diff options
author | Unit 193 <unit193@ubuntu.com> | 2019-11-10 22:14:10 -0500 |
---|---|---|
committer | Unit 193 <unit193@ubuntu.com> | 2019-11-10 22:14:10 -0500 |
commit | 0c73e982fa596da07f23b377621ab894a9e64884 (patch) | |
tree | 96f6a40a5656c15a2ec7217a8a1efcff5827bcbb | |
parent | 40f5fe6edef268632d3bc484e85e5b37bad67bff (diff) | |
download | gallery-dl-0c73e982fa596da07f23b377621ab894a9e64884.tar.bz2 gallery-dl-0c73e982fa596da07f23b377621ab894a9e64884.tar.xz gallery-dl-0c73e982fa596da07f23b377621ab894a9e64884.tar.zst |
New upstream version 1.11.1upstream/1.11.1
100 files changed, 4955 insertions, 5622 deletions
diff --git a/.gitignore b/.gitignore deleted file mode 100644 index 2e257a8..0000000 --- a/.gitignore +++ /dev/null @@ -1,75 +0,0 @@ -archive/ - -# Byte-compiled / optimized / DLL files -__pycache__/ -*.py[cod] - -# C extensions -*.so - -# Distribution / packaging -.Python -env/ -build/ -develop-eggs/ -dist/ -downloads/ -eggs/ -.eggs/ -lib/ -lib64/ -parts/ -sdist/ -var/ -*.egg-info/ -.installed.cfg -*.egg - -# PyInstaller -# Usually these files are written by a python script from a template -# before PyInstaller builds the exe, so as to inject date/other infos into it. -*.manifest -*.spec - -# Installer logs -pip-log.txt -pip-delete-this-directory.txt - -# Unit test / coverage reports -htmlcov/ -.tox/ -.coverage -.coverage.* -.cache -nosetests.xml -coverage.xml -*,cover - -# Translations -*.mo -*.pot - -# Django stuff: -*.log - -# Sphinx documentation -docs/_build/ - -# PyBuilder -target/ - -# Manpages -gallery-dl.1 -gallery-dl.conf.5 - -# Bash completion -gallery-dl.bash_completion - -# Snap packaging specific -/snap/.snapcraft/ -/parts/ -/stage/ -/prime/ - -/*.snap -/*_source.tar.bz2 diff --git a/.travis.yml b/.travis.yml deleted file mode 100644 index 4b3a2cd..0000000 --- a/.travis.yml +++ /dev/null @@ -1,44 +0,0 @@ -language: python -python: - - "3.4" - - "3.5" - - "3.6" - - "pypy3" -env: - - GALLERYDL_TESTS=core -matrix: - include: - - python: "3.7" - dist: xenial - - python: "3.8-dev" - dist: xenial - - python: "3.6" - env: GALLERYDL_TESTS=results - - language: minimal - dist: xenial - env: GALLERYDL_TESTS=snap - addons: - snaps: - - name: snapcraft - classic: true - install: - - true - script: - - sudo apt update - - snapcraft --destructive-mode - - sudo snap try - - snap run gallery-dl --verbose https://twitter.com/ubuntu/status/1121001597092364288 - -git: - depth: 3 - quiet: true -branches: - only: - - master - - /^v\d+\.\d+\.\d+(-\S*)?$/ - - /^test(-\w+)+$/ - -install: - - pip install -r requirements.txt pyOpenSSL -script: - - ./scripts/run_tests.sh diff --git a/CHANGELOG.md b/CHANGELOG.md deleted file mode 100644 index 4a57394..0000000 --- a/CHANGELOG.md +++ /dev/null @@ -1,757 +0,0 @@ -# Changelog - -## 1.10.6 - 2019-10-11 -### Additions -- `--exec` command-line option to specify a command to run after each file download ([#421](https://github.com/mikf/gallery-dl/issues/421)) -### Changes -- Include titles in `gfycat` default filenames ([#434](https://github.com/mikf/gallery-dl/issues/434)) -### Fixes -- Fetch working download URLs for `deviantart` ([#436](https://github.com/mikf/gallery-dl/issues/436)) -- Various fixes and improvements for `yaplog` blogs ([#443](https://github.com/mikf/gallery-dl/issues/443)) -- Fix image URL generation for `hitomi` galleries -- Miscellaneous fixes for `behance` and `xvideos` - -## 1.10.5 - 2019-09-28 -### Additions -- `instagram.highlights` option to include highlighted stories when downloading user profiles ([#329](https://github.com/mikf/gallery-dl/issues/329)) -- Support for `/user/` URLs on `reddit` ([#350](https://github.com/mikf/gallery-dl/issues/350)) -- Support for `imgur` user profiles and favorites ([#420](https://github.com/mikf/gallery-dl/issues/420)) -- Additional metadata fields on `nijie`([#423](https://github.com/mikf/gallery-dl/issues/423)) -### Fixes -- Improve handling of private `deviantart` artworks ([#414](https://github.com/mikf/gallery-dl/issues/414)) and 429 status codes ([#424](https://github.com/mikf/gallery-dl/issues/424)) -- Prevent fatal errors when trying to open download-archive files ([#417](https://github.com/mikf/gallery-dl/issues/417)) -- Detect and ignore unavailable videos on `weibo` ([#427](https://github.com/mikf/gallery-dl/issues/427)) -- Update the `scope` of new `reddit` refresh-tokens ([#428](https://github.com/mikf/gallery-dl/issues/428)) -- Fix inconsistencies with the `reddit.comments` option ([#429](https://github.com/mikf/gallery-dl/issues/429)) -- Extend URL patterns for `hentaicafe` manga and `pixiv` artworks -- Improve detection of unavailable albums on `luscious` and `imgbb` -- Miscellaneous fixes for `tsumino` - -## 1.10.4 - 2019-09-08 -### Additions -- Support for - - `lineblog` - https://www.lineblog.me/ ([#404](https://github.com/mikf/gallery-dl/issues/404)) - - `fuskator` - https://fuskator.com/ ([#407](https://github.com/mikf/gallery-dl/issues/407)) -- `ugoira` option for `danbooru` to download pre-rendered ugoira animations ([#406](https://github.com/mikf/gallery-dl/issues/406)) -### Fixes -- Download the correct files from `twitter` replies ([#403](https://github.com/mikf/gallery-dl/issues/403)) -- Prevent crash when trying to use unavailable downloader modules ([#405](https://github.com/mikf/gallery-dl/issues/405)) -- Fix `pixiv` authentication ([#411](https://github.com/mikf/gallery-dl/issues/411)) -- Improve `exhentai` image limit checks -- Miscellaneous fixes for `hentaicafe`, `simplyhentai`, `tumblr` - -## 1.10.3 - 2019-08-30 -### Additions -- Provide `filename` metadata for all `deviantart` files ([#392](https://github.com/mikf/gallery-dl/issues/392), [#400](https://github.com/mikf/gallery-dl/issues/400)) -- Implement a `ytdl.outtmpl` option to let youtube-dl handle filenames by itself ([#395](https://github.com/mikf/gallery-dl/issues/395)) -- Support `seiga` mobile URLs ([#401](https://github.com/mikf/gallery-dl/issues/401)) -### Fixes -- Extract more than the first 32 posts from `piczel` galleries ([#396](https://github.com/mikf/gallery-dl/issues/396)) -- Fix filenames of archives created with `--zip` ([#397](https://github.com/mikf/gallery-dl/issues/397)) -- Skip unavailable images and videos on `flickr` ([#398](https://github.com/mikf/gallery-dl/issues/398)) -- Fix filesystem paths on Windows with Python 3.6 and lower ([#402](https://github.com/mikf/gallery-dl/issues/402)) - -## 1.10.2 - 2019-08-23 -### Additions -- Support for `instagram` stories and IGTV ([#371](https://github.com/mikf/gallery-dl/issues/371), [#373](https://github.com/mikf/gallery-dl/issues/373)) -- Support for individual `imgbb` images ([#363](https://github.com/mikf/gallery-dl/issues/363)) -- `deviantart.quality` option to set the JPEG compression quality for newer images ([#369](https://github.com/mikf/gallery-dl/issues/369)) -- `enumerate` option for `extractor.skip` ([#306](https://github.com/mikf/gallery-dl/issues/306)) -- `adjust-extensions` option to control filename extension adjustments -- `path-remove` option to remove control characters etc. from filesystem paths -### Changes -- Rename `restrict-filenames` to `path-restrict` -- Adjust `pixiv` metadata and default filename format ([#366](https://github.com/mikf/gallery-dl/issues/366)) - - Set `filename` to `"{category}_{user[id]}_{id}{suffix}.{extension}"` to restore the old default -- Improve and optimize directory and filename generation -### Fixes -- Allow the `classify` post-processor to handle files with unknown filename extension ([#138](https://github.com/mikf/gallery-dl/issues/138)) -- Fix rate limit handling for OAuth APIs ([#368](https://github.com/mikf/gallery-dl/issues/368)) -- Fix artwork and scraps extraction on `deviantart` ([#376](https://github.com/mikf/gallery-dl/issues/376), [#392](https://github.com/mikf/gallery-dl/issues/392)) -- Distinguish between `imgur` album and gallery URLs ([#380](https://github.com/mikf/gallery-dl/issues/380)) -- Prevent crash when using `--ugoira-conv` ([#382](https://github.com/mikf/gallery-dl/issues/382)) -- Handle multi-image posts on `patreon` ([#383](https://github.com/mikf/gallery-dl/issues/383)) -- Miscellaneous fixes for `*reactor`, `simplyhentai` - -## 1.10.1 - 2019-08-02 -## Fixes -- Use the correct domain for exhentai.org input URLs - -## 1.10.0 - 2019-08-01 -### Warning -- Prior to version 1.10.0 all cache files were created world readable (mode `644`) - leading to possible sensitive information disclosure on multi-user systems -- It is recommended to restrict access permissions of already existing files - (`/tmp/.gallery-dl.cache`) with `chmod 600` -- Windows users should not be affected -### Additions -- Support for - - `vsco` - https://vsco.co/ ([#331](https://github.com/mikf/gallery-dl/issues/331)) - - `imgbb` - https://imgbb.com/ ([#361](https://github.com/mikf/gallery-dl/issues/361)) - - `adultempire` - https://www.adultempire.com/ ([#340](https://github.com/mikf/gallery-dl/issues/340)) -- `restrict-filenames` option to create Windows-compatible filenames on any platform ([#348](https://github.com/mikf/gallery-dl/issues/348)) -- `forward-cookies` option to control cookie forwarding to youtube-dl ([#352](https://github.com/mikf/gallery-dl/issues/352)) -### Changes -- The default cache file location on non-Windows systems is now - - `$XDG_CACHE_HOME/gallery-dl/cache.sqlite3` or - - `~/.cache/gallery-dl/cache.sqlite3` -- New cache files are created with mode `600` -- `exhentai` extractors will always use `e-hentai.org` as domain -### Fixes -- Better handling of `exhentai` image limits and errors ([#356](https://github.com/mikf/gallery-dl/issues/356), [#360](https://github.com/mikf/gallery-dl/issues/360)) -- Try to prevent ZIP file corruption ([#355](https://github.com/mikf/gallery-dl/issues/355)) -- Miscellaneous fixes for `behance`, `ngomik` - -## 1.9.0 - 2019-07-19 -### Additions -- Support for - - `erolord` - http://erolord.com/ ([#326](https://github.com/mikf/gallery-dl/issues/326)) -- Add login support for `instagram` ([#195](https://github.com/mikf/gallery-dl/issues/195)) -- Add `--no-download` and `extractor.*.download` disable file downloads ([#220](https://github.com/mikf/gallery-dl/issues/220)) -- Add `-A/--abort` to specify the number of consecutive download skips before aborting -- Interpret `-1` as infinite retries ([#300](https://github.com/mikf/gallery-dl/issues/300)) -- Implement custom log message formats per log-level ([#304](https://github.com/mikf/gallery-dl/issues/304)) -- Implement an `mtime` post-processor that sets file modification times according to metadata fields ([#332](https://github.com/mikf/gallery-dl/issues/332)) -- Implement a `twitter.content` option to enable tweet text extraction ([#333](https://github.com/mikf/gallery-dl/issues/333), [#338](https://github.com/mikf/gallery-dl/issues/338)) -- Enable `date-min/-max/-format` options for `tumblr` ([#337](https://github.com/mikf/gallery-dl/issues/337)) -### Changes -- Set file modification times according to their `Last-Modified` header when downloading ([#236](https://github.com/mikf/gallery-dl/issues/236), [#277](https://github.com/mikf/gallery-dl/issues/277)) - - Use `--no-mtime` or `downloader.*.mtime` to disable this behavior -- Duplicate download URLs are no longer silently ignored (controllable with `extractor.*.image-unique`) -- Deprecate `--abort-on-skip` -### Fixes -- Retry downloads on OpenSSL exceptions ([#324](https://github.com/mikf/gallery-dl/issues/324)) -- Ignore unavailable pins on `sexcom` instead of raising an exception ([#325](https://github.com/mikf/gallery-dl/issues/325)) -- Use Firefox's SSL/TLS ciphers to prevent Cloudflare CAPTCHAs ([#342](https://github.com/mikf/gallery-dl/issues/342)) -- Improve folder name matching on `deviantart` ([#343](https://github.com/mikf/gallery-dl/issues/343)) -- Forward cookies to `youtube-dl` to allow downloading private videos -- Miscellaneous fixes for `35photo`, `500px`, `newgrounds`, `simplyhentai` - -## 1.8.7 - 2019-06-28 -### Additions -- Support for - - `vanillarock` - https://vanilla-rock.com/ ([#254](https://github.com/mikf/gallery-dl/issues/254)) - - `nsfwalbum` - https://nsfwalbum.com/ ([#287](https://github.com/mikf/gallery-dl/issues/287)) -- `artist` and `tags` metadata for `hentaicafe` ([#238](https://github.com/mikf/gallery-dl/issues/238)) -- `description` metadata for `instagram` ([#310](https://github.com/mikf/gallery-dl/issues/310)) -- Format string option to replace a substring with another - `R<old>/<new>/` ([#318](https://github.com/mikf/gallery-dl/issues/318)) -### Changes -- Delete empty archives created by the `zip` post-processor ([#316](https://github.com/mikf/gallery-dl/issues/316)) -### Fixes -- Handle `hitomi` Game CG galleries correctly ([#321](https://github.com/mikf/gallery-dl/issues/321)) -- Miscellaneous fixes for `deviantart`, `hitomi`, `pururin`, `kissmanga`, `keenspot`, `mangoxo`, `imagefap` - -## 1.8.6 - 2019-06-14 -### Additions -- Support for - - `slickpic` - https://www.slickpic.com/ ([#249](https://github.com/mikf/gallery-dl/issues/249)) - - `xhamster` - https://xhamster.com/ ([#281](https://github.com/mikf/gallery-dl/issues/281)) - - `pornhub` - https://www.pornhub.com/ ([#282](https://github.com/mikf/gallery-dl/issues/282)) - - `8muses` - https://www.8muses.com/ ([#305](https://github.com/mikf/gallery-dl/issues/305)) -- `extra` option for `deviantart` to download Sta.sh content linked in description texts ([#302](https://github.com/mikf/gallery-dl/issues/302)) -### Changes -- Detect `directlink` URLs with upper case filename extensions ([#296](https://github.com/mikf/gallery-dl/issues/296)) -### Fixes -- Improved error handling for `tumblr` API calls ([#297](https://github.com/mikf/gallery-dl/issues/297)) -- Fixed extraction of `livedoor` blogs ([#301](https://github.com/mikf/gallery-dl/issues/301)) -- Fixed extraction of special `deviantart` Sta.sh items ([#307](https://github.com/mikf/gallery-dl/issues/307)) -- Fixed pagination for specific `keenspot` comics - -## 1.8.5 - 2019-06-01 -### Additions -- Support for - - `keenspot` - http://keenspot.com/ ([#223](https://github.com/mikf/gallery-dl/issues/223)) - - `sankakucomplex` - https://www.sankakucomplex.com ([#258](https://github.com/mikf/gallery-dl/issues/258)) -- `folders` option for `deviantart` to add a list of containing folders to each file ([#276](https://github.com/mikf/gallery-dl/issues/276)) -- `captcha` option for `kissmanga` and `readcomiconline` to control CAPTCHA handling ([#279](https://github.com/mikf/gallery-dl/issues/279)) -- `filename` metadata for files downloaded with youtube-dl ([#291](https://github.com/mikf/gallery-dl/issues/291)) -### Changes -- Adjust `wallhaven` extractors to new page layout: - - use API and add `api-key` option - - removed traditional login support -- Provide original filenames for `patreon` downloads ([#268](https://github.com/mikf/gallery-dl/issues/268)) -- Use e-hentai.org or exhentai.org depending on input URL ([#278](https://github.com/mikf/gallery-dl/issues/278)) -### Fixes -- Fix pagination over `sankaku` popular listings ([#265](https://github.com/mikf/gallery-dl/issues/265)) -- Fix folder and collection extraction on `deviantart` ([#271](https://github.com/mikf/gallery-dl/issues/271)) -- Detect "AreYouHuman" redirects on `readcomiconline` ([#279](https://github.com/mikf/gallery-dl/issues/279)) -- Miscellaneous fixes for `hentainexus`, `livedoor`, `ngomik` - -## 1.8.4 - 2019-05-17 -### Additions -- Support for - - `patreon` - https://www.patreon.com/ ([#226](https://github.com/mikf/gallery-dl/issues/226)) - - `hentainexus` - https://hentainexus.com/ ([#256](https://github.com/mikf/gallery-dl/issues/256)) -- `date` metadata fields for `pixiv` ([#248](https://github.com/mikf/gallery-dl/issues/248)), `instagram` ([#250](https://github.com/mikf/gallery-dl/issues/250)), `exhentai`, and `newgrounds` -### Changes -- Improved `flickr` metadata and video extraction ([#246](https://github.com/mikf/gallery-dl/issues/246)) -### Fixes -- Download original GIF animations from `deviantart` ([#242](https://github.com/mikf/gallery-dl/issues/242)) -- Ignore missing `edge_media_to_comment` fields on `instagram` ([#250](https://github.com/mikf/gallery-dl/issues/250)) -- Fix serialization of `datetime` objects for `--write-metadata` ([#251](https://github.com/mikf/gallery-dl/issues/251), [#252](https://github.com/mikf/gallery-dl/issues/252)) -- Allow multiple post-processor command-line options at once ([#253](https://github.com/mikf/gallery-dl/issues/253)) -- Prevent crash on `booru` sites when no tags are available ([#259](https://github.com/mikf/gallery-dl/issues/259)) -- Fix extraction on `instagram` after `rhx_gis` field removal ([#266](https://github.com/mikf/gallery-dl/issues/266)) -- Avoid Cloudflare CAPTCHAs for Python interpreters built against OpenSSL < 1.1.1 -- Miscellaneous fixes for `luscious` - -## 1.8.3 - 2019-05-04 -### Additions -- Support for - - `plurk` - https://www.plurk.com/ ([#212](https://github.com/mikf/gallery-dl/issues/212)) - - `sexcom` - https://www.sex.com/ ([#147](https://github.com/mikf/gallery-dl/issues/147)) -- `--clear-cache` -- `date` metadata fields for `deviantart`, `twitter`, and `tumblr` ([#224](https://github.com/mikf/gallery-dl/issues/224), [#232](https://github.com/mikf/gallery-dl/issues/232)) -### Changes -- Standalone executables are now built using PyInstaller: - - uses the latest CPython interpreter (Python 3.7.3) - - available on several platforms (Windows, Linux, macOS) - - includes the `certifi` CA bundle, `youtube-dl`, and `pyOpenSSL` on Windows -### Fixes -- Patch `urllib3`'s default list of SSL/TLS ciphers to prevent Cloudflare CAPTCHAs ([#227](https://github.com/mikf/gallery-dl/issues/227)) - (Windows users need to install `pyOpenSSL` for this to take effect) -- Provide fallback URLs for `twitter` images ([#237](https://github.com/mikf/gallery-dl/issues/237)) -- Send `Referer` headers when downloading from `hitomi` ([#239](https://github.com/mikf/gallery-dl/issues/239)) -- Updated login procedure on `mangoxo` - -## 1.8.2 - 2019-04-12 -### Additions -- Support for - - `pixnet` - https://www.pixnet.net/ ([#177](https://github.com/mikf/gallery-dl/issues/177)) - - `wikiart` - https://www.wikiart.org/ ([#179](https://github.com/mikf/gallery-dl/issues/179)) - - `mangoxo` - https://www.mangoxo.com/ ([#184](https://github.com/mikf/gallery-dl/issues/184)) - - `yaplog` - https://yaplog.jp/ ([#190](https://github.com/mikf/gallery-dl/issues/190)) - - `livedoor` - http://blog.livedoor.jp/ ([#190](https://github.com/mikf/gallery-dl/issues/190)) -- Login support for `mangoxo` ([#184](https://github.com/mikf/gallery-dl/issues/184)) and `twitter` ([#214](https://github.com/mikf/gallery-dl/issues/214)) -### Changes -- Increased required `Requests` version to 2.11.0 -### Fixes -- Improved image quality on `reactor` sites ([#210](https://github.com/mikf/gallery-dl/issues/210)) -- Support `imagebam` galleries with more than 100 images ([#219](https://github.com/mikf/gallery-dl/issues/219)) -- Updated Cloudflare bypass code - -## 1.8.1 - 2019-03-29 -### Additions -- Support for: - - `35photo` - https://35photo.pro/ ([#162](https://github.com/mikf/gallery-dl/issues/162)) - - `500px` - https://500px.com/ ([#185](https://github.com/mikf/gallery-dl/issues/185)) -- `instagram` extractor for hashtags ([#202](https://github.com/mikf/gallery-dl/issues/202)) -- Option to get more metadata on `deviantart` ([#189](https://github.com/mikf/gallery-dl/issues/189)) -- Man pages and bash completion ([#150](https://github.com/mikf/gallery-dl/issues/150)) -- Snap improvements ([#197](https://github.com/mikf/gallery-dl/issues/197), [#199](https://github.com/mikf/gallery-dl/issues/199), [#207](https://github.com/mikf/gallery-dl/issues/207)) -### Changes -- Better FFmpeg arguments for `--ugoira-conv` -- Adjusted metadata for `luscious` albums -### Fixes -- Proper handling of `instagram` multi-image posts ([#178](https://github.com/mikf/gallery-dl/issues/178), [#201](https://github.com/mikf/gallery-dl/issues/201)) -- Fixed `tumblr` avatar URLs when not using OAuth1.0 ([#193](https://github.com/mikf/gallery-dl/issues/193)) -- Miscellaneous fixes for `exhentai`, `komikcast` - -## 1.8.0 - 2019-03-15 -### Additions -- Support for: - - `weibo` - https://www.weibo.com/ - - `pururin` - https://pururin.io/ ([#174](https://github.com/mikf/gallery-dl/issues/174)) - - `fashionnova` - https://www.fashionnova.com/ ([#175](https://github.com/mikf/gallery-dl/issues/175)) - - `shopify` sites in general ([#175](https://github.com/mikf/gallery-dl/issues/175)) -- Snap packaging ([#169](https://github.com/mikf/gallery-dl/issues/169), [#170](https://github.com/mikf/gallery-dl/issues/170), [#187](https://github.com/mikf/gallery-dl/issues/187), [#188](https://github.com/mikf/gallery-dl/issues/188)) -- Automatic Cloudflare DDoS protection bypass -- Extractor and Job information for logging format strings -- `dynastyscans` image and search extractors ([#163](https://github.com/mikf/gallery-dl/issues/163)) -- `deviantart` scraps extractor ([#168](https://github.com/mikf/gallery-dl/issues/168)) -- `artstation` extractor for artwork listings ([#172](https://github.com/mikf/gallery-dl/issues/172)) -- `smugmug` video support and improved image format selection ([#183](https://github.com/mikf/gallery-dl/issues/183)) -### Changes -- More metadata for `nhentai` galleries -- Combined `myportfolio` extractors into one -- Renamed `name` metadata field to `filename` and removed the original `filename` field -- Simplified and improved internal data structures -- Optimized creation of child extractors -### Fixes -- Filter empty `tumblr` URLs ([#165](https://github.com/mikf/gallery-dl/issues/165)) -- Filter ads and improve connection speed on `hentaifoundry` -- Show proper error messages if `luscious` galleries are unavailable -- Miscellaneous fixes for `mangahere`, `ngomik`, `simplyhentai`, `imgspice` -### Removals -- `seaotterscans` - -## 1.7.0 - 2019-02-05 -- Added support for: - - `photobucket` - http://photobucket.com/ ([#117](https://github.com/mikf/gallery-dl/issues/117)) - - `hentaifox` - https://hentaifox.com/ ([#160](https://github.com/mikf/gallery-dl/issues/160)) - - `tsumino` - https://www.tsumino.com/ ([#161](https://github.com/mikf/gallery-dl/issues/161)) -- Added the ability to dynamically generate extractors based on a user's config file for - - [`mastodon`](https://github.com/tootsuite/mastodon) instances ([#144](https://github.com/mikf/gallery-dl/issues/144)) - - [`foolslide`](https://github.com/FoolCode/FoOlSlide) based sites - - [`foolfuuka`](https://github.com/FoolCode/FoolFuuka) based archives -- Added an extractor for `behance` collections ([#157](https://github.com/mikf/gallery-dl/issues/157)) -- Added login support for `luscious` ([#159](https://github.com/mikf/gallery-dl/issues/159)) and `tsumino` ([#161](https://github.com/mikf/gallery-dl/issues/161)) -- Added an option to stop downloading if the `exhentai` image limit is exceeded ([#141](https://github.com/mikf/gallery-dl/issues/141)) -- Fixed extraction issues for `behance` and `mangapark` - -## 1.6.3 - 2019-01-18 -- Added `metadata` post-processor to write image metadata to an external file ([#135](https://github.com/mikf/gallery-dl/issues/135)) -- Added option to reverse chapter order of manga extractors ([#149](https://github.com/mikf/gallery-dl/issues/149)) -- Added authentication support for `danbooru` ([#151](https://github.com/mikf/gallery-dl/issues/151)) -- Added tag metadata for `exhentai` and `hbrowse` galleries -- Improved `*reactor` extractors ([#148](https://github.com/mikf/gallery-dl/issues/148)) -- Fixed extraction issues for `nhentai` ([#156](https://github.com/mikf/gallery-dl/issues/156)), `pinterest`, `mangapark` - -## 1.6.2 - 2019-01-01 -- Added support for: - - `instagram` - https://www.instagram.com/ ([#134](https://github.com/mikf/gallery-dl/issues/134)) -- Added support for multiple items on sta.sh pages ([#113](https://github.com/mikf/gallery-dl/issues/113)) -- Added option to download `tumblr` avatars ([#137](https://github.com/mikf/gallery-dl/issues/137)) -- Changed defaults for visited post types and inline media on `tumblr` -- Improved inline extraction of `tumblr` posts ([#133](https://github.com/mikf/gallery-dl/issues/133), [#137](https://github.com/mikf/gallery-dl/issues/137)) -- Improved error handling and retry behavior of all API calls -- Improved handling of missing fields in format strings ([#136](https://github.com/mikf/gallery-dl/issues/136)) -- Fixed hash extraction for unusual `tumblr` URLs ([#129](https://github.com/mikf/gallery-dl/issues/129)) -- Fixed image subdomains for `hitomi` galleries ([#142](https://github.com/mikf/gallery-dl/issues/142)) -- Fixed and improved miscellaneous issues for `kissmanga` ([#20](https://github.com/mikf/gallery-dl/issues/20)), `luscious`, `mangapark`, `readcomiconline` - -## 1.6.1 - 2018-11-28 -- Added support for: - - `joyreactor` - http://joyreactor.cc/ ([#114](https://github.com/mikf/gallery-dl/issues/114)) - - `pornreactor` - http://pornreactor.cc/ ([#114](https://github.com/mikf/gallery-dl/issues/114)) - - `newgrounds` - https://www.newgrounds.com/ ([#119](https://github.com/mikf/gallery-dl/issues/119)) -- Added extractor for search results on `luscious` ([#127](https://github.com/mikf/gallery-dl/issues/127)) -- Fixed filenames of ZIP archives ([#126](https://github.com/mikf/gallery-dl/issues/126)) -- Fixed extraction issues for `gfycat`, `hentaifoundry` ([#125](https://github.com/mikf/gallery-dl/issues/125)), `mangafox` - -## 1.6.0 - 2018-11-17 -- Added support for: - - `wallhaven` - https://alpha.wallhaven.cc/ - - `yuki` - https://yuki.la/ -- Added youtube-dl integration and video downloads for `twitter` ([#99](https://github.com/mikf/gallery-dl/issues/99)), `behance`, `artstation` -- Added per-extractor options for network connections (`retries`, `timeout`, `verify`) -- Added a `--no-check-certificate` command-line option -- Added ability to specify the number of skipped downloads before aborting/exiting ([#115](https://github.com/mikf/gallery-dl/issues/115)) -- Added extractors for scraps, favorites, popular and recent images on `hentaifoundry` ([#110](https://github.com/mikf/gallery-dl/issues/110)) -- Improved login procedure for `pixiv` to avoid unwanted emails on each new login -- Improved album metadata and error handling for `flickr` ([#109](https://github.com/mikf/gallery-dl/issues/109)) -- Updated default User-Agent string to Firefox 62 ([#122](https://github.com/mikf/gallery-dl/issues/122)) -- Fixed `twitter` API response handling when logged in ([#123](https://github.com/mikf/gallery-dl/issues/123)) -- Fixed issue when converting Ugoira using H.264 -- Fixed miscellaneous issues for `2chan`, `deviantart`, `fallenangels`, `flickr`, `imagefap`, `pinterest`, `turboimagehost`, `warosu`, `yuki` ([#112](https://github.com/mikf/gallery-dl/issues/112)) - -## 1.5.3 - 2018-09-14 -- Added support for: - - `hentaicafe` - https://hentai.cafe/ ([#101](https://github.com/mikf/gallery-dl/issues/101)) - - `bobx` - http://www.bobx.com/dark/ -- Added black-/whitelist options for post-processor modules -- Added support for `tumblr` inline videos ([#102](https://github.com/mikf/gallery-dl/issues/102)) -- Fixed extraction of `smugmug` albums without owner ([#100](https://github.com/mikf/gallery-dl/issues/100)) -- Fixed issues when using default config values with `reddit` extractors ([#104](https://github.com/mikf/gallery-dl/issues/104)) -- Fixed pagination for user favorites on `sankaku` ([#106](https://github.com/mikf/gallery-dl/issues/106)) -- Fixed a crash when processing `deviantart` journals ([#108](https://github.com/mikf/gallery-dl/issues/108)) - -## 1.5.2 - 2018-08-31 -- Added support for `twitter` timelines ([#96](https://github.com/mikf/gallery-dl/issues/96)) -- Added option to suppress FFmpeg output during ugoira conversions -- Improved filename formatter performance -- Improved inline image quality on `tumblr` ([#98](https://github.com/mikf/gallery-dl/issues/98)) -- Fixed image URLs for newly released `mangadex` chapters -- Fixed a smaller issue with `deviantart` journals -- Replaced `subapics` with `ngomik` - -## 1.5.1 - 2018-08-17 -- Added support for: - - `piczel` - https://piczel.tv/ -- Added support for related pins on `pinterest` -- Fixed accessing "offensive" galleries on `exhentai` ([#97](https://github.com/mikf/gallery-dl/issues/97)) -- Fixed extraction issues for `mangadex`, `komikcast` and `behance` -- Removed original-image functionality from `tumblr`, since "raw" images are no longer accessible - -## 1.5.0 - 2018-08-03 -- Added support for: - - `behance` - https://www.behance.net/ - - `myportfolio` - https://www.myportfolio.com/ ([#95](https://github.com/mikf/gallery-dl/issues/95)) -- Added custom format string options to handle long strings ([#92](https://github.com/mikf/gallery-dl/issues/92), [#94](https://github.com/mikf/gallery-dl/issues/94)) - - Slicing: `"{field[10:40]}"` - - Replacement: `"{field:L40/too long/}"` -- Improved frame rate handling for ugoira conversions -- Improved private access token usage on `deviantart` -- Fixed metadata extraction for some images on `nijie` -- Fixed chapter extraction on `mangahere` -- Removed `whatisthisimnotgoodwithcomputers` -- Removed support for Python 3.3 - -## 1.4.2 - 2018-07-06 -- Added image-pool extractors for `safebooru` and `rule34` -- Added option for extended tag information on `booru` sites ([#92](https://github.com/mikf/gallery-dl/issues/92)) -- Added support for DeviantArt's new URL format -- Added support for `mangapark` mirrors -- Changed `imagefap` extractors to use HTTPS -- Fixed crash when skipping downloads for files without known extension - -## 1.4.1 - 2018-06-22 -- Added an `ugoira` post-processor to convert `pixiv` animations to WebM -- Added `--zip` and `--ugoira-conv` command-line options -- Changed how ugoira frame information is handled - - instead of being written to a separate file, it is now made available as metadata field of the ZIP archive -- Fixed manga and chapter titles for `mangadex` -- Fixed file deletion by post-processors - -## 1.4.0 - 2018-06-08 -- Added support for: - - `simplyhentai` - https://www.simply-hentai.com/ ([#89](https://github.com/mikf/gallery-dl/issues/89)) -- Added extractors for - - `pixiv` search results and followed users - - `deviantart` search results and popular listings -- Added post-processors to perform actions on downloaded files -- Added options to configure logging behavior -- Added OAuth support for `smugmug` -- Changed `pixiv` extractors to use the AppAPI - - this breaks `favorite` archive IDs and changes some metadata fields -- Changed the default filename format for `tumblr` and renamed `offset` to `num` -- Fixed a possible UnicodeDecodeError during installation ([#86](https://github.com/mikf/gallery-dl/issues/86)) -- Fixed extraction of `mangadex` manga with more than 100 chapters ([#84](https://github.com/mikf/gallery-dl/issues/84)) -- Fixed miscellaneous issues for `imgur`, `reddit`, `komikcast`, `mangafox` and `imagebam` - -## 1.3.5 - 2018-05-04 -- Added support for: - - `smugmug` - https://www.smugmug.com/ -- Added title information for `mangadex` chapters -- Improved the `pinterest` API implementation ([#83](https://github.com/mikf/gallery-dl/issues/83)) -- Improved error handling for `deviantart` and `tumblr` -- Removed `gomanga` and `puremashiro` - -## 1.3.4 - 2018-04-20 -- Added support for custom OAuth2 credentials for `pinterest` -- Improved rate limit handling for `tumblr` extractors -- Improved `hentaifoundry` extractors -- Improved `imgur` URL patterns -- Fixed miscellaneous extraction issues for `luscious` and `komikcast` -- Removed `loveisover` and `spectrumnexus` - -## 1.3.3 - 2018-04-06 -- Added extractors for - - `nhentai` search results - - `exhentai` search results and favorites - - `nijie` doujins and favorites -- Improved metadata extraction for `exhentai` and `nijie` -- Improved `tumblr` extractors by avoiding unnecessary API calls -- Fixed Cloudflare DDoS protection bypass -- Fixed errors when trying to print unencodable characters - -## 1.3.2 - 2018-03-23 -- Added extractors for `artstation` albums, challenges and search results -- Improved URL and metadata extraction for `hitomi`and `nhentai` -- Fixed page transitions for `danbooru` API results ([#82](https://github.com/mikf/gallery-dl/issues/82)) - -## 1.3.1 - 2018-03-16 -- Added support for: - - `mangadex` - https://mangadex.org/ - - `artstation` - https://www.artstation.com/ -- Added Cloudflare DDoS protection bypass to `komikcast` extractors -- Changed archive ID formats for `deviantart` folders and collections -- Improved error handling for `deviantart` API calls -- Removed `imgchili` and various smaller image hosts - -## 1.3.0 - 2018-03-02 -- Added `--proxy` to explicitly specify a proxy server ([#76](https://github.com/mikf/gallery-dl/issues/76)) -- Added options to customize [archive ID formats](https://github.com/mikf/gallery-dl/blob/master/docs/configuration.rst#extractorarchive-format) and [undefined replacement fields](https://github.com/mikf/gallery-dl/blob/master/docs/configuration.rst#extractorkeywords-default) -- Changed various archive ID formats to improve their behavior for favorites / bookmarks / etc. - - Affected modules are `deviantart`, `flickr`, `tumblr`, `pixiv` and all …boorus -- Improved `sankaku` and `idolcomplex` support by - - respecting `page` and `next` URL parameters ([#79](https://github.com/mikf/gallery-dl/issues/79)) - - bypassing the page-limit for unauthenticated users -- Improved `directlink` metadata by properly unquoting it -- Fixed `pixiv` ugoira extraction ([#78](https://github.com/mikf/gallery-dl/issues/78)) -- Fixed miscellaneous extraction issues for `mangastream` and `tumblr` -- Removed `yeet`, `chronos`, `coreimg`, `hosturimage`, `imageontime`, `img4ever`, `imgmaid`, `imgupload` - -## 1.2.0 - 2018-02-16 -- Added support for: - - `paheal` - https://rule34.paheal.net/ ([#69](https://github.com/mikf/gallery-dl/issues/69)) - - `komikcast` - https://komikcast.com/ ([#70](https://github.com/mikf/gallery-dl/issues/70)) - - `subapics` - http://subapics.com/ ([#70](https://github.com/mikf/gallery-dl/issues/70)) -- Added `--download-archive` to record downloaded files in an archive file -- Added `--write-log` to write logging output to a file -- Added a filetype check on download completion to fix incorrectly assigned filename extensions ([#63](https://github.com/mikf/gallery-dl/issues/63)) -- Added the `tumblr:...` pseudo URI scheme to support custom domains for Tumblr blogs ([#71](https://github.com/mikf/gallery-dl/issues/71)) -- Added fallback URLs for `tumblr` images ([#64](https://github.com/mikf/gallery-dl/issues/64)) -- Added support for `reddit`-hosted images ([#68](https://github.com/mikf/gallery-dl/issues/68)) -- Improved the input file format by allowing comments and per-URL options -- Fixed OAuth 1.0 signature generation for Python 3.3 and 3.4 ([#75](https://github.com/mikf/gallery-dl/issues/75)) -- Fixed smaller issues for `luscious`, `hentai2read`, `hentaihere` and `imgur` -- Removed the `batoto` module - -## 1.1.2 - 2018-01-12 -- Added support for: - - `puremashiro` - http://reader.puremashiro.moe/ ([#66](https://github.com/mikf/gallery-dl/issues/66)) - - `idolcomplex` - https://idol.sankakucomplex.com/ -- Added an option to filter reblogs on `tumblr` ([#61](https://github.com/mikf/gallery-dl/issues/61)) -- Added OAuth user authentication for `tumblr` ([#65](https://github.com/mikf/gallery-dl/issues/65)) -- Added support for `slideshare` mobile URLs ([#67](https://github.com/mikf/gallery-dl/issues/67)) -- Improved pagination for various …booru sites to work around page limits -- Fixed chapter information parsing for certain manga on `kissmanga` ([#58](https://github.com/mikf/gallery-dl/issues/58)) and `batoto` ([#60](https://github.com/mikf/gallery-dl/issues/60)) - -## 1.1.1 - 2017-12-22 -- Added support for: - - `slideshare` - https://www.slideshare.net/ ([#54](https://github.com/mikf/gallery-dl/issues/54)) -- Added pool- and post-extractors for `sankaku` -- Added OAuth user authentication for `deviantart` -- Updated `luscious` to support `members.luscious.net` URLs ([#55](https://github.com/mikf/gallery-dl/issues/55)) -- Updated `mangahere` to use their new domain name (mangahere.cc) and support mobile URLs -- Updated `gelbooru` to not be restricted to the first 20,000 images ([#56](https://github.com/mikf/gallery-dl/issues/56)) -- Fixed extraction issues for `nhentai` and `khinsider` - -## 1.1.0 - 2017-12-08 -- Added the ``-r/--limit-rate`` command-line option to set a maximum download rate -- Added the ``--sleep`` command-line option to specify the number of seconds to sleep before each download -- Updated `gelbooru` to no longer use their now disabled API -- Fixed SWF extraction for `sankaku` ([#52](https://github.com/mikf/gallery-dl/issues/52)) -- Fixed extraction issues for `hentai2read` and `khinsider` -- Removed the deprecated `--images` and `--chapters` options -- Removed the ``mangazuki`` module - -## 1.0.2 - 2017-11-24 -- Added an option to set a [custom user-agent string](https://github.com/mikf/gallery-dl/blob/master/docs/configuration.rst#extractoruser-agent) -- Improved retry behavior for failed HTTP requests -- Improved `seiga` by providing better metadata and getting more than the latest 200 images -- Improved `tumblr` by adding support for [all post types](https://github.com/mikf/gallery-dl/blob/master/docs/configuration.rst#extractortumblrposts), scanning for [inline images](https://github.com/mikf/gallery-dl/blob/master/docs/configuration.rst#extractortumblrinline) and following [external links](https://github.com/mikf/gallery-dl/blob/master/docs/configuration.rst#extractortumblrexternal) ([#48](https://github.com/mikf/gallery-dl/issues/48)) -- Fixed extraction issues for `hbrowse`, `khinsider` and `senmanga` - -## 1.0.1 - 2017-11-10 -- Added support for: - - `xvideos` - https://www.xvideos.com/ ([#45](https://github.com/mikf/gallery-dl/issues/45)) -- Fixed exception handling during file downloads which could lead to a premature exit -- Fixed an issue with `tumblr` where not all images would be downloaded when using tags ([#48](https://github.com/mikf/gallery-dl/issues/48)) -- Fixed extraction issues for `imgbox` ([#47](https://github.com/mikf/gallery-dl/issues/47)), `mangastream` ([#49](https://github.com/mikf/gallery-dl/issues/49)) and `mangahere` - -## 1.0.0 - 2017-10-27 -- Added support for: - - `warosu` - https://warosu.org/ - - `b4k` - https://arch.b4k.co/ -- Added support for `pixiv` ranking lists -- Added support for `booru` popular lists (`danbooru`, `e621`, `konachan`, `yandere`, `3dbooru`) -- Added the `--cookies` command-line and [`cookies`](https://github.com/mikf/gallery-dl/blob/master/docs/configuration.rst#extractorcookies) config option to load additional cookies -- Added the `--filter` and `--chapter-filter` command-line options to select individual images or manga-chapters by their metadata using simple Python expressions ([#43](https://github.com/mikf/gallery-dl/issues/43)) -- Added the [`verify`](https://github.com/mikf/gallery-dl/blob/master/docs/configuration.rst#downloaderhttpverify) config option to control certificate verification during file downloads -- Added config options to overwrite internally used API credentials ([API Tokens & IDs](https://github.com/mikf/gallery-dl/blob/master/docs/configuration.rst#api-tokens-ids)) -- Added `-K` as a shortcut for `--list-keywords` -- Changed the `--images` and `--chapters` command-line options to `--range` and `--chapter-range` -- Changed keyword names for various modules to make them accessible by `--filter`. In general minus signs have been replaced with underscores (e.g. `gallery-id` -> `gallery_id`). -- Changed default filename formats for manga extractors to optionally use volume and title information -- Improved the downloader modules to use [`.part` files](https://github.com/mikf/gallery-dl/blob/master/docs/configuration.rst#downloaderpart) and support resuming incomplete downloads ([#29](https://github.com/mikf/gallery-dl/issues/29)) -- Improved `deviantart` by distinguishing between users and groups ([#26](https://github.com/mikf/gallery-dl/issues/26)), always using HTTPS, and always downloading full-sized original images -- Improved `sankaku` by adding authentication support and fixing various other issues ([#44](https://github.com/mikf/gallery-dl/issues/44)) -- Improved URL pattern for direct image links ([#30](https://github.com/mikf/gallery-dl/issues/30)) -- Fixed an issue with `luscious` not getting original image URLs ([#33](https://github.com/mikf/gallery-dl/issues/33)) -- Fixed various smaller issues for `batoto`, `hentai2read` ([#38](https://github.com/mikf/gallery-dl/issues/38)), `jaiminisbox`, `khinsider`, `kissmanga` ([#28](https://github.com/mikf/gallery-dl/issues/28), [#46](https://github.com/mikf/gallery-dl/issues/46)), `mangahere`, `pawoo`, `twitter` -- Removed `kisscomic` and `yonkouprod` modules - -## 0.9.1 - 2017-07-24 -- Added support for: - - `2chan` - https://www.2chan.net/ - - `4plebs` - https://archive.4plebs.org/ - - `archivedmoe` - https://archived.moe/ - - `archiveofsins` - https://archiveofsins.com/ - - `desuarchive` - https://desuarchive.org/ - - `fireden` - https://boards.fireden.net/ - - `loveisover` - https://archive.loveisover.me/ - - `nyafuu` - https://archive.nyafuu.org/ - - `rbt` - https://rbt.asia/ - - `thebarchive` - https://thebarchive.com/ - - `mangazuki` - https://mangazuki.co/ -- Improved `reddit` to allow submission filtering by ID and human-readable dates -- Improved `deviantart` to support group galleries and gallery folders ([#26](https://github.com/mikf/gallery-dl/issues/26)) -- Changed `deviantart` to use better default path formats -- Fixed extraction of larger `imgur` albums -- Fixed some smaller issues for `pixiv`, `batoto` and `fallenangels` - -## 0.9.0 - 2017-06-28 -- Added support for: - - `reddit` - https://www.reddit.com/ ([#15](https://github.com/mikf/gallery-dl/issues/15)) - - `flickr` - https://www.flickr.com/ ([#16](https://github.com/mikf/gallery-dl/issues/16)) - - `gfycat` - https://gfycat.com/ -- Added support for direct image links -- Added user authentication via [OAuth](https://github.com/mikf/gallery-dl#52oauth) for `reddit` and `flickr` -- Added support for user authentication data from [`.netrc`](https://stackoverflow.com/tags/.netrc/info) files ([#22](https://github.com/mikf/gallery-dl/issues/22)) -- Added a simple progress indicator for multiple URLs ([#19](https://github.com/mikf/gallery-dl/issues/19)) -- Added the `--write-unsupported` command-line option to write unsupported URLs to a file -- Added documentation for all available config options ([configuration.rst](https://github.com/mikf/gallery-dl/blob/master/docs/configuration.rst)) -- Improved `pixiv` to support tags for user downloads ([#17](https://github.com/mikf/gallery-dl/issues/17)) -- Improved `pixiv` to support shortened and http://pixiv.me/... URLs ([#23](https://github.com/mikf/gallery-dl/issues/23)) -- Improved `imgur` to properly handle `.gifv` images and provide better metadata -- Fixed an issue with `kissmanga` where metadata parsing for some series failed ([#20](https://github.com/mikf/gallery-dl/issues/20)) -- Fixed an issue with getting filename extensions from `Content-Type` response headers - -## 0.8.4 - 2017-05-21 -- Added the `--abort-on-skip` option to stop extraction if a download would be skipped -- Improved the output format of the `--list-keywords` option -- Updated `deviantart` to support all media types and journals -- Updated `fallenangels` to support their [Vietnamese version](https://truyen.fascans.com/) -- Fixed an issue with multiple tags on ...booru sites -- Removed the `yomanga` module - -## 0.8.3 - 2017-05-01 -- Added support for https://pawoo.net/ -- Added manga extractors for all [FoOlSlide](https://foolcode.github.io/FoOlSlide/)-based modules -- Added the `-q/--quiet` and `-v/--verbose` options to control output verbosity -- Added the `-j/--dump-json` option to dump extractor results in JSON format -- Added the `--ignore-config` option -- Updated the `exhentai` extractor to fall back to using the e-hentai version if no username is given -- Updated `deviantart` to support sta.sh URLs -- Fixed an issue with `kissmanga` which prevented image URLs from being decrypted properly (again) -- Fixed an issue with `pixhost` where for an image inside an album it would always download the first image of that album ([#13](https://github.com/mikf/gallery-dl/issues/13)) -- Removed the `mangashare` and `readcomics` modules - -## 0.8.2 - 2017-04-10 -- Fixed an issue in `kissmanga` which prevented image URLs from being decrypted properly - -## 0.8.1 - 2017-04-09 -- Added new extractors: - - `kireicake` - https://reader.kireicake.com/ - - `seaotterscans` - https://reader.seaotterscans.com/ -- Added a favourites extractor for `deviantart` -- Re-enabled the `kissmanga` module -- Updated `nijie` to support multi-page image listings -- Updated `mangastream` to support readms.net URLs -- Updated `exhentai` to support e-hentai.org URLs -- Updated `fallenangels` to support their new domain and site layout - -## 0.8.0 - 2017-03-28 -- Added logging support -- Added the `-R/--retries` option to specify how often a download should be retried before giving up -- Added the `--http-timeout` option to set a timeout for HTTP connections -- Improved error handling/tolerance during HTTP file downloads ([#10](https://github.com/mikf/gallery-dl/issues/10)) -- Improved option parsing and the help message from `-h/--help` -- Changed the way configuration values are used by prioritizing top-level values - - This allows for cmdline options like `-u/--username` to overwrite values set in configuration files -- Fixed an issue with `imagefap.com` where incorrectly reported gallery sizes would cause the extractor to fail ([#9](https://github.com/mikf/gallery-dl/issues/9)) -- Fixed an issue with `seiga.nicovideo.jp` where invalid characters in an API response caused the XML parser to fail -- Fixed an issue with `seiga.nicovideo.jp` where the filename extension for the first image would be used for all others -- Removed support for old configuration paths on Windows -- Removed several modules: - - `mangamint`: site is down - - `whentai`: now requires account with VIP status for original images - - `kissmanga`: encrypted image URLs (will be re-added later) - -## 0.7.0 - 2017-03-06 -- Added `--images` and `--chapters` options - - Specifies which images (or chapters) to download through a comma-separated list of indices or index-ranges - - Example: `--images -2,4,6-8,10-` will select images with index 1, 2, 4, 6, 7, 8 and 10 up to the last one -- Changed the `-g`/`--get-urls` option - - The amount of how often the -g option is given now determines up until which level URLs are resolved. - - See 3bca86618505c21628cd9c7179ce933a78d00ca2 -- Changed several option keys: - - `directory_fmt` -> `directory` - - `filename_fmt` -> `filename` - - `download-original` -> `original` -- Improved [FoOlSlide](https://foolcode.github.io/FoOlSlide/)-based extractors -- Fixed URL extraction for hentai2read -- Fixed an issue with deviantart, where the API access token wouldn't get refreshed - -## 0.6.4 - 2017-02-13 -- Added new extractors: - - fallenangels (famatg.com) -- Fixed url- and data-extraction for: - - nhentai - - mangamint - - twitter - - imagetwist -- Disabled InsecureConnectionWarning when no certificates are available - -## 0.6.3 - 2017-01-25 -- Added new extractors: - - gomanga - - yomanga - - mangafox -- Fixed deviantart extractor failing - switched to using their API -- Fixed an issue with SQLite on Python 3.6 -- Automated test builds via Travis CI -- Standalone executables for Windows - -## 0.6.2 - 2017-01-05 -- Added new extractors: - - kisscomic - - readcomics - - yonkouprod - - jaiminisbox -- Added manga extractor to batoto-module -- Added user extractor to seiga-module -- Added `-i`/`--input-file` argument to allow local files and stdin as input (like wget) -- Added basic support for `file://` URLs - - this allows for the recursive extractor to be applied to local files: - - `$ gallery-dl r:file://[path to file]` -- Added a utility extractor to run unit test URLs -- Updated luscious to deal with API changes -- Fixed twitter to provide the original image URL -- Minor fixes to hentaifoundry -- Removed imgclick extractor - -## 0.6.1 - 2016-11-30 -- Added new extractors: - - whentai - - readcomiconline - - sensescans, worldthree - - imgmaid, imagevenue, img4ever, imgspot, imgtrial, pixhost -- Added base class for extractors of [FoOlSlide](https://foolcode.github.io/FoOlSlide/)-based sites -- Changed default paths for configuration files on Windows - - old paths are still supported, but that will change in future versions -- Fixed aborting downloads if a single one failed ([#5](https://github.com/mikf/gallery-dl/issues/5)) -- Fixed cloudflare-bypass cache containing outdated cookies -- Fixed image URLs for hitomi and 8chan -- Updated deviantart to always provide the highest quality image -- Updated README.rst -- Removed doujinmode extractor - -## 0.6.0 - 2016-10-08 -- Added new extractors: - - hentaihere - - dokireader - - twitter - - rapidimg, picmaniac -- Added support to find filename extensions by Content-Type response header -- Fixed filename/path issues on Windows ([#4](https://github.com/mikf/gallery-dl/issues/4)): - - Enable path names with more than 260 characters - - Remove trailing spaces in path segments -- Updated Job class to automatically set category/subcategory keywords - -## 0.5.2 - 2016-09-23 -- Added new extractors: - - pinterest - - rule34 - - dynastyscans - - imagebam, coreimg, imgcandy, imgtrex -- Added login capabilities for batoto -- Added `--version` cmdline argument to print the current program version and exit -- Added `--list-extractors` cmdline argument to print names of all extractor classes together with descriptions and example URLs -- Added proper error messages if an image/user does not exist -- Added unittests for every extractor - -## 0.5.1 - 2016-08-22 -- Added new extractors: - - luscious - - doujinmode - - hentaibox - - seiga - - imagefap -- Changed error output to use stderr instead of stdout -- Fixed broken pipes causing an exception-dump by catching BrokenPipeErrors - -## 0.5.0 - 2016-07-25 - -## 0.4.1 - 2015-12-03 -- New modules (imagetwist, turboimagehost) -- Manga-extractors: Download entire manga and not just single chapters -- Generic extractor (provisional) -- Better and configurable console output -- Windows support - -## 0.4.0 - 2015-11-26 - -## 0.3.3 - 2015-11-10 - -## 0.3.2 - 2015-11-04 - -## 0.3.1 - 2015-10-30 - -## 0.3.0 - 2015-10-05 - -## 0.2.0 - 2015-06-28 - -## 0.1.0 - 2015-05-27 diff --git a/LICENSE b/LICENSE deleted file mode 100644 index d159169..0000000 --- a/LICENSE +++ /dev/null @@ -1,339 +0,0 @@ - GNU GENERAL PUBLIC LICENSE - Version 2, June 1991 - - Copyright (C) 1989, 1991 Free Software Foundation, Inc., - 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - Everyone is permitted to copy and distribute verbatim copies - of this license document, but changing it is not allowed. - - Preamble - - The licenses for most software are designed to take away your -freedom to share and change it. By contrast, the GNU General Public -License is intended to guarantee your freedom to share and change free -software--to make sure the software is free for all its users. This -General Public License applies to most of the Free Software -Foundation's software and to any other program whose authors commit to -using it. (Some other Free Software Foundation software is covered by -the GNU Lesser General Public License instead.) You can apply it to -your programs, too. - - When we speak of free software, we are referring to freedom, not -price. Our General Public Licenses are designed to make sure that you -have the freedom to distribute copies of free software (and charge for -this service if you wish), that you receive source code or can get it -if you want it, that you can change the software or use pieces of it -in new free programs; and that you know you can do these things. - - To protect your rights, we need to make restrictions that forbid -anyone to deny you these rights or to ask you to surrender the rights. -These restrictions translate to certain responsibilities for you if you -distribute copies of the software, or if you modify it. - - For example, if you distribute copies of such a program, whether -gratis or for a fee, you must give the recipients all the rights that -you have. You must make sure that they, too, receive or can get the -source code. And you must show them these terms so they know their -rights. - - We protect your rights with two steps: (1) copyright the software, and -(2) offer you this license which gives you legal permission to copy, -distribute and/or modify the software. - - Also, for each author's protection and ours, we want to make certain -that everyone understands that there is no warranty for this free -software. If the software is modified by someone else and passed on, we -want its recipients to know that what they have is not the original, so -that any problems introduced by others will not reflect on the original -authors' reputations. - - Finally, any free program is threatened constantly by software -patents. We wish to avoid the danger that redistributors of a free -program will individually obtain patent licenses, in effect making the -program proprietary. To prevent this, we have made it clear that any -patent must be licensed for everyone's free use or not licensed at all. - - The precise terms and conditions for copying, distribution and -modification follow. - - GNU GENERAL PUBLIC LICENSE - TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION - - 0. This License applies to any program or other work which contains -a notice placed by the copyright holder saying it may be distributed -under the terms of this General Public License. The "Program", below, -refers to any such program or work, and a "work based on the Program" -means either the Program or any derivative work under copyright law: -that is to say, a work containing the Program or a portion of it, -either verbatim or with modifications and/or translated into another -language. (Hereinafter, translation is included without limitation in -the term "modification".) Each licensee is addressed as "you". - -Activities other than copying, distribution and modification are not -covered by this License; they are outside its scope. The act of -running the Program is not restricted, and the output from the Program -is covered only if its contents constitute a work based on the -Program (independent of having been made by running the Program). -Whether that is true depends on what the Program does. - - 1. You may copy and distribute verbatim copies of the Program's -source code as you receive it, in any medium, provided that you -conspicuously and appropriately publish on each copy an appropriate -copyright notice and disclaimer of warranty; keep intact all the -notices that refer to this License and to the absence of any warranty; -and give any other recipients of the Program a copy of this License -along with the Program. - -You may charge a fee for the physical act of transferring a copy, and -you may at your option offer warranty protection in exchange for a fee. - - 2. You may modify your copy or copies of the Program or any portion -of it, thus forming a work based on the Program, and copy and -distribute such modifications or work under the terms of Section 1 -above, provided that you also meet all of these conditions: - - a) You must cause the modified files to carry prominent notices - stating that you changed the files and the date of any change. - - b) You must cause any work that you distribute or publish, that in - whole or in part contains or is derived from the Program or any - part thereof, to be licensed as a whole at no charge to all third - parties under the terms of this License. - - c) If the modified program normally reads commands interactively - when run, you must cause it, when started running for such - interactive use in the most ordinary way, to print or display an - announcement including an appropriate copyright notice and a - notice that there is no warranty (or else, saying that you provide - a warranty) and that users may redistribute the program under - these conditions, and telling the user how to view a copy of this - License. (Exception: if the Program itself is interactive but - does not normally print such an announcement, your work based on - the Program is not required to print an announcement.) - -These requirements apply to the modified work as a whole. If -identifiable sections of that work are not derived from the Program, -and can be reasonably considered independent and separate works in -themselves, then this License, and its terms, do not apply to those -sections when you distribute them as separate works. But when you -distribute the same sections as part of a whole which is a work based -on the Program, the distribution of the whole must be on the terms of -this License, whose permissions for other licensees extend to the -entire whole, and thus to each and every part regardless of who wrote it. - -Thus, it is not the intent of this section to claim rights or contest -your rights to work written entirely by you; rather, the intent is to -exercise the right to control the distribution of derivative or -collective works based on the Program. - -In addition, mere aggregation of another work not based on the Program -with the Program (or with a work based on the Program) on a volume of -a storage or distribution medium does not bring the other work under -the scope of this License. - - 3. You may copy and distribute the Program (or a work based on it, -under Section 2) in object code or executable form under the terms of -Sections 1 and 2 above provided that you also do one of the following: - - a) Accompany it with the complete corresponding machine-readable - source code, which must be distributed under the terms of Sections - 1 and 2 above on a medium customarily used for software interchange; or, - - b) Accompany it with a written offer, valid for at least three - years, to give any third party, for a charge no more than your - cost of physically performing source distribution, a complete - machine-readable copy of the corresponding source code, to be - distributed under the terms of Sections 1 and 2 above on a medium - customarily used for software interchange; or, - - c) Accompany it with the information you received as to the offer - to distribute corresponding source code. (This alternative is - allowed only for noncommercial distribution and only if you - received the program in object code or executable form with such - an offer, in accord with Subsection b above.) - -The source code for a work means the preferred form of the work for -making modifications to it. For an executable work, complete source -code means all the source code for all modules it contains, plus any -associated interface definition files, plus the scripts used to -control compilation and installation of the executable. However, as a -special exception, the source code distributed need not include -anything that is normally distributed (in either source or binary -form) with the major components (compiler, kernel, and so on) of the -operating system on which the executable runs, unless that component -itself accompanies the executable. - -If distribution of executable or object code is made by offering -access to copy from a designated place, then offering equivalent -access to copy the source code from the same place counts as -distribution of the source code, even though third parties are not -compelled to copy the source along with the object code. - - 4. You may not copy, modify, sublicense, or distribute the Program -except as expressly provided under this License. Any attempt -otherwise to copy, modify, sublicense or distribute the Program is -void, and will automatically terminate your rights under this License. -However, parties who have received copies, or rights, from you under -this License will not have their licenses terminated so long as such -parties remain in full compliance. - - 5. You are not required to accept this License, since you have not -signed it. However, nothing else grants you permission to modify or -distribute the Program or its derivative works. These actions are -prohibited by law if you do not accept this License. Therefore, by -modifying or distributing the Program (or any work based on the -Program), you indicate your acceptance of this License to do so, and -all its terms and conditions for copying, distributing or modifying -the Program or works based on it. - - 6. Each time you redistribute the Program (or any work based on the -Program), the recipient automatically receives a license from the -original licensor to copy, distribute or modify the Program subject to -these terms and conditions. You may not impose any further -restrictions on the recipients' exercise of the rights granted herein. -You are not responsible for enforcing compliance by third parties to -this License. - - 7. If, as a consequence of a court judgment or allegation of patent -infringement or for any other reason (not limited to patent issues), -conditions are imposed on you (whether by court order, agreement or -otherwise) that contradict the conditions of this License, they do not -excuse you from the conditions of this License. If you cannot -distribute so as to satisfy simultaneously your obligations under this -License and any other pertinent obligations, then as a consequence you -may not distribute the Program at all. For example, if a patent -license would not permit royalty-free redistribution of the Program by -all those who receive copies directly or indirectly through you, then -the only way you could satisfy both it and this License would be to -refrain entirely from distribution of the Program. - -If any portion of this section is held invalid or unenforceable under -any particular circumstance, the balance of the section is intended to -apply and the section as a whole is intended to apply in other -circumstances. - -It is not the purpose of this section to induce you to infringe any -patents or other property right claims or to contest validity of any -such claims; this section has the sole purpose of protecting the -integrity of the free software distribution system, which is -implemented by public license practices. Many people have made -generous contributions to the wide range of software distributed -through that system in reliance on consistent application of that -system; it is up to the author/donor to decide if he or she is willing -to distribute software through any other system and a licensee cannot -impose that choice. - -This section is intended to make thoroughly clear what is believed to -be a consequence of the rest of this License. - - 8. If the distribution and/or use of the Program is restricted in -certain countries either by patents or by copyrighted interfaces, the -original copyright holder who places the Program under this License -may add an explicit geographical distribution limitation excluding -those countries, so that distribution is permitted only in or among -countries not thus excluded. In such case, this License incorporates -the limitation as if written in the body of this License. - - 9. The Free Software Foundation may publish revised and/or new versions -of the General Public License from time to time. Such new versions will -be similar in spirit to the present version, but may differ in detail to -address new problems or concerns. - -Each version is given a distinguishing version number. If the Program -specifies a version number of this License which applies to it and "any -later version", you have the option of following the terms and conditions -either of that version or of any later version published by the Free -Software Foundation. If the Program does not specify a version number of -this License, you may choose any version ever published by the Free Software -Foundation. - - 10. If you wish to incorporate parts of the Program into other free -programs whose distribution conditions are different, write to the author -to ask for permission. For software which is copyrighted by the Free -Software Foundation, write to the Free Software Foundation; we sometimes -make exceptions for this. Our decision will be guided by the two goals -of preserving the free status of all derivatives of our free software and -of promoting the sharing and reuse of software generally. - - NO WARRANTY - - 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY -FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN -OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES -PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED -OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF -MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS -TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE -PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, -REPAIR OR CORRECTION. - - 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING -WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR -REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, -INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING -OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED -TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY -YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER -PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE -POSSIBILITY OF SUCH DAMAGES. - - END OF TERMS AND CONDITIONS - - How to Apply These Terms to Your New Programs - - If you develop a new program, and you want it to be of the greatest -possible use to the public, the best way to achieve this is to make it -free software which everyone can redistribute and change under these terms. - - To do so, attach the following notices to the program. It is safest -to attach them to the start of each source file to most effectively -convey the exclusion of warranty; and each file should have at least -the "copyright" line and a pointer to where the full notice is found. - - <one line to give the program's name and a brief idea of what it does.> - Copyright (C) <year> <name of author> - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License along - with this program; if not, write to the Free Software Foundation, Inc., - 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - -Also add information on how to contact you by electronic and paper mail. - -If the program is interactive, make it output a short notice like this -when it starts in an interactive mode: - - Gnomovision version 69, Copyright (C) year name of author - Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. - This is free software, and you are welcome to redistribute it - under certain conditions; type `show c' for details. - -The hypothetical commands `show w' and `show c' should show the appropriate -parts of the General Public License. Of course, the commands you use may -be called something other than `show w' and `show c'; they could even be -mouse-clicks or menu items--whatever suits your program. - -You should also get your employer (if you work as a programmer) or your -school, if any, to sign a "copyright disclaimer" for the program, if -necessary. Here is a sample; alter the names: - - Yoyodyne, Inc., hereby disclaims all copyright interest in the program - `Gnomovision' (which makes passes at compilers) written by James Hacker. - - <signature of Ty Coon>, 1 April 1989 - Ty Coon, President of Vice - -This General Public License does not permit incorporating your program into -proprietary programs. If your program is a subroutine library, you may -consider it more useful to permit linking proprietary applications with the -library. If this is what you want to do, use the GNU Lesser General -Public License instead of this License. diff --git a/Makefile b/Makefile deleted file mode 100644 index 5a98fcd..0000000 --- a/Makefile +++ /dev/null @@ -1,45 +0,0 @@ - -PREFIX ?= /usr/local -BINDIR ?= $(PREFIX)/bin -MANDIR ?= $(PREFIX)/man -SHAREDIR ?= $(PREFIX)/share -PYTHON ?= /usr/bin/env python3 - -# set SYSCONFDIR to /etc if PREFIX=/usr or PREFIX=/usr/local -SYSCONFDIR = $(shell if [ $(PREFIX) = /usr -o $(PREFIX) = /usr/local ]; then echo /etc; else echo $(PREFIX)/etc; fi) - -all: man completion docs/supportedsites.rst - -clean: - $(RM) gallery-dl.1 gallery-dl.conf.5 gallery-dl.bash_completion - $(RM) -r build/ - -install: man completion - $(PYTHON) setup.py install - -release: man completion docs/supportedsites.rst - scripts/release.sh - -test: - scripts/run_tests.sh - -executable: - scripts/pyinstaller.py - -completion: gallery-dl.bash_completion - -man: gallery-dl.1 gallery-dl.conf.5 - -.PHONY: all clean install release test executable completion man - -docs/supportedsites.rst: gallery_dl/*/*.py scripts/supportedsites.py - $(PYTHON) scripts/supportedsites.py - -gallery-dl.1: gallery_dl/option.py scripts/man.py - $(PYTHON) scripts/man.py - -gallery-dl.conf.5: docs/configuration.rst scripts/man.py - $(PYTHON) scripts/man.py - -gallery-dl.bash_completion: gallery_dl/option.py scripts/bash_completion.py - $(PYTHON) scripts/bash_completion.py diff --git a/PKG-INFO b/PKG-INFO new file mode 100644 index 0000000..a2145f9 --- /dev/null +++ b/PKG-INFO @@ -0,0 +1,286 @@ +Metadata-Version: 2.1 +Name: gallery_dl +Version: 1.11.1 +Summary: Command-line program to download image-galleries and -collections from several image hosting sites +Home-page: https://github.com/mikf/gallery-dl +Author: Mike Fährmann +Author-email: mike_faehrmann@web.de +Maintainer: Mike Fährmann +Maintainer-email: mike_faehrmann@web.de +License: GPLv2 +Download-URL: https://github.com/mikf/gallery-dl/releases/latest +Description: ========== + gallery-dl + ========== + + *gallery-dl* is a command-line program to download image-galleries and + -collections from several image hosting sites (see `Supported Sites`_). + It is a cross-platform tool with many configuration options + and powerful filenaming capabilities. + + + |pypi| |build| |gitter| + + + Dependencies + ============ + + - Python_ 3.4+ + - Requests_ + + Optional + -------- + + - FFmpeg_: Pixiv Ugoira to WebM conversion + - youtube-dl_: Video downloads + - pyOpenSSL_: Access Cloudflare protected sites + + + Installation + ============ + + Pip + --- + + The stable releases of *gallery-dl* are distributed on PyPI_ and can be + easily installed or upgraded using pip_: + + .. code:: bash + + $ python3 -m pip install --upgrade gallery-dl + + Installing the latest dev-version directly from GitHub can be done with + pip_ as well: + + .. code:: bash + + $ python3 -m pip install --upgrade https://github.com/mikf/gallery-dl/archive/master.tar.gz + + Note: Windows users should use :code:`py -3` instead of :code:`python3`. + + | It is advised to use the latest version of pip_, + including the essential packages :code:`setuptools` and :code:`wheel`. + | To ensure that these packages are up-to-date, run + + .. code:: bash + + $ python3 -m pip install --upgrade pip setuptools wheel + + + From Source + ----------- + + Get the code by either + + * Downloading a stable_ or dev_ archive and unpacking it + * Or via :code:`git clone https://github.com/mikf/gallery-dl.git` + + Navigate into the respective directory and run the :code:`setup.py` file. + + .. code:: bash + + $ wget https://github.com/mikf/gallery-dl/archive/master.tar.gz + $ tar -xf master.tar.gz + # or + $ git clone https://github.com/mikf/gallery-dl.git + + $ cd gallery-dl* + $ python3 setup.py install + + + Standalone Executable + --------------------- + + Download a standalone executable file, + put it into your `PATH <https://en.wikipedia.org/wiki/PATH_(variable)>`__, + and run it inside a command prompt (like ``cmd.exe``). + + - `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.10.6/gallery-dl.exe>`__ + - `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.10.6/gallery-dl.bin>`__ + + These executables include a Python 3.7 interpreter + and all required Python packages. + + + Snap + ---- + + Linux users that are using a distro that is supported by Snapd_ can install *gallery-dl* from the Snap Store: + + .. code:: bash + + $ snap install gallery-dl + + + Usage + ===== + + To use *gallery-dl* simply call it with the URLs you wish to download images + from: + + .. code:: bash + + $ gallery-dl [OPTION]... URL... + + See also :code:`gallery-dl --help`. + + + Examples + -------- + + Download images; in this case from danbooru via tag search for 'bonocho': + + .. code:: bash + + $ gallery-dl http://danbooru.donmai.us/posts?tags=bonocho + + + Get the direct URL of an image from a site that requires authentication: + + .. code:: bash + + $ gallery-dl -g -u <username> -p <password> http://seiga.nicovideo.jp/seiga/im3211703 + + + | Search a remote resource for URLs and download images from them: + | (URLs for which no extractor can be found will be silently ignored) + + .. code:: bash + + $ gallery-dl r:https://pastebin.com/raw/FLwrCYsT + + + Configuration + ============= + + Configuration files for *gallery-dl* use a JSON-based file format. + + | For a (more or less) complete example with options set to their default values, + see gallery-dl.conf_. + | For a configuration file example with more involved settings and options, + see gallery-dl-example.conf_. + | A list of all available configuration options and their + descriptions can be found in configuration.rst_. + + *gallery-dl* searches for configuration files in the following places: + + +--------------------------------------------+------------------------------------------+ + | Linux | Windows | + +--------------------------------------------+------------------------------------------+ + |* ``/etc/gallery-dl.conf`` |* | + |* ``${HOME}/.config/gallery-dl/config.json``|* ``%USERPROFILE%\gallery-dl\config.json``| + |* ``${HOME}/.gallery-dl.conf`` |* ``%USERPROFILE%\gallery-dl.conf`` | + +--------------------------------------------+------------------------------------------+ + + (``%USERPROFILE%`` usually refers to the user's home directory, + i.e. ``C:\Users\<username>\``) + + Values in later configuration files will override previous ones. + + + Authentication + ============== + + Username & Password + ------------------- + + Some extractors require you to provide valid login-credentials in the form of + a username & password pair. This is necessary for + ``pixiv``, ``nijie``, and ``seiga`` + and optional (but strongly recommended) for + ``danbooru``, ``exhentai``, ``idolcomplex``, ``instagram``, + ``luscious``, ``sankaku``, ``tsumino``, and ``twitter``. + + You can set the necessary information in your configuration file + (cf. gallery-dl.conf_) + + .. code:: + + { + "extractor": { + ... + "pixiv": { + "username": "<username>", + "password": "<password>" + } + ... + } + } + + or you can provide them directly via the + :code:`-u/--username` and :code:`-p/--password` or via the + :code:`-o/--option` command-line options + + .. code:: bash + + $ gallery-dl -u <username> -p <password> URL + $ gallery-dl -o username=<username> -o password=<password> URL + + OAuth + ----- + + *gallery-dl* supports user authentication via OAuth_ for + ``deviantart``, ``flickr``, ``reddit``, ``smugmug`` and ``tumblr``. + This is entirely optional, but grants *gallery-dl* the ability + to issue requests on your account's behalf and enables it to access resources + which would otherwise be unavailable to a public user. + + To link your account to *gallery-dl*, start by invoking it with + ``oauth:<site-name>`` as an argument. For example: + + .. code:: bash + + $ gallery-dl oauth:flickr + + You will be sent to the site's authorization page and asked to grant read + access to *gallery-dl*. Authorize it and you will be shown one or more + "tokens", which should be added to your configuration file. + + + .. _gallery-dl.conf: https://github.com/mikf/gallery-dl/blob/master/docs/gallery-dl.conf + .. _gallery-dl-example.conf: https://github.com/mikf/gallery-dl/blob/master/docs/gallery-dl-example.conf + .. _configuration.rst: https://github.com/mikf/gallery-dl/blob/master/docs/configuration.rst + .. _Supported Sites: https://github.com/mikf/gallery-dl/blob/master/docs/supportedsites.rst + .. _stable: https://github.com/mikf/gallery-dl/archive/v1.10.6.zip + .. _dev: https://github.com/mikf/gallery-dl/archive/master.zip + + .. _Python: https://www.python.org/downloads/ + .. _PyPI: https://pypi.org/ + .. _pip: https://pip.pypa.io/en/stable/ + .. _Requests: https://requests.readthedocs.io/en/master/ + .. _FFmpeg: https://www.ffmpeg.org/ + .. _youtube-dl: https://ytdl-org.github.io/youtube-dl/ + .. _pyOpenSSL: https://pyopenssl.org/ + .. _Snapd: https://docs.snapcraft.io/installing-snapd + .. _OAuth: https://en.wikipedia.org/wiki/OAuth + + .. |pypi| image:: https://img.shields.io/pypi/v/gallery-dl.svg + :target: https://pypi.org/project/gallery-dl/ + + .. |build| image:: https://travis-ci.org/mikf/gallery-dl.svg?branch=master + :target: https://travis-ci.org/mikf/gallery-dl + + .. |gitter| image:: https://badges.gitter.im/gallery-dl/main.svg + :target: https://gitter.im/gallery-dl/main + +Keywords: image gallery downloader crawler scraper +Platform: UNKNOWN +Classifier: Development Status :: 5 - Production/Stable +Classifier: Environment :: Console +Classifier: Intended Audience :: End Users/Desktop +Classifier: License :: OSI Approved :: GNU General Public License v2 (GPLv2) +Classifier: Operating System :: Microsoft :: Windows +Classifier: Operating System :: POSIX +Classifier: Operating System :: MacOS +Classifier: Programming Language :: Python :: 3.4 +Classifier: Programming Language :: Python :: 3.5 +Classifier: Programming Language :: Python :: 3.6 +Classifier: Programming Language :: Python :: 3.7 +Classifier: Programming Language :: Python :: 3.8 +Classifier: Programming Language :: Python :: 3 :: Only +Classifier: Topic :: Internet :: WWW/HTTP +Classifier: Topic :: Multimedia :: Graphics +Classifier: Topic :: Utilities +Requires-Python: >=3.4 +Provides-Extra: cloudflare +Provides-Extra: video @@ -36,18 +36,24 @@ easily installed or upgraded using pip_: .. code:: bash - $ pip install --upgrade gallery-dl + $ python3 -m pip install --upgrade gallery-dl Installing the latest dev-version directly from GitHub can be done with pip_ as well: .. code:: bash - $ pip install --upgrade https://github.com/mikf/gallery-dl/archive/master.zip + $ python3 -m pip install --upgrade https://github.com/mikf/gallery-dl/archive/master.tar.gz -Be sure the Python interpreter used for pip_ is version 3.4 or higher. -You might have to use :code:`pip3` or :code:`python3 -m pip` -depending on your system's defaults. +Note: Windows users should use :code:`py -3` instead of :code:`python3`. + +| It is advised to use the latest version of pip_, + including the essential packages :code:`setuptools` and :code:`wheel`. +| To ensure that these packages are up-to-date, run + +.. code:: bash + + $ python3 -m pip install --upgrade pip setuptools wheel From Source @@ -62,13 +68,13 @@ Navigate into the respective directory and run the :code:`setup.py` file. .. code:: bash - $ wget https://github.com/mikf/gallery-dl/archive/master.zip - $ unzip master.zip + $ wget https://github.com/mikf/gallery-dl/archive/master.tar.gz + $ tar -xf master.tar.gz # or $ git clone https://github.com/mikf/gallery-dl.git - $ cd gallery-dl - $ python setup.py install + $ cd gallery-dl* + $ python3 setup.py install Standalone Executable diff --git a/bin/gallery-dl b/bin/gallery-dl deleted file mode 100755 index 12da2fd..0000000 --- a/bin/gallery-dl +++ /dev/null @@ -1,7 +0,0 @@ -#!/usr/bin/env python3 -# -*- coding: UTF-8 -*- - -import gallery_dl - -if __name__ == '__main__': - gallery_dl.main() diff --git a/data/completion/gallery-dl b/data/completion/gallery-dl new file mode 100644 index 0000000..415bf5c --- /dev/null +++ b/data/completion/gallery-dl @@ -0,0 +1,17 @@ +_gallery_dl() +{ + local cur prev + COMPREPLY=() + cur="${COMP_WORDS[COMP_CWORD]}" + prev="${COMP_WORDS[COMP_CWORD-1]}" + + if [[ "${prev}" =~ ^(-i|--input-file|--cookies|--write-log|--write-unsupported|-c|--config|--config-yaml|--download-archive)$ ]]; then + COMPREPLY=( $(compgen -f -- "${cur}") ) + elif [[ "${prev}" =~ ^(-d|--dest)$ ]]; then + COMPREPLY=( $(compgen -d -- "${cur}") ) + else + COMPREPLY=( $(compgen -W "--help --version --dest --input-file --cookies --proxy --clear-cache --quiet --verbose --get-urls --dump-json --simulate --list-keywords --list-modules --list-extractors --write-log --write-unsupported --limit-rate --retries --abort --http-timeout --sleep --no-part --no-mtime --no-download --no-check-certificate --abort-on-skip --config --config-yaml --option --ignore-config --username --password --netrc --download-archive --range --chapter-range --filter --chapter-filter --zip --exec --ugoira-conv --ugoira-conv-lossless --write-metadata --write-tags --mtime-from-date" -- "${cur}") ) + fi +} + +complete -F _gallery_dl gallery-dl diff --git a/data/man/gallery-dl.1 b/data/man/gallery-dl.1 new file mode 100644 index 0000000..a775e76 --- /dev/null +++ b/data/man/gallery-dl.1 @@ -0,0 +1,195 @@ +.TH "GALLERY-DL" "1" "2019-11-09" "1.11.1" "gallery-dl Manual" +.\" disable hyphenation +.nh + +.SH NAME +gallery-dl \- download image-galleries and -collections + +.SH SYNOPSIS +.B gallery-dl +[OPTION]... URL... + +.SH DESCRIPTION +.B gallery-dl +is a command-line program to download image-galleries and -collections +from several image hosting sites. It is a cross-platform tool +with many configuration options and powerful filenaming capabilities. + +.SH OPTIONS +.TP +.B "\-h, \-\-help" +Print this help message and exit +.TP +.B "\-\-version" +Print program version and exit +.TP +.B "\-d, \-\-dest" \f[I]DEST\f[] +Destination directory +.TP +.B "\-i, \-\-input\-file" \f[I]FILE\f[] +Download URLs found in FILE ('-' for stdin) +.TP +.B "\-\-cookies" \f[I]FILE\f[] +File to load additional cookies from +.TP +.B "\-\-proxy" \f[I]URL\f[] +Use the specified proxy +.TP +.B "\-\-clear\-cache" +Delete all cached login sessions, cookies, etc. +.TP +.B "\-q, \-\-quiet" +Activate quiet mode +.TP +.B "\-v, \-\-verbose" +Print various debugging information +.TP +.B "\-g, \-\-get\-urls" +Print URLs instead of downloading +.TP +.B "\-j, \-\-dump\-json" +Print JSON information +.TP +.B "\-s, \-\-simulate" +Simulate data extraction; do not download anything +.TP +.B "\-K, \-\-list\-keywords" +Print a list of available keywords and example values for the given URLs +.TP +.B "\-\-list\-modules" +Print a list of available extractor modules +.TP +.B "\-\-list\-extractors" +Print a list of extractor classes with description, (sub)category and example URL +.TP +.B "\-\-write\-log" \f[I]FILE\f[] +Write logging output to FILE +.TP +.B "\-\-write\-unsupported" \f[I]FILE\f[] +Write URLs, which get emitted by other extractors but cannot be handled, to FILE +.TP +.B "\-r, \-\-limit\-rate" \f[I]RATE\f[] +Maximum download rate (e.g. 500k or 2.5M) +.TP +.B "\-R, \-\-retries" \f[I]N\f[] +Maximum number of retries for failed HTTP requests or -1 for infinite retries (default: 4) +.TP +.B "\-A, \-\-abort" \f[I]N\f[] +Abort extractor run after N consecutive file downloads have been skipped, e.g. if files with the same filename already exist +.TP +.B "\-\-http\-timeout" \f[I]SECONDS\f[] +Timeout for HTTP connections (defaut: 30.0) +.TP +.B "\-\-sleep" \f[I]SECONDS\f[] +Number of seconds to sleep before each download +.TP +.B "\-\-no\-part" +Do not use .part files +.TP +.B "\-\-no\-mtime" +Do not set file modification times according to Last-Modified HTTP response headers +.TP +.B "\-\-no\-download" +Do not download any files +.TP +.B "\-\-no\-check\-certificate" +Disable HTTPS certificate validation +.TP +.B "\-c, \-\-config" \f[I]FILE\f[] +Additional configuration files +.TP +.B "\-o, \-\-option" \f[I]OPT\f[] +Additional '<key>=<value>' option values +.TP +.B "\-\-ignore\-config" +Do not read the default configuration files +.TP +.B "\-u, \-\-username" \f[I]USER\f[] +Username to login with +.TP +.B "\-p, \-\-password" \f[I]PASS\f[] +Password belonging to the given username +.TP +.B "\-\-netrc" +Enable .netrc authentication data +.TP +.B "\-\-download\-archive" \f[I]FILE\f[] +Record all downloaded files in the archive file and skip downloading any file already in it. +.TP +.B "\-\-range" \f[I]RANGE\f[] +Index-range(s) specifying which images to download. For example '5-10' or '1,3-5,10-' +.TP +.B "\-\-chapter\-range" \f[I]RANGE\f[] +Like '--range', but applies to manga-chapters and other delegated URLs +.TP +.B "\-\-filter" \f[I]EXPR\f[] +Python expression controlling which images to download. Files for which the expression evaluates to False are ignored. Available keys are the filename-specific ones listed by '-K'. Example: --filter "image_width >= 1000 and rating in ('s', 'q')" +.TP +.B "\-\-chapter\-filter" \f[I]EXPR\f[] +Like '--filter', but applies to manga-chapters and other delegated URLs +.TP +.B "\-\-zip" +Store downloaded files in a ZIP archive +.TP +.B "\-\-exec" \f[I]CMD\f[] +Execute CMD for each downloaded file. Example: --exec 'magick convert {} {}.png && rm {}' +.TP +.B "\-\-ugoira\-conv" +Convert Pixiv Ugoira to WebM (requires FFmpeg) +.TP +.B "\-\-ugoira\-conv\-lossless" +Convert Pixiv Ugoira to WebM in VP9 lossless mode +.TP +.B "\-\-write\-metadata" +Write metadata to separate JSON files +.TP +.B "\-\-write\-tags" +Write image tags to separate text files +.TP +.B "\-\-mtime\-from\-date" +Set file modification times according to 'date' metadata + +.SH EXAMPLES +.TP +gallery-dl \f[I]URL\f[] +Download images from \f[I]URL\f[]. +.TP +gallery-dl -g -u <username> -p <password> \f[I]URL\f[] +Print direct URLs from a site that requires authentication. +.TP +gallery-dl --filter 'type == "ugoira"' --range '2-4' \f[I]URL\f[] +Apply filter and range expressions. This will only download +the second, third, and fourth file where its type value is equal to "ugoira". +.TP +gallery-dl r:\f[I]URL\f[] +Scan \f[I]URL\f[] for other URLs and invoke \f[B]gallery-dl\f[] on them. +.TP +gallery-dl oauth:\f[I]SITE\-NAME\f[] +Gain OAuth authentication tokens for +.IR deviantart , +.IR flickr , +.IR reddit , +.IR smugmug ", and" +.IR tumblr . + +.SH FILES +.TP +.I /etc/gallery-dl.conf +The system wide configuration file. +.TP +.I ~/.config/gallery-dl/config.json +Per user configuration file. +.TP +.I ~/.gallery-dl.conf +Alternate per user configuration file. + +.SH BUGS +https://github.com/mikf/gallery-dl/issues + +.SH AUTHORS +Mike Fährmann <mike_faehrmann@web.de> +.br +and https://github.com/mikf/gallery-dl/graphs/contributors + +.SH "SEE ALSO" +.BR gallery-dl.conf (5) diff --git a/data/man/gallery-dl.conf.5 b/data/man/gallery-dl.conf.5 new file mode 100644 index 0000000..8902f51 --- /dev/null +++ b/data/man/gallery-dl.conf.5 @@ -0,0 +1,2129 @@ +.TH "GALLERY-DL.CONF" "5" "2019-11-09" "1.11.1" "gallery-dl Manual" +.\" disable hyphenation +.nh +.\" disable justification (adjust text to left margin only) +.ad l + +.SH NAME +gallery-dl.conf \- gallery-dl configuration file + +.SH DESCRIPTION +gallery-dl will search for configuration files in the following places +every time it is started, unless +.B --ignore-config +is specified: +.PP +.RS 4 +.nf +.I /etc/gallery-dl.conf +.I $HOME/.config/gallery-dl/config.json +.I $HOME/.gallery-dl.conf +.fi +.RE +.PP +It is also possible to specify additional configuration files with the +.B -c/--config +command-line option or to add further option values with +.B -o/--option +as <key>=<value> pairs, + +Configuration files are JSON-based and therefore don't allow any ordinary +comments, but, since unused keys are simply ignored, it is possible to utilize +those as makeshift comments by settings their values to arbitrary strings. + +.SH EXAMPLE +{ +.RS 4 +"base-directory": "/tmp/", +.br +"extractor": { +.RS 4 +"pixiv": { +.RS 4 +"directory": ["Pixiv", "Works", "{user[id]}"], +.br +"filename": "{id}{num}.{extension}", +.br +"username": "foo", +.br +"password": "bar" +.RE +}, +.br +"flickr": { +.RS 4 +"_comment": "OAuth keys for account 'foobar'", +.br +"access-token": "0123456789-0123456789abcdef", +.br +"access-token-secret": "fedcba9876543210" +.RE +} +.RE +}, +.br +"downloader": { +.RS 4 +"retries": 3, +.br +"timeout": 2.5 +.RE +} +.RE +} + +.SH EXTRACTOR OPTIONS +.SS extractor.*.filename +.IP "Type:" 6 +\f[I]string\f[] + +.IP "Example:" 4 +"{manga}_c{chapter}_{page:>03}.{extension}" + +.IP "Description:" 4 +A \f[I]format string\f[] to build the resulting filename +for a downloaded file. + +The available replacement keys depend on the extractor used. A list +of keys for a specific one can be acquired by calling *gallery-dl* +with the \f[I]-K\f[]/\f[I]--list-keywords\f[] command-line option. +For example: + +.. code:: + +$ gallery-dl -K http://seiga.nicovideo.jp/seiga/im5977527 +Keywords for directory names: + +category +seiga +subcategory +image + +Keywords for filenames: + +category +seiga +extension +None +image-id +5977527 +subcategory +image + +Note: Even if the value of the \f[I]extension\f[] key is missing or +\f[I]None\f[], it will filled in later when the file download is +starting. This key is therefore always available to provide +a valid filename extension. + +.SS extractor.*.directory +.IP "Type:" 6 +\f[I]list\f[] of \f[I]strings\f[] + +.IP "Example:" 4 +["{category}", "{manga}", "c{chapter} - {title}"] + +.IP "Description:" 4 +A list of \f[I]format strings\f[] for the resulting target directory. + +Each individual string in such a list represents a single path +segment, which will be joined together and appended to the +\f[I]base-directory\f[] to form the complete target directory path. + +.SS extractor.*.base-directory +.IP "Type:" 6 +\f[I]Path\f[] + +.IP "Default:" 9 +\f[I]"./gallery-dl/"\f[] + +.IP "Description:" 4 +Directory path used as the base for all download destinations. + +.SS extractor.*.path-restrict +.IP "Type:" 6 +\f[I]string\f[] + +.IP "Default:" 9 +\f[I]"auto"\f[] + +.IP "Example:" 4 +"/!? (){}" + +.IP "Description:" 4 +Set of characters to replace with underscores (\f[I]_\f[]) +in generated path segment names. + +Special values: + + +* \f[I]"auto"\f[]: Use characters from \f[I]"unix"\f[] or \f[I]"windows"\f[] +depending on the local operating system + +* \f[I]"unix"\f[]: \f[I]"/"\f[] + +* \f[I]"windows"\f[]: \f[I]"\\\\\\\\|/<>:\\"?*"\f[] + +Note: In a set with 2 or more characters, \f[I][]^-\\\f[] need to be +escaped with backslashes, e.g. \f[I]"\\\\[\\\\]"\f[] + +.SS extractor.*.path-remove +.IP "Type:" 6 +\f[I]string\f[] + +.IP "Default:" 9 +\f[I]"\\\\u0000-\\\\u001f\\\\u007f"\f[] (ASCII control characters) + +.IP "Description:" 4 +Set of characters to remove from generated path names. + +Note: In a set with 2 or more characters, \f[I][]^-\\\f[] need to be +escaped with backslashes, e.g. \f[I]"\\\\[\\\\]"\f[] + +.SS extractor.*.skip +.IP "Type:" 6 +\f[I]bool\f[] or \f[I]string\f[] + +.IP "Default:" 9 +\f[I]true\f[] + +.IP "Description:" 4 +Controls the behavior when downloading files that have been +downloaded before, i.e. a file with the same filename already +exists or its ID is in a \f[I]download archive\f[]. + +__ \f[I]extractor.*.archive\f[] + + +* \f[I]true\f[]: Skip downloads + +* \f[I]false\f[]: Overwrite already existing files + + +* \f[I]"abort"\f[]: Abort the current extractor run + +* \f[I]"abort:N"\f[]: Skip downloads and abort extractor run +after \f[I]N\f[] consecutive skips + + +* \f[I]"exit"\f[]: Exit the program altogether + +* \f[I]"exit:N"\f[]: Skip downloads and exit the program +after \f[I]N\f[] consecutive skips + + +* \f[I]"enumerate"\f[]: Append a numeric suffix to the end of the +original filename (\f[I]file.ext.1\f[], \f[I]file.ext.2\f[], etc) + +.SS extractor.*.sleep +.IP "Type:" 6 +\f[I]float\f[] + +.IP "Default:" 9 +\f[I]0\f[] + +.IP "Description:" 4 +Number of seconds to sleep before each download. + +.SS extractor.*.username & .password +.IP "Type:" 6 +\f[I]string\f[] + +.IP "Default:" 9 +\f[I]null\f[] + +.IP "Description:" 4 +The username and password to use when attempting to log in to +another site. + +Specifying username and password is required for the +\f[I]pixiv\f[], \f[I]nijie\f[], and \f[I]seiga\f[] +modules and optional (but strongly recommended) for +\f[I]danbooru\f[], \f[I]exhentai\f[], \f[I]idolcomplex\f[], \f[I]instagram\f[], +\f[I]luscious\f[], \f[I]sankaku\f[], \f[I]tsumino\f[], and \f[I]twitter\f[]. + +These values can also be set via the \f[I]-u/--username\f[] and +\f[I]-p/--password\f[] command-line options or by using a \f[I].netrc\f[] file. +(see Authentication_) + +Note: The password for \f[I]danbooru\f[] is the API key found in your +user profile, not the password for your account. + +.SS extractor.*.netrc +.IP "Type:" 6 +\f[I]bool\f[] + +.IP "Default:" 9 +\f[I]false\f[] + +.IP "Description:" 4 +Enable the use of \f[I].netrc\f[] authentication data. + +.SS extractor.*.cookies +.IP "Type:" 6 +\f[I]Path\f[] or \f[I]object\f[] + +.IP "Default:" 9 +\f[I]null\f[] + +.IP "Description:" 4 +Source to read additional cookies from. + + +* If this is a \f[I]Path\f[], it specifies a +Mozilla/Netscape format cookies.txt file. + +* If this is an \f[I]object\f[], its key-value pairs, which should both +be \f[I]strings\f[], will be used as cookie-names and -values. + +.SS extractor.*.cookies-update +.IP "Type:" 6 +\f[I]bool\f[] + +.IP "Default:" 9 +\f[I]false\f[] + +.IP "Description:" 4 +If \f[I]extractor.*.cookies\f[] specifies a cookies.txt file, update its +contents with cookies received during data extraction. + +.SS extractor.*.proxy +.IP "Type:" 6 +\f[I]string\f[] or \f[I]object\f[] + +.IP "Default:" 9 +\f[I]null\f[] + +.IP "Description:" 4 +Proxy (or proxies) to be used for remote connections. + + +* If this is a \f[I]string\f[], it is the proxy URL for all +outgoing requests. + +* If this is an \f[I]object\f[], it is a scheme-to-proxy mapping to +specify different proxy URLs for each scheme. +It is also possible to set a proxy for a specific host by using +\f[I]scheme://host\f[] as key. +See \f[I]Requests' proxy documentation\f[] for more details. + +Example: + +.. code:: + +{ +"http": "http://10.10.1.10:3128", +"https": "http://10.10.1.10:1080", +"http://10.20.1.128": "http://10.10.1.10:5323" +} + +Note: All proxy URLs should include a scheme, +otherwise \f[I]http://\f[] is assumed. + +.SS extractor.*.user-agent +.IP "Type:" 6 +\f[I]string\f[] + +.IP "Default:" 9 +\f[I]"Mozilla/5.0 (X11; Linux x86_64; rv:68.0) Gecko/20100101 Firefox/68.0"\f[] + +.IP "Description:" 4 +User-Agent header value to be used for HTTP requests. + +Note: This option has no effect on pixiv and +readcomiconline extractors, as these need specific values to +function correctly. + +.SS extractor.*.keywords +.IP "Type:" 6 +\f[I]object\f[] + +.IP "Example:" 4 +{"type": "Pixel Art", "type_id": 123} + +.IP "Description:" 4 +Additional key-value pairs to be added to each metadata dictionary. + +.SS extractor.*.keywords-default +.IP "Type:" 6 +any + +.IP "Default:" 9 +\f[I]"None"\f[] + +.IP "Description:" 4 +Default value used for missing or undefined keyword names in +format strings. + +.SS extractor.*.category-transfer +.IP "Type:" 6 +\f[I]bool\f[] + +.IP "Default:" 9 +Extractor-specific + +.IP "Description:" 4 +Transfer an extractor's (sub)category values to all child +extractors spawned by it, to let them inherit their parent's +config options. + +.SS extractor.*.archive +.IP "Type:" 6 +\f[I]Path\f[] + +.IP "Default:" 9 +\f[I]null\f[] + +.IP "Description:" 4 +File to store IDs of downloaded files in. Downloads of files +already recorded in this archive file will be skipped_. + +The resulting archive file is not a plain text file but an SQLite3 +database, as either lookup operations are significantly faster or +memory requirements are significantly lower when the +amount of stored IDs gets reasonably large. + +.SS extractor.*.archive-format +.IP "Type:" 6 +\f[I]string\f[] + +.IP "Example:" 4 +"{id}_{offset}" + +.IP "Description:" 4 +An alternative \f[I]format string\f[] to build archive IDs with. + +.SS extractor.*.postprocessors +.IP "Type:" 6 +\f[I]list\f[] of \f[I]Postprocessor Configuration\f[] objects + +.IP "Example:" 4 +.. code:: + +[ +{"name": "zip", "compression": "zip"}, +{"name": "exec", "command": ["/home/foobar/script", "{category}", "{image_id}"]} +] + + +.IP "Description:" 4 +A list of post-processors to be applied to each downloaded file +in the same order as they are specified. + +.SS extractor.*.retries +.IP "Type:" 6 +\f[I]integer\f[] + +.IP "Default:" 9 +\f[I]4\f[] + +.IP "Description:" 4 +Maximum number of times a failed HTTP request is retried before +giving up or \f[I]-1\f[] for infinite retries. + +.SS extractor.*.timeout +.IP "Type:" 6 +\f[I]float\f[] or \f[I]null\f[] + +.IP "Default:" 9 +\f[I]30\f[] + +.IP "Description:" 4 +Amount of time (in seconds) to wait for a successful connection +and response from a remote server. + +This value gets internally used as the \f[I]timeout\f[] parameter for the +\f[I]requests.request()\f[] method. + +.SS extractor.*.verify +.IP "Type:" 6 +\f[I]bool\f[] or \f[I]string\f[] + +.IP "Default:" 9 +\f[I]true\f[] + +.IP "Description:" 4 +Controls whether to verify SSL/TLS certificates for HTTPS requests. + +If this is a \f[I]string\f[], it must be the path to a CA bundle to use +instead of the default certificates. + +This value gets internally used as the \f[I]verify\f[] parameter for the +\f[I]requests.request()\f[] method. + +.SS extractor.*.download +.IP "Type:" 6 +\f[I]bool\f[] + +.IP "Default:" 9 +\f[I]true\f[] + +.IP "Description:" 4 +Controls whether to download media files. + +Setting this to \f[I]false\f[] won't download any files, but all other +functions (postprocessors_, \f[I]download archive\f[], etc.) +will be executed as normal. + +.SS extractor.*.image-range +.IP "Type:" 6 +\f[I]string\f[] + +.IP "Example:" 4 +"10-20", +.br +"-5, 10, 30-50, 100-" +.br + +.IP "Description:" 4 +Index-range(s) specifying which images to download. + +Note: The index of the first image is \f[I]1\f[]. + +.SS extractor.*.chapter-range +.IP "Type:" 6 +\f[I]string\f[] + +.IP "Description:" 4 +Like \f[I]image-range\f[], but applies to delegated URLs +like manga-chapters, etc. + +.SS extractor.*.image-filter +.IP "Type:" 6 +\f[I]string\f[] + +.IP "Example:" 4 +"width >= 1200 and width/height > 1.2", +.br +"re.search(r'foo(bar)+', description)" +.br + +.IP "Description:" 4 +Python expression controlling which images to download. +.br +Files for which the expression evaluates to \f[I]False\f[] +.br +are ignored. +Available keys are the filename-specific ones listed +.br +by \f[I]-K\f[] or \f[I]-j\f[]. + +.SS extractor.*.chapter-filter +.IP "Type:" 6 +\f[I]string\f[] + +.IP "Description:" 4 +Like \f[I]image-filter\f[], but applies to delegated URLs +like manga-chapters, etc. + +.SS extractor.*.image-unique +.IP "Type:" 6 +\f[I]bool\f[] + +.IP "Default:" 9 +\f[I]false\f[] + +.IP "Description:" 4 +Ignore image URLs that have been encountered before during the +current extractor run. + +.SS extractor.*.chapter-unique +.IP "Type:" 6 +\f[I]bool\f[] + +.IP "Default:" 9 +\f[I]false\f[] + +.IP "Description:" 4 +Like \f[I]image-unique\f[], but applies to delegated URLs +like manga-chapters, etc. + +.SS extractor.*.date-format +.IP "Type:" 6 +\f[I]string\f[] + +.IP "Default:" 9 +\f[I]"%Y-%m-%dT%H:%M:%S"\f[] + +.IP "Description:" 4 +Format string used to parse \f[I]string\f[] values of +date-min and date-max. + +See \f[I]strptime\f[] for a list of formatting directives. + +.SH EXTRACTOR-SPECIFIC OPTIONS +.SS extractor.artstation.external +.IP "Type:" 6 +\f[I]bool\f[] + +.IP "Default:" 9 +\f[I]false\f[] + +.IP "Description:" 4 +Try to follow external URLs of embedded players. + +.SS extractor.danbooru.ugoira +.IP "Type:" 6 +\f[I]bool\f[] + +.IP "Default:" 9 +\f[I]true\f[] + +.IP "Description:" 4 +Controls the download target for Ugoira posts. + + +* \f[I]true\f[]: Original ZIP archives + +* \f[I]false\f[]: Converted video files + +.SS extractor.deviantart.extra +.IP "Type:" 6 +\f[I]bool\f[] + +.IP "Default:" 9 +\f[I]false\f[] + +.IP "Description:" 4 +Download extra Sta.sh resources from description texts. + +Note: Enabling this option also enables deviantart.metadata_. + +.SS extractor.deviantart.flat +.IP "Type:" 6 +\f[I]bool\f[] + +.IP "Default:" 9 +\f[I]true\f[] + +.IP "Description:" 4 +Select the directory structure created by the Gallery- and +Favorite-Extractors. + + +* \f[I]true\f[]: Use a flat directory structure. + +* \f[I]false\f[]: Collect a list of all gallery-folders or +favorites-collections and transfer any further work to other +extractors (\f[I]folder\f[] or \f[I]collection\f[]), which will then +create individual subdirectories for each of them. + +.SS extractor.deviantart.folders +.IP "Type:" 6 +\f[I]bool\f[] + +.IP "Default:" 9 +\f[I]false\f[] + +.IP "Description:" 4 +Provide a \f[I]folders\f[] metadata field that contains the names of all +folders a deviation is present in. + +Note: Gathering this information requires a lot of API calls. +Use with caution. + +.SS extractor.deviantart.include +.IP "Type:" 6 +\f[I]list\f[] of \f[I]strings\f[] or \f[I]string\f[] + +.IP "Default:" 9 +\f[I]["gallery"]\f[] + +.IP "Description:" 4 +Selects the subcategories to include when processing a user profile. + +Possible values are \f[I]"gallery"\f[], \f[I]"scraps"\f[], \f[I]"journal"\f[], +\f[I]"favorite"\f[]. + +It is also possible to use a string with the initial character of +each subcategory, i.e. \f[I]"gsj"\f[] for +\f[I]["gallery", "scraps", "journal"]\f[] + +.SS extractor.deviantart.journals +.IP "Type:" 6 +\f[I]string\f[] + +.IP "Default:" 9 +\f[I]"html"\f[] + +.IP "Description:" 4 +Selects the output format of journal entries. + + +* \f[I]"html"\f[]: HTML with (roughly) the same layout as on DeviantArt. + +* \f[I]"text"\f[]: Plain text with image references and HTML tags removed. + +* \f[I]"none"\f[]: Don't download journals. + +.SS extractor.deviantart.mature +.IP "Type:" 6 +\f[I]bool\f[] + +.IP "Default:" 9 +\f[I]true\f[] + +.IP "Description:" 4 +Enable mature content. + +This option simply sets the \f[I]mature_content\f[] parameter for API +calls to either \f[I]"true"\f[] or \f[I]"false"\f[] and does not do any other +form of content filtering. + +.SS extractor.deviantart.metadata +.IP "Type:" 6 +\f[I]bool\f[] + +.IP "Default:" 9 +\f[I]false\f[] + +.IP "Description:" 4 +Request extended metadata for deviation objects to additionally +provide \f[I]description\f[], \f[I]tags\f[], \f[I]license\f[] and \f[I]is_watching\f[] +fields. + +.SS extractor.deviantart.original +.IP "Type:" 6 +\f[I]bool\f[] or \f[I]string\f[] + +.IP "Default:" 9 +\f[I]true\f[] + +.IP "Description:" 4 +Download original files if available. + +Setting this option to \f[I]"images"\f[] only downloads original +files if they are images and falls back to preview versions for +everything else (archives, etc.). + +.SS extractor.deviantart.quality +.IP "Type:" 6 +\f[I]integer\f[] + +.IP "Default:" 9 +\f[I]100\f[] + +.IP "Description:" 4 +JPEG quality level of newer images for which +an original file download is not available. + +.SS extractor.deviantart.refresh-token +.IP "Type:" 6 +\f[I]string\f[] + +.IP "Default:" 9 +\f[I]null\f[] + +.IP "Description:" 4 +The \f[I]refresh-token\f[] value you get from +\f[I]linking your DeviantArt account to gallery-dl <OAuth_>\f[]. + +Using a \f[I]refresh-token\f[] allows you to access private or otherwise +not publicly available deviations. + +Note: Authenticating with a \f[I]refresh-token\f[] requires persistent +storage in a \f[I]cache file <cache.file_>\f[]. +Otherwise the token will become invalid after its first use. + +.SS extractor.deviantart.wait-min +.IP "Type:" 6 +\f[I]integer\f[] + +.IP "Default:" 9 +\f[I]0\f[] + +.IP "Description:" 4 +Minimum wait time in seconds before API requests. + +Note: This value will internally be rounded up +to the next power of 2. + +.SS extractor.exhentai.limits +.IP "Type:" 6 +\f[I]bool\f[] + +.IP "Default:" 9 +\f[I]true\f[] + +.IP "Description:" 4 +Check image download limits +and stop extraction when they are exceeded. + +.SS extractor.exhentai.original +.IP "Type:" 6 +\f[I]bool\f[] + +.IP "Default:" 9 +\f[I]true\f[] + +.IP "Description:" 4 +Download full-sized original images if available. + +.SS extractor.exhentai.wait-min & .wait-max +.IP "Type:" 6 +\f[I]float\f[] + +.IP "Default:" 9 +\f[I]3.0\f[] and \f[I]6.0\f[] + +.IP "Description:" 4 +Minimum and maximum wait time in seconds between each image + +ExHentai detects and blocks automated downloaders. +*gallery-dl* waits a randomly selected number of +seconds between \f[I]wait-min\f[] and \f[I]wait-max\f[] after +each image to prevent getting blocked. + +.SS extractor.flickr.access-token & .access-token-secret +.IP "Type:" 6 +\f[I]string\f[] + +.IP "Default:" 9 +\f[I]null\f[] + +.IP "Description:" 4 +The \f[I]access_token\f[] and \f[I]access_token_secret\f[] values you get +from \f[I]linking your Flickr account to gallery-dl <OAuth_>\f[]. + +.SS extractor.flickr.videos +.IP "Type:" 6 +\f[I]bool\f[] + +.IP "Default:" 9 +\f[I]true\f[] + +.IP "Description:" 4 +Extract and download videos. + +.SS extractor.flickr.size-max +.IP "Type:" 6 +\f[I]integer\f[] or \f[I]string\f[] + +.IP "Default:" 9 +\f[I]null\f[] + +.IP "Description:" 4 +Sets the maximum allowed size for downloaded images. + + +* If this is an \f[I]integer\f[], it specifies the maximum image dimension +(width and height) in pixels. + +* If this is a \f[I]string\f[], it should be one of Flickr's format specifiers +(\f[I]"Original"\f[], \f[I]"Large"\f[], ... or \f[I]"o"\f[], \f[I]"k"\f[], \f[I]"h"\f[], +\f[I]"l"\f[], ...) to use as an upper limit. + +.SS extractor.gelbooru.api +.IP "Type:" 6 +\f[I]bool\f[] + +.IP "Default:" 9 +\f[I]true\f[] + +.IP "Description:" 4 +Enable use of Gelbooru's API. + +Set this value to false if the API has been disabled to switch +to manual information extraction. + +.SS extractor.gfycat.format +.IP "Type:" 6 +\f[I]string\f[] + +.IP "Default:" 9 +\f[I]"mp4"\f[] + +.IP "Description:" 4 +The name of the preferred animation format, which can be one of +\f[I]"mp4"\f[], \f[I]"webm"\f[], \f[I]"gif"\f[], \f[I]"webp"\f[] or \f[I]"mjpg"\f[]. + +If the selected format is not available, \f[I]"mp4"\f[], \f[I]"webm"\f[] +and \f[I]"gif"\f[] (in that order) will be tried instead, until an +available format is found. + +.SS extractor.imgur.mp4 +.IP "Type:" 6 +\f[I]bool\f[] or \f[I]string\f[] + +.IP "Default:" 9 +\f[I]true\f[] + +.IP "Description:" 4 +Controls whether to choose the GIF or MP4 version of an animation. + + +* \f[I]true\f[]: Follow Imgur's advice and choose MP4 if the +\f[I]prefer_video\f[] flag in an image's metadata is set. + +* \f[I]false\f[]: Always choose GIF. + +* \f[I]"always"\f[]: Always choose MP4. + +.SS extractor.instagram.highlights +.IP "Type:" 6 +\f[I]bool\f[] + +.IP "Default:" 9 +\f[I]false\f[] + +.IP "Description:" 4 +Include *Story Highlights* when downloading a user profile. +(requires authentication) + +.SS extractor.kissmanga.captcha +.IP "Type:" 6 +\f[I]string\f[] + +.IP "Default:" 9 +\f[I]"stop"\f[] + +.IP "Description:" 4 +Controls how to handle redirects to CAPTCHA pages. + + +* \f[I]"stop\f[]: Stop the current extractor run. + +* \f[I]"wait\f[]: Ask the user to solve the CAPTCHA and wait. + +.SS extractor.oauth.browser +.IP "Type:" 6 +\f[I]bool\f[] + +.IP "Default:" 9 +\f[I]true\f[] + +.IP "Description:" 4 +Controls how a user is directed to an OAuth authorization site. + + +* \f[I]true\f[]: Use Python's \f[I]webbrowser.open()\f[] method to automatically +open the URL in the user's browser. + +* \f[I]false\f[]: Ask the user to copy & paste an URL from the terminal. + +.SS extractor.photobucket.subalbums +.IP "Type:" 6 +\f[I]bool\f[] + +.IP "Default:" 9 +\f[I]true\f[] + +.IP "Description:" 4 +Download subalbums. + +.SS extractor.pixiv.ugoira +.IP "Type:" 6 +\f[I]bool\f[] + +.IP "Default:" 9 +\f[I]true\f[] + +.IP "Description:" 4 +Download Pixiv's Ugoira animations or ignore them. + +These animations come as a \f[I].zip\f[] file containing all the single +animation frames in JPEG format. + +.SS extractor.plurk.comments +.IP "Type:" 6 +\f[I]bool\f[] + +.IP "Default:" 9 +\f[I]false\f[] + +.IP "Description:" 4 +Also search Plurk comments for URLs. + +.SS extractor.reactor.wait-min & .wait-max +.IP "Type:" 6 +\f[I]float\f[] + +.IP "Default:" 9 +\f[I]3.0\f[] and \f[I]6.0\f[] + +.IP "Description:" 4 +Minimum and maximum wait time in seconds between HTTP requests +during the extraction process. + +.SS extractor.readcomiconline.captcha +.IP "Type:" 6 +\f[I]string\f[] + +.IP "Default:" 9 +\f[I]"stop"\f[] + +.IP "Description:" 4 +Controls how to handle redirects to CAPTCHA pages. + + +* \f[I]"stop\f[]: Stop the current extractor run. + +* \f[I]"wait\f[]: Ask the user to solve the CAPTCHA and wait. + +.SS extractor.recursive.blacklist +.IP "Type:" 6 +\f[I]list\f[] of \f[I]strings\f[] + +.IP "Default:" 9 +\f[I]["directlink", "oauth", "recursive", "test"]\f[] + +.IP "Description:" 4 +A list of extractor categories which should be ignored when using +the \f[I]recursive\f[] extractor. + +.SS extractor.reddit.comments +.IP "Type:" 6 +\f[I]integer\f[] + +.IP "Default:" 9 +\f[I]500\f[] + +.IP "Description:" 4 +The value of the \f[I]limit\f[] parameter when loading +a submission and its comments. +This number (roughly) specifies the total amount of comments +being retrieved with the first API call. + +Reddit's internal default and maximum values for this parameter +appear to be 200 and 500 respectively. + +The value \f[I]0\f[] ignores all comments and significantly reduces the +time required when scanning a subreddit. + +.SS extractor.reddit.morecomments +.IP "Type:" 6 +\f[I]bool\f[] + +.IP "Default:" 9 +\f[I]false\f[] + +.IP "Description:" 4 +Retrieve additional comments by resolving the \f[I]more\f[] comment +stubs in the base comment tree. + +This requires 1 additional API call for every 100 extra comments. + +.SS extractor.reddit.date-min & .date-max +.IP "Type:" 6 +\f[I]Date\f[] + +.IP "Default:" 9 +\f[I]0\f[] and \f[I]253402210800\f[] (timestamp of \f[I]datetime.max\f[]) + +.IP "Description:" 4 +Ignore all submissions posted before/after this date. + +.SS extractor.reddit.id-min & .id-max +.IP "Type:" 6 +\f[I]string\f[] + +.IP "Example:" 4 +"6kmzv2" + +.IP "Description:" 4 +Ignore all submissions posted before/after the submission with +this ID. + +.SS extractor.reddit.recursion +.IP "Type:" 6 +\f[I]integer\f[] + +.IP "Default:" 9 +\f[I]0\f[] + +.IP "Description:" 4 +Reddit extractors can recursively visit other submissions +linked to in the initial set of submissions. +This value sets the maximum recursion depth. + +Special values: + + +* \f[I]0\f[]: Recursion is disabled + +* \f[I]-1\f[]: Infinite recursion (don't do this) + +.SS extractor.reddit.refresh-token +.IP "Type:" 6 +\f[I]string\f[] + +.IP "Default:" 9 +\f[I]null\f[] + +.IP "Description:" 4 +The \f[I]refresh-token\f[] value you get from +\f[I]linking your Reddit account to gallery-dl <OAuth_>\f[]. + +Using a \f[I]refresh-token\f[] allows you to access private or otherwise +not publicly available subreddits, given that your account is +authorized to do so, +but requests to the reddit API are going to be rate limited +at 600 requests every 10 minutes/600 seconds. + +.SS extractor.sankaku.wait-min & .wait-max +.IP "Type:" 6 +\f[I]float\f[] + +.IP "Default:" 9 +\f[I]3.0\f[] and \f[I]6.0\f[] + +.IP "Description:" 4 +Minimum and maximum wait time in seconds between each image + +Sankaku Channel responds with \f[I]429 Too Many Requests\f[] if it +receives too many HTTP requests in a certain amount of time. +Waiting a few seconds between each request tries to prevent that. + +.SS extractor.smugmug.videos +.IP "Type:" 6 +\f[I]bool\f[] + +.IP "Default:" 9 +\f[I]true\f[] + +.IP "Description:" 4 +Download video files. + +.SS extractor.tumblr.avatar +.IP "Type:" 6 +\f[I]bool\f[] + +.IP "Default:" 9 +\f[I]false\f[] + +.IP "Description:" 4 +Download blog avatars. + +.SS extractor.tumblr.date-min & .date-max +.IP "Type:" 6 +\f[I]Date\f[] + +.IP "Default:" 9 +\f[I]0\f[] and \f[I]null\f[] + +.IP "Description:" 4 +Ignore all posts published before/after this date. + +.SS extractor.tumblr.external +.IP "Type:" 6 +\f[I]bool\f[] + +.IP "Default:" 9 +\f[I]false\f[] + +.IP "Description:" 4 +Follow external URLs (e.g. from "Link" posts) and try to extract +images from them. + +.SS extractor.tumblr.inline +.IP "Type:" 6 +\f[I]bool\f[] + +.IP "Default:" 9 +\f[I]true\f[] + +.IP "Description:" 4 +Search posts for inline images and videos. + +.SS extractor.tumblr.reblogs +.IP "Type:" 6 +\f[I]bool\f[] or \f[I]string\f[] + +.IP "Default:" 9 +\f[I]true\f[] + +.IP "Description:" 4 + +* \f[I]true\f[]: Extract media from reblogged posts + +* \f[I]false\f[]: Skip reblogged posts + +* \f[I]"same-blog"\f[]: Skip reblogged posts unless the original post +is from the same blog + +.SS extractor.tumblr.posts +.IP "Type:" 6 +\f[I]string\f[] or \f[I]list\f[] of \f[I]strings\f[] + +.IP "Default:" 9 +\f[I]"all"\f[] + +.IP "Example:" 4 +"video,audio,link" or ["video", "audio", "link"] + +.IP "Description:" 4 +A (comma-separated) list of post types to extract images, etc. from. + +Possible types are \f[I]text\f[], \f[I]quote\f[], \f[I]link\f[], \f[I]answer\f[], +\f[I]video\f[], \f[I]audio\f[], \f[I]photo\f[], \f[I]chat\f[]. + +You can use \f[I]"all"\f[] instead of listing all types separately. + +.SS extractor.twitter.content +.IP "Type:" 6 +\f[I]bool\f[] + +.IP "Default:" 9 +\f[I]false\f[] + +.IP "Description:" 4 +Extract tweet text as \f[I]content\f[] metadata. + +.SS extractor.twitter.retweets +.IP "Type:" 6 +\f[I]bool\f[] + +.IP "Default:" 9 +\f[I]true\f[] + +.IP "Description:" 4 +Extract images from retweets. + +.SS extractor.twitter.videos +.IP "Type:" 6 +\f[I]bool\f[] or \f[I]string\f[] + +.IP "Default:" 9 +\f[I]false\f[] + +.IP "Description:" 4 +Control video download behavior. + + +* \f[I]true\f[]: Download videos and use \f[I]youtube-dl\f[] to handle +HLS \f[I].m3u8\f[] manifests + +* \f[I]"ytdl"\f[]: Download videos and let \f[I]youtube-dl\f[] handle all of +video extraction and download + +* \f[I]false\f[]: Skip video Tweets + +.SS extractor.wallhaven.api-key +.IP "Type:" 6 +\f[I]string\f[] + +.IP "Default:" 9 +\f[I]null\f[] + +.IP "Description:" 4 +Your \f[I]API Key <https://wallhaven.cc/settings/account>\f[] to use +your account's browsing settings and default filters when searching. + +See https://wallhaven.cc/help/api for more information. + +.SS extractor.[booru].tags +.IP "Type:" 6 +\f[I]bool\f[] + +.IP "Default:" 9 +\f[I]false\f[] + +.IP "Description:" 4 +Categorize tags by their respective types +and provide them as \f[I]tags_<type>\f[] metadata fields. + +Note: This requires 1 additional HTTP request for each post. + +.SS extractor.[manga-extractor].chapter-reverse +.IP "Type:" 6 +\f[I]bool\f[] + +.IP "Default:" 9 +\f[I]false\f[] + +.IP "Description:" 4 +Reverse the order of chapter URLs extracted from manga pages. + + +* \f[I]true\f[]: Start with the latest chapter + +* \f[I]false\f[]: Start with the first chapter + +.SH DOWNLOADER OPTIONS +.SS downloader.*.enabled +.IP "Type:" 6 +\f[I]bool\f[] + +.IP "Default:" 9 +\f[I]true\f[] + +.IP "Description:" 4 +Enable/Disable this downloader module. + +.SS downloader.*.mtime +.IP "Type:" 6 +\f[I]bool\f[] + +.IP "Default:" 9 +\f[I]true\f[] + +.IP "Description:" 4 +Use \f[I]Last-Modified\f[] HTTP response headers +to set file modification times. + +.SS downloader.*.part +.IP "Type:" 6 +\f[I]bool\f[] + +.IP "Default:" 9 +\f[I]true\f[] + +.IP "Description:" 4 +Controls the use of \f[I].part\f[] files during file downloads. + + +* \f[I]true\f[]: Write downloaded data into \f[I].part\f[] files and rename +them upon download completion. This mode additionally supports +resuming incomplete downloads. + +* \f[I]false\f[]: Do not use \f[I].part\f[] files and write data directly +into the actual output files. + +.SS downloader.*.part-directory +.IP "Type:" 6 +\f[I]Path\f[] + +.IP "Default:" 9 +\f[I]null\f[] + +.IP "Description:" 4 +Alternate location for \f[I].part\f[] files. + +Missing directories will be created as needed. +If this value is \f[I]null\f[], \f[I].part\f[] files are going to be stored +alongside the actual output files. + +.SS downloader.*.rate +.IP "Type:" 6 +\f[I]string\f[] + +.IP "Default:" 9 +\f[I]null\f[] + +.IP "Examples:" 4 +\f[I]"32000"\f[], \f[I]"500k"\f[], \f[I]"2.5M"\f[] + +.IP "Description:" 4 +Maximum download rate in bytes per second. + +Possible values are valid integer or floating-point numbers +optionally followed by one of \f[I]k\f[], \f[I]m\f[]. \f[I]g\f[], \f[I]t\f[] or \f[I]p\f[]. +These suffixes are case-insensitive. + +.SS downloader.*.retries +.IP "Type:" 6 +\f[I]integer\f[] + +.IP "Default:" 9 +\f[I]extractor.*.retries\f[] + +.IP "Description:" 4 +Maximum number of retries during file downloads +or \f[I]-1\f[] for infinite retries. + +.SS downloader.*.timeout +.IP "Type:" 6 +\f[I]float\f[] or \f[I]null\f[] + +.IP "Default:" 9 +\f[I]extractor.*.timeout\f[] + +.IP "Description:" 4 +Connection timeout during file downloads. + +.SS downloader.*.verify +.IP "Type:" 6 +\f[I]bool\f[] or \f[I]string\f[] + +.IP "Default:" 9 +\f[I]extractor.*.verify\f[] + +.IP "Description:" 4 +Certificate validation during file downloads. + +.SS downloader.http.adjust-extensions +.IP "Type:" 6 +\f[I]bool\f[] + +.IP "Default:" 9 +\f[I]true\f[] + +.IP "Description:" 4 +Check the file headers of \f[I]jpg\f[], \f[I]png\f[], and \f[I]gif\f[] files +and adjust their filename extensions if they do not match. + +.SS downloader.ytdl.format +.IP "Type:" 6 +\f[I]string\f[] + +.IP "Default:" 9 +youtube-dl's default, currently \f[I]"bestvideo+bestaudio/best"\f[] + +.IP "Description:" 4 +Video \f[I]format selection +<https://github.com/ytdl-org/youtube-dl#format-selection>\f[] +directly passed to youtube-dl. + +.SS downloader.ytdl.forward-cookies +.IP "Type:" 6 +\f[I]bool\f[] + +.IP "Default:" 9 +\f[I]true\f[] + +.IP "Description:" 4 +Forward cookies to youtube-dl. + +.SS downloader.ytdl.logging +.IP "Type:" 6 +\f[I]bool\f[] + +.IP "Default:" 9 +\f[I]true\f[] + +.IP "Description:" 4 +Route youtube-dl's output through gallery-dl's logging system. +.br +Otherwise youtube-dl will write its output directly to stdout/stderr. +.br + +Note: Set \f[I]quiet\f[] and \f[I]no_warnings\f[] in +\f[I]downloader.ytdl.raw-options\f[] to \f[I]true\f[] to suppress all output. + +.SS downloader.ytdl.outtmpl +.IP "Type:" 6 +\f[I]string\f[] + +.IP "Default:" 9 +\f[I]null\f[] + +.IP "Description:" 4 +The \f[I]Output Template <https://github.com/ytdl-org/youtube-dl#output-template>\f[] +used to generate filenames for files downloaded with youtube-dl. + +Special values: + + +* \f[I]null\f[]: generate filenames with \f[I]extractor.*.filename\f[] + +* \f[I]"default"\f[]: use youtube-dl's default, currently \f[I]"%(title)s-%(id)s.%(ext)s"\f[] + +Note: An output template other than \f[I]null\f[] might +cause unexpected results in combination with other options +(e.g. \f[I]"skip": "enumerate"\f[]) + +.SS downloader.ytdl.raw-options +.IP "Type:" 6 +\f[I]object\f[] + +.IP "Example:" 4 +.. code:: + +{ +"quiet": true, +"writesubtitles": true, +"merge_output_format": "mkv" +} + + +.IP "Description:" 4 +Additional options passed directly to the \f[I]YoutubeDL\f[] constructor. +.br +All available options can be found in \f[I]youtube-dl's docstrings +.br +<https://github.com/ytdl-org/youtube-dl/blob/master/youtube_dl/YoutubeDL.py#L138-L318>\f[]. + +.SH OUTPUT OPTIONS +.SS output.mode +.IP "Type:" 6 +\f[I]string\f[] + +.IP "Default:" 9 +\f[I]"auto"\f[] + +.IP "Description:" 4 +Controls the output string format and status indicators. + + +* \f[I]"null"\f[]: No output + +* \f[I]"pipe"\f[]: Suitable for piping to other processes or files + +* \f[I]"terminal"\f[]: Suitable for the standard Windows console + +* \f[I]"color"\f[]: Suitable for terminals that understand ANSI escape codes and colors + +* \f[I]"auto"\f[]: Automatically choose the best suitable output mode + +.SS output.shorten +.IP "Type:" 6 +\f[I]bool\f[] + +.IP "Default:" 9 +\f[I]true\f[] + +.IP "Description:" 4 +Controls whether the output strings should be shortened to fit +on one console line. + +.SS output.progress +.IP "Type:" 6 +\f[I]bool\f[] or \f[I]string\f[] + +.IP "Default:" 9 +\f[I]true\f[] + +.IP "Description:" 4 +Controls the progress indicator when *gallery-dl* is run with +multiple URLs as arguments. + + +* \f[I]true\f[]: Show the default progress indicator +(\f[I]"[{current}/{total}] {url}"\f[]) + +* \f[I]false\f[]: Do not show any progress indicator + +* Any \f[I]string\f[]: Show the progress indicator using this +as a custom \f[I]format string\f[]. Possible replacement keys are +\f[I]current\f[], \f[I]total\f[] and \f[I]url\f[]. + +.SS output.log +.IP "Type:" 6 +\f[I]string\f[] or \f[I]Logging Configuration\f[] + +.IP "Default:" 9 +\f[I]"[{name}][{levelname}] {message}"\f[] + +.IP "Description:" 4 +Configuration for standard logging output to stderr. + +If this is a simple \f[I]string\f[], it specifies +the format string for logging messages. + +.SS output.logfile +.IP "Type:" 6 +\f[I]Path\f[] or \f[I]Logging Configuration\f[] + +.IP "Default:" 9 +\f[I]null\f[] + +.IP "Description:" 4 +File to write logging output to. + +.SS output.unsupportedfile +.IP "Type:" 6 +\f[I]Path\f[] or \f[I]Logging Configuration\f[] + +.IP "Default:" 9 +\f[I]null\f[] + +.IP "Description:" 4 +File to write external URLs unsupported by *gallery-dl* to. + +The default format string here is \f[I]"{message}"\f[]. + +.SS output.num-to-str +.IP "Type:" 6 +\f[I]bool\f[] + +.IP "Default:" 9 +\f[I]false\f[] + +.IP "Description:" 4 +Convert numeric values (\f[I]integer\f[] or \f[I]float\f[]) to \f[I]string\f[] +before outputting them as JSON. + +.SH POSTPROCESSOR OPTIONS +.SS classify.mapping +.IP "Type:" 6 +\f[I]object\f[] + +.IP "Default:" 9 +.. code:: + +{ +"Pictures" : ["jpg", "jpeg", "png", "gif", "bmp", "svg", "webp"], +"Video" : ["flv", "ogv", "avi", "mp4", "mpg", "mpeg", "3gp", "mkv", "webm", "vob", "wmv"], +"Music" : ["mp3", "aac", "flac", "ogg", "wma", "m4a", "wav"], +"Archives" : ["zip", "rar", "7z", "tar", "gz", "bz2"] +} + + +.IP "Description:" 4 +A mapping from directory names to filename extensions that should +be stored in them. + +Files with an extension not listed will be ignored and stored +in their default location. + +.SS exec.async +.IP "Type:" 6 +\f[I]bool\f[] + +.IP "Default:" 9 +\f[I]false\f[] + +.IP "Description:" 4 +Controls whether to wait for a subprocess to finish +or to let it run asynchronously. + +.SS exec.command +.IP "Type:" 6 +\f[I]string\f[] or \f[I]list\f[] of \f[I]strings\f[] + +.IP "Example:" 4 + +* "convert {} {}.png && rm {}" + +* ["echo", "{user[account]}", "{id}"] + +.IP "Description:" 4 +The command to run. + + +* If this is a \f[I]string\f[], it will be executed using the system's +shell, e.g. \f[I]/bin/sh\f[]. Any \f[I]{}\f[] will be replaced +with the full path of a file or target directory, depending on +\f[I]exec.final\f[] + + +* If this is a \f[I]list\f[], the first element specifies the program +name and any further elements its arguments. +Each element of this list is treated as a \f[I]format string\f[] using +the files' metadata as well as \f[I]{_path}\f[], \f[I]{_directory}\f[], +and \f[I]{_filename}\f[]. + +.SS exec.final +.IP "Type:" 6 +\f[I]bool\f[] + +.IP "Default:" 9 +\f[I]false\f[] + +.IP "Description:" 4 +Controls whether to execute \f[I]exec.command\f[] for each +downloaded file or only once after all files +have been downloaded successfully. + +.SS metadata.mode +.IP "Type:" 6 +\f[I]string\f[] + +.IP "Default:" 9 +\f[I]"json"\f[] + +.IP "Description:" 4 +Select how to write metadata. + + +* \f[I]"json"\f[]: all metadata using \f[I]json.dump() +<https://docs.python.org/3/library/json.html#json.dump>\f[] + +* \f[I]"tags"\f[]: \f[I]tags\f[] separated by newlines + +* \f[I]"custom"\f[]: result of applying \f[I]metadata.format\f[] to a file's +metadata dictionary + +.SS metadata.extension +.IP "Type:" 6 +\f[I]string\f[] + +.IP "Default:" 9 +\f[I]"json"\f[] or \f[I]"txt"\f[] + +.IP "Description:" 4 +Filename extension for metadata files. + +.SS metadata.format +.IP "Type:" 6 +\f[I]string\f[] + +.IP "Example:" 4 +"tags:\\n\\n{tags:J\\n}\\n" + +.IP "Description:" 4 +Custom format string to build content of metadata files. + +Note: Only applies for \f[I]"mode": "custom"\f[]. + +.SS mtime.key +.IP "Type:" 6 +\f[I]string\f[] + +.IP "Default:" 9 +\f[I]"date"\f[] + +.IP "Description:" 4 +Name of the metadata field whose value should be used. + +This value must either be a UNIX timestamp or a +\f[I]datetime\f[] object. + +.SS ugoira.extension +.IP "Type:" 6 +\f[I]string\f[] + +.IP "Default:" 9 +\f[I]"webm"\f[] + +.IP "Description:" 4 +Filename extension for the resulting video files. + +.SS ugoira.ffmpeg-args +.IP "Type:" 6 +\f[I]list\f[] of \f[I]strings\f[] + +.IP "Default:" 9 +\f[I]null\f[] + +.IP "Example:" 4 +["-c:v", "libvpx-vp9", "-an", "-b:v", "2M"] + +.IP "Description:" 4 +Additional FFmpeg command-line arguments. + +.SS ugoira.ffmpeg-location +.IP "Type:" 6 +\f[I]Path\f[] + +.IP "Default:" 9 +\f[I]"ffmpeg"\f[] + +.IP "Description:" 4 +Location of the \f[I]ffmpeg\f[] (or \f[I]avconv\f[]) executable to use. + +.SS ugoira.ffmpeg-output +.IP "Type:" 6 +\f[I]bool\f[] + +.IP "Default:" 9 +\f[I]true\f[] + +.IP "Description:" 4 +Show FFmpeg output. + +.SS ugoira.ffmpeg-twopass +.IP "Type:" 6 +\f[I]bool\f[] + +.IP "Default:" 9 +\f[I]false\f[] + +.IP "Description:" 4 +Enable Two-Pass encoding. + +.SS ugoira.framerate +.IP "Type:" 6 +\f[I]string\f[] + +.IP "Default:" 9 +\f[I]"auto"\f[] + +.IP "Description:" 4 +Controls the frame rate argument (\f[I]-r\f[]) for FFmpeg + + +* \f[I]"auto"\f[]: Automatically assign a fitting frame rate +based on delays between frames. + +* any other \f[I]string\f[]: Use this value as argument for \f[I]-r\f[]. + +* \f[I]null\f[] or an empty \f[I]string\f[]: Don't set an explicit frame rate. + +.SS ugoira.keep-files +.IP "Type:" 6 +\f[I]bool\f[] + +.IP "Default:" 9 +\f[I]false\f[] + +.IP "Description:" 4 +Keep ZIP archives after conversion. + +.SS ugoira.libx264-prevent-odd +.IP "Type:" 6 +\f[I]bool\f[] + +.IP "Default:" 9 +\f[I]true\f[] + +.IP "Description:" 4 +Prevent \f[I]"width/height not divisible by 2"\f[] errors +when using \f[I]libx264\f[] or \f[I]libx265\f[] encoders +by applying a simple cropping filter. See this \f[I]Stack Overflow +thread <https://stackoverflow.com/questions/20847674>\f[] +for more information. + +This option, when \f[I]libx264/5\f[] is used, automatically +adds \f[I]["-vf", "crop=iw-mod(iw\\\\,2):ih-mod(ih\\\\,2)"]\f[] +to the list of FFmpeg command-line arguments +to reduce an odd width/height by 1 pixel and make them even. + +.SS zip.compression +.IP "Type:" 6 +\f[I]string\f[] + +.IP "Default:" 9 +\f[I]"store"\f[] + +.IP "Description:" 4 +Compression method to use when writing the archive. + +Possible values are \f[I]"store"\f[], \f[I]"zip"\f[], \f[I]"bzip2"\f[], \f[I]"lzma"\f[]. + +.SS zip.extension +.IP "Type:" 6 +\f[I]string\f[] + +.IP "Default:" 9 +\f[I]"zip"\f[] + +.IP "Description:" 4 +Filename extension for the created ZIP archive. + +.SS zip.keep-files +.IP "Type:" 6 +\f[I]bool\f[] + +.IP "Default:" 9 +\f[I]false\f[] + +.IP "Description:" 4 +Keep the actual files after writing them to a ZIP archive. + +.SS zip.mode +.IP "Type:" 6 +\f[I]string\f[] + +.IP "Default:" 9 +\f[I]"default"\f[] + +.IP "Description:" 4 + +* \f[I]"default"\f[]: Write the central directory file header +once after everything is done or an exception is raised. + + +* \f[I]"safe"\f[]: Update the central directory file header +each time a file is stored in a ZIP archive. + +This greatly reduces the chance a ZIP archive gets corrupted in +case the Python interpreter gets shut down unexpectedly +(power outage, SIGKILL) but is also a lot slower. + +.SH MISCELLANEOUS OPTIONS +.SS cache.file +.IP "Type:" 6 +\f[I]Path\f[] + +.IP "Default:" 9 + +* \f[I]tempfile.gettempdir()\f[] + \f[I]".gallery-dl.cache"\f[] on Windows + +* (\f[I]$XDG_CACHE_HOME\f[] or \f[I]"~/.cache"\f[]) + \f[I]"/gallery-dl/cache.sqlite3"\f[] on all other platforms + +.IP "Description:" 4 +Path of the SQLite3 database used to cache login sessions, +cookies and API tokens across gallery-dl invocations. + +Set this option to \f[I]null\f[] or an invalid path to disable +this cache. + +.SS ciphers +.IP "Type:" 6 +\f[I]bool\f[] or \f[I]string\f[] + +.IP "Default:" 9 +\f[I]true\f[] + +.IP "Description:" 4 + +* \f[I]true\f[]: Update urllib3's default cipher list + +* \f[I]false\f[]: Leave the default cipher list as is + +* Any \f[I]string\f[]: Replace urllib3's default ciphers with these +(See \f[I]SSLContext.set_ciphers() <https://docs.python.org/3/library/ssl.html#ssl.SSLContext.set_ciphers>\f[] +for details) + +.SH API TOKENS & IDS +.SS extractor.deviantart.client-id & .client-secret +.IP "Type:" 6 +\f[I]string\f[] + +.IP "How To:" 4 + +* login and visit DeviantArt's +\f[I]Applications & Keys <https://www.deviantart.com/developers/apps>\f[] +section + +* click "Register Application" + +* scroll to "OAuth2 Redirect URI Whitelist (Required)" +and enter "https://mikf.github.io/gallery-dl/oauth-redirect.html" + +* scroll to the bottom and agree to the API License Agreement. +Submission Policy, and Terms of Service. + +* click "Save" + +* copy \f[I]client_id\f[] and \f[I]client_secret\f[] of your new +application and put them in your configuration file +as \f[I]"client-id"\f[] and \f[I]"client-secret"\f[] + +* get a new \f[I]refresh-token <extractor.deviantart.refresh-token_>\f[] +if necessary + +.SS extractor.flickr.api-key & .api-secret +.IP "Type:" 6 +\f[I]string\f[] + +.IP "How To:" 4 + +* login and \f[I]Create an App <https://www.flickr.com/services/apps/create/apply/>\f[] +in Flickr's \f[I]App Garden <https://www.flickr.com/services/>\f[] + +* click "APPLY FOR A NON-COMMERCIAL KEY" + +* fill out the form with a random name and description +and click "SUBMIT" + +* copy \f[I]Key\f[] and \f[I]Secret\f[] and put them in your configuration +file + +.SS extractor.pawoo.access-token +.IP "Type:" 6 +\f[I]string\f[] + +.IP "How To +:" 4 + + +.SS extractor.reddit.client-id & .user-agent +.IP "Type:" 6 +\f[I]string\f[] + +.IP "How To:" 4 + +* login and visit the \f[I]apps <https://www.reddit.com/prefs/apps/>\f[] +section of your account's preferences + +* click the "are you a developer? create an app..." button + +* fill out the form, choose "installed app", preferably set +"http://localhost:6414/" as "redirect uri" and finally click +"create app" + +* copy the client id (third line, under your application's name and +"installed app") and put it in your configuration file + +* use "\f[I]Python:<application name>:v1.0 (by /u/<username>)\f[]" as +user-agent and replace \f[I]<application name>\f[] and \f[I]<username>\f[] +accordingly (see Reddit's +\f[I]API access rules <https://github.com/reddit/reddit/wiki/API>\f[]) + +.SS extractor.smugmug.api-key & .api-secret +.IP "Type:" 6 +\f[I]string\f[] + +.IP "How To:" 4 + +* login and \f[I]Apply for an API Key <https://api.smugmug.com/api/developer/apply>\f[] + +* use a random name and description, +set "Type" to "Application", "Platform" to "All", +and "Use" to "Non-Commercial" + +* fill out the two checkboxes at the bottom and click "Apply" + +* copy \f[I]API Key\f[] and \f[I]API Secret\f[] +and put them in your configuration file + +.SS extractor.tumblr.api-key & .api-secret +.IP "Type:" 6 +\f[I]string\f[] + +.IP "How To:" 4 + +* login and visit Tumblr's +\f[I]Applications <https://www.tumblr.com/oauth/apps>\f[] section + +* click "Register application" + +* fill out the form: use a random name and description, set +https://example.org/ as "Application Website" and "Default +callback URL" + +* solve Google's "I'm not a robot" challenge and click "Register" + +* click "Show secret key" (below "OAuth Consumer Key") + +* copy your \f[I]OAuth Consumer Key\f[] and \f[I]Secret Key\f[] +and put them in your configuration file + +.SH CUSTOM TYPES +.SS Date +.IP "Type:" 6 +\f[I]string\f[] or \f[I]integer\f[] + +.IP "Examples:" 4 + +* \f[I]"2019-01-01T00:00:00"\f[] + +* \f[I]"2019"\f[] with \f[I]"%Y"\f[] as \f[I]date-format\f[] + +* \f[I]1546297200\f[] + +.IP "Description:" 4 +A \f[I]Date\f[] value represents a specific point in time. + + +* If given as \f[I]string\f[], it is parsed according to date-format_. + +* If given as \f[I]integer\f[], it is interpreted as UTC timestamp. + +.SS Path +.IP "Type:" 6 +\f[I]string\f[] or \f[I]list\f[] of \f[I]strings\f[] + +.IP "Examples:" 4 + +* \f[I]"file.ext"\f[] + +* \f[I]"~/path/to/file.ext"\f[] + +* \f[I]"$HOME/path/to/file.ext"\f[] + +* \f[I]["$HOME", "path", "to", "file.ext"]\f[] + +.IP "Description:" 4 +A \f[I]Path\f[] is a \f[I]string\f[] representing the location of a file +or directory. + +Simple \f[I]tilde expansion <https://docs.python.org/3/library/os.path.html#os.path.expanduser>\f[] +and \f[I]environment variable expansion <https://docs.python.org/3/library/os.path.html#os.path.expandvars>\f[] +is supported. + +In Windows environments, backslashes (\f[I]"\\"\f[]) can, in addition to +forward slashes (\f[I]"/"\f[]), be used as path separators. +Because backslashes are JSON's escape character, +they themselves have to be escaped. +The path \f[I]C:\\path\\to\\file.ext\f[] has therefore to be written as +\f[I]"C:\\\\path\\\\to\\\\file.ext"\f[] if you want to use backslashes. + +.SS Logging Configuration +.IP "Type:" 6 +\f[I]object\f[] + + +.IP "Examples:" 4 +.. code:: + +{ +"format": "{asctime} {name}: {message}", +"format-date": "%H:%M:%S", +"path": "~/log.txt", +"encoding": "ascii" +} + +{ +"level": "debug", +"format": { +"debug" : "debug: {message}", +"info" : "[{name}] {message}", +"warning": "Warning: {message}", +"error" : "ERROR: {message}" +} +} + + +.IP "Description:" 4 +Extended logging output configuration. + + +* format + +* General format string for logging messages +or a dictionary with format strings for each loglevel. + +In addition to the default +\f[I]LogRecord attributes <https://docs.python.org/3/library/logging.html#logrecord-attributes>\f[], +it is also possible to access the current +\f[I]extractor <https://github.com/mikf/gallery-dl/blob/2e516a1e3e09cb8a9e36a8f6f7e41ce8d4402f5a/gallery_dl/extractor/common.py#L24>\f[] +and \f[I]job <https://github.com/mikf/gallery-dl/blob/2e516a1e3e09cb8a9e36a8f6f7e41ce8d4402f5a/gallery_dl/job.py#L19>\f[] +objects as well as their attributes +(e.g. \f[I]"{extractor.url}"\f[]) + +* Default: \f[I]"[{name}][{levelname}] {message}"\f[] + +* format-date + +* Format string for \f[I]{asctime}\f[] fields in logging messages +(see \f[I]strftime() directives <https://docs.python.org/3/library/time.html#time.strftime>\f[]) + +* Default: \f[I]"%Y-%m-%d %H:%M:%S"\f[] + +* level + +* Minimum logging message level +(one of \f[I]"debug"\f[], \f[I]"info"\f[], \f[I]"warning"\f[], \f[I]"error"\f[], \f[I]"exception"\f[]) + +* Default: \f[I]"info"\f[] + +* path + +* \f[I]Path\f[] to the output file + +* mode + +* Mode in which the file is opened; +use \f[I]"w"\f[] to truncate or \f[I]"a"\f[] to append +(see \f[I]open() <https://docs.python.org/3/library/functions.html#open>\f[]) + +* Default: \f[I]"w"\f[] + +* encoding + +* File encoding + +* Default: \f[I]"utf-8"\f[] + +Note: path, mode and encoding are only applied when configuring +logging output to a file. + +.SS Postprocessor Configuration +.IP "Type:" 6 +\f[I]object\f[] + + +.IP "Example:" 4 +.. code:: + +{ +"name": "zip", +"compression": "store", +"extension": "cbz", +"whitelist": ["mangadex", "exhentai", "nhentai"] +} + + +.IP "Description:" 4 +An object with the \f[I]name\f[] of a post-processor and its options. + +See \f[I]Postprocessor Options\f[] for a list of all available +post-processors and their respective options. + +You can also set a \f[I]whitelist\f[] or \f[I]blacklist\f[] to +only enable or disable a post-processor for the specified +extractor categories. + + +.SH BUGS +https://github.com/mikf/gallery-dl/issues + +.SH AUTHORS +Mike Fährmann <mike_faehrmann@web.de> +.br +and https://github.com/mikf/gallery-dl/graphs/contributors + +.SH "SEE ALSO" +.BR gallery-dl (1) diff --git a/docs/configuration.rst b/docs/configuration.rst deleted file mode 100644 index 0824390..0000000 --- a/docs/configuration.rst +++ /dev/null @@ -1,1869 +0,0 @@ -Configuration -############# - -Contents -======== - -1) `Extractor Options`_ -2) `Extractor-specific Options`_ -3) `Downloader Options`_ -4) `Output Options`_ -5) `Postprocessor Options`_ -6) `Miscellaneous Options`_ -7) `API Tokens & IDs`_ - - - -Extractor Options -================= - - -Each extractor is identified by its ``category`` and ``subcategory``. -The ``category`` is the lowercase site name without any spaces or special -characters, which is usually just the module name -(``pixiv``, ``danbooru``, ...). -The ``subcategory`` is a lowercase word describing the general functionality -of that extractor (``user``, ``favorite``, ``manga``, ...). - -Each one of the following options can be specified on multiple levels of the -configuration tree: - -================== ===== -Base level: ``extractor.<option-name>`` -Category level: ``extractor.<category>.<option-name>`` -Subcategory level: ``extractor.<category>.<subcategory>.<option-name>`` -================== ===== - -A value in a "deeper" level hereby overrides a value of the same name on a -lower level. Setting the ``extractor.pixiv.filename`` value, for example, lets -you specify a general filename pattern for all the different pixiv extractors. -Using the ``extractor.pixiv.user.filename`` value lets you override this -general pattern specifically for ``PixivUserExtractor`` instances. - -The ``category`` and ``subcategory`` of all extractors are included in the -output of ``gallery-dl --list-extractors``. For a specific URL these values -can also be determined by using the ``-K``/``--list-keywords`` command-line -option (see the example below). - -extractor.*.filename --------------------- -=========== ===== -Type ``string`` -Example ``"{manga}_c{chapter}_{page:>03}.{extension}"`` -Description A `format string`_ to build the resulting filename - for a downloaded file. - - The available replacement keys depend on the extractor used. A list - of keys for a specific one can be acquired by calling *gallery-dl* - with the ``-K``/``--list-keywords`` command-line option. - For example: - - .. code:: - - $ gallery-dl -K http://seiga.nicovideo.jp/seiga/im5977527 - Keywords for directory names: - ----------------------------- - category - seiga - subcategory - image - - Keywords for filenames: - ----------------------- - category - seiga - extension - None - image-id - 5977527 - subcategory - image - - Note: Even if the value of the ``extension`` key is missing or - ``None``, it will filled in later when the file download is - starting. This key is therefore always available to provide - a valid filename extension. -=========== ===== - - -extractor.*.directory ---------------------- -=========== ===== -Type ``list`` of ``strings`` -Example ``["{category}", "{manga}", "c{chapter} - {title}"]`` -Description A list of `format strings`_ for the resulting target directory. - - Each individual string in such a list represents a single path - segment, which will be joined together and appended to the - base-directory_ to form the complete target directory path. -=========== ===== - - -extractor.*.base-directory --------------------------- -=========== ===== -Type |Path|_ -Default ``"./gallery-dl/"`` -Description Directory path used as the base for all download destinations. -=========== ===== - - -extractor.*.path-restrict -------------------------- -=========== ===== -Type ``string`` -Default ``"auto"`` -Example ``"/!? (){}"`` -Description Set of characters to replace with underscores (``_``) - in generated path segment names. - - Special values: - - * ``"auto"``: Use characters from ``"unix"`` or ``"windows"`` - depending on the local operating system - * ``"unix"``: ``"/"`` - * ``"windows"``: ``"\\\\|/<>:\"?*"`` - - Note: In a set with 2 or more characters, ``[]^-\`` need to be - escaped with backslashes, e.g. ``"\\[\\]"`` -=========== ===== - - -extractor.*.path-remove ------------------------ -=========== ===== -Type ``string`` -Default ``"\\u0000-\\u001f\\u007f"`` (ASCII control characters) -Description Set of characters to remove from generated path names. - - Note: In a set with 2 or more characters, ``[]^-\`` need to be - escaped with backslashes, e.g. ``"\\[\\]"`` -=========== ===== - - -extractor.*.skip ----------------- -=========== ===== -Type ``bool`` or ``string`` -Default ``true`` -Description Controls the behavior when downloading files that have been - downloaded before, i.e. a file with the same filename already - exists or its ID is in a `download archive`__. - - __ `extractor.*.archive`_ - - * ``true``: Skip downloads - * ``false``: Overwrite already existing files - - * ``"abort"``: Abort the current extractor run - * ``"abort:N"``: Skip downloads and abort extractor run - after ``N`` consecutive skips - - * ``"exit"``: Exit the program altogether - * ``"exit:N"``: Skip downloads and exit the program - after ``N`` consecutive skips - - * ``"enumerate"``: Append a numeric suffix to the end of the - original filename (``file.ext.1``, ``file.ext.2``, etc) -=========== ===== - - -extractor.*.sleep ------------------ -=========== ===== -Type ``float`` -Default ``0`` -Description Number of seconds to sleep before each download. -=========== ===== - - -extractor.*.username & .password --------------------------------- -=========== ===== -Type ``string`` -Default ``null`` -Description The username and password to use when attempting to log in to - another site. - - Specifying username and password is required for the - ``pixiv``, ``nijie``, and ``seiga`` - modules and optional (but strongly recommended) for - ``danbooru``, ``exhentai``, ``idolcomplex``, ``instagram``, - ``luscious``, ``sankaku``, ``tsumino``, and ``twitter``. - - These values can also be set via the ``-u/--username`` and - ``-p/--password`` command-line options or by using a |.netrc|_ file. - (see Authentication_) - - Note: The password for ``danbooru`` is the API key found in your - user profile, not the password for your account. -=========== ===== - - -extractor.*.netrc ------------------ -=========== ===== -Type ``bool`` -Default ``false`` -Description Enable the use of |.netrc|_ authentication data. -=========== ===== - - -extractor.*.cookies -------------------- -=========== ===== -Type |Path|_ or ``object`` -Default ``null`` -Description Source to read additional cookies from. - - * If this is a |Path|_, it specifies a - Mozilla/Netscape format cookies.txt file. - * If this is an ``object``, its key-value pairs, which should both - be ``strings``, will be used as cookie-names and -values. -=========== ===== - - -extractor.*.proxy ------------------ -=========== ===== -Type ``string`` or ``object`` -Default ``null`` -Description Proxy (or proxies) to be used for remote connections. - - * If this is a ``string``, it is the proxy URL for all - outgoing requests. - * If this is an ``object``, it is a scheme-to-proxy mapping to - specify different proxy URLs for each scheme. - It is also possible to set a proxy for a specific host by using - ``scheme://host`` as key. - See `Requests' proxy documentation`_ for more details. - - Example: - - .. code:: - - { - "http": "http://10.10.1.10:3128", - "https": "http://10.10.1.10:1080", - "http://10.20.1.128": "http://10.10.1.10:5323" - } - - Note: All proxy URLs should include a scheme, - otherwise ``http://`` is assumed. -=========== ===== - - -extractor.*.user-agent ----------------------- -=========== ===== -Type ``string`` -Default ``"Mozilla/5.0 (X11; Linux x86_64; rv:68.0) Gecko/20100101 Firefox/68.0"`` -Description User-Agent header value to be used for HTTP requests. - - Note: This option has no effect on `pixiv` and - `readcomiconline` extractors, as these need specific values to - function correctly. -=========== ===== - - -extractor.*.keywords --------------------- -=========== ===== -Type ``object`` -Example ``{"type": "Pixel Art", "type_id": 123}`` -Description Additional key-value pairs to be added to each metadata dictionary. -=========== ===== - - -extractor.*.keywords-default ----------------------------- -=========== ===== -Type any -Default ``"None"`` -Description Default value used for missing or undefined keyword names in - format strings. -=========== ===== - - -extractor.*.category-transfer ------------------------------ -=========== ===== -Type ``bool`` -Default Extractor-specific -Description Transfer an extractor's (sub)category values to all child - extractors spawned by it, to let them inherit their parent's - config options. -=========== ===== - - -extractor.*.archive -------------------- -=========== ===== -Type |Path|_ -Default ``null`` -Description File to store IDs of downloaded files in. Downloads of files - already recorded in this archive file will be skipped_. - - The resulting archive file is not a plain text file but an SQLite3 - database, as either lookup operations are significantly faster or - memory requirements are significantly lower when the - amount of stored IDs gets reasonably large. -=========== ===== - - -extractor.*.archive-format --------------------------- -=========== ===== -Type ``string`` -Example ``"{id}_{offset}"`` -Description An alternative `format string`_ to build archive IDs with. -=========== ===== - - -extractor.*.postprocessors --------------------------- -=========== ===== -Type ``list`` of |Postprocessor Configuration|_ objects -Example .. code:: - - [ - {"name": "zip", "compression": "zip"}, - {"name": "exec", "command": ["/home/foobar/script", "{category}", "{image_id}"]} - ] - -Description A list of post-processors to be applied to each downloaded file - in the same order as they are specified. -=========== ===== - - -extractor.*.retries -------------------- -=========== ===== -Type ``integer`` -Default ``4`` -Description Maximum number of times a failed HTTP request is retried before - giving up or ``-1`` for infinite retries. -=========== ===== - - -extractor.*.timeout -------------------- -=========== ===== -Type ``float`` or ``null`` -Default ``30`` -Description Amount of time (in seconds) to wait for a successful connection - and response from a remote server. - - This value gets internally used as the |timeout|_ parameter for the - |requests.request()|_ method. -=========== ===== - - -extractor.*.verify ------------------- -=========== ===== -Type ``bool`` or ``string`` -Default ``true`` -Description Controls whether to verify SSL/TLS certificates for HTTPS requests. - - If this is a ``string``, it must be the path to a CA bundle to use - instead of the default certificates. - - This value gets internally used as the |verify|_ parameter for the - |requests.request()|_ method. -=========== ===== - - -extractor.*.download --------------------- -=========== ===== -Type ``bool`` -Default ``true`` -Description Controls whether to download media files. - - Setting this to ``false`` won't download any files, but all other - functions (postprocessors_, `download archive`_, etc.) - will be executed as normal. -=========== ===== - -.. _postprocessors: `extractor.*.postprocessors`_ -.. _download archive: `extractor.*.archive`_ - - -extractor.*.image-range ------------------------ -=========== ===== -Type ``string`` -Example | ``"10-20"``, - | ``"-5, 10, 30-50, 100-"`` -Description Index-range(s) specifying which images to download. - - Note: The index of the first image is ``1``. -=========== ===== - - -extractor.*.chapter-range -------------------------- -=========== ===== -Type ``string`` -Description Like `image-range`__, but applies to delegated URLs - like manga-chapters, etc. -=========== ===== - -__ `extractor.*.image-range`_ - - -extractor.*.image-filter ------------------------- -=========== ===== -Type ``string`` -Example | ``"width >= 1200 and width/height > 1.2"``, - | ``"re.search(r'foo(bar)+', description)"`` -Description | Python expression controlling which images to download. - | Files for which the expression evaluates to ``False`` - are ignored. - | Available keys are the filename-specific ones listed - by ``-K`` or ``-j``. -=========== ===== - - -extractor.*.chapter-filter --------------------------- -=========== ===== -Type ``string`` -Description Like `image-filter`__, but applies to delegated URLs - like manga-chapters, etc. -=========== ===== - -__ `extractor.*.image-filter`_ - - -extractor.*.image-unique ------------------------- -=========== ===== -Type ``bool`` -Default ``false`` -Description Ignore image URLs that have been encountered before during the - current extractor run. -=========== ===== - - -extractor.*.chapter-unique --------------------------- -=========== ===== -Type ``bool`` -Default ``false`` -Description Like `image-unique`__, but applies to delegated URLs - like manga-chapters, etc. -=========== ===== - -__ `extractor.*.image-unique`_ - - -extractor.*.date-format ----------------------------- -=========== ===== -Type ``string`` -Default ``"%Y-%m-%dT%H:%M:%S"`` -Description Format string used to parse ``string`` values of - `date-min` and `date-max`. - - See |strptime|_ for a list of formatting directives. -=========== ===== - - - -Extractor-specific Options -========================== - - -extractor.artstation.external ------------------------------ -=========== ===== -Type ``bool`` -Default ``false`` -Description Try to follow external URLs of embedded players. -=========== ===== - - -extractor.danbooru.ugoira -------------------------- -=========== ===== -Type ``bool`` -Default ``true`` -Description Controls the download target for Ugoira posts. - - * ``true``: Original ZIP archives - * ``false``: Converted video files -=========== ===== - - -extractor.deviantart.extra --------------------------- -=========== ===== -Type ``bool`` -Default ``false`` -Description Download extra Sta.sh resources from description texts. - - Note: Enabling this option also enables deviantart.metadata_. -=========== ===== - - -extractor.deviantart.flat -------------------------- -=========== ===== -Type ``bool`` -Default ``true`` -Description Select the directory structure created by the Gallery- and - Favorite-Extractors. - - * ``true``: Use a flat directory structure. - * ``false``: Collect a list of all gallery-folders or - favorites-collections and transfer any further work to other - extractors (``folder`` or ``collection``), which will then - create individual subdirectories for each of them. -=========== ===== - - -extractor.deviantart.folders ----------------------------- -=========== ===== -Type ``bool`` -Default ``false`` -Description Provide a ``folders`` metadata field that contains the names of all - folders a deviation is present in. - - Note: Gathering this information requires a lot of API calls. - Use with caution. -=========== ===== - - -extractor.deviantart.journals ------------------------------ -=========== ===== -Type ``string`` -Default ``"html"`` -Description Selects the output format of journal entries. - - * ``"html"``: HTML with (roughly) the same layout as on DeviantArt. - * ``"text"``: Plain text with image references and HTML tags removed. - * ``"none"``: Don't download journals. -=========== ===== - - -extractor.deviantart.mature ---------------------------- -=========== ===== -Type ``bool`` -Default ``true`` -Description Enable mature content. - - This option simply sets the |mature_content|_ parameter for API - calls to either ``"true"`` or ``"false"`` and does not do any other - form of content filtering. -=========== ===== - - -extractor.deviantart.metadata ------------------------------ -=========== ===== -Type ``bool`` -Default ``false`` -Description Request extended metadata for deviation objects to additionally - provide ``description``, ``tags``, ``license`` and ``is_watching`` - fields. -=========== ===== - - -extractor.deviantart.original ------------------------------ -=========== ===== -Type ``bool`` or ``string`` -Default ``true`` -Description Download original files if available. - - Setting this option to ``"images"`` only downloads original - files if they are images and falls back to preview versions for - everything else (archives, etc.). -=========== ===== - - -extractor.deviantart.quality ----------------------------- -=========== ===== -Type ``integer`` -Default ``100`` -Description JPEG quality level of newer images for which - an original file download is not available. -=========== ===== - - -extractor.deviantart.refresh-token ----------------------------------- -=========== ===== -Type ``string`` -Default ``null`` -Description The ``refresh_token`` value you get from - `linking your DeviantArt account to gallery-dl <OAuth_>`__. - - Using a ``refresh_token`` allows you to access private or otherwise - not publicly available deviations. -=========== ===== - - -extractor.deviantart.wait-min ------------------------------ -=========== ===== -Type ``integer`` -Default ``0`` -Description Minimum wait time in seconds before API requests. - - Note: This value will internally be rounded up - to the next power of 2. -=========== ===== - - -extractor.exhentai.limits -------------------------- -=========== ===== -Type ``bool`` -Default ``true`` -Description Check image download limits - and stop extraction when they are exceeded. -=========== ===== - - -extractor.exhentai.original ---------------------------- -=========== ===== -Type ``bool`` -Default ``true`` -Description Download full-sized original images if available. -=========== ===== - - -extractor.exhentai.wait-min & .wait-max ---------------------------------------- -=========== ===== -Type ``float`` -Default ``3.0`` and ``6.0`` -Description Minimum and maximum wait time in seconds between each image - - ExHentai detects and blocks automated downloaders. - *gallery-dl* waits a randomly selected number of - seconds between ``wait-min`` and ``wait-max`` after - each image to prevent getting blocked. -=========== ===== - - -extractor.flickr.access-token & .access-token-secret ----------------------------------------------------- -=========== ===== -Type ``string`` -Default ``null`` -Description The ``access_token`` and ``access_token_secret`` values you get - from `linking your Flickr account to gallery-dl <OAuth_>`__. -=========== ===== - - -extractor.flickr.videos ------------------------ -=========== ===== -Type ``bool`` -Default ``true`` -Description Extract and download videos. -=========== ===== - - -extractor.flickr.size-max --------------------------- -=========== ===== -Type ``integer`` or ``string`` -Default ``null`` -Description Sets the maximum allowed size for downloaded images. - - * If this is an ``integer``, it specifies the maximum image dimension - (width and height) in pixels. - * If this is a ``string``, it should be one of Flickr's format specifiers - (``"Original"``, ``"Large"``, ... or ``"o"``, ``"k"``, ``"h"``, - ``"l"``, ...) to use as an upper limit. -=========== ===== - - -extractor.gelbooru.api ----------------------- -=========== ===== -Type ``bool`` -Default ``true`` -Description Enable use of Gelbooru's API. - - Set this value to `false` if the API has been disabled to switch - to manual information extraction. -=========== ===== - - -extractor.gfycat.format ------------------------ -=========== ===== -Type ``string`` -Default ``"mp4"`` -Description The name of the preferred animation format, which can be one of - ``"mp4"``, ``"webm"``, ``"gif"``, ``"webp"`` or ``"mjpg"``. - - If the selected format is not available, ``"mp4"``, ``"webm"`` - and ``"gif"`` (in that order) will be tried instead, until an - available format is found. -=========== ===== - - -extractor.imgur.mp4 -------------------- -=========== ===== -Type ``bool`` or ``string`` -Default ``true`` -Description Controls whether to choose the GIF or MP4 version of an animation. - - * ``true``: Follow Imgur's advice and choose MP4 if the - ``prefer_video`` flag in an image's metadata is set. - * ``false``: Always choose GIF. - * ``"always"``: Always choose MP4. -=========== ===== - - -extractor.instagram.highlights ------------------------------- -=========== ===== -Type ``bool`` -Default ``false`` -Description Include *Story Highlights* when downloading a user profile. - (requires authentication) -=========== ===== - - -extractor.kissmanga.captcha ---------------------------- -=========== ===== -Type ``string`` -Default ``"stop"`` -Description Controls how to handle redirects to CAPTCHA pages. - - * ``"stop``: Stop the current extractor run. - * ``"wait``: Ask the user to solve the CAPTCHA and wait. -=========== ===== - - -extractor.oauth.browser ------------------------ -=========== ===== -Type ``bool`` -Default ``true`` -Description Controls how a user is directed to an OAuth authorization site. - - * ``true``: Use Python's |webbrowser.open()|_ method to automatically - open the URL in the user's browser. - * ``false``: Ask the user to copy & paste an URL from the terminal. -=========== ===== - - -extractor.photobucket.subalbums -------------------------------- -=========== ===== -Type ``bool`` -Default ``true`` -Description Download subalbums. -=========== ===== - - -extractor.pixiv.ugoira ----------------------- -=========== ===== -Type ``bool`` -Default ``true`` -Description Download Pixiv's Ugoira animations or ignore them. - - These animations come as a ``.zip`` file containing all the single - animation frames in JPEG format. -=========== ===== - - -extractor.plurk.comments ------------------------- -=========== ===== -Type ``bool`` -Default ``false`` -Description Also search Plurk comments for URLs. -=========== ===== - - -extractor.reactor.wait-min & .wait-max --------------------------------------- -=========== ===== -Type ``float`` -Default ``3.0`` and ``6.0`` -Description Minimum and maximum wait time in seconds between HTTP requests - during the extraction process. -=========== ===== - - -extractor.readcomiconline.captcha ---------------------------------- -=========== ===== -Type ``string`` -Default ``"stop"`` -Description Controls how to handle redirects to CAPTCHA pages. - - * ``"stop``: Stop the current extractor run. - * ``"wait``: Ask the user to solve the CAPTCHA and wait. -=========== ===== - - -extractor.recursive.blacklist ------------------------------ -=========== ===== -Type ``list`` of ``strings`` -Default ``["directlink", "oauth", "recursive", "test"]`` -Description A list of extractor categories which should be ignored when using - the ``recursive`` extractor. -=========== ===== - - -extractor.reddit.comments -------------------------- -=========== ===== -Type ``integer`` -Default ``500`` -Description The value of the ``limit`` parameter when loading - a submission and its comments. - This number (roughly) specifies the total amount of comments - being retrieved with the first API call. - - Reddit's internal default and maximum values for this parameter - appear to be 200 and 500 respectively. - - The value ``0`` ignores all comments and significantly reduces the - time required when scanning a subreddit. -=========== ===== - - -extractor.reddit.morecomments ------------------------------ -=========== ===== -Type ``bool`` -Default ``false`` -Description Retrieve additional comments by resolving the ``more`` comment - stubs in the base comment tree. - - This requires 1 additional API call for every 100 extra comments. -=========== ===== - - -extractor.reddit.date-min & .date-max -------------------------------------- -=========== ===== -Type |Date|_ -Default ``0`` and ``253402210800`` (timestamp of |datetime.max|_) -Description Ignore all submissions posted before/after this date. -=========== ===== - - -extractor.reddit.id-min & .id-max ---------------------------------- -=========== ===== -Type ``string`` -Example ``"6kmzv2"`` -Description Ignore all submissions posted before/after the submission with - this ID. -=========== ===== - - -extractor.reddit.recursion --------------------------- -=========== ===== -Type ``integer`` -Default ``0`` -Description Reddit extractors can recursively visit other submissions - linked to in the initial set of submissions. - This value sets the maximum recursion depth. - - Special values: - - * ``0``: Recursion is disabled - * ``-1``: Infinite recursion (don't do this) -=========== ===== - - -extractor.reddit.refresh-token ------------------------------- -=========== ===== -Type ``string`` -Default ``null`` -Description The ``refresh_token`` value you get from - `linking your Reddit account to gallery-dl <OAuth_>`__. - - Using a ``refresh_token`` allows you to access private or otherwise - not publicly available subreddits, given that your account is - authorized to do so, - but requests to the reddit API are going to be rate limited - at 600 requests every 10 minutes/600 seconds. -=========== ===== - - -extractor.sankaku.wait-min & .wait-max --------------------------------------- -=========== ===== -Type ``float`` -Default ``3.0`` and ``6.0`` -Description Minimum and maximum wait time in seconds between each image - - Sankaku Channel responds with ``429 Too Many Requests`` if it - receives too many HTTP requests in a certain amount of time. - Waiting a few seconds between each request tries to prevent that. -=========== ===== - - -extractor.smugmug.videos ------------------------- -=========== ===== -Type ``bool`` -Default ``true`` -Description Download video files. -=========== ===== - - -extractor.tumblr.avatar ------------------------ -=========== ===== -Type ``bool`` -Default ``false`` -Description Download blog avatars. -=========== ===== - - -extractor.tumblr.date-min & .date-max -------------------------------------- -=========== ===== -Type |Date|_ -Default ``0`` and ``null`` -Description Ignore all posts published before/after this date. -=========== ===== - - -extractor.tumblr.external -------------------------- -=========== ===== -Type ``bool`` -Default ``false`` -Description Follow external URLs (e.g. from "Link" posts) and try to extract - images from them. -=========== ===== - - -extractor.tumblr.inline ------------------------ -=========== ===== -Type ``bool`` -Default ``true`` -Description Search posts for inline images and videos. -=========== ===== - - -extractor.tumblr.reblogs ------------------------- -=========== ===== -Type ``bool`` or ``string`` -Default ``true`` -Description * ``true``: Extract media from reblogged posts - * ``false``: Skip reblogged posts - * ``"same-blog"``: Skip reblogged posts unless the original post - is from the same blog -=========== ===== - - -extractor.tumblr.posts ----------------------- -=========== ===== -Type ``string`` or ``list`` of ``strings`` -Default ``"all"`` -Example ``"video,audio,link"`` or ``["video", "audio", "link"]`` -Description A (comma-separated) list of post types to extract images, etc. from. - - Possible types are ``text``, ``quote``, ``link``, ``answer``, - ``video``, ``audio``, ``photo``, ``chat``. - - You can use ``"all"`` instead of listing all types separately. -=========== ===== - - -extractor.twitter.content -------------------------- -=========== ===== -Type ``bool`` -Default ``false`` -Description Extract tweet text as ``content`` metadata. -=========== ===== - - -extractor.twitter.retweets --------------------------- -=========== ===== -Type ``bool`` -Default ``true`` -Description Extract images from retweets. -=========== ===== - - -extractor.twitter.videos ------------------------- -=========== ===== -Type ``bool`` -Default ``false`` -Description Use `youtube-dl`_ to download from video tweets. -=========== ===== - - -extractor.wallhaven.api-key ---------------------------- -=========== ===== -Type ``string`` -Default ``null`` -Description Your `API Key <https://wallhaven.cc/settings/account>`__ to use - your account's browsing settings and default filters when searching. - - See https://wallhaven.cc/help/api for more information. -=========== ===== - - -extractor.[booru].tags ----------------------- -=========== ===== -Type ``bool`` -Default ``false`` -Description Categorize tags by their respective types - and provide them as ``tags_<type>`` metadata fields. - - Note: This requires 1 additional HTTP request for each post. -=========== ===== - - -extractor.[manga-extractor].chapter-reverse -------------------------------------------- -=========== ===== -Type ``bool`` -Default ``false`` -Description Reverse the order of chapter URLs extracted from manga pages. - - * ``true``: Start with the latest chapter - * ``false``: Start with the first chapter -=========== ===== - - - -Downloader Options -================== - - -downloader.*.enabled --------------------- -=========== ===== -Type ``bool`` -Default ``true`` -Description Enable/Disable this downloader module. -=========== ===== - - -downloader.*.mtime ------------------- -=========== ===== -Type ``bool`` -Default ``true`` -Description Use |Last-Modified|_ HTTP response headers - to set file modification times. -=========== ===== - - -downloader.*.part ------------------ -=========== ===== -Type ``bool`` -Default ``true`` -Description Controls the use of ``.part`` files during file downloads. - - * ``true``: Write downloaded data into ``.part`` files and rename - them upon download completion. This mode additionally supports - resuming incomplete downloads. - * ``false``: Do not use ``.part`` files and write data directly - into the actual output files. -=========== ===== - - -downloader.*.part-directory ---------------------------- -=========== ===== -Type |Path|_ -Default ``null`` -Description Alternate location for ``.part`` files. - - Missing directories will be created as needed. - If this value is ``null``, ``.part`` files are going to be stored - alongside the actual output files. -=========== ===== - - -downloader.*.rate ------------------ -=========== ===== -Type ``string`` -Default ``null`` -Examples ``"32000"``, ``"500k"``, ``"2.5M"`` -Description Maximum download rate in bytes per second. - - Possible values are valid integer or floating-point numbers - optionally followed by one of ``k``, ``m``. ``g``, ``t`` or ``p``. - These suffixes are case-insensitive. -=========== ===== - - -downloader.*.retries --------------------- -=========== ===== -Type ``integer`` -Default `extractor.*.retries`_ -Description Maximum number of retries during file downloads - or ``-1`` for infinite retries. -=========== ===== - - -downloader.*.timeout --------------------- -=========== ===== -Type ``float`` or ``null`` -Default `extractor.*.timeout`_ -Description Connection timeout during file downloads. -=========== ===== - - -downloader.*.verify -------------------- -=========== ===== -Type ``bool`` or ``string`` -Default `extractor.*.verify`_ -Description Certificate validation during file downloads. -=========== ===== - - -downloader.http.adjust-extensions ---------------------------------- -=========== ===== -Type ``bool`` -Default ``true`` -Description Check the file headers of ``jpg``, ``png``, and ``gif`` files - and adjust their filename extensions if they do not match. -=========== ===== - - -downloader.ytdl.format ----------------------- -=========== ===== -Type ``string`` -Default youtube-dl's default, currently ``"bestvideo+bestaudio/best"`` -Description Video `format selection - <https://github.com/ytdl-org/youtube-dl#format-selection>`__ - directly passed to youtube-dl. -=========== ===== - - -downloader.ytdl.forward-cookies -------------------------------- -=========== ===== -Type ``bool`` -Default ``true`` -Description Forward cookies to youtube-dl. -=========== ===== - - -downloader.ytdl.logging ------------------------ -=========== ===== -Type ``bool`` -Default ``true`` -Description | Route youtube-dl's output through gallery-dl's logging system. - | Otherwise youtube-dl will write its output directly to stdout/stderr. - - Note: Set ``quiet`` and ``no_warnings`` in - `downloader.ytdl.raw-options`_ to ``true`` to suppress all output. -=========== ===== - - -downloader.ytdl.outtmpl ------------------------ -=========== ===== -Type ``string`` -Default ``null`` -Description The `Output Template <https://github.com/ytdl-org/youtube-dl#output-template>`__ - used to generate filenames for files downloaded with youtube-dl. - - Special values: - - * ``null``: generate filenames with `extractor.*.filename`_ - * ``"default"``: use youtube-dl's default, currently ``"%(title)s-%(id)s.%(ext)s"`` - - Note: An output template other than ``null`` might - cause unexpected results in combination with other options - (e.g. ``"skip": "enumerate"``) -=========== ===== - - -downloader.ytdl.raw-options ---------------------------- -=========== ===== -Type ``object`` -Example .. code:: - - { - "quiet": true, - "writesubtitles": true, - "merge_output_format": "mkv" - } - -Description | Additional options passed directly to the ``YoutubeDL`` constructor. - | All available options can be found in `youtube-dl's docstrings - <https://github.com/ytdl-org/youtube-dl/blob/master/youtube_dl/YoutubeDL.py#L138-L318>`__. -=========== ===== - - - -Output Options -============== - - -output.mode ------------ -=========== ===== -Type ``string`` -Default ``"auto"`` -Description Controls the output string format and status indicators. - - * ``"null"``: No output - * ``"pipe"``: Suitable for piping to other processes or files - * ``"terminal"``: Suitable for the standard Windows console - * ``"color"``: Suitable for terminals that understand ANSI escape codes and colors - * ``"auto"``: Automatically choose the best suitable output mode -=========== ===== - - -output.shorten --------------- -=========== ===== -Type ``bool`` -Default ``true`` -Description Controls whether the output strings should be shortened to fit - on one console line. -=========== ===== - - -output.progress ---------------- -=========== ===== -Type ``bool`` or ``string`` -Default ``true`` -Description Controls the progress indicator when *gallery-dl* is run with - multiple URLs as arguments. - - * ``true``: Show the default progress indicator - (``"[{current}/{total}] {url}"``) - * ``false``: Do not show any progress indicator - * Any ``string``: Show the progress indicator using this - as a custom `format string`_. Possible replacement keys are - ``current``, ``total`` and ``url``. -=========== ===== - - -output.log ----------- -=========== ===== -Type ``string`` or |Logging Configuration|_ -Default ``"[{name}][{levelname}] {message}"`` -Description Configuration for standard logging output to stderr. - - If this is a simple ``string``, it specifies - the format string for logging messages. -=========== ===== - - -output.logfile --------------- -=========== ===== -Type |Path|_ or |Logging Configuration|_ -Default ``null`` -Description File to write logging output to. -=========== ===== - - -output.unsupportedfile ----------------------- -=========== ===== -Type |Path|_ or |Logging Configuration|_ -Default ``null`` -Description File to write external URLs unsupported by *gallery-dl* to. - - The default format string here is ``"{message}"``. -=========== ===== - - -output.num-to-str ------------------ -=========== ===== -Type ``bool`` -Default ``false`` -Description Convert numeric values (``integer`` or ``float``) to ``string`` - before outputting them as JSON. -=========== ===== - - - -Postprocessor Options -===================== - - -classify --------- - -Categorize files by filename extension - -classify.mapping ----------------- -=========== ===== -Type ``object`` -Default .. code:: - - { - "Pictures" : ["jpg", "jpeg", "png", "gif", "bmp", "svg", "webp"], - "Video" : ["flv", "ogv", "avi", "mp4", "mpg", "mpeg", "3gp", "mkv", "webm", "vob", "wmv"], - "Music" : ["mp3", "aac", "flac", "ogg", "wma", "m4a", "wav"], - "Archives" : ["zip", "rar", "7z", "tar", "gz", "bz2"] - } - -Description A mapping from directory names to filename extensions that should - be stored in them. - - Files with an extension not listed will be ignored and stored - in their default location. -=========== ===== - - -exec ----- - -Execute external commands. - -exec.async ----------- -=========== ===== -Type ``bool`` -Default ``false`` -Description Controls whether to wait for a subprocess to finish - or to let it run asynchronously. -=========== ===== - -exec.command ------------- -=========== ===== -Type ``list`` of ``strings`` -Example ``["echo", "{user[account]}", "{id}"]`` -Description The command to run. - - Each element of this list is treated as a `format string`_ using - the files' metadata. -=========== ===== - - -metadata --------- - -Write image metadata to separate files - -metadata.mode -------------- -=========== ===== -Type ``string`` -Default ``"json"`` -Description Select how to write metadata. - - * ``"json"``: all metadata using `json.dump() - <https://docs.python.org/3/library/json.html#json.dump>`_ - * ``"tags"``: ``tags`` separated by newlines - * ``"custom"``: result of applying `metadata.format`_ to a file's - metadata dictionary -=========== ===== - -metadata.extension ------------------- -=========== ===== -Type ``string`` -Default ``"json"`` or ``"txt"`` -Description Filename extension for metadata files. -=========== ===== - -metadata.format ---------------- -=========== ===== -Type ``string`` -Example ``"tags:\n\n{tags:J\n}\n"`` -Description Custom format string to build content of metadata files. - - Note: Only applies for ``"mode": "custom"``. -=========== ===== - - -mtime ------ - -Set file modification time according to its metadata - -mtime.key ---------- -=========== ===== -Type ``string`` -Default ``"date"`` -Description Name of the metadata field whose value should be used. - - This value must either be a UNIX timestamp or a - |datetime|_ object. -=========== ===== - - -ugoira ------- - -Convert Pixiv Ugoira to WebM using `FFmpeg <https://www.ffmpeg.org/>`__. - -ugoira.extension ----------------- -=========== ===== -Type ``string`` -Default ``"webm"`` -Description Filename extension for the resulting video files. -=========== ===== - -ugoira.ffmpeg-args ------------------- -=========== ===== -Type ``list`` of ``strings`` -Default ``null`` -Example ``["-c:v", "libvpx-vp9", "-an", "-b:v", "2M"]`` -Description Additional FFmpeg command-line arguments. -=========== ===== - -ugoira.ffmpeg-location ----------------------- -=========== ===== -Type |Path|_ -Default ``"ffmpeg"`` -Description Location of the ``ffmpeg`` (or ``avconv``) executable to use. -=========== ===== - -ugoira.ffmpeg-output --------------------- -=========== ===== -Type ``bool`` -Default ``true`` -Description Show FFmpeg output. -=========== ===== - -ugoira.ffmpeg-twopass ---------------------- -=========== ===== -Type ``bool`` -Default ``false`` -Description Enable Two-Pass encoding. -=========== ===== - -ugoira.framerate ----------------- -=========== ===== -Type ``string`` -Default ``"auto"`` -Description Controls the frame rate argument (``-r``) for FFmpeg - - * ``"auto"``: Automatically assign a fitting frame rate - based on delays between frames. - * any other ``string``: Use this value as argument for ``-r``. - * ``null`` or an empty ``string``: Don't set an explicit frame rate. -=========== ===== - -ugoira.keep-files ------------------ -=========== ===== -Type ``bool`` -Default ``false`` -Description Keep ZIP archives after conversion. -=========== ===== - -ugoira.libx264-prevent-odd --------------------------- -=========== ===== -Type ``bool`` -Default ``true`` -Description Prevent ``"width/height not divisible by 2"`` errors - when using ``libx264`` or ``libx265`` encoders - by applying a simple cropping filter. See this `Stack Overflow - thread <https://stackoverflow.com/questions/20847674>`__ - for more information. - - This option, when ``libx264/5`` is used, automatically - adds ``["-vf", "crop=iw-mod(iw\\,2):ih-mod(ih\\,2)"]`` - to the list of FFmpeg command-line arguments - to reduce an odd width/height by 1 pixel and make them even. -=========== ===== - - -zip ---- - -Store files in a ZIP archive. - -zip.compression ---------------- -=========== ===== -Type ``string`` -Default ``"store"`` -Description Compression method to use when writing the archive. - - Possible values are ``"store"``, ``"zip"``, ``"bzip2"``, ``"lzma"``. -=========== ===== - -zip.extension -------------- -=========== ===== -Type ``string`` -Default ``"zip"`` -Description Filename extension for the created ZIP archive. -=========== ===== - -zip.keep-files --------------- -=========== ===== -Type ``bool`` -Default ``false`` -Description Keep the actual files after writing them to a ZIP archive. -=========== ===== - -zip.mode --------- -=========== ===== -Type ``string`` -Default ``"default"`` -Description * ``"default"``: Write the central directory file header - once after everything is done or an exception is raised. - - * ``"safe"``: Update the central directory file header - each time a file is stored in a ZIP archive. - - This greatly reduces the chance a ZIP archive gets corrupted in - case the Python interpreter gets shut down unexpectedly - (power outage, SIGKILL) but is also a lot slower. -=========== ===== - - - -Miscellaneous Options -===================== - - -cache.file ----------- -=========== ===== -Type |Path|_ -Default * |tempfile.gettempdir()|__ + ``".gallery-dl.cache"`` on Windows - * (``$XDG_CACHE_HOME`` or ``"~/.cache"``) + ``"/gallery-dl/cache.sqlite3"`` on all other platforms -Description Path of the SQLite3 database used to cache login sessions, - cookies and API tokens across `gallery-dl` invocations. - - Set this option to ``null`` or an invalid path to disable - this cache. -=========== ===== - -__ gettempdir_ - - -ciphers -------- -=========== ===== -Type ``bool`` or ``string`` -Default ``true`` -Description * ``true``: Update urllib3's default cipher list - * ``false``: Leave the default cipher list as is - * Any ``string``: Replace urllib3's default ciphers with these - (See `SSLContext.set_ciphers() <https://docs.python.org/3/library/ssl.html#ssl.SSLContext.set_ciphers>`__ - for details) -=========== ===== - - - -API Tokens & IDs -================ - - -All configuration keys listed in this section have fully functional default -values embedded into *gallery-dl* itself, but if things unexpectedly break -or you want to use your own personal client credentials, you can follow these -instructions to get an alternative set of API tokens and IDs. - - -extractor.deviantart.client-id & .client-secret ------------------------------------------------ -=========== ===== -Type ``string`` -How To * login and visit DeviantArt's - `Applications & Keys <https://www.deviantart.com/developers/apps>`__ - section - * click "Register your Application" - * scroll to "OAuth2 Redirect URI Whitelist (Required)" - and enter "https://mikf.github.io/gallery-dl/oauth-redirect.html" - * click "Save" (top right) - * copy ``client_id`` and ``client_secret`` of your new - application and put them in your configuration file -=========== ===== - - -extractor.flickr.api-key & .api-secret --------------------------------------- -=========== ===== -Type ``string`` -How To * login and `Create an App <https://www.flickr.com/services/apps/create/apply/>`__ - in Flickr's `App Garden <https://www.flickr.com/services/>`__ - * click "APPLY FOR A NON-COMMERCIAL KEY" - * fill out the form with a random name and description - and click "SUBMIT" - * copy ``Key`` and ``Secret`` and put them in your configuration - file -=========== ===== - - -extractor.pawoo.access-token ----------------------------- -=========== ===== -Type ``string`` -How To -=========== ===== - - -extractor.reddit.client-id & .user-agent ----------------------------------------- -=========== ===== -Type ``string`` -How To * login and visit the `apps <https://www.reddit.com/prefs/apps/>`__ - section of your account's preferences - * click the "are you a developer? create an app..." button - * fill out the form, choose "installed app", preferably set - "http://localhost:6414/" as "redirect uri" and finally click - "create app" - * copy the client id (third line, under your application's name and - "installed app") and put it in your configuration file - * use "``Python:<application name>:v1.0 (by /u/<username>)``" as - user-agent and replace ``<application name>`` and ``<username>`` - accordingly (see Reddit's - `API access rules <https://github.com/reddit/reddit/wiki/API>`__) -=========== ===== - - -extractor.smugmug.api-key & .api-secret ---------------------------------------- -=========== ===== -Type ``string`` -How To * login and `Apply for an API Key <https://api.smugmug.com/api/developer/apply>`__ - * use a random name and description, - set "Type" to "Application", "Platform" to "All", - and "Use" to "Non-Commercial" - * fill out the two checkboxes at the bottom and click "Apply" - * copy ``API Key`` and ``API Secret`` - and put them in your configuration file -=========== ===== - - -extractor.tumblr.api-key & .api-secret --------------------------------------- -=========== ===== -Type ``string`` -How To * login and visit Tumblr's - `Applications <https://www.tumblr.com/oauth/apps>`__ section - * click "Register application" - * fill out the form: use a random name and description, set - https://example.org/ as "Application Website" and "Default - callback URL" - * solve Google's "I'm not a robot" challenge and click "Register" - * click "Show secret key" (below "OAuth Consumer Key") - * copy your ``OAuth Consumer Key`` and ``Secret Key`` - and put them in your configuration file -=========== ===== - - - -Custom Types -============ - - -Date ----- -=========== ===== -Type ``string`` or ``integer`` -Examples * ``"2019-01-01T00:00:00"`` - * ``"2019"`` with ``"%Y"`` as date-format_ - * ``1546297200`` -Description A |Date|_ value represents a specific point in time. - - * If given as ``string``, it is parsed according to date-format_. - * If given as ``integer``, it is interpreted as UTC timestamp. -=========== ===== - - -Path ----- -=========== ===== -Type ``string`` or ``list`` of ``strings`` -Examples * ``"file.ext"`` - * ``"~/path/to/file.ext"`` - * ``"$HOME/path/to/file.ext"`` - * ``["$HOME", "path", "to", "file.ext"]`` -Description A |Path|_ is a ``string`` representing the location of a file - or directory. - - Simple `tilde expansion <https://docs.python.org/3/library/os.path.html#os.path.expanduser>`__ - and `environment variable expansion <https://docs.python.org/3/library/os.path.html#os.path.expandvars>`__ - is supported. - - In Windows environments, backslashes (``"\"``) can, in addition to - forward slashes (``"/"``), be used as path separators. - Because backslashes are JSON's escape character, - they themselves have to be escaped. - The path ``C:\path\to\file.ext`` has therefore to be written as - ``"C:\\path\\to\\file.ext"`` if you want to use backslashes. -=========== ===== - - -Logging Configuration ---------------------- -=========== ===== -Type ``object`` - -Examples .. code:: - - { - "format": "{asctime} {name}: {message}", - "format-date": "%H:%M:%S", - "path": "~/log.txt", - "encoding": "ascii" - } - - { - "level": "debug", - "format": { - "debug" : "debug: {message}", - "info" : "[{name}] {message}", - "warning": "Warning: {message}", - "error" : "ERROR: {message}" - } - } - -Description Extended logging output configuration. - - * format - * General format string for logging messages - or a dictionary with format strings for each loglevel. - - In addition to the default - `LogRecord attributes <https://docs.python.org/3/library/logging.html#logrecord-attributes>`__, - it is also possible to access the current - `extractor <https://github.com/mikf/gallery-dl/blob/2e516a1e3e09cb8a9e36a8f6f7e41ce8d4402f5a/gallery_dl/extractor/common.py#L24>`__ - and `job <https://github.com/mikf/gallery-dl/blob/2e516a1e3e09cb8a9e36a8f6f7e41ce8d4402f5a/gallery_dl/job.py#L19>`__ - objects as well as their attributes - (e.g. ``"{extractor.url}"``) - * Default: ``"[{name}][{levelname}] {message}"`` - * format-date - * Format string for ``{asctime}`` fields in logging messages - (see `strftime() directives <https://docs.python.org/3/library/time.html#time.strftime>`__) - * Default: ``"%Y-%m-%d %H:%M:%S"`` - * level - * Minimum logging message level - (one of ``"debug"``, ``"info"``, ``"warning"``, ``"error"``, ``"exception"``) - * Default: ``"info"`` - * path - * |Path|_ to the output file - * mode - * Mode in which the file is opened; - use ``"w"`` to truncate or ``"a"`` to append - (see `open() <https://docs.python.org/3/library/functions.html#open>`__) - * Default: ``"w"`` - * encoding - * File encoding - * Default: ``"utf-8"`` - - Note: path, mode and encoding are only applied when configuring - logging output to a file. -=========== ===== - - -Postprocessor Configuration ---------------------------- -=========== ===== -Type ``object`` - -Example .. code:: - - { - "name": "zip", - "compression": "store", - "extension": "cbz", - "whitelist": ["mangadex", "exhentai", "nhentai"] - } - -Description An object with the ``name`` of a post-processor and its options. - - See `Postprocessor Options`_ for a list of all available - post-processors and their respective options. - - You can also set a ``whitelist`` or ``blacklist`` to - only enable or disable a post-processor for the specified - extractor categories. -=========== ===== - - - -.. |.netrc| replace:: ``.netrc`` -.. |tempfile.gettempdir()| replace:: ``tempfile.gettempdir()`` -.. |requests.request()| replace:: ``requests.request()`` -.. |timeout| replace:: ``timeout`` -.. |verify| replace:: ``verify`` -.. |mature_content| replace:: ``mature_content`` -.. |webbrowser.open()| replace:: ``webbrowser.open()`` -.. |datetime| replace:: ``datetime`` -.. |datetime.max| replace:: ``datetime.max`` -.. |Date| replace:: ``Date`` -.. |Path| replace:: ``Path`` -.. |Last-Modified| replace:: ``Last-Modified`` -.. |Logging Configuration| replace:: ``Logging Configuration`` -.. |Postprocessor Configuration| replace:: ``Postprocessor Configuration`` -.. |strptime| replace:: strftime() and strptime() Behavior - -.. _base-directory: `extractor.*.base-directory`_ -.. _skipped: `extractor.*.skip`_ -.. _date-format: `extractor.*.date-format`_ -.. _deviantart.metadata: extractor.deviantart.metadata_ - -.. _.netrc: https://stackoverflow.com/tags/.netrc/info -.. _Last-Modified: https://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.29 -.. _datetime: https://docs.python.org/3/library/datetime.html#datetime-objects -.. _datetime.max: https://docs.python.org/3/library/datetime.html#datetime.datetime.max -.. _format string: https://docs.python.org/3/library/string.html#formatstrings -.. _format strings: https://docs.python.org/3/library/string.html#formatstrings -.. _gettempdir: https://docs.python.org/3/library/tempfile.html#tempfile.gettempdir -.. _strptime: https://docs.python.org/3/library/datetime.html#strftime-strptime-behavior -.. _webbrowser.open(): https://docs.python.org/3/library/webbrowser.html -.. _mature_content: https://www.deviantart.com/developers/http/v1/20160316/object/deviation -.. _Authentication: https://github.com/mikf/gallery-dl#authentication -.. _OAuth: https://github.com/mikf/gallery-dl#oauth -.. _youtube-dl: https://github.com/ytdl-org/youtube-dl -.. _requests.request(): https://requests.readthedocs.io/en/master/api/#requests.request -.. _timeout: https://requests.readthedocs.io/en/master/user/advanced/#timeouts -.. _verify: https://requests.readthedocs.io/en/master/user/advanced/#ssl-cert-verification -.. _`Requests' proxy documentation`: https://requests.readthedocs.io/en/master/user/advanced/#proxies diff --git a/docs/gallery-dl-example.conf b/docs/gallery-dl-example.conf deleted file mode 100644 index 04be5e6..0000000 --- a/docs/gallery-dl-example.conf +++ /dev/null @@ -1,177 +0,0 @@ -{ - "extractor": - { - "base-directory": "~/gallery-dl/", - "archive": "~/gallery-dl/archive.sqlite3", - "proxy": "http://10.10.1.10:3128", - - "postprocessors": [ - { - "name": "ugoira", - "whitelist": ["pixiv", "danbooru"], - "ffmpeg-twopass": true, - "ffmpeg-args": ["-c:v", "libvpx", "-crf", "4", "-b:v", "5000k", "-an"] - }, - { - "name": "metadata", - "whitelist": ["danbooru", "yandere", "sankaku"], - "mode": "tags" - } - ], - - "pixiv": - { - "archive": "~/gallery-dl/archive-pixiv.sqlite3", - - "filename": "{id}{num}.{extension}", - "directory": ["Pixiv", "Works", "{user[id]}"], - - "username": "foo", - "password": "bar", - - "favorite": - { - "directory": ["Pixiv", "Favorites", "{user[id]}"] - }, - - "bookmark": - { - "directory": ["Pixiv", "My Bookmarks"], - - "username": "foo123", - "password": "bar123" - } - }, - - "exhentai": - { - "cookies": - { - "ipb_member_id": "12345", - "ipb_pass_hash": "1234567890abcdef" - }, - - "proxy": - { - "http": "http://10.10.1.10:8080", - "https": "https://10.10.1.10:443" - }, - - "filename": "{num:>04}_{name}.{extension}", - "directory": ["{category!c}", "{title}"], - - "wait-min": 1.0, - "wait-max": 5.0 - }, - - "mangadex": - { - "postprocessors": [{ - "name": "zip", - "keep-files": false, - "compression": "zip" - }] - }, - - "flickr": - { - "access-token": "1234567890-abcdef", - "access-token-secret": "1234567890abcdef", - "size-max": 1920 - }, - - "reddit": - { - "morecomments": true, - "date-min": "2017-01", - "date-format": "%Y-%m", - "recursion": 1 - }, - - "sankaku": - { - "sleep": 2, - "wait-min": 5.0, - "wait-max": 5.0, - "cookies": "~/gallery-dl/cookies-sankaku.txt" - }, - - "tumblr": - { - "posts": "all", - "external": false, - "reblogs": false, - "inline": true, - - "likes": - { - "posts": "video,photo,link", - "external": true, - "reblogs": true - } - }, - - "mastodon": - { - "mastodon.xyz": - { - "access-token": "cab65529..." - }, - "tabletop.social": { - "access-token": "513a36c6..." - }, - - "directory": ["mastodon", "{instance}", "{account[username]!l}"], - "filename": "{id}_{media[id]}.{extension}" - }, - - "foolslide": { - "otscans": {"root": "https://otscans.com/foolslide"}, - "helvetica": {"root": "https://helveticascans.com/r" } - }, - - "foolfuuka": { - "fireden-onion": {"root": "http://ydt6jy2ng3s3xg2e.onion"}, - "scalearchive": {"root": "https://archive.scaled.team" } - } - }, - - "downloader": - { - "part-directory": "/tmp/.download/", - "rate": "1M", - "retries": 3, - "timeout": 8.5 - }, - - "output": - { - "mode": "terminal", - "log": { - "level": "info", - "format": { - "debug" : "\u001b[0;37m{name}: {message}\u001b[0m", - "info" : "\u001b[1;37m{name}: {message}\u001b[0m", - "warning": "\u001b[1;33m{name}: {message}\u001b[0m", - "error" : "\u001b[1;31m{name}: {message}\u001b[0m" - } - }, - "logfile": { - "path": "~/gallery-dl/log.txt", - "mode": "w", - "level": "debug" - }, - "unsupportedfile": { - "path": "~/gallery-dl/unsupported.txt", - "mode": "a", - "format": "{asctime} {message}", - "format-date": "%Y-%m-%d-%H-%M-%S" - } - }, - - "cache": { - "file": "~/gallery-dl/cache.sqlite3" - }, - - "netrc": true -} diff --git a/docs/gallery-dl.conf b/docs/gallery-dl.conf deleted file mode 100644 index ebf47ff..0000000 --- a/docs/gallery-dl.conf +++ /dev/null @@ -1,196 +0,0 @@ -{ - "extractor": - { - "base-directory": "./gallery-dl/", - "postprocessors": null, - "archive": null, - "cookies": null, - "proxy": null, - "skip": true, - "sleep": 0, - "path-restrict": "auto", - "path-remove": "\\u0000-\\u001f\\u007f", - "user-agent": "Mozilla/5.0 (X11; Linux x86_64; rv:68.0) Gecko/20100101 Firefox/68.0", - - "artstation": - { - "external": false - }, - "danbooru": - { - "username": null, - "password": null, - "ugoira": true - }, - "deviantart": - { - "refresh-token": null, - "extra": false, - "flat": true, - "folders": false, - "journals": "html", - "mature": true, - "metadata": false, - "original": true, - "quality": 100, - "wait-min": 0 - }, - "exhentai": - { - "username": null, - "password": null, - "original": true, - "wait-min": 3.0, - "wait-max": 6.0 - }, - "flickr": - { - "access-token": null, - "access-token-secret": null, - "videos": true, - "size-max": null - }, - "gelbooru": - { - "api": true - }, - "gfycat": - { - "format": "mp4" - }, - "idolcomplex": - { - "username": null, - "password": null, - "wait-min": 3.0, - "wait-max": 6.0 - }, - "imgur": - { - "mp4": true - }, - "instagram": - { - "highlights": false - }, - "kissmanga": - { - "captcha": "stop" - }, - "nijie": - { - "username": null, - "password": null - }, - "oauth": - { - "browser": true - }, - "pixiv": - { - "username": null, - "password": null, - "ugoira": true - }, - "reactor": - { - "wait-min": 3.0, - "wait-max": 6.0 - }, - "readcomiconline": - { - "captcha": "stop" - }, - "recursive": - { - "blacklist": ["directlink", "oauth", "recursive", "test"] - }, - "reddit": - { - "refresh-token": null, - "comments": 500, - "morecomments": false, - "date-min": 0, - "date-max": 253402210800, - "date-format": "%Y-%m-%dT%H:%M:%S", - "id-min": "0", - "id-max": "zik0zj", - "recursion": 0, - "user-agent": "Python:gallery-dl:0.8.4 (by /u/mikf1)" - }, - "sankaku": - { - "username": null, - "password": null, - "wait-min": 3.0, - "wait-max": 6.0 - }, - "seiga": - { - "username": null, - "password": null - }, - "tumblr": - { - "avatar": false, - "external": false, - "inline": true, - "posts": "all", - "reblogs": true - }, - "twitter": - { - "content": false, - "retweets": true, - "videos": false - }, - "wallhaven": - { - "api-key": null - }, - "booru": - { - "tags": false - } - }, - - "downloader": - { - "part": true, - "part-directory": null, - - "http": - { - "adjust-extensions": true, - "mtime": true, - "rate": null, - "retries": 4, - "timeout": 30.0, - "verify": true - }, - - "ytdl": - { - "format": null, - "forward-cookies": true, - "mtime": true, - "outtmpl": null, - "rate": null, - "retries": 4, - "timeout": 30.0, - "verify": true - } - }, - - "output": - { - "mode": "auto", - "progress": true, - "shorten": true, - "log": "[{name}][{levelname}] {message}", - "logfile": null, - "unsupportedfile": null - }, - - "netrc": false -} diff --git a/docs/supportedsites.rst b/docs/supportedsites.rst deleted file mode 100644 index b0d6eba..0000000 --- a/docs/supportedsites.rst +++ /dev/null @@ -1,147 +0,0 @@ -Supported Sites -=============== -Unless otherwise known, assume all sites to be NSFW - -==================== =================================== ================================================== ================ -Site URL Capabilities Authentication -==================== =================================== ================================================== ================ -35PHOTO https://35photo.pro/ Genres, individual Images, User Profiles -3dbooru http://behoimi.org/ Pools, Popular Images, Posts, Tag-Searches -4chan https://www.4chan.org/ Threads -4plebs https://archive.4plebs.org/ Threads -500px https://500px.com/ Galleries, individual Images, User Profiles -8chan https://8ch.net/ Threads -8muses https://www.8muses.com/ Albums -Adobe Portfolio https://www.myportfolio.com/ Galleries -Adult Empire https://www.adultempire.com/ Galleries -arch.b4k.co https://arch.b4k.co/ Threads -Archive of Sins https://archiveofsins.com/ Threads -Archived.Moe https://archived.moe/ Threads -ArtStation https://www.artstation.com/ |artstation-C| -Behance https://www.behance.net/ Collections, Galleries, User Profiles -BobX http://www.bobx.com/dark/ Galleries, Idols -Danbooru https://danbooru.donmai.us/ Pools, Popular Images, Posts, Tag-Searches Optional -Desuarchive https://desuarchive.org/ Threads -DeviantArt https://www.deviantart.com/ |deviantart-C| Optional (OAuth) -Doki Reader https://kobato.hologfx.com/reader/ Chapters, Manga -Dynasty Reader https://dynasty-scans.com/ Chapters, individual Images, Search Results -E-Hentai https://e-hentai.org/ Favorites, Galleries, Search Results Optional -e621 https://e621.net/ Pools, Popular Images, Posts, Tag-Searches -EroLord.com http://erolord.com/ Galleries -ExHentai https://exhentai.org/ Favorites, Galleries, Search Results Optional -Fallen Angels Scans https://www.fascans.com/ Chapters, Manga -Fashion Nova https://www.fashionnova.com/ Collections, Products -Fireden https://boards.fireden.net/ Threads -Flickr https://www.flickr.com/ |flickr-C| Optional (OAuth) -Fuskator https://fuskator.com/ Galleries, Search Results -Futaba Channel https://www.2chan.net/ Threads -Gelbooru https://gelbooru.com/ Pools, Posts, Tag-Searches -Gfycat https://gfycat.com/ individual Images -HBrowse https://www.hbrowse.com/ Chapters, Manga -Hentai Cafe https://hentai.cafe/ Chapters, Manga -Hentai Foundry https://www.hentai-foundry.com/ |hentaifoundry-C| -Hentai2Read https://hentai2read.com/ Chapters, Manga -HentaiFox https://hentaifox.com/ Galleries, Search Results -HentaiHere https://hentaihere.com/ Chapters, Manga -Hentainexus https://hentainexus.com/ Galleries, Search Results -Hitomi.la https://hitomi.la/ Galleries -Hypnohub https://hypnohub.net/ Pools, Popular Images, Posts, Tag-Searches -Idol Complex https://idol.sankakucomplex.com/ Pools, Posts, Tag-Searches Optional -ImageBam http://www.imagebam.com/ Galleries, individual Images -ImageFap https://imagefap.com/ Galleries, individual Images, User Profiles -ImgBB https://imgbb.com/ Albums, individual Images, User Profiles Optional -imgbox https://imgbox.com/ Galleries, individual Images -imgth https://imgth.com/ Galleries -imgur https://imgur.com/ |imgur-C| -Instagram https://www.instagram.com/ |instagram-C| Optional -Jaimini's Box https://jaiminisbox.com/reader/ Chapters, Manga -Joyreactor http://joyreactor.com/ Posts, Search Results, Tag-Searches, User Profiles -Keenspot http://www.keenspot.com/ Comics -Khinsider https://downloads.khinsider.com/ Soundtracks -Kirei Cake https://reader.kireicake.com/ Chapters, Manga -KissManga https://kissmanga.com/ Chapters, Manga -Komikcast https://komikcast.com/ Chapters, Manga -Konachan https://konachan.com/ Pools, Popular Images, Posts, Tag-Searches -LINE BLOG https://www.lineblog.me/ Blogs, Posts -livedoor Blog http://blog.livedoor.jp/ Blogs, Posts -Luscious https://luscious.net/ Albums, Search Results Optional -Manga Fox https://fanfox.net/ Chapters -Manga Here https://www.mangahere.cc/ Chapters, Manga -Manga Stream https://readms.net/ Chapters -MangaDex https://mangadex.org/ Chapters, Manga -Mangapanda https://www.mangapanda.com/ Chapters, Manga -MangaPark https://mangapark.me/ Chapters, Manga -Mangareader https://www.mangareader.net/ Chapters, Manga -Mangoxo https://www.mangoxo.com/ Albums, Channels Optional -Newgrounds https://www.newgrounds.com/ individual Images, User Profiles, Videos -Ngomik http://ngomik.in/ Chapters -nhentai https://nhentai.net/ Galleries, Search Results -Niconico Seiga https://seiga.nicovideo.jp/ individual Images, User Profiles Required -nijie https://nijie.info/ |nijie-C| Required -NSFWalbum.com https://nsfwalbum.com/ Albums -Nyafuu Archive https://archive.nyafuu.org/ Threads -Patreon https://www.patreon.com/ Creators, Posts, User Profiles -Pawoo https://pawoo.net/ Images from Statuses, User Profiles -Photobucket https://photobucket.com/ Albums, individual Images -Piczel https://piczel.tv/ Folders, individual Images, User Profiles -Pinterest https://www.pinterest.com/ Boards, Pins, pin.it Links, related Pins -Pixiv https://www.pixiv.net/ |pixiv-C| Required -Pixnet https://www.pixnet.net/ Folders, individual Images, Sets, User Profiles -Plurk https://www.plurk.com/ Posts, Timelines -Pornhub https://www.pornhub.com/ Galleries, User Profiles -Pornreactor http://pornreactor.cc/ Posts, Search Results, Tag-Searches, User Profiles -PowerManga https://read.powermanga.org/ Chapters, Manga -Pururin https://pururin.io/ Galleries -Read Comic Online https://readcomiconline.to/ Comic-Issues, Comics -RebeccaBlackTech https://rbt.asia/ Threads -Reddit https://www.reddit.com/ |reddit-C| Optional (OAuth) -rule #34 https://rule34.paheal.net/ Posts, Tag-Searches -Rule 34 https://rule34.xxx/ Pools, Posts, Tag-Searches -Safebooru https://safebooru.org/ Pools, Posts, Tag-Searches -Sankaku Channel https://chan.sankakucomplex.com/ Pools, Posts, Tag-Searches Optional -Sankaku Complex https://www.sankakucomplex.com/ Articles, Tag-Searches -Sen Manga https://raw.senmanga.com/ Chapters -Sense-Scans http://sensescans.com/reader/ Chapters, Manga -Sex.com https://www.sex.com/ Boards, Pins, related Pins, Search Results -Simply Hentai https://www.simply-hentai.com/ Galleries, individual Images, Videos -SlickPic https://www.slickpic.com/ Albums, User Profiles -SlideShare https://www.slideshare.net/ Presentations -SmugMug https://www.smugmug.com/ |smugmug-C| Optional (OAuth) -The /b/ Archive https://thebarchive.com/ Threads -Tsumino https://www.tsumino.com/ Galleries, Search Results Optional -Tumblr https://www.tumblr.com/ Likes, Posts, Tag-Searches, User Profiles Optional (OAuth) -Twitter https://twitter.com/ Media Timelines, Timelines, Tweets Optional -VSCO https://vsco.co/ Collections, individual Images, User Profiles -Wallhaven https://wallhaven.cc/ individual Images, Search Results |wallhaven-A| -Warosu https://warosu.org/ Threads -Weibo https://www.weibo.com/ Images from Statuses, User Profiles -WikiArt.org https://www.wikiart.org/ Artists, Artworks -World Three http://www.slide.world-three.org/ Chapters, Manga -xHamster https://xhamster.com/ Galleries, User Profiles -XVideos https://www.xvideos.com/ Galleries, User Profiles -Yandere https://yande.re/ Pools, Popular Images, Posts, Tag-Searches -yaplog! https://yaplog.jp/ Blogs, Posts -|yuki-S| https://yuki.la/ Threads -Acidimg https://acidimg.cc/ individual Images -Imagetwist https://imagetwist.com/ individual Images -Imagevenue http://imagevenue.com/ individual Images -Imgspice https://imgspice.com/ individual Images -Imxto https://imx.to/ individual Images -Pixhost https://pixhost.to/ individual Images -Postimg https://postimages.org/ individual Images -Turboimagehost https://www.turboimagehost.com/ individual Images -もえぴりあ https://vanilla-rock.com/ Posts, Tag-Searches -==================== =================================== ================================================== ================ - -.. |artstation-C| replace:: Albums, Artwork Listings, Challenges, individual Images, Likes, Search Results, User Profiles -.. |deviantart-C| replace:: Collections, Deviations, Favorites, Folders, Galleries, Journals, Popular Images, Scraps, Sta.sh -.. |flickr-C| replace:: Albums, Favorites, Galleries, Groups, individual Images, Search Results, User Profiles -.. |hentaifoundry-C| replace:: Favorites, individual Images, Popular Images, Recent Images, Scraps, User Profiles -.. |imgur-C| replace:: Albums, Favorites, Galleries, individual Images, User Profiles -.. |instagram-C| replace:: Channels, individual Images, Stories, Tag-Searches, User Profiles -.. |nijie-C| replace:: Doujin, Favorites, individual Images, User Profiles -.. |pixiv-C| replace:: Favorites, Follows, pixiv.me Links, Rankings, Search Results, User Profiles, Individual Images -.. |reddit-C| replace:: individual Images, Submissions, Subreddits, User Profiles -.. |smugmug-C| replace:: Albums, individual Images, Images from Users and Folders -.. |wallhaven-A| replace:: Optional (`API Key <configuration.rst#extractorwallhavenapi-key>`__) -.. |yuki-S| replace:: yuki.la 4chan archive diff --git a/gallery_dl.egg-info/PKG-INFO b/gallery_dl.egg-info/PKG-INFO new file mode 100644 index 0000000..ab46b5c --- /dev/null +++ b/gallery_dl.egg-info/PKG-INFO @@ -0,0 +1,286 @@ +Metadata-Version: 2.1 +Name: gallery-dl +Version: 1.11.1 +Summary: Command-line program to download image-galleries and -collections from several image hosting sites +Home-page: https://github.com/mikf/gallery-dl +Author: Mike Fährmann +Author-email: mike_faehrmann@web.de +Maintainer: Mike Fährmann +Maintainer-email: mike_faehrmann@web.de +License: GPLv2 +Download-URL: https://github.com/mikf/gallery-dl/releases/latest +Description: ========== + gallery-dl + ========== + + *gallery-dl* is a command-line program to download image-galleries and + -collections from several image hosting sites (see `Supported Sites`_). + It is a cross-platform tool with many configuration options + and powerful filenaming capabilities. + + + |pypi| |build| |gitter| + + + Dependencies + ============ + + - Python_ 3.4+ + - Requests_ + + Optional + -------- + + - FFmpeg_: Pixiv Ugoira to WebM conversion + - youtube-dl_: Video downloads + - pyOpenSSL_: Access Cloudflare protected sites + + + Installation + ============ + + Pip + --- + + The stable releases of *gallery-dl* are distributed on PyPI_ and can be + easily installed or upgraded using pip_: + + .. code:: bash + + $ python3 -m pip install --upgrade gallery-dl + + Installing the latest dev-version directly from GitHub can be done with + pip_ as well: + + .. code:: bash + + $ python3 -m pip install --upgrade https://github.com/mikf/gallery-dl/archive/master.tar.gz + + Note: Windows users should use :code:`py -3` instead of :code:`python3`. + + | It is advised to use the latest version of pip_, + including the essential packages :code:`setuptools` and :code:`wheel`. + | To ensure that these packages are up-to-date, run + + .. code:: bash + + $ python3 -m pip install --upgrade pip setuptools wheel + + + From Source + ----------- + + Get the code by either + + * Downloading a stable_ or dev_ archive and unpacking it + * Or via :code:`git clone https://github.com/mikf/gallery-dl.git` + + Navigate into the respective directory and run the :code:`setup.py` file. + + .. code:: bash + + $ wget https://github.com/mikf/gallery-dl/archive/master.tar.gz + $ tar -xf master.tar.gz + # or + $ git clone https://github.com/mikf/gallery-dl.git + + $ cd gallery-dl* + $ python3 setup.py install + + + Standalone Executable + --------------------- + + Download a standalone executable file, + put it into your `PATH <https://en.wikipedia.org/wiki/PATH_(variable)>`__, + and run it inside a command prompt (like ``cmd.exe``). + + - `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.10.6/gallery-dl.exe>`__ + - `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.10.6/gallery-dl.bin>`__ + + These executables include a Python 3.7 interpreter + and all required Python packages. + + + Snap + ---- + + Linux users that are using a distro that is supported by Snapd_ can install *gallery-dl* from the Snap Store: + + .. code:: bash + + $ snap install gallery-dl + + + Usage + ===== + + To use *gallery-dl* simply call it with the URLs you wish to download images + from: + + .. code:: bash + + $ gallery-dl [OPTION]... URL... + + See also :code:`gallery-dl --help`. + + + Examples + -------- + + Download images; in this case from danbooru via tag search for 'bonocho': + + .. code:: bash + + $ gallery-dl http://danbooru.donmai.us/posts?tags=bonocho + + + Get the direct URL of an image from a site that requires authentication: + + .. code:: bash + + $ gallery-dl -g -u <username> -p <password> http://seiga.nicovideo.jp/seiga/im3211703 + + + | Search a remote resource for URLs and download images from them: + | (URLs for which no extractor can be found will be silently ignored) + + .. code:: bash + + $ gallery-dl r:https://pastebin.com/raw/FLwrCYsT + + + Configuration + ============= + + Configuration files for *gallery-dl* use a JSON-based file format. + + | For a (more or less) complete example with options set to their default values, + see gallery-dl.conf_. + | For a configuration file example with more involved settings and options, + see gallery-dl-example.conf_. + | A list of all available configuration options and their + descriptions can be found in configuration.rst_. + + *gallery-dl* searches for configuration files in the following places: + + +--------------------------------------------+------------------------------------------+ + | Linux | Windows | + +--------------------------------------------+------------------------------------------+ + |* ``/etc/gallery-dl.conf`` |* | + |* ``${HOME}/.config/gallery-dl/config.json``|* ``%USERPROFILE%\gallery-dl\config.json``| + |* ``${HOME}/.gallery-dl.conf`` |* ``%USERPROFILE%\gallery-dl.conf`` | + +--------------------------------------------+------------------------------------------+ + + (``%USERPROFILE%`` usually refers to the user's home directory, + i.e. ``C:\Users\<username>\``) + + Values in later configuration files will override previous ones. + + + Authentication + ============== + + Username & Password + ------------------- + + Some extractors require you to provide valid login-credentials in the form of + a username & password pair. This is necessary for + ``pixiv``, ``nijie``, and ``seiga`` + and optional (but strongly recommended) for + ``danbooru``, ``exhentai``, ``idolcomplex``, ``instagram``, + ``luscious``, ``sankaku``, ``tsumino``, and ``twitter``. + + You can set the necessary information in your configuration file + (cf. gallery-dl.conf_) + + .. code:: + + { + "extractor": { + ... + "pixiv": { + "username": "<username>", + "password": "<password>" + } + ... + } + } + + or you can provide them directly via the + :code:`-u/--username` and :code:`-p/--password` or via the + :code:`-o/--option` command-line options + + .. code:: bash + + $ gallery-dl -u <username> -p <password> URL + $ gallery-dl -o username=<username> -o password=<password> URL + + OAuth + ----- + + *gallery-dl* supports user authentication via OAuth_ for + ``deviantart``, ``flickr``, ``reddit``, ``smugmug`` and ``tumblr``. + This is entirely optional, but grants *gallery-dl* the ability + to issue requests on your account's behalf and enables it to access resources + which would otherwise be unavailable to a public user. + + To link your account to *gallery-dl*, start by invoking it with + ``oauth:<site-name>`` as an argument. For example: + + .. code:: bash + + $ gallery-dl oauth:flickr + + You will be sent to the site's authorization page and asked to grant read + access to *gallery-dl*. Authorize it and you will be shown one or more + "tokens", which should be added to your configuration file. + + + .. _gallery-dl.conf: https://github.com/mikf/gallery-dl/blob/master/docs/gallery-dl.conf + .. _gallery-dl-example.conf: https://github.com/mikf/gallery-dl/blob/master/docs/gallery-dl-example.conf + .. _configuration.rst: https://github.com/mikf/gallery-dl/blob/master/docs/configuration.rst + .. _Supported Sites: https://github.com/mikf/gallery-dl/blob/master/docs/supportedsites.rst + .. _stable: https://github.com/mikf/gallery-dl/archive/v1.10.6.zip + .. _dev: https://github.com/mikf/gallery-dl/archive/master.zip + + .. _Python: https://www.python.org/downloads/ + .. _PyPI: https://pypi.org/ + .. _pip: https://pip.pypa.io/en/stable/ + .. _Requests: https://requests.readthedocs.io/en/master/ + .. _FFmpeg: https://www.ffmpeg.org/ + .. _youtube-dl: https://ytdl-org.github.io/youtube-dl/ + .. _pyOpenSSL: https://pyopenssl.org/ + .. _Snapd: https://docs.snapcraft.io/installing-snapd + .. _OAuth: https://en.wikipedia.org/wiki/OAuth + + .. |pypi| image:: https://img.shields.io/pypi/v/gallery-dl.svg + :target: https://pypi.org/project/gallery-dl/ + + .. |build| image:: https://travis-ci.org/mikf/gallery-dl.svg?branch=master + :target: https://travis-ci.org/mikf/gallery-dl + + .. |gitter| image:: https://badges.gitter.im/gallery-dl/main.svg + :target: https://gitter.im/gallery-dl/main + +Keywords: image gallery downloader crawler scraper +Platform: UNKNOWN +Classifier: Development Status :: 5 - Production/Stable +Classifier: Environment :: Console +Classifier: Intended Audience :: End Users/Desktop +Classifier: License :: OSI Approved :: GNU General Public License v2 (GPLv2) +Classifier: Operating System :: Microsoft :: Windows +Classifier: Operating System :: POSIX +Classifier: Operating System :: MacOS +Classifier: Programming Language :: Python :: 3.4 +Classifier: Programming Language :: Python :: 3.5 +Classifier: Programming Language :: Python :: 3.6 +Classifier: Programming Language :: Python :: 3.7 +Classifier: Programming Language :: Python :: 3.8 +Classifier: Programming Language :: Python :: 3 :: Only +Classifier: Topic :: Internet :: WWW/HTTP +Classifier: Topic :: Multimedia :: Graphics +Classifier: Topic :: Utilities +Requires-Python: >=3.4 +Provides-Extra: cloudflare +Provides-Extra: video diff --git a/gallery_dl.egg-info/SOURCES.txt b/gallery_dl.egg-info/SOURCES.txt new file mode 100644 index 0000000..16db33a --- /dev/null +++ b/gallery_dl.egg-info/SOURCES.txt @@ -0,0 +1,163 @@ +README.rst +setup.cfg +setup.py +data/completion/gallery-dl +data/man/gallery-dl.1 +data/man/gallery-dl.conf.5 +gallery_dl/__init__.py +gallery_dl/__main__.py +gallery_dl/aes.py +gallery_dl/cache.py +gallery_dl/cloudflare.py +gallery_dl/config.py +gallery_dl/exception.py +gallery_dl/job.py +gallery_dl/oauth.py +gallery_dl/option.py +gallery_dl/output.py +gallery_dl/text.py +gallery_dl/util.py +gallery_dl/version.py +gallery_dl.egg-info/PKG-INFO +gallery_dl.egg-info/SOURCES.txt +gallery_dl.egg-info/dependency_links.txt +gallery_dl.egg-info/entry_points.txt +gallery_dl.egg-info/requires.txt +gallery_dl.egg-info/top_level.txt +gallery_dl/downloader/__init__.py +gallery_dl/downloader/common.py +gallery_dl/downloader/http.py +gallery_dl/downloader/text.py +gallery_dl/downloader/ytdl.py +gallery_dl/extractor/2chan.py +gallery_dl/extractor/35photo.py +gallery_dl/extractor/3dbooru.py +gallery_dl/extractor/4chan.py +gallery_dl/extractor/500px.py +gallery_dl/extractor/8muses.py +gallery_dl/extractor/__init__.py +gallery_dl/extractor/adultempire.py +gallery_dl/extractor/artstation.py +gallery_dl/extractor/behance.py +gallery_dl/extractor/blogger.py +gallery_dl/extractor/bobx.py +gallery_dl/extractor/booru.py +gallery_dl/extractor/common.py +gallery_dl/extractor/danbooru.py +gallery_dl/extractor/deviantart.py +gallery_dl/extractor/directlink.py +gallery_dl/extractor/dynastyscans.py +gallery_dl/extractor/e621.py +gallery_dl/extractor/erolord.py +gallery_dl/extractor/exhentai.py +gallery_dl/extractor/fallenangels.py +gallery_dl/extractor/flickr.py +gallery_dl/extractor/foolfuuka.py +gallery_dl/extractor/foolslide.py +gallery_dl/extractor/fuskator.py +gallery_dl/extractor/gelbooru.py +gallery_dl/extractor/gfycat.py +gallery_dl/extractor/hbrowse.py +gallery_dl/extractor/hentai2read.py +gallery_dl/extractor/hentaicafe.py +gallery_dl/extractor/hentaifoundry.py +gallery_dl/extractor/hentaifox.py +gallery_dl/extractor/hentaihere.py +gallery_dl/extractor/hentainexus.py +gallery_dl/extractor/hitomi.py +gallery_dl/extractor/hypnohub.py +gallery_dl/extractor/idolcomplex.py +gallery_dl/extractor/imagebam.py +gallery_dl/extractor/imagefap.py +gallery_dl/extractor/imagehosts.py +gallery_dl/extractor/imgbb.py +gallery_dl/extractor/imgbox.py +gallery_dl/extractor/imgth.py +gallery_dl/extractor/imgur.py +gallery_dl/extractor/instagram.py +gallery_dl/extractor/issuu.py +gallery_dl/extractor/keenspot.py +gallery_dl/extractor/khinsider.py +gallery_dl/extractor/kissmanga.py +gallery_dl/extractor/komikcast.py +gallery_dl/extractor/konachan.py +gallery_dl/extractor/lineblog.py +gallery_dl/extractor/livedoor.py +gallery_dl/extractor/luscious.py +gallery_dl/extractor/mangadex.py +gallery_dl/extractor/mangafox.py +gallery_dl/extractor/mangahere.py +gallery_dl/extractor/mangapanda.py +gallery_dl/extractor/mangapark.py +gallery_dl/extractor/mangareader.py +gallery_dl/extractor/mangastream.py +gallery_dl/extractor/mangoxo.py +gallery_dl/extractor/mastodon.py +gallery_dl/extractor/message.py +gallery_dl/extractor/myportfolio.py +gallery_dl/extractor/naver.py +gallery_dl/extractor/newgrounds.py +gallery_dl/extractor/ngomik.py +gallery_dl/extractor/nhentai.py +gallery_dl/extractor/nijie.py +gallery_dl/extractor/nozomi.py +gallery_dl/extractor/nsfwalbum.py +gallery_dl/extractor/oauth.py +gallery_dl/extractor/paheal.py +gallery_dl/extractor/patreon.py +gallery_dl/extractor/photobucket.py +gallery_dl/extractor/piczel.py +gallery_dl/extractor/pinterest.py +gallery_dl/extractor/pixiv.py +gallery_dl/extractor/pixnet.py +gallery_dl/extractor/plurk.py +gallery_dl/extractor/pornhub.py +gallery_dl/extractor/pururin.py +gallery_dl/extractor/reactor.py +gallery_dl/extractor/readcomiconline.py +gallery_dl/extractor/recursive.py +gallery_dl/extractor/reddit.py +gallery_dl/extractor/rule34.py +gallery_dl/extractor/safebooru.py +gallery_dl/extractor/sankaku.py +gallery_dl/extractor/sankakucomplex.py +gallery_dl/extractor/seiga.py +gallery_dl/extractor/senmanga.py +gallery_dl/extractor/sexcom.py +gallery_dl/extractor/shopify.py +gallery_dl/extractor/simplyhentai.py +gallery_dl/extractor/slickpic.py +gallery_dl/extractor/slideshare.py +gallery_dl/extractor/smugmug.py +gallery_dl/extractor/test.py +gallery_dl/extractor/tsumino.py +gallery_dl/extractor/tumblr.py +gallery_dl/extractor/twitter.py +gallery_dl/extractor/vanillarock.py +gallery_dl/extractor/vsco.py +gallery_dl/extractor/wallhaven.py +gallery_dl/extractor/warosu.py +gallery_dl/extractor/weibo.py +gallery_dl/extractor/wikiart.py +gallery_dl/extractor/xhamster.py +gallery_dl/extractor/xvideos.py +gallery_dl/extractor/yandere.py +gallery_dl/extractor/yaplog.py +gallery_dl/extractor/yuki.py +gallery_dl/postprocessor/__init__.py +gallery_dl/postprocessor/classify.py +gallery_dl/postprocessor/common.py +gallery_dl/postprocessor/exec.py +gallery_dl/postprocessor/metadata.py +gallery_dl/postprocessor/mtime.py +gallery_dl/postprocessor/ugoira.py +gallery_dl/postprocessor/zip.py +test/test_config.py +test/test_cookies.py +test/test_downloader.py +test/test_extractor.py +test/test_oauth.py +test/test_postprocessor.py +test/test_results.py +test/test_text.py +test/test_util.py
\ No newline at end of file diff --git a/gallery_dl.egg-info/dependency_links.txt b/gallery_dl.egg-info/dependency_links.txt new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/gallery_dl.egg-info/dependency_links.txt @@ -0,0 +1 @@ + diff --git a/gallery_dl.egg-info/entry_points.txt b/gallery_dl.egg-info/entry_points.txt new file mode 100644 index 0000000..53cf510 --- /dev/null +++ b/gallery_dl.egg-info/entry_points.txt @@ -0,0 +1,3 @@ +[console_scripts] +gallery-dl = gallery_dl:main + diff --git a/gallery_dl.egg-info/requires.txt b/gallery_dl.egg-info/requires.txt new file mode 100644 index 0000000..821055e --- /dev/null +++ b/gallery_dl.egg-info/requires.txt @@ -0,0 +1,8 @@ +requests>=2.11.0 + +[cloudflare] +pyOpenSSL>=19.0.0 +cryptography>=2.8.0 + +[video] +youtube-dl diff --git a/gallery_dl.egg-info/top_level.txt b/gallery_dl.egg-info/top_level.txt new file mode 100644 index 0000000..9e5039c --- /dev/null +++ b/gallery_dl.egg-info/top_level.txt @@ -0,0 +1 @@ +gallery_dl diff --git a/gallery_dl/__init__.py b/gallery_dl/__init__.py index 94a445a..9665823 100644 --- a/gallery_dl/__init__.py +++ b/gallery_dl/__init__.py @@ -234,6 +234,7 @@ def main(): if pformat and len(urls) > 1 and args.loglevel < logging.ERROR: urls = progress(urls, pformat) + retval = 0 for url in urls: try: log.debug("Starting %s for '%s'", jobtype.__name__, url) @@ -241,17 +242,20 @@ def main(): for key, value in url.gconfig: config.set(key, value) with config.apply(url.lconfig): - jobtype(url.value).run() + retval |= jobtype(url.value).run() else: - jobtype(url).run() + retval |= jobtype(url).run() except exception.NoExtractorError: log.error("No suitable extractor found for '%s'", url) + retval |= 64 + return retval except KeyboardInterrupt: sys.exit("\nKeyboardInterrupt") except BrokenPipeError: pass - except IOError as exc: + except OSError as exc: import errno if exc.errno != errno.EPIPE: raise + return 1 diff --git a/gallery_dl/__main__.py b/gallery_dl/__main__.py index 04ea9fe..637d463 100644 --- a/gallery_dl/__main__.py +++ b/gallery_dl/__main__.py @@ -1,7 +1,7 @@ #!/usr/bin/env python3 # -*- coding: utf-8 -*- -# Copyright 2017 Mike Fährmann +# Copyright 2017-2019 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -17,4 +17,4 @@ if __package__ is None and not hasattr(sys, "frozen"): import gallery_dl if __name__ == "__main__": - gallery_dl.main() + sys.exit(gallery_dl.main()) diff --git a/gallery_dl/downloader/http.py b/gallery_dl/downloader/http.py index bb45de2..1c78cfb 100644 --- a/gallery_dl/downloader/http.py +++ b/gallery_dl/downloader/http.py @@ -107,7 +107,7 @@ class HttpDownloader(DownloaderBase): elif code == 416 and filesize: # Requested Range Not Satisfiable break else: - msg = "{}: {} for url: {}".format(code, response.reason, url) + msg = "'{} {}' for '{}'".format(code, response.reason, url) if code == 429 or 500 <= code < 600: # Server Error continue self.log.warning("%s", msg) diff --git a/gallery_dl/downloader/ytdl.py b/gallery_dl/downloader/ytdl.py index ce921e3..fe6c4bc 100644 --- a/gallery_dl/downloader/ytdl.py +++ b/gallery_dl/downloader/ytdl.py @@ -29,6 +29,7 @@ class YoutubeDLDownloader(DownloaderBase): "nocheckcertificate": not self.config("verify", extractor._verify), "nopart": not self.part, "updatetime": self.config("mtime", True), + "proxy": extractor.session.proxies.get("http"), } options.update(self.config("raw-options") or {}) @@ -58,6 +59,11 @@ class YoutubeDLDownloader(DownloaderBase): return self._download_playlist(pathfmt, info_dict) else: info_dict = info_dict["entries"][index] + + extra = pathfmt.kwdict.get("_ytdl_extra") + if extra: + info_dict.update(extra) + return self._download_video(pathfmt, info_dict) def _download_video(self, pathfmt, info_dict): diff --git a/gallery_dl/exception.py b/gallery_dl/exception.py index 3e86177..783e2b2 100644 --- a/gallery_dl/exception.py +++ b/gallery_dl/exception.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2015-2018 Mike Fährmann +# Copyright 2015-2019 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -17,63 +17,90 @@ Exception | +-- AuthorizationError | +-- NotFoundError | +-- HttpError - +-- DownloadError - | +-- DownloadComplete - | +-- DownloadRetry - +-- NoExtractorError +-- FormatError + | +-- FilenameFormatError + | +-- DirectoryFormatError +-- FilterError + +-- NoExtractorError +-- StopExtraction """ class GalleryDLException(Exception): """Base class for GalleryDL exceptions""" + default = None + msgfmt = None + code = 1 + + def __init__(self, message=None): + if not message: + message = self.default + elif isinstance(message, Exception): + message = "{}: {}".format(message.__class__.__name__, message) + if self.msgfmt: + message = self.msgfmt.format(message) + Exception.__init__(self, message) class ExtractionError(GalleryDLException): """Base class for exceptions during information extraction""" -class AuthenticationError(ExtractionError): - """Invalid or missing login information""" - - -class AuthorizationError(ExtractionError): - """Insufficient privileges to access a resource""" +class HttpError(ExtractionError): + """HTTP request during data extraction failed""" + default = "HTTP request failed" + code = 4 class NotFoundError(ExtractionError): - """Requested resource (gallery/image) does not exist""" - - -class HttpError(ExtractionError): - """HTTP request during extraction failed""" + """Requested resource (gallery/image) could not be found""" + msgfmt = "Requested {} could not be found" + default = "resource (gallery/image)" + code = 8 -class DownloadError(GalleryDLException): - """Base class for exceptions during file downloads""" +class AuthenticationError(ExtractionError): + """Invalid or missing login credentials""" + default = "Invalid or missing login credentials" + code = 16 -class DownloadRetry(DownloadError): - """Download attempt failed and should be retried""" +class AuthorizationError(ExtractionError): + """Insufficient privileges to access a resource""" + default = "Insufficient privileges to access the specified resource" + code = 16 -class DownloadComplete(DownloadError): - """Output file of attempted download is already complete""" +class FormatError(GalleryDLException): + """Error while building output paths""" + code = 32 -class NoExtractorError(GalleryDLException): - """No extractor can handle the given URL""" +class FilenameFormatError(FormatError): + """Error while building output filenames""" + msgfmt = "Applying filename format string failed ({})" -class FormatError(GalleryDLException): - """Error while building output path""" +class DirectoryFormatError(FormatError): + """Error while building output directory paths""" + msgfmt = "Applying directory format string failed ({})" class FilterError(GalleryDLException): """Error while evaluating a filter expression""" + msgfmt = "Evaluating filter expression failed ({})" + code = 32 + + +class NoExtractorError(GalleryDLException): + """No extractor can handle the given URL""" + code = 64 class StopExtraction(GalleryDLException): - """Extraction should stop""" + """Stop data extraction""" + + def __init__(self, message=None, *args): + GalleryDLException.__init__(self) + self.message = message % args if args else message + self.code = 1 if message else 0 diff --git a/gallery_dl/extractor/2chan.py b/gallery_dl/extractor/2chan.py index 8df8645..33e7929 100644 --- a/gallery_dl/extractor/2chan.py +++ b/gallery_dl/extractor/2chan.py @@ -6,13 +6,13 @@ # it under the terms of the GNU General Public License version 2 as # published by the Free Software Foundation. -"""Extract images from https://www.2chan.net/""" +"""Extractors for https://www.2chan.net/""" from .common import Extractor, Message from .. import text -class FutabaThreadExtractor(Extractor): +class _2chanThreadExtractor(Extractor): """Extractor for images from threads on www.2chan.net""" category = "2chan" subcategory = "thread" diff --git a/gallery_dl/extractor/3dbooru.py b/gallery_dl/extractor/3dbooru.py index 15f4207..febbb51 100644 --- a/gallery_dl/extractor/3dbooru.py +++ b/gallery_dl/extractor/3dbooru.py @@ -11,7 +11,7 @@ from . import booru -class ThreedeebooruExtractor(booru.MoebooruPageMixin, booru.BooruExtractor): +class _3dbooruExtractor(booru.MoebooruPageMixin, booru.BooruExtractor): """Base class for 3dbooru extractors""" category = "3dbooru" api_url = "http://behoimi.org/post/index.json" @@ -26,8 +26,7 @@ class ThreedeebooruExtractor(booru.MoebooruPageMixin, booru.BooruExtractor): }) -class ThreedeebooruTagExtractor(booru.TagMixin, - ThreedeebooruExtractor): +class _3dbooruTagExtractor(booru.TagMixin, _3dbooruExtractor): """Extractor for images from behoimi.org based on search-tags""" pattern = (r"(?:https?://)?(?:www\.)?behoimi\.org/post" r"(?:/(?:index)?)?\?tags=(?P<tags>[^&#]+)") @@ -37,8 +36,7 @@ class ThreedeebooruTagExtractor(booru.TagMixin, }) -class ThreedeebooruPoolExtractor(booru.PoolMixin, - ThreedeebooruExtractor): +class _3dbooruPoolExtractor(booru.PoolMixin, _3dbooruExtractor): """Extractor for image-pools from behoimi.org""" pattern = r"(?:https?://)?(?:www\.)?behoimi\.org/pool/show/(?P<pool>\d+)" test = ("http://behoimi.org/pool/show/27", { @@ -47,8 +45,7 @@ class ThreedeebooruPoolExtractor(booru.PoolMixin, }) -class ThreedeebooruPostExtractor(booru.PostMixin, - ThreedeebooruExtractor): +class _3dbooruPostExtractor(booru.PostMixin, _3dbooruExtractor): """Extractor for single images from behoimi.org""" pattern = r"(?:https?://)?(?:www\.)?behoimi\.org/post/show/(?P<post>\d+)" test = ("http://behoimi.org/post/show/140852", { @@ -64,8 +61,7 @@ class ThreedeebooruPostExtractor(booru.PostMixin, }) -class ThreedeebooruPopularExtractor(booru.MoebooruPopularMixin, - ThreedeebooruExtractor): +class _3dbooruPopularExtractor(booru.MoebooruPopularMixin, _3dbooruExtractor): """Extractor for popular images from behoimi.org""" pattern = (r"(?:https?://)?(?:www\.)?behoimi\.org" r"/post/popular_(?P<scale>by_(?:day|week|month)|recent)" diff --git a/gallery_dl/extractor/4chan.py b/gallery_dl/extractor/4chan.py index e387b33..36a0573 100644 --- a/gallery_dl/extractor/4chan.py +++ b/gallery_dl/extractor/4chan.py @@ -6,15 +6,19 @@ # it under the terms of the GNU General Public License version 2 as # published by the Free Software Foundation. -"""Extract images and videos from https://www.4chan.org/""" +"""Extractors for https://www.4chan.org/""" -from . import chan +from .common import Extractor, Message from .. import text -class FourchanThreadExtractor(chan.ChanThreadExtractor): - """Extractor for images from threads from 4chan.org""" +class _4chanThreadExtractor(Extractor): + """Extractor for 4chan threads""" category = "4chan" + subcategory = "thread" + directory_fmt = ("{category}", "{board}", "{thread} {title}") + filename_fmt = "{tim} {filename}.{extension}" + archive_fmt = "{board}_{thread}_{tim}" pattern = (r"(?:https?://)?boards\.4chan(?:nel)?\.org" r"/([^/]+)/thread/(\d+)") test = ( @@ -28,9 +32,30 @@ class FourchanThreadExtractor(chan.ChanThreadExtractor): "keyword": "7ae2f4049adf0d2f835eb91b6b26b7f4ec882e0a", }), ) - api_url = "https://a.4cdn.org/{board}/thread/{thread}.json" - file_url = "https://i.4cdn.org/{board}/{tim}{ext}" - def update(self, post, data=None): - chan.ChanThreadExtractor.update(self, post, data) - post["filename"] = text.unescape(post["filename"]) + def __init__(self, match): + Extractor.__init__(self, match) + self.board, self.thread = match.groups() + + def items(self): + url = "https://a.4cdn.org/{}/thread/{}.json".format( + self.board, self.thread) + posts = self.request(url).json()["posts"] + title = posts[0].get("sub") or text.remove_html(posts[0]["com"]) + + data = { + "board" : self.board, + "thread": self.thread, + "title" : text.unescape(title)[:50], + } + + yield Message.Version, 1 + yield Message.Directory, data + for post in posts: + if "filename" in post: + post.update(data) + post["extension"] = post["ext"][1:] + post["filename"] = text.unescape(post["filename"]) + url = "https://i.4cdn.org/{}/{}{}".format( + post["board"], post["tim"], post["ext"]) + yield Message.Url, url, post diff --git a/gallery_dl/extractor/8chan.py b/gallery_dl/extractor/8chan.py deleted file mode 100644 index e526da3..0000000 --- a/gallery_dl/extractor/8chan.py +++ /dev/null @@ -1,29 +0,0 @@ -# -*- coding: utf-8 -*- - -# Copyright 2014-2019 Mike Fährmann -# -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License version 2 as -# published by the Free Software Foundation. - -"""Extract images and videos from https://8ch.net/""" - -from . import chan - - -class InfinitychanThreadExtractor(chan.ChanThreadExtractor): - """Extractor for images from threads from 8ch.net""" - category = "8chan" - filename_fmt = "{time}-{filename}{ext}" - pattern = r"(?:https?://)?(?:www\.)?8ch\.net/([^/]+)/res/(\d+)" - test = ("https://8ch.net/builders/res/3.html", { - "url": "5d85c0509f907f217aea379f862b41bf3d01f645", - "keyword": "0c497190c0c0f826925fde09815351d01869c783", - }) - api_url = "https://8ch.net/{board}/res/{thread}.json" - file_url = "https://media.8ch.net/{board}/src/{tim}{ext}" - file_url_v2 = "https://media.8ch.net/file_store/{tim}{ext}" - - def build_url(self, post): - fmt = self.file_url if len(post["tim"]) < 64 else self.file_url_v2 - return fmt.format_map(post) diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py index 351c5df..b8f74d1 100644 --- a/gallery_dl/extractor/__init__.py +++ b/gallery_dl/extractor/__init__.py @@ -15,11 +15,11 @@ modules = [ "3dbooru", "4chan", "500px", - "8chan", "8muses", "adultempire", "artstation", "behance", + "blogger", "bobx", "danbooru", "deviantart", @@ -49,6 +49,7 @@ modules = [ "imgth", "imgur", "instagram", + "issuu", "keenspot", "khinsider", "kissmanga", @@ -66,10 +67,12 @@ modules = [ "mangastream", "mangoxo", "myportfolio", + "naver", "newgrounds", "ngomik", "nhentai", "nijie", + "nozomi", "nsfwalbum", "paheal", "patreon", diff --git a/gallery_dl/extractor/adultempire.py b/gallery_dl/extractor/adultempire.py index 85d8266..8160e48 100644 --- a/gallery_dl/extractor/adultempire.py +++ b/gallery_dl/extractor/adultempire.py @@ -21,12 +21,12 @@ class AdultempireGalleryExtractor(GalleryExtractor): test = ( ("https://www.adultempire.com/5998/gallery.html", { "range": "1", - "keyword": "25c8171f5623678491a0d7bdf38a7a6ebfa4a361", + "keyword": "5b3266e69801db0d78c22181da23bc102886e027", "content": "5c6beb31e5e3cdc90ee5910d5c30f9aaec977b9e", }), ("https://www.adultdvdempire.com/5683/gallery.html", { "url": "b12cd1a65cae8019d837505adb4d6a2c1ed4d70d", - "keyword": "9634eb16cc6dbf347eb9dcdd9b2a499dfd04d167", + "keyword": "8d448d79c4ac5f5b10a3019d5b5129ddb43655e5", }), ) @@ -55,4 +55,4 @@ class AdultempireGalleryExtractor(GalleryExtractor): if len(urls) < 24: return params["page"] += 1 - page = self.request(self.chapter_url, params=params).text + page = self.request(self.gallery_url, params=params).text diff --git a/gallery_dl/extractor/blogger.py b/gallery_dl/extractor/blogger.py new file mode 100644 index 0000000..31bbaf8 --- /dev/null +++ b/gallery_dl/extractor/blogger.py @@ -0,0 +1,178 @@ +# -*- coding: utf-8 -*- + +# Copyright 2019 Mike Fährmann +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. + +"""Extractors for Blogger blogs""" + +from .common import Extractor, Message +from .. import text +import re + +BASE_PATTERN = ( + r"(?:blogger:(?:https?://)?([^/]+)|" + r"(?:https?://)?([^.]+\.blogspot\.com))") + + +class BloggerExtractor(Extractor): + """Base class for blogger extractors""" + category = "blogger" + directory_fmt = ("{category}", "{blog[name]}", + "{post[date]:%Y-%m-%d} {post[title]}") + filename_fmt = "{num:>03}.{extension}" + archive_fmt = "{post[id]}_{num}" + root = "https://www.blogger.com" + + def __init__(self, match): + Extractor.__init__(self, match) + self.blog = match.group(1) or match.group(2) + self.api = BloggerAPI(self) + + def items(self): + yield Message.Version, 1 + + blog = self.api.blog_by_url("http://" + self.blog) + blog["pages"] = blog["pages"]["totalItems"] + blog["posts"] = blog["posts"]["totalItems"] + blog["date"] = text.parse_datetime(blog["published"]) + del blog["selfLink"] + + sub = re.compile(r"/s\d+/").sub + findall = re.compile( + r'src="(https?://\d+\.bp\.blogspot\.com/[^"]+)"').findall + + for post in self.posts(blog): + images = findall(post["content"]) + if not images: + continue + + post["author"] = post["author"]["displayName"] + post["replies"] = post["replies"]["totalItems"] + post["content"] = text.remove_html(post["content"]) + post["date"] = text.parse_datetime(post["published"]) + del post["selfLink"] + del post["blog"] + + yield Message.Directory, {"blog": blog, "post": post} + for num, url in enumerate(images, 1): + url = sub("/s0/", url).replace("http:", "https:", 1) + yield Message.Url, url, text.nameext_from_url(url, { + "blog": blog, + "post": post, + "url" : url, + "num" : num, + }) + + def posts(self, blog): + """Return an iterable with all relevant post objects""" + + +class BloggerPostExtractor(BloggerExtractor): + """Extractor for a single blog post""" + subcategory = "post" + pattern = BASE_PATTERN + r"(/\d{4}/\d\d/[^/?&#]+\.html)" + test = ( + ("https://julianbphotography.blogspot.com/2010/12/moon-rise.html", { + "url": "9928429fb62f712eb4de80f53625eccecc614aae", + "pattern": r"https://3.bp.blogspot.com/.*/s0/Icy-Moonrise-.*.jpg", + "keyword": { + "blog": { + "date" : "type:datetime", + "description": "", + "id" : "5623928067739466034", + "kind" : "blogger#blog", + "locale" : dict, + "name" : "Julian Bunker Photography", + "pages" : int, + "posts" : int, + "published" : "2010-11-21T10:19:42-08:00", + "updated" : str, + "url" : "http://www.julianbunker.com/", + }, + "post": { + "author" : "Julian Bunker", + "content" : str, + "date" : "type:datetime", + "etag" : str, + "id" : "6955139236418998998", + "kind" : "blogger#post", + "published" : "2010-12-25T17:08:00-08:00", + "replies" : "0", + "title" : "Moon Rise", + "updated" : "2011-12-06T05:21:24-08:00", + "url" : "re:.+/2010/12/moon-rise.html$", + }, + "num": int, + "url": str, + }, + }), + ("blogger:http://www.julianbunker.com/2010/12/moon-rise.html", { + "url": "9928429fb62f712eb4de80f53625eccecc614aae", + }), + ) + + def __init__(self, match): + BloggerExtractor.__init__(self, match) + self.path = match.group(3) + + def posts(self, blog): + return (self.api.post_by_path(blog["id"], self.path),) + + +class BloggerBlogExtractor(BloggerExtractor): + """Extractor for an entire Blogger blog""" + subcategory = "blog" + pattern = BASE_PATTERN + "/?$" + test = ( + ("https://julianbphotography.blogspot.com/", { + "range": "1-25", + "count": 25, + "pattern": r"https://\d\.bp\.blogspot\.com/.*/s0/[^.]+\.jpg", + }), + ("blogger:http://www.julianbunker.com/", { + "range": "1-25", + "count": 25, + }), + ) + + def posts(self, blog): + return self.api.blog_posts(blog["id"]) + + +class BloggerAPI(): + """Minimal interface for the Blogger v3 API + + Ref: https://developers.google.com/blogger + """ + API_KEY = "AIzaSyCN9ax34oMMyM07g_M-5pjeDp_312eITK8" + + def __init__(self, extractor): + self.extractor = extractor + self.api_key = extractor.config("api-key", self.API_KEY) + + def blog_by_url(self, url): + return self._call("blogs/byurl", {"url": url}) + + def blog_posts(self, blog_id): + return self._pagination("blogs/{}/posts".format(blog_id), {}) + + def post_by_path(self, blog_id, path): + endpoint = "blogs/{}/posts/bypath".format(blog_id) + return self._call(endpoint, {"path": path}) + + def _call(self, endpoint, params): + url = "https://www.googleapis.com/blogger/v3/" + endpoint + params["key"] = self.api_key + return self.extractor.request(url, params=params).json() + + def _pagination(self, endpoint, params): + while True: + data = self._call(endpoint, params) + yield from data["items"] + + if "nextPageToken" not in data: + return + params["pageToken"] = data["nextPageToken"] diff --git a/gallery_dl/extractor/bobx.py b/gallery_dl/extractor/bobx.py index 67427a7..dba5fe7 100644 --- a/gallery_dl/extractor/bobx.py +++ b/gallery_dl/extractor/bobx.py @@ -94,7 +94,8 @@ class BobxIdolExtractor(BobxExtractor): subcategory = "idol" pattern = r"(?:https?://)?(?:www\.)?bobx\.com/([^/]+/[^/?&#]+)/?$" test = ("http://www.bobx.com/idol/rin-okabe/", { - "url": "74d80bfcd53b738b31909bb42e5cc97c41b475b8", + "pattern": BobxGalleryExtractor.pattern, + "count": ">= 6", }) def items(self): @@ -107,6 +108,5 @@ class BobxIdolExtractor(BobxExtractor): for part in text.extract_iter(page, '="photoset/', '"'): # skip every other entry skip = not skip - if skip: - continue - yield Message.Queue, "{}photoset/{}".format(url, part), data + if not skip: + yield Message.Queue, "{}photoset/{}".format(url, part), data diff --git a/gallery_dl/extractor/chan.py b/gallery_dl/extractor/chan.py deleted file mode 100644 index 5e44fd9..0000000 --- a/gallery_dl/extractor/chan.py +++ /dev/null @@ -1,61 +0,0 @@ -# -*- coding: utf-8 -*- - -# Copyright 2015-2019 Mike Fährmann -# -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License version 2 as -# published by the Free Software Foundation. - -"""Base classes for extractors for different Futaba Channel-like boards""" - -from .common import Extractor, Message -from .. import text - - -class ChanThreadExtractor(Extractor): - """Base class for extractors for Futaba Channel-like boards""" - category = "chan" - subcategory = "thread" - directory_fmt = ("{category}", "{board}", "{thread} - {title}") - filename_fmt = "{tim}-{filename}.{extension}" - archive_fmt = "{board}_{thread}_{tim}" - api_url = "" - file_url = "" - - def __init__(self, match): - Extractor.__init__(self, match) - self.metadata = { - "board": match.group(1), - "thread": match.group(2), - } - - def items(self): - yield Message.Version, 1 - url = self.api_url.format_map(self.metadata) - posts = self.request(url).json()["posts"] - self.metadata["title"] = self.get_thread_title(posts[0]) - yield Message.Directory, self.metadata - for post in posts: - if "filename" not in post: - continue - self.update(post) - yield Message.Url, self.build_url(post), post - if "extra_files" in post: - for file in post["extra_files"]: - self.update(post, file) - yield Message.Url, self.build_url(post), post - - def update(self, post, data=None): - """Update keyword dictionary""" - post.update(data or self.metadata) - post["extension"] = post["ext"][1:] - - def build_url(self, post): - """Construct an image url out of a post object""" - return self.file_url.format_map(post) - - @staticmethod - def get_thread_title(post): - """Return thread title from first post""" - title = post["sub"] if "sub" in post else text.remove_html(post["com"]) - return text.unescape(title)[:50] diff --git a/gallery_dl/extractor/common.py b/gallery_dl/extractor/common.py index a90af1c..0d258eb 100644 --- a/gallery_dl/extractor/common.py +++ b/gallery_dl/extractor/common.py @@ -18,7 +18,7 @@ import requests import threading import http.cookiejar from .message import Message -from .. import config, text, exception, cloudflare +from .. import config, text, util, exception, cloudflare class Extractor(): @@ -37,9 +37,9 @@ class Extractor(): self.session = requests.Session() self.log = logging.getLogger(self.category) self.url = match.string - self._init_headers() - self._init_cookies() - self._init_proxies() + + self._cookiefile = None + self._cookiejar = self.session.cookies self._retries = self.config("retries", 4) self._timeout = self.config("timeout", 30) self._verify = self.config("verify", True) @@ -47,6 +47,10 @@ class Extractor(): if self._retries < 0: self._retries = float("inf") + self._init_headers() + self._init_cookies() + self._init_proxies() + @classmethod def from_url(cls, url): if isinstance(cls.pattern, str): @@ -67,7 +71,7 @@ class Extractor(): return config.interpolate( ("extractor", self.category, self.subcategory, key), default) - def request(self, url, method="GET", *, session=None, retries=None, + def request(self, url, *, method="GET", session=None, retries=None, encoding=None, fatal=True, notfound=None, **kwargs): tries = 1 retries = self._retries if retries is None else retries @@ -110,7 +114,7 @@ class Extractor(): msg = "" self.log.warning("Cloudflare CAPTCHA" + msg) - msg = "{}: {} for url: {}".format(code, response.reason, url) + msg = "'{} {}' for '{}'".format(code, response.reason, url) if code < 500 and code != 429 and code != 430: break @@ -141,7 +145,7 @@ class Extractor(): return username, password def _init_headers(self): - """Set additional headers for the 'session' object""" + """Initialize HTTP headers for the 'session' object""" headers = self.session.headers headers.clear() @@ -174,26 +178,43 @@ class Extractor(): if cookies: if isinstance(cookies, dict): self._update_cookies_dict(cookies, self.cookiedomain) - else: + elif isinstance(cookies, str): + cookiefile = util.expand_path(cookies) cookiejar = http.cookiejar.MozillaCookieJar() try: - cookiejar.load(cookies) + cookiejar.load(cookiefile) except OSError as exc: self.log.warning("cookies: %s", exc) else: - self.session.cookies.update(cookiejar) + self._cookiejar.update(cookiejar) + self._cookiefile = cookiefile + else: + self.log.warning( + "expected 'dict' or 'str' value for 'cookies' option, " + "got '%s' (%s)", cookies.__class__.__name__, cookies) cookies = cloudflare.cookies(self.category) if cookies: domain, cookies = cookies self._update_cookies_dict(cookies, domain) + def _store_cookies(self): + """Store the session's cookiejar in a cookies.txt file""" + if self._cookiefile and self.config("cookies-update", False): + cookiejar = http.cookiejar.MozillaCookieJar() + for cookie in self._cookiejar: + cookiejar.set_cookie(cookie) + try: + cookiejar.save(self._cookiefile) + except OSError as exc: + self.log.warning("cookies: %s", exc) + def _update_cookies(self, cookies, *, domain=""): """Update the session's cookiejar with 'cookies'""" if isinstance(cookies, dict): self._update_cookies_dict(cookies, domain or self.cookiedomain) else: - setcookie = self.session.cookies.set_cookie + setcookie = self._cookiejar.set_cookie try: cookies = iter(cookies) except TypeError: @@ -204,17 +225,17 @@ class Extractor(): def _update_cookies_dict(self, cookiedict, domain): """Update cookiejar with name-value pairs from a dict""" - setcookie = self.session.cookies.set + setcookie = self._cookiejar.set for name, value in cookiedict.items(): setcookie(name, value, domain=domain) - def _check_cookies(self, cookienames, *, domain=""): + def _check_cookies(self, cookienames, *, domain=None): """Check if all 'cookienames' are in the session's cookiejar""" - if not domain: + if domain is None: domain = self.cookiedomain try: for name in cookienames: - self.session.cookies._find(name, domain) + self._cookiejar._find(name, domain) except KeyError: return False return True @@ -249,24 +270,21 @@ class Extractor(): yield test -class ChapterExtractor(Extractor): +class GalleryExtractor(Extractor): - subcategory = "chapter" - directory_fmt = ( - "{category}", "{manga}", - "{volume:?v/ />02}c{chapter:>03}{chapter_minor:?//}{title:?: //}") - filename_fmt = ( - "{manga}_c{chapter:>03}{chapter_minor:?//}_{page:>03}.{extension}") - archive_fmt = ( - "{manga}_{chapter}{chapter_minor}_{page}") + subcategory = "gallery" + filename_fmt = "{category}_{gallery_id}_{num:>03}.{extension}" + directory_fmt = ("{category}", "{gallery_id} {title}") + archive_fmt = "{gallery_id}_{num}" + enum = "num" def __init__(self, match, url=None): Extractor.__init__(self, match) - self.chapter_url = url or self.root + match.group(1) + self.gallery_url = self.root + match.group(1) if url is None else url def items(self): self.login() - page = self.request(self.chapter_url).text + page = self.request(self.gallery_url).text data = self.metadata(page) imgs = self.images(page) @@ -284,7 +302,7 @@ class ChapterExtractor(Extractor): yield Message.Version, 1 yield Message.Directory, data - for data["page"], (url, imgdata) in images: + for data[self.enum], (url, imgdata) in images: if imgdata: data.update(imgdata) yield Message.Url, url, text.nameext_from_url(url, data) @@ -299,6 +317,19 @@ class ChapterExtractor(Extractor): """Return a list of all (image-url, metadata)-tuples""" +class ChapterExtractor(GalleryExtractor): + + subcategory = "chapter" + directory_fmt = ( + "{category}", "{manga}", + "{volume:?v/ />02}c{chapter:>03}{chapter_minor:?//}{title:?: //}") + filename_fmt = ( + "{manga}_c{chapter:>03}{chapter_minor:?//}_{page:>03}.{extension}") + archive_fmt = ( + "{manga}_{chapter}{chapter_minor}_{page}") + enum = "page" + + class MangaExtractor(Extractor): subcategory = "manga" @@ -333,14 +364,6 @@ class MangaExtractor(Extractor): """Return a list of all (chapter-url, metadata)-tuples""" -class GalleryExtractor(ChapterExtractor): - - subcategory = "gallery" - filename_fmt = "{category}_{gallery_id}_{page:>03}.{extension}" - directory_fmt = ("{category}", "{gallery_id} {title}") - archive_fmt = "{gallery_id}_{page}" - - class AsynchronousMixin(): """Run info extraction in a separate thread""" diff --git a/gallery_dl/extractor/deviantart.py b/gallery_dl/extractor/deviantart.py index ab32a00..eeee74a 100644 --- a/gallery_dl/extractor/deviantart.py +++ b/gallery_dl/extractor/deviantart.py @@ -58,9 +58,12 @@ class DeviantartExtractor(Extractor): def items(self): if self.user: - self.group = not self.api.user_profile(self.user) + profile = self.api.user_profile(self.user) + self.group = not profile if self.group: self.subcategory = "group-" + self.subcategory + else: + self.user = profile["user"]["username"] yield Message.Version, 1 for deviation in self.deviations(): @@ -260,11 +263,53 @@ class DeviantartExtractor(Extractor): content.update(download) +class DeviantartUserExtractor(Extractor): + """Extractor for an artist's user profile""" + category = "deviantart" + subcategory = "user" + pattern = BASE_PATTERN + r"/?$" + test = ( + ("https://www.deviantart.com/shimoda7", { + "options": (("include", "gsjf"),), + "pattern": r"/shimoda7/(gallery(/scraps)?|posts|favourites)", + "count": 4, + }), + ("https://shimoda7.deviantart.com/"), + ) + + def __init__(self, match): + Extractor.__init__(self, match) + self.user = match.group(1) or match.group(2) + + incl = self.config("include") or "g" + if isinstance(incl, list): + incl = "".join(item[0] for item in incl if item) + self.include = incl.lower() + + def items(self): + base = "https://www.deviantart.com/{}/".format(self.user) + incl = self.include + data = {} + + if "g" in incl: + data["_extractor"] = DeviantartGalleryExtractor + yield Message.Queue, base + "gallery", data + if "s" in incl: + data["_extractor"] = DeviantartScrapsExtractor + yield Message.Queue, base + "gallery/scraps", data + if "j" in incl: + data["_extractor"] = DeviantartJournalExtractor + yield Message.Queue, base + "posts", data + if "f" in incl: + data["_extractor"] = DeviantartFavoriteExtractor + yield Message.Queue, base + "favourites", data + + class DeviantartGalleryExtractor(DeviantartExtractor): """Extractor for all deviations from an artist's gallery""" subcategory = "gallery" archive_fmt = "g_{username}_{index}.{extension}" - pattern = BASE_PATTERN + r"(?:/(?:gallery/?(?:\?catpath=/)?)?)?$" + pattern = BASE_PATTERN + r"/gallery(?:/all|/?\?catpath=)?/?$" test = ( ("https://www.deviantart.com/shimoda7/gallery/", { "pattern": r"https://(www.deviantart.com/download/\d+/" @@ -315,12 +360,12 @@ class DeviantartGalleryExtractor(DeviantartExtractor): }, }), # group - ("https://www.deviantart.com/yakuzafc", { + ("https://www.deviantart.com/yakuzafc/gallery", { "pattern": r"https://www.deviantart.com/yakuzafc/gallery/0/", "count": ">= 15", }), # 'folders' option (#276) - ("https://www.deviantart.com/justatest235723", { + ("https://www.deviantart.com/justatest235723/gallery", { "count": 3, "options": (("metadata", 1), ("folders", 1), ("original", 0)), "keyword": { @@ -334,10 +379,12 @@ class DeviantartGalleryExtractor(DeviantartExtractor): ("https://www.deviantart.com/shimoda8/gallery/", { "exception": exception.NotFoundError, }), - # old-style URLs + + ("https://www.deviantart.com/shimoda7/gallery"), + ("https://www.deviantart.com/shimoda7/gallery/all"), ("https://www.deviantart.com/shimoda7/gallery/?catpath=/"), ("https://shimoda7.deviantart.com/gallery/"), - ("https://yakuzafc.deviantart.com/"), + ("https://shimoda7.deviantart.com/gallery/all/"), ("https://shimoda7.deviantart.com/gallery/?catpath=/"), ) @@ -794,6 +841,14 @@ class DeviantartScrapsExtractor(DeviantartExtractorV2): ) def deviations(self): + # copy self.session + session = self.session.__class__() + for attr in session.__attrs__: + setattr(session, attr, getattr(self.session, attr, None)) + + # reset cookies in the original session object + self.session.cookies = session.cookies.__class__() + url = self.root + "/_napi/da-user-profile/api/gallery/contents" params = { "username" : self.user, @@ -806,7 +861,8 @@ class DeviantartScrapsExtractor(DeviantartExtractorV2): } while True: - data = self.request(url, params=params, headers=headers).json() + data = self.request( + url, session=session, params=params, headers=headers).json() for obj in data["results"]: yield obj["deviation"] @@ -974,11 +1030,12 @@ class DeviantartAPI(): auth = (self.client_id, self.client_secret) response = self.extractor.request( - url, method="POST", data=data, auth=auth) + url, method="POST", data=data, auth=auth, fatal=False) data = response.json() if response.status_code != 200: - raise exception.AuthenticationError('"{} ({})"'.format( + self.log.debug("Server response: %s", data) + raise exception.AuthenticationError('"{}" ({})'.format( data.get("error_description"), data.get("error"))) if refresh_token: _refresh_token_cache.update(refresh_token, data["refresh_token"]) diff --git a/gallery_dl/extractor/exhentai.py b/gallery_dl/extractor/exhentai.py index 75e19d6..cba9627 100644 --- a/gallery_dl/extractor/exhentai.py +++ b/gallery_dl/extractor/exhentai.py @@ -69,8 +69,7 @@ class ExhentaiExtractor(Extractor): def login(self): """Login and set necessary cookies""" if self.LIMIT: - self.log.error("Image limit reached!") - raise exception.StopExtraction() + raise exception.StopExtraction("Image limit reached!") if self._check_cookies(self.cookienames): return username, password = self._get_auth_info() @@ -235,9 +234,9 @@ class ExhentaiGalleryExtractor(ExhentaiExtractor): url = iurl data = self._parse_image_info(url) except IndexError: - self.log.error("Unable to parse image info for '%s'", url) self.log.debug("Page content:\n%s", page) - raise exception.StopExtraction() + raise exception.StopExtraction( + "Unable to parse image info for '%s'", url) data["num"] = self.image_num data["image_token"] = self.key["start"] = extr('var startkey="', '";') @@ -272,9 +271,9 @@ class ExhentaiGalleryExtractor(ExhentaiExtractor): url = imgurl data = self._parse_image_info(url) except IndexError: - self.log.error("Unable to parse image info for '%s'", url) self.log.debug("Page content:\n%s", page) - raise exception.StopExtraction() + raise exception.StopExtraction( + "Unable to parse image info for '%s'", url) data["num"] = request["page"] data["image_token"] = imgkey @@ -311,12 +310,12 @@ class ExhentaiGalleryExtractor(ExhentaiExtractor): self._remaining -= data["cost"] if self._remaining <= 0: + ExhentaiExtractor.LIMIT = True url = "{}/s/{}/{}-{}".format( self.root, data["image_token"], self.gallery_id, data["num"]) - self.log.error("Image limit reached! Continue with " - "'%s' as URL after resetting it.", url) - ExhentaiExtractor.LIMIT = True - raise exception.StopExtraction() + raise exception.StopExtraction( + "Image limit reached! Continue with '%s' " + "as URL after resetting it.", url) def _update_limits(self): url = "https://e-hentai.org/home.php" diff --git a/gallery_dl/extractor/flickr.py b/gallery_dl/extractor/flickr.py index 73b8ec4..b71fc4d 100644 --- a/gallery_dl/extractor/flickr.py +++ b/gallery_dl/extractor/flickr.py @@ -423,14 +423,15 @@ class FlickrAPI(oauth.OAuth1API): params["api_key"] = self.api_key data = self.request(self.API_URL, params=params).json() if "code" in data: + msg = data.get("message") + self.log.debug("Server response: %s", data) if data["code"] == 1: raise exception.NotFoundError(self.extractor.subcategory) elif data["code"] == 98: - raise exception.AuthenticationError(data.get("message")) + raise exception.AuthenticationError(msg) elif data["code"] == 99: - raise exception.AuthorizationError() - self.log.error("API call failed: %s", data.get("message")) - raise exception.StopExtraction() + raise exception.AuthorizationError(msg) + raise exception.StopExtraction("API request failed: %s", msg) return data def _pagination(self, method, params, key="photos"): diff --git a/gallery_dl/extractor/foolfuuka.py b/gallery_dl/extractor/foolfuuka.py index 5f4c5b8..645b53a 100644 --- a/gallery_dl/extractor/foolfuuka.py +++ b/gallery_dl/extractor/foolfuuka.py @@ -78,7 +78,7 @@ class FoolfuukaThreadExtractor(SharedConfigMixin, Extractor): EXTRACTORS = { "4plebs": { - "name": "fourplebs", + "name": "_4plebs", "root": "https://archive.4plebs.org", "pattern": r"(?:archive\.)?4plebs\.org", "test-thread": ("https://archive.4plebs.org/tg/thread/54059290", { diff --git a/gallery_dl/extractor/foolslide.py b/gallery_dl/extractor/foolslide.py index 14baa36..fc7dbf9 100644 --- a/gallery_dl/extractor/foolslide.py +++ b/gallery_dl/extractor/foolslide.py @@ -44,14 +44,13 @@ class FoolslideBase(SharedConfigMixin): class FoolslideChapterExtractor(FoolslideBase, ChapterExtractor): """Base class for chapter extractors for FoOlSlide based sites""" - directory_fmt = ( - "{category}", "{manga}", "{chapter_string}") + directory_fmt = ("{category}", "{manga}", "{chapter_string}") archive_fmt = "{id}" pattern_fmt = r"(/read/[^/?&#]+/[a-z-]+/\d+/\d+(?:/\d+)?)" decode = "default" def items(self): - page = self.request(self.chapter_url).text + page = self.request(self.gallery_url).text data = self.metadata(page) imgs = self.images(page) @@ -77,7 +76,7 @@ class FoolslideChapterExtractor(FoolslideBase, ChapterExtractor): def metadata(self, page): extr = text.extract_from(page) extr('<h1 class="tbtitle dnone">', '') - return self.parse_chapter_url(self.chapter_url, { + return self.parse_chapter_url(self.gallery_url, { "manga" : text.unescape(extr('title="', '"')).strip(), "chapter_string": text.unescape(extr('title="', '"')), }) diff --git a/gallery_dl/extractor/fuskator.py b/gallery_dl/extractor/fuskator.py index dbcf2f2..eba1c39 100644 --- a/gallery_dl/extractor/fuskator.py +++ b/gallery_dl/extractor/fuskator.py @@ -42,7 +42,7 @@ class FuskatorGalleryExtractor(GalleryExtractor): def metadata(self, page): headers = { - "Referer" : self.chapter_url, + "Referer" : self.gallery_url, "X-Requested-With": "XMLHttpRequest", } auth = self.request( diff --git a/gallery_dl/extractor/hbrowse.py b/gallery_dl/extractor/hbrowse.py index 01793dc..43479c6 100644 --- a/gallery_dl/extractor/hbrowse.py +++ b/gallery_dl/extractor/hbrowse.py @@ -29,8 +29,7 @@ class HbrowseBase(): if not data["manga"] and "<b>Warning</b>" in page: msg = page.rpartition(">")[2].strip() - self.log.error("Site is not accessible: '%s'", msg) - raise exception.StopExtraction() + raise exception.StopExtraction("Site is not accessible: '%s'", msg) tags = text.extract(page, 'class="listTable"', '</table>', pos)[0] diff --git a/gallery_dl/extractor/hentaicafe.py b/gallery_dl/extractor/hentaicafe.py index 161073b..1ab71d6 100644 --- a/gallery_dl/extractor/hentaicafe.py +++ b/gallery_dl/extractor/hentaicafe.py @@ -31,10 +31,10 @@ class HentaicafeChapterExtractor(foolslide.FoolslideChapterExtractor): info = text.unescape(text.extract(page, '<title>', '</title>')[0]) manga, _, chapter_string = info.partition(" :: ") - data = self._data(self.chapter_url.split("/")[5]) + data = self._data(self.gallery_url.split("/")[5]) data["manga"] = manga data["chapter_string"] = chapter_string.rstrip(" :") - return self.parse_chapter_url(self.chapter_url, data) + return self.parse_chapter_url(self.gallery_url, data) @memcache(keyarg=1) def _data(self, manga): diff --git a/gallery_dl/extractor/hentaifox.py b/gallery_dl/extractor/hentaifox.py index cf4871f..7e0b63c 100644 --- a/gallery_dl/extractor/hentaifox.py +++ b/gallery_dl/extractor/hentaifox.py @@ -24,7 +24,7 @@ class HentaifoxGalleryExtractor(HentaifoxBase, GalleryExtractor): test = ("https://hentaifox.com/gallery/56622/", { "pattern": r"https://i\d*\.hentaifox\.com/\d+/\d+/\d+\.jpg", "count": 24, - "keyword": "38f8517605feb6854d48833297da6b05c6541b69", + "keyword": "903ebe227d85e484460382fc6cbab42be7a244d5", }) def __init__(self, match): diff --git a/gallery_dl/extractor/hentainexus.py b/gallery_dl/extractor/hentainexus.py index d875817..9e2ee9f 100644 --- a/gallery_dl/extractor/hentainexus.py +++ b/gallery_dl/extractor/hentainexus.py @@ -22,7 +22,7 @@ class HentainexusGalleryExtractor(GalleryExtractor): test = ( ("https://hentainexus.com/view/5688", { "url": "746d0043e20030f1171aae5ea113176607302517", - "keyword": "b05986369fbaf29cfa08b118960d92c49e59524b", + "keyword": "9512cf5f258130e5f75de9954d7a13217c2405e7", }), ("https://hentainexus.com/read/5688"), ) diff --git a/gallery_dl/extractor/hitomi.py b/gallery_dl/extractor/hitomi.py index ef08d69..e53b051 100644 --- a/gallery_dl/extractor/hitomi.py +++ b/gallery_dl/extractor/hitomi.py @@ -11,17 +11,20 @@ from .common import GalleryExtractor from .. import text, util import string +import json class HitomiGalleryExtractor(GalleryExtractor): """Extractor for image galleries from hitomi.la""" category = "hitomi" root = "https://hitomi.la" - pattern = r"(?:https?://)?hitomi\.la/(?:galleries|reader)/(\d+)" + pattern = (r"(?:https?://)?hitomi\.la" + r"/(?:manga|doujinshi|cg|gamecg|galleries|reader)" + r"/(?:[^/?&#]+-)?(\d+)") test = ( ("https://hitomi.la/galleries/867789.html", { "pattern": r"https://aa.hitomi.la/galleries/867789/\d+.jpg", - "keyword": "d097a8db8e810045131b4510c41714004f9eff3a", + "keyword": "6701f8f588f119ef84cd29bdf99a399417b0a6a2", "count": 16, }), ("https://hitomi.la/galleries/1401410.html", { @@ -39,6 +42,11 @@ class HitomiGalleryExtractor(GalleryExtractor): "url": "055c898a36389719799d6bce76889cc4ea4421fc", "count": 1413, }), + ("https://hitomi.la/manga/amazon-no-hiyaku-867789.html"), + ("https://hitomi.la/manga/867789.html"), + ("https://hitomi.la/doujinshi/867789.html"), + ("https://hitomi.la/cg/867789.html"), + ("https://hitomi.la/gamecg/867789.html"), ("https://hitomi.la/reader/867789.html"), ) @@ -54,6 +62,11 @@ class HitomiGalleryExtractor(GalleryExtractor): self.fallback = True url = url.replace("/galleries/", "/reader/") response = GalleryExtractor.request(self, url, **kwargs) + elif b"<title>Redirect</title>" in response.content: + url = text.extract(response.text, "href='", "'")[0] + if not url.startswith("http"): + url = text.urljoin(self.root, url) + response = self.request(url, **kwargs) return response def metadata(self, page): @@ -86,25 +99,19 @@ class HitomiGalleryExtractor(GalleryExtractor): # see https://ltn.hitomi.la/common.js offset = text.parse_int(self.gallery_id[-1]) % 3 subdomain = chr(97 + offset) + "a" - base = "https://" + subdomain + ".hitomi.la/galleries/" + base = "https://{}.hitomi.la/galleries/{}/".format( + subdomain, self.gallery_id) # set Referer header before image downloads (#239) - self.session.headers["Referer"] = self.chapter_url - - # handle Game CG galleries with scenes (#321) - scenes = text.extract(page, "var scene_indexes = [", "]")[0] - if scenes and scenes.strip(): - url = "{}/reader/{}.html".format(self.root, self.gallery_id) - page = self.request(url).text - begin, end = ">//g.hitomi.la/galleries/", "</div>" - elif self.fallback: - begin, end = ">//g.hitomi.la/galleries/", "</div>" - else: - begin, end = "'//tn.hitomi.la/smalltn/", ".jpg'," + self.session.headers["Referer"] = self.gallery_url + + # get 'galleryinfo' + url = "https://ltn.hitomi.la/galleries/{}.js".format(self.gallery_id) + page = self.request(url).text return [ - (base + urlpart, None) - for urlpart in text.extract_iter(page, begin, end) + (base + image["name"], None) + for image in json.loads(page.partition("=")[2]) ] @staticmethod diff --git a/gallery_dl/extractor/imgbb.py b/gallery_dl/extractor/imgbb.py index 2a8dcad..fb321d0 100644 --- a/gallery_dl/extractor/imgbb.py +++ b/gallery_dl/extractor/imgbb.py @@ -90,7 +90,7 @@ class ImgbbExtractor(Extractor): return params["seek"] = data["seekEnd"] params["page"] += 1 - data = self.request(endpoint, "POST", data=params).json() + data = self.request(endpoint, method="POST", data=params).json() page = data["html"] diff --git a/gallery_dl/extractor/imgur.py b/gallery_dl/extractor/imgur.py index cb36c30..b1be995 100644 --- a/gallery_dl/extractor/imgur.py +++ b/gallery_dl/extractor/imgur.py @@ -10,8 +10,6 @@ from .common import Extractor, Message from .. import text, exception -import itertools -import json BASE_PATTERN = r"(?:https?://)?(?:www\.|[im]\.)?imgur\.com" @@ -21,103 +19,89 @@ class ImgurExtractor(Extractor): """Base class for imgur extractors""" category = "imgur" root = "https://imgur.com" - api_root = "https://api.imgur.com" def __init__(self, match): Extractor.__init__(self, match) + self.api = ImgurAPI(self) self.key = match.group(1) self.mp4 = self.config("mp4", True) - def _extract_data(self, path): - response = self.request(self.root + path, notfound=self.subcategory) - data = json.loads(text.extract( - response.text, "image : ", ",\n")[0]) + def _prepare(self, image): try: - del data["adConfig"] - del data["isAd"] + del image["ad_url"] + del image["ad_type"] + del image["ad_config"] except KeyError: pass - return data - def _prepare(self, image): - image["ext"] = image["ext"].partition("?")[0] - if image["ext"] == ".gif" and ( - (self.mp4 and image["prefer_video"]) or self.mp4 == "always"): - image["ext"] = ".mp4" - url = "https://i.imgur.com/" + image["hash"] + image["ext"] - image["extension"] = image["ext"][1:] + url = image["mp4"] if image["animated"] and self.mp4 else image["link"] + image["date"] = text.parse_timestamp(image["datetime"]) + text.nameext_from_url(url, image) + return url - def _items_apiv3(self, urlfmt): + def _items_queue(self, items): album_ex = ImgurAlbumExtractor image_ex = ImgurImageExtractor - params = { - "IMGURPLATFORM" : "web", - "album_previews": "0", - "client_id" : "546c25a59c58ad7", - } - headers = { - "Origin" : self.root, - "Referer": self.root + "/", - } - yield Message.Version, 1 - - for num in itertools.count(0): - url = urlfmt.format(num) - data = self.request(url, params=params, headers=headers).json() - - for item in data["data"]: - item["_extractor"] = album_ex if item["is_album"] else image_ex - yield Message.Queue, item["link"], item - - if len(data["data"]) < 60: - return + for item in items: + item["_extractor"] = album_ex if item["is_album"] else image_ex + yield Message.Queue, item["link"], item class ImgurImageExtractor(ImgurExtractor): """Extractor for individual images on imgur.com""" subcategory = "image" - filename_fmt = "{category}_{hash}{title:?_//}.{extension}" - archive_fmt = "{hash}" + filename_fmt = "{category}_{id}{title:?_//}.{extension}" + archive_fmt = "{id}" pattern = BASE_PATTERN + r"/(?!gallery)(\w{7}|\w{5})[sbtmlh]?\.?" test = ( ("https://imgur.com/21yMxCS", { "url": "6f2dcfb86815bdd72808c313e5f715610bc7b9b2", "content": "0c8768055e4e20e7c7259608b67799171b691140", "keyword": { - "animated": False, - "datetime": "2016-11-10 14:24:35", - "description": str, - "ext": ".png", - "extension": "png", - "hash": "21yMxCS", - "height": "32", - "is_moderated": False, - "is_safe": False, - "is_viral": 0, - "looping": False, - "mimetype": "image/png", - "name": None, - "prefer_video": False, - "size": 182, - "source": "", - "title": "Test", - "video_host": None, - "video_source": None, - "width": "64", + "account_id" : None, + "account_url" : None, + "animated" : False, + "bandwidth" : int, + "date" : "type:datetime", + "datetime" : 1478787875, + "description" : None, + "edited" : "0", + "extension" : "png", + "favorite" : False, + "filename" : "21yMxCS", + "has_sound" : False, + "height" : 32, + "id" : "21yMxCS", + "in_gallery" : False, + "in_most_viral": False, + "is_ad" : False, + "link" : "https://i.imgur.com/21yMxCS.png", + "nsfw" : False, + "section" : None, + "size" : 182, + "tags" : [], + "title" : "Test", + "type" : "image/png", + "views" : int, + "vote" : None, + "width" : 64, }, }), ("http://imgur.com/0gybAXR", { # gifv/mp4 video "url": "a2220eb265a55b0c95e0d3d721ec7665460e3fd7", "content": "a3c080e43f58f55243ab830569ba02309d59abfc", }), + ("https://imgur.com/XFfsmuC", { # missing title in API response (#467) + "keyword": {"title": "Tears are a natural response to irritants"}, + }), ("https://imgur.com/HjoXJAd", { # url ends with '.jpg?1' - "url": "73f361b50753ab25da64160aa50bc5d139480d45", + "url": "ec2cf11a2bfb4939feff374781a6e6f3e9af8e8e", }), ("https://imgur.com/zzzzzzz", { # not found - "exception": exception.NotFoundError, + "exception": exception.HttpError, }), ("https://www.imgur.com/21yMxCS"), # www ("https://m.imgur.com/21yMxCS"), # mobile @@ -129,7 +113,11 @@ class ImgurImageExtractor(ImgurExtractor): ) def items(self): - image = self._extract_data("/" + self.key) + image = self.api.image(self.key) + if not image["title"]: + page = self.request(self.root + "/" + self.key, fatal=False).text + title = text.extract(page, "<title>", "<")[0] + image["title"] = (title or "").rpartition(" - ")[0].strip() url = self._prepare(image) yield Message.Version, 1 yield Message.Directory, image @@ -139,42 +127,67 @@ class ImgurImageExtractor(ImgurExtractor): class ImgurAlbumExtractor(ImgurExtractor): """Extractor for imgur albums""" subcategory = "album" - directory_fmt = ("{category}", "{album[hash]}{album[title]:? - //}") - filename_fmt = "{category}_{album[hash]}_{num:>03}_{hash}.{extension}" - archive_fmt = "{album[hash]}_{hash}" + directory_fmt = ("{category}", "{album[id]}{album[title]:? - //}") + filename_fmt = "{category}_{album[id]}_{num:>03}_{id}.{extension}" + archive_fmt = "{album[id]}_{id}" pattern = BASE_PATTERN + r"/(?:a|t/unmuted)/(\w{7}|\w{5})" test = ( ("https://imgur.com/a/TcBmP", { "url": "ce3552f550a5b5316bd9c7ae02e21e39f30c0563", "keyword": { "album": { - "album_cover": "693j2Kr", - "album_description": None, - "cover": "693j2Kr", - "datetime": "2015-10-09 10:37:50", - "description": None, - "hash": "TcBmP", - "id": "TcBmP", - "is_album": True, - "num_images": "19", - "title": "138", - "title_clean": "TcBmP", - "views": str, + "account_id" : None, + "account_url" : None, + "cover" : "693j2Kr", + "cover_edited": None, + "cover_height": 1400, + "cover_width" : 951, + "date" : "type:datetime", + "datetime" : 1444387070, + "description" : None, + "favorite" : False, + "id" : "TcBmP", + "images_count": 19, + "in_gallery" : False, + "is_ad" : False, + "is_album" : True, + "layout" : "blog", + "link" : "https://imgur.com/a/TcBmP", + "nsfw" : False, + "privacy" : "hidden", + "section" : None, + "title" : "138", + "views" : int, }, - "animated": bool, - "datetime": str, - "extension": str, - "hash": str, - "height": int, - "num": int, - "prefer_video": bool, - "size": int, - "title": str, - "width": int, + "account_id" : None, + "account_url": None, + "animated" : bool, + "bandwidth" : int, + "date" : "type:datetime", + "datetime" : int, + "description": None, + "edited" : "0", + "favorite" : False, + "has_sound" : False, + "height" : int, + "id" : str, + "in_gallery" : False, + "is_ad" : False, + "link" : r"re:https://i\.imgur\.com/\w+\.jpg", + "nsfw" : None, + "num" : int, + "section" : None, + "size" : int, + "tags" : list, + "title" : None, + "type" : "image/jpeg", + "views" : int, + "vote" : None, + "width" : int, }, }), ("https://imgur.com/a/eD9CT", { # large album - "url": "4ee94de31ff26be416271bc0b1ea27b9349c9937", + "url": "de748c181a04d18bef1de9d4f4866ef0a06d632b", }), ("https://imgur.com/a/RhJXhVT/all", { # 7 character album hash "url": "695ef0c950023362a0163ee5041796300db76674", @@ -183,21 +196,22 @@ class ImgurAlbumExtractor(ImgurExtractor): "url": "86b4747f8147cec7602f0214e267309af73a8655", }), ("https://imgur.com/a/TcBmQ", { - "exception": exception.NotFoundError, + "exception": exception.HttpError, }), ("https://www.imgur.com/a/TcBmP"), # www ("https://m.imgur.com/a/TcBmP"), # mobile ) def items(self): - album = self._extract_data("/a/" + self.key + "/all") - images = album["album_images"]["images"] - del album["album_images"] + album = self.api.album(self.key) + album["date"] = text.parse_timestamp(album["datetime"]) + images = album["images"] - if int(album["num_images"]) > len(images): - url = "{}/ajaxalbums/getimages/{}/hit.json".format( - self.root, self.key) - images = self.request(url).json()["data"]["images"] + try: + del album["images"] + del album["ad_config"] + except KeyError: + pass yield Message.Version, 1 yield Message.Directory, {"album": album, "count": len(images)} @@ -224,13 +238,11 @@ class ImgurGalleryExtractor(ImgurExtractor): def items(self): url = self.root + "/a/" + self.key with self.request(url, method="HEAD", fatal=False) as response: - code = response.status_code - - if code < 400: - extr = ImgurAlbumExtractor - else: - extr = ImgurImageExtractor - url = self.root + "/" + self.key + if response.status_code < 400: + extr = ImgurAlbumExtractor + else: + extr = ImgurImageExtractor + url = self.root + "/" + self.key yield Message.Version, 1 yield Message.Queue, url, {"_extractor": extr} @@ -251,9 +263,7 @@ class ImgurUserExtractor(ImgurExtractor): ) def items(self): - urlfmt = "{}/3/account/{}/submissions/{{}}/newest".format( - self.api_root, self.key) - return self._items_apiv3(urlfmt) + return self._items_queue(self.api.account_submissions(self.key)) class ImgurFavoriteExtractor(ImgurExtractor): @@ -267,6 +277,43 @@ class ImgurFavoriteExtractor(ImgurExtractor): }) def items(self): - urlfmt = "{}/3/account/{}/gallery_favorites/{{}}/newest".format( - self.api_root, self.key) - return self._items_apiv3(urlfmt) + return self._items_queue(self.api.account_favorites(self.key)) + + +class ImgurAPI(): + + def __init__(self, extractor): + self.extractor = extractor + self.headers = { + "Authorization": "Client-ID " + extractor.config( + "client-id", "546c25a59c58ad7"), + } + + def account_favorites(self, account): + endpoint = "account/{}/gallery_favorites".format(account) + return self._pagination(endpoint) + + def account_submissions(self, account): + endpoint = "account/{}/submissions".format(account) + return self._pagination(endpoint) + + def album(self, album_hash): + return self._call("album/" + album_hash) + + def image(self, image_hash): + return self._call("image/" + image_hash) + + def _call(self, endpoint): + return self.extractor.request( + "https://api.imgur.com/3/" + endpoint, headers=self.headers, + ).json()["data"] + + def _pagination(self, endpoint): + num = 0 + + while True: + data = self._call("{}/{}".format(endpoint, num)) + if not data: + return + yield from data + num += 1 diff --git a/gallery_dl/extractor/instagram.py b/gallery_dl/extractor/instagram.py index 8eee390..a14225f 100644 --- a/gallery_dl/extractor/instagram.py +++ b/gallery_dl/extractor/instagram.py @@ -36,17 +36,13 @@ class InstagramExtractor(Extractor): data.update(metadata) yield Message.Directory, data - if data['typename'] in ('GraphImage', 'GraphStoryImage', 'GraphStoryVideo'): - yield Message.Url, data['display_url'], \ - text.nameext_from_url(data['display_url'], data) - elif data['typename'] == 'GraphVideo': - data["extension"] = None - yield Message.Url, \ - 'ytdl:{}/p/{}/'.format(self.root, data['shortcode']), data - elif data['typename'] == 'GraphHighlightReel': + if data['typename'] == 'GraphHighlightReel': url = '{}/stories/highlights/{}/'.format(self.root, data['id']) data['_extractor'] = InstagramStoriesExtractor yield Message.Queue, url, data + else: + url = data['video_url'] or data['display_url'] + yield Message.Url, url, text.nameext_from_url(url, data) def login(self): if self._check_cookies(self.cookienames): @@ -101,12 +97,20 @@ class InstagramExtractor(Extractor): def _extract_shared_data(self, url): page = self.request(url).text - data = text.extract(page, 'window._sharedData = ', ';</script>')[0] - return json.loads(data) + shared_data, pos = text.extract( + page, 'window._sharedData =', ';</script>') + additional_data, pos = text.extract( + page, 'window.__additionalDataLoaded(', ');</script>', pos) + + data = json.loads(shared_data) + if additional_data: + next(iter(data['entry_data'].values()))[0] = \ + json.loads(additional_data.partition(',')[2]) + return data def _extract_postpage(self, url): - shared_data = self._extract_shared_data(url) - media = shared_data['entry_data']['PostPage'][0]['graphql']['shortcode_media'] + data = self.request(url + "?__a=1").json() + media = data['graphql']['shortcode_media'] common = { 'date': text.parse_timestamp(media['taken_at_timestamp']), @@ -122,7 +126,6 @@ class InstagramExtractor(Extractor): medias = [] if media['__typename'] == 'GraphSidecar': - yi = 0 for n in media['edge_sidecar_to_children']['edges']: children = n['node'] media_data = { @@ -130,14 +133,12 @@ class InstagramExtractor(Extractor): 'shortcode': children['shortcode'], 'typename': children['__typename'], 'display_url': children['display_url'], + 'video_url': children.get('video_url'), 'height': text.parse_int(children['dimensions']['height']), 'width': text.parse_int(children['dimensions']['width']), 'sidecar_media_id': media['id'], 'sidecar_shortcode': media['shortcode'], } - if children['__typename'] == 'GraphVideo': - media_data['_ytdl_index'] = yi - yi += 1 media_data.update(common) medias.append(media_data) @@ -147,6 +148,7 @@ class InstagramExtractor(Extractor): 'shortcode': media['shortcode'], 'typename': media['__typename'], 'display_url': media['display_url'], + 'video_url': media.get('video_url'), 'height': text.parse_int(media['dimensions']['height']), 'width': text.parse_int(media['dimensions']['width']), } @@ -318,7 +320,7 @@ class InstagramImageExtractor(InstagramExtractor): # GraphVideo ("https://www.instagram.com/p/Bqxp0VSBgJg/", { - "url": "8f38c1cf460c9804842f7306c487410f33f82e7e", + "pattern": r"/47129943_191645575115739_8539303288426725376_n\.mp4", "keyword": { "date": "type:datetime", "description": str, @@ -334,7 +336,7 @@ class InstagramImageExtractor(InstagramExtractor): # GraphVideo (IGTV) ("https://www.instagram.com/tv/BkQjCfsBIzi/", { - "url": "64208f408e11cbbca86c2df4488e90262ae9d9ec", + "pattern": r"/10000000_1760663964018792_716207142595461120_n\.mp4", "keyword": { "date": "type:datetime", "description": str, @@ -351,11 +353,10 @@ class InstagramImageExtractor(InstagramExtractor): # GraphSidecar with 2 embedded GraphVideo objects ("https://www.instagram.com/p/BtOvDOfhvRr/", { "count": 2, - "url": "e290d4180a58ae50c910d51d3b04d5f5c4622cd7", "keyword": { "sidecar_media_id": "1967717017113261163", "sidecar_shortcode": "BtOvDOfhvRr", - "_ytdl_index": int, + "video_url": str, } }) ) diff --git a/gallery_dl/extractor/issuu.py b/gallery_dl/extractor/issuu.py new file mode 100644 index 0000000..12d7487 --- /dev/null +++ b/gallery_dl/extractor/issuu.py @@ -0,0 +1,109 @@ +# -*- coding: utf-8 -*- + +# Copyright 2019 Mike Fährmann +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. + +"""Extractors for https://issuu.com/""" + +from .common import GalleryExtractor, Extractor, Message +from .. import text, util +import json + + +class IssuuBase(): + """Base class for issuu extractors""" + category = "issuu" + root = "https://issuu.com" + + +class IssuuPublicationExtractor(IssuuBase, GalleryExtractor): + """Extractor for a single publication""" + subcategory = "publication" + directory_fmt = ("{category}", "{document[userName]}", + "{document[originalPublishDate]} {document[title]}") + filename_fmt = "{num:>03}.{extension}" + archive_fmt = "{document[id]}_{num}" + pattern = r"(?:https?://)?issuu\.com(/[^/?&#]+/docs/[^/?&#]+)" + test = ("https://issuu.com/issuu/docs/motions-1-2019/", { + "pattern": r"https://image.isu.pub/190916155301-\w+/jpg/page_\d+.jpg", + "count" : 36, + "keyword": { + "document": { + "access" : "public", + "contentRating": dict, + "date" : "type:datetime", + "description" : "re:Motions, the brand new publication by Is", + "documentId" : r"re:\d+-d99ec95935f15091b040cb8060f05510", + "documentName" : "motions-1-2019", + "downloadState": "NOT_AVAILABLE", + "id" : r"re:\d+-d99ec95935f15091b040cb8060f05510", + "isConverting" : False, + "isQuarantined": False, + "lang" : "en", + "language" : "English", + "pageCount" : 36, + "publicationId": "d99ec95935f15091b040cb8060f05510", + "sections" : list, + "title" : "Motions by Issuu - Issue 1", + "userName" : "issuu", + }, + "extension": "jpg", + "filename" : r"re:page_\d+", + "num" : int, + }, + }) + + def metadata(self, page): + data = json.loads(text.extract( + page, 'window.__INITIAL_STATE__ =', ';\n')[0]) + + doc = data["document"] + doc["lang"] = doc["language"] + doc["language"] = util.code_to_language(doc["language"]) + doc["date"] = text.parse_datetime( + doc["originalPublishDate"], "%Y-%m-%d") + + self._cnt = text.parse_int(doc["pageCount"]) + self._tpl = "https://{}/{}/jpg/page_{{}}.jpg".format( + data["config"]["hosts"]["image"], doc["id"]) + + return {"document": doc} + + def images(self, page): + fmt = self._tpl.format + return [(fmt(i), None) for i in range(1, self._cnt + 1)] + + +class IssuuUserExtractor(IssuuBase, Extractor): + """Extractor for all publications of a user/publisher""" + subcategory = "user" + pattern = r"(?:https?://)?issuu\.com/([^/?&#]+)/?$" + test = ("https://issuu.com/issuu", { + "pattern": IssuuPublicationExtractor.pattern, + "count" : "> 25", + }) + + def __init__(self, match): + Extractor.__init__(self, match) + self.user = match.group(1) + + def items(self): + url = "{}/call/profile/v1/documents/{}".format(self.root, self.user) + params = {"offset": 0, "limit": "25"} + + yield Message.Version, 1 + while True: + data = self.request(url, params=params).json() + + for publication in data["items"]: + publication["url"] = "{}/{}/docs/{}".format( + self.root, self.user, publication["uri"]) + publication["_extractor"] = IssuuPublicationExtractor + yield Message.Queue, publication["url"], publication + + if not data["hasMore"]: + return + params["offset"] += data["limit"] diff --git a/gallery_dl/extractor/kissmanga.py b/gallery_dl/extractor/kissmanga.py index 6314a94..bb89f93 100644 --- a/gallery_dl/extractor/kissmanga.py +++ b/gallery_dl/extractor/kissmanga.py @@ -33,10 +33,9 @@ class RedirectMixin(): except (EOFError, OSError): pass else: - self.log.error( + raise exception.StopExtraction( "Redirect to \n%s\nVisit this URL in your browser and " "solve the CAPTCHA to continue", response.url) - raise exception.StopExtraction() class KissmangaBase(RedirectMixin): diff --git a/gallery_dl/extractor/luscious.py b/gallery_dl/extractor/luscious.py index 965daa0..0aeeb4a 100644 --- a/gallery_dl/extractor/luscious.py +++ b/gallery_dl/extractor/luscious.py @@ -6,75 +6,109 @@ # it under the terms of the GNU General Public License version 2 as # published by the Free Software Foundation. -"""Extractors for https://luscious.net/""" +"""Extractors for https://members.luscious.net/""" -from .common import GalleryExtractor, Extractor, Message +from .common import Extractor, Message from .. import text, exception -from ..cache import cache -class LusciousBase(Extractor): +class LusciousExtractor(Extractor): """Base class for luscious extractors""" category = "luscious" cookiedomain = ".luscious.net" root = "https://members.luscious.net" - def login(self): - """Login and set necessary cookies""" - username, password = self._get_auth_info() - if username: - self._update_cookies(self._login_impl(username, password)) - - @cache(maxage=14*24*3600, keyarg=1) - def _login_impl(self, username, password): - self.log.info("Logging in as %s", username) - url = "https://members.luscious.net/accounts/login/" - headers = {"Referer": "https://members.luscious.net/login/"} + def _graphql(self, op, variables, query): data = { - "login": username, - "password": password, - "remember": "on", - "next": "/", + "id" : 1, + "operationName": op, + "query" : query, + "variables" : variables, } + response = self.request( + "{}/graphql/nobatch/?operationName={}".format(self.root, op), + method="POST", json=data, fatal=False, + ) - response = self.request(url, method="POST", headers=headers, data=data) - if "/accounts/login/" in response.url or not response.history: - raise exception.AuthenticationError() - for cookie in response.history[0].cookies: - if cookie.name.startswith("sessionid_"): - return {cookie.name: cookie.value} - raise exception.AuthenticationError() + if response.status_code >= 400: + self.log.debug("Server response: %s", response.text) + raise exception.StopExtraction( + "GraphQL query failed ('%s %s')", + response.status_code, response.reason) - @staticmethod - def _parse_tags(tags): - return [ - text.unescape(tag.replace(":_", ":")) - for tag in text.extract_iter(tags or "", "/tags/", "/") - ] + return response.json()["data"] -class LusciousAlbumExtractor(LusciousBase, GalleryExtractor): +class LusciousAlbumExtractor(LusciousExtractor): """Extractor for image albums from luscious.net""" subcategory = "album" - archive_fmt = "{gallery_id}_{image_id}" + filename_fmt = "{category}_{album[id]}_{num:>03}.{extension}" + directory_fmt = ("{category}", "{album[id]} {album[title]}") + archive_fmt = "{album[id]}_{id}" pattern = (r"(?:https?://)?(?:www\.|members\.)?luscious\.net" - r"/(?:albums|pictures/c/[^/?&#]+/album)/([^/?&#]+_(\d+))") + r"/(?:albums|pictures/c/[^/?&#]+/album)/[^/?&#]+_(\d+)") test = ( ("https://luscious.net/albums/okinami-no-koigokoro_277031/", { "url": "7e4984a271a1072ac6483e4228a045895aff86f3", - "keyword": "07c0b915f2ab1cc3bbf28b76e7950fccee1213f3", - "content": "b3a747a6464509440bd0ff6d1267e6959f8d6ff3", + # "content": "b3a747a6464509440bd0ff6d1267e6959f8d6ff3", + "keyword": { + "album": { + "__typename" : "Album", + "audiences" : list, + "content" : "Hentai", + "cover" : "re:https://cdnio.luscious.net/.+/277031/", + "created" : 1479625853, + "created_by" : "NTRshouldbeillegal", + "date" : "type:datetime", + "description" : "Enjoy.", + "download_url": "/download/824778/277031/", + "genres" : list, + "id" : 277031, + "is_manga" : True, + "labels" : list, + "language" : "English", + "like_status" : "none", + "modified" : int, + "permissions" : list, + "rating" : float, + "slug" : "okinami-no-koigokoro", + "status" : "not_moderated", + "tags" : list, + "title" : "Okinami no Koigokoro", + "url" : "/albums/okinami-no-koigokoro_277031/", + "marked_for_deletion": False, + "marked_for_processing": False, + "number_of_animated_pictures": 0, + "number_of_favorites": int, + "number_of_pictures": 18, + }, + "aspect_ratio": r"re:\d+:\d+", + "category" : "luscious", + "created" : int, + "date" : "type:datetime", + "height" : int, + "id" : int, + "is_animated" : False, + "like_status" : "none", + "position" : int, + "resolution" : r"re:\d+x\d+", + "status" : "not_moderated", + "tags" : list, + "thumbnail" : str, + "title" : str, + "width" : int, + "number_of_comments": int, + "number_of_favorites": int, + }, }), ("https://luscious.net/albums/virgin-killer-sweater_282582/", { "url": "21cc68a7548f4d71dfd67d8caf96349dde7e791c", - "keyword": "e1202078b504adeccd521aa932f456a5a85479a0", }), ("https://luscious.net/albums/not-found_277035/", { "exception": exception.NotFoundError, }), ("https://members.luscious.net/albums/login-required_323871/", { - "options": (("username", None),), - "exception": exception.HttpError, + "count": 78, }), ("https://www.luscious.net/albums/okinami_277031/"), ("https://members.luscious.net/albums/okinami_277031/"), @@ -83,126 +117,340 @@ class LusciousAlbumExtractor(LusciousBase, GalleryExtractor): ) def __init__(self, match): - path, self.gallery_id = match.groups() - url = "{}/albums/{}/".format(self.root, path) - GalleryExtractor.__init__(self, match, url) + LusciousExtractor.__init__(self, match) + self.album_id = match.group(1) - def metadata(self, page): - title, pos = text.extract(page, '"og:title" content="', '"') + def items(self): + album = self.metadata() + yield Message.Version, 1 + yield Message.Directory, {"album": album} + for num, image in enumerate(self.images(), 1): + image["num"] = num + image["album"] = album + + image["thumbnail"] = image.pop("thumbnails")[0]["url"] + image["tags"] = [item["text"] for item in image["tags"]] + image["date"] = text.parse_timestamp(image["created"]) + image["id"] = text.parse_int(image["id"]) + + url = image["url_to_video"] or image["url_to_original"] + yield Message.Url, url, text.nameext_from_url(url, image) + + def metadata(self): + variables = { + "id": self.album_id, + } - if title is None: - msg = text.extract(page, '<div class="content">', '</div>', pos)[0] - if msg: - raise exception.AuthorizationError(msg) + query = """ +query AlbumGet($id: ID!) { + album { + get(id: $id) { + ... on Album { + ...AlbumStandard + } + ... on MutationError { + errors { + code + message + } + } + } + } +} + +fragment AlbumStandard on Album { + __typename + id + title + labels + description + created + modified + like_status + number_of_favorites + rating + status + marked_for_deletion + marked_for_processing + number_of_pictures + number_of_animated_pictures + slug + is_manga + url + download_url + permissions + cover { + width + height + size + url + } + created_by { + id + name + display_name + user_title + avatar { + url + size + } + url + } + content { + id + title + url + } + language { + id + title + url + } + tags { + id + category + text + url + count + } + genres { + id + title + slug + url + } + audiences { + id + title + url + url + } + last_viewed_picture { + id + position + url + } +} +""" + album = self._graphql("AlbumGet", variables, query)["album"]["get"] + if "errors" in album: raise exception.NotFoundError("album") - info , pos = text.extract(page, '<li class="user_info">', "", pos) - if info is None: - count, pos = text.extract(page, '>Pages:', '<', pos) - else: - count, pos = text.extract(page, '<p>', ' ', pos) - genre, pos = text.extract(page, '<p>Genre:', '</p>', pos) - adnce, pos = text.extract(page, '<p>Audience:', '</p>', pos) - tags , pos = text.extract(page, '"tag_list static">', '</ol>', pos) - - return { - "gallery_id": text.parse_int(self.gallery_id), - "title" : text.unescape(title or ""), - "count" : text.parse_int(count), - "genre" : text.remove_html(genre), - "audience" : text.remove_html(adnce), - "tags" : self._parse_tags(tags), + album["audiences"] = [item["title"] for item in album["audiences"]] + album["genres"] = [item["title"] for item in album["genres"]] + album["tags"] = [item["text"] for item in album["tags"]] + + album["cover"] = album["cover"]["url"] + album["content"] = album["content"]["title"] + album["language"] = album["language"]["title"].partition(" ")[0] + album["created_by"] = album["created_by"]["display_name"] + + album["id"] = text.parse_int(album["id"]) + album["date"] = text.parse_timestamp(album["created"]) + + return album + + def images(self): + variables = { + "input": { + "filters": [{ + "name" : "album_id", + "value": self.album_id, + }], + "display": "position", + "page" : 1, + }, } - def images(self, page): - extr = text.extract - - url = "{}/pictures/album/x_{}/sorted/old/page/1/".format( - self.root, self.gallery_id) - page = self.request(url).text - pos = page.find('<div id="picture_page_') - url = extr(page, '<a href="', '"', pos)[0] - iurl = None - - while url and not url.endswith("/more_like_this/"): - page = self.request(self.root + url).text - - if not iurl: # first loop iteraton - current = extr(page, '"pj_current_page" value="', '"')[0] - if current and current != "1": - url = "{}/albums/{}/jump_to_page/1/".format( - self.root, self.gallery_id) - page = self.request(url, method="POST").text - - iid , pos = extr(url , '/id/', '/') - url , pos = extr(page, '<link rel="next" href="', '"') - name, pos = extr(page, '<h1 id="picture_title">', '</h1>', pos) - _ , pos = extr(page, '<ul class="image_option_icons">', '', pos) - iurl, pos = extr(page, '<li><a href="', '"', pos+100) - - if iurl[0] == "/": - iurl = text.urljoin(self.root, iurl) - - yield iurl, { - "name": name, - "image_id": text.parse_int(iid), + query = """ +query AlbumListOwnPictures($input: PictureListInput!) { + picture { + list(input: $input) { + info { + ...FacetCollectionInfo + } + items { + ...PictureStandardWithoutAlbum } + } + } +} + +fragment FacetCollectionInfo on FacetCollectionInfo { + page + has_next_page + has_previous_page + total_items + total_pages + items_per_page + url_complete + url_filters_only +} + +fragment PictureStandardWithoutAlbum on Picture { + __typename + id + title + created + like_status + number_of_comments + number_of_favorites + status + width + height + resolution + aspect_ratio + url_to_original + url_to_video + is_animated + position + tags { + id + category + text + url + } + permissions + url + thumbnails { + width + height + size + url + } +} +""" + while True: + data = self._graphql("AlbumListOwnPictures", variables, query) + yield from data["picture"]["list"]["items"] + + if not data["picture"]["list"]["info"]["has_next_page"]: + return + variables["input"]["page"] += 1 -class LusciousSearchExtractor(LusciousBase, Extractor): +class LusciousSearchExtractor(LusciousExtractor): """Extractor for album searches on luscious.net""" subcategory = "search" pattern = (r"(?:https?://)?(?:www\.|members\.)?luscious\.net" - r"/(albums(?:/(?![^/?&#]+_\d+)[^/?&#]+)+|manga|pictures)/?$") + r"/albums/list/?(?:\?([^#]+))?") test = ( - ("https://luscious.net/manga/"), - ("https://members.luscious.net/albums/sorted/updated/album_type/manga" - "/content_id/2/tagged/+full_color/page/1/", { + ("https://members.luscious.net/albums/list/"), + ("https://members.luscious.net/albums/list/" + "?display=date_newest&language_ids=%2B1&tagged=+full_color&page=1", { "pattern": LusciousAlbumExtractor.pattern, - "range": "20-40", - "count": 21, + "range": "41-60", + "count": 20, }), ) def __init__(self, match): - Extractor.__init__(self, match) - self.path = match.group(1).partition("/page/")[0] - if not self.path.startswith("albums/"): - self.path = "albums/sorted/updated/album_type/" + self.path + LusciousExtractor.__init__(self, match) + self.query = match.group(1) def items(self): - self.login() - yield Message.Version, 1 - for album in self.albums(): - url, data = self.parse_album(album) - yield Message.Queue, url, data + query = text.parse_query(self.query) + display = query.pop("display", "date_newest") + page = query.pop("page", None) + + variables = { + "input": { + "display": display, + "filters": [{"name": n, "value": v} for n, v in query.items()], + "page": text.parse_int(page, 1), + }, + } - def albums(self, pnum=1): + query = """ +query AlbumListWithPeek($input: AlbumListInput!) { + album { + list(input: $input) { + info { + ...FacetCollectionInfo + } + items { + ...AlbumMinimal + peek_thumbnails { + width + height + size + url + } + } + } + } +} + +fragment FacetCollectionInfo on FacetCollectionInfo { + page + has_next_page + has_previous_page + total_items + total_pages + items_per_page + url_complete + url_filters_only +} + +fragment AlbumMinimal on Album { + __typename + id + title + labels + description + created + modified + number_of_favorites + number_of_pictures + slug + is_manga + url + download_url + cover { + width + height + size + url + } + content { + id + title + url + } + language { + id + title + url + } + tags { + id + category + text + url + count + } + genres { + id + title + slug + url + } + audiences { + id + title + url + } +} +""" + yield Message.Version, 1 while True: - url = "{}/{}/page/{}/.json/".format(self.root, self.path, pnum) - data = self.request(url).json() + data = self._graphql("AlbumListWithPeek", variables, query) - yield from text.extract_iter( - data["html"], "<figcaption>", "</figcaption>") + for album in data["album"]["list"]["items"]: + album["url"] = self.root + album["url"] + album["_extractor"] = LusciousAlbumExtractor + yield Message.Queue, album["url"], album - if data["paginator_complete"]: + if not data["album"]["list"]["info"]["has_next_page"]: return - pnum += 1 - - def parse_album(self, album): - url , pos = text.extract(album, 'href="', '"') - title, pos = text.extract(album, ">", "<", pos) - count, pos = text.extract(album, "# of pictures:", "<", pos) - date , pos = text.extract(album, "Updated: ", "<", pos) - desc , pos = text.extract(album, "class='desc'>", "<", pos) - tags , pos = text.extract(album, "<ol ", "</ol>", pos) - - return text.urljoin(self.root, url), { - "title": text.unescape(title or ""), - "description": text.unescape(desc or ""), - "gallery_id": text.parse_int(url.rpartition("_")[2].rstrip("/")), - "count": text.parse_int(count), - "date": date, - "tags": self._parse_tags(tags), - "_extractor": LusciousAlbumExtractor, - } + variables["input"]["page"] += 1 diff --git a/gallery_dl/extractor/naver.py b/gallery_dl/extractor/naver.py new file mode 100644 index 0000000..c980a38 --- /dev/null +++ b/gallery_dl/extractor/naver.py @@ -0,0 +1,140 @@ +# -*- coding: utf-8 -*- + +# Copyright 2019 Mike Fährmann +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. + +"""Extractors for https://blog.naver.com/""" + +from .common import GalleryExtractor, Extractor, Message +from .. import text + + +class NaverBase(): + """Base class for naver extractors""" + category = "naver" + root = "https://blog.naver.com" + + +class NaverPostExtractor(NaverBase, GalleryExtractor): + """Extractor for blog posts on blog.naver.com""" + subcategory = "post" + filename_fmt = "{num:>03}.{extension}" + directory_fmt = ("{category}", "{blog[user]} {blog[id]}", + "{post[date]:%Y-%m-%d} {post[title]}") + archive_fmt = "{blog[id]}_{post[num]}_{num}" + pattern = (r"(?:https?://)?blog\.naver\.com/" + r"(?:PostView\.nhn\?blogId=(\w+)&logNo=(\d+)|(\w+)/(\d+)/?$)") + test = ( + ("https://blog.naver.com/rlfqjxm0/221430673006", { + "url": "6c694f3aced075ed5e9511f1e796d14cb26619cc", + "keyword": "a6e23d19afbee86b37d6e7ad934650c379d2cb1e", + }), + (("https://blog.naver.com/PostView.nhn" + "?blogId=rlfqjxm0&logNo=221430673006"), { + "url": "6c694f3aced075ed5e9511f1e796d14cb26619cc", + "keyword": "a6e23d19afbee86b37d6e7ad934650c379d2cb1e", + }), + ) + + def __init__(self, match): + blog_id = match.group(1) + if blog_id: + self.blog_id = blog_id + self.post_id = match.group(2) + else: + self.blog_id = match.group(3) + self.post_id = match.group(4) + + url = "{}/PostView.nhn?blogId={}&logNo={}".format( + self.root, self.blog_id, self.post_id) + GalleryExtractor.__init__(self, match, url) + + def metadata(self, page): + extr = text.extract_from(page) + data = { + "post": { + "title" : extr('"og:title" content="', '"'), + "description": extr('"og:description" content="', '"'), + "num" : text.parse_int(self.post_id), + }, + "blog": { + "id" : self.blog_id, + "num" : text.parse_int(extr("var blogNo = '", "'")), + "user" : extr("var nickName = '", "'"), + }, + } + data["post"]["date"] = text.parse_datetime( + extr('se_publishDate pcol2">', '<') or + extr('_postAddDate">', '<'), "%Y. %m. %d. %H:%M") + return data + + def images(self, page): + return [ + (url.replace("://post", "://blog", 1).partition("?")[0], None) + for url in text.extract_iter(page, 'data-lazy-src="', '"') + ] + + +class NaverBlogExtractor(NaverBase, Extractor): + """Extractor for a user's blog on blog.naver.com""" + subcategory = "blog" + pattern = (r"(?:https?://)?blog\.naver\.com/" + r"(?:PostList.nhn\?(?:[^&#]+&)*blogId=([^&#]+)|(\w+)/?$)") + test = ( + ("https://blog.naver.com/gukjung", { + "pattern": NaverPostExtractor.pattern, + "count": 12, + "range": "1-12", + }), + ("https://blog.naver.com/PostList.nhn?blogId=gukjung", { + "pattern": NaverPostExtractor.pattern, + "count": 12, + "range": "1-12", + }), + ) + + def __init__(self, match): + Extractor.__init__(self, match) + self.blog_id = match.group(1) or match.group(2) + + def items(self): + yield Message.Version, 1 + + # fetch first post number + url = "{}/PostList.nhn?blogId={}".format(self.root, self.blog_id) + post_num = text.extract( + self.request(url).text, 'gnFirstLogNo = "', '"', + )[0] + + # setup params for API calls + url = "{}/PostViewBottomTitleListAsync.nhn".format(self.root) + params = { + "blogId" : self.blog_id, + "logNo" : post_num or "0", + "viewDate" : "", + "categoryNo" : "", + "parentCategoryNo" : "", + "showNextPage" : "true", + "showPreviousPage" : "false", + "sortDateInMilli" : "", + "isThumbnailViewType": "false", + "countPerPage" : "", + } + + # loop over all posts + while True: + data = self.request(url, params=params).json() + + for post in data["postList"]: + post["url"] = "{}/PostView.nhn?blogId={}&logNo={}".format( + self.root, self.blog_id, post["logNo"]) + post["_extractor"] = NaverPostExtractor + yield Message.Queue, post["url"], post + + if not data["hasNextPage"]: + return + params["logNo"] = data["nextIndexLogNo"] + params["sortDateInMilli"] = data["nextIndexSortDate"] diff --git a/gallery_dl/extractor/nijie.py b/gallery_dl/extractor/nijie.py index fdfad87..0bd858f 100644 --- a/gallery_dl/extractor/nijie.py +++ b/gallery_dl/extractor/nijie.py @@ -20,7 +20,7 @@ class NijieExtractor(AsynchronousMixin, Extractor): """Base class for nijie extractors""" category = "nijie" directory_fmt = ("{category}", "{user_id}") - filename_fmt = "{category}_{artist_id}_{image_id}_p{num:>02}.{extension}" + filename_fmt = "{image_id}_p{num}.{extension}" archive_fmt = "{image_id}_{num}" cookiedomain = "nijie.info" cookienames = ("nemail", "nlogin") diff --git a/gallery_dl/extractor/nozomi.py b/gallery_dl/extractor/nozomi.py new file mode 100644 index 0000000..97be789 --- /dev/null +++ b/gallery_dl/extractor/nozomi.py @@ -0,0 +1,185 @@ +# -*- coding: utf-8 -*- + +# Copyright 2019 Mike Fährmann +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. + +"""Extractors for https://nozomi.la/""" + +from .common import Extractor, Message +from .. import text + + +class NozomiExtractor(Extractor): + """Base class for nozomi extractors""" + category = "nozomi" + root = "https://nozomi.la" + filename_fmt = "{postid}.{extension}" + archive_fmt = "{postid}" + + def items(self): + yield Message.Version, 1 + + data = self.metadata() + self.session.headers["Origin"] = self.root + self.session.headers["Referer"] = self.root + "/" + + for post_id in map(str, self.posts()): + url = "https://j.nozomi.la/post/{}/{}/{}.json".format( + post_id[-1], post_id[-3:-1], post_id) + response = self.request(url, fatal=False) + + if response.status_code >= 400: + self.log.warning( + "Skipping post %s ('%s %s')", + post_id, response.status_code, response.reason) + continue + + image = response.json() + image["tags"] = self._list(image.get("general")) + image["artist"] = self._list(image.get("artist")) + image["copyright"] = self._list(image.get("copyright")) + image["character"] = self._list(image.get("character")) + image["is_video"] = bool(image.get("is_video")) + image["date"] = text.parse_datetime( + image["date"] + ":00", "%Y-%m-%d %H:%M:%S%z") + image["url"] = text.urljoin(self.root, image["imageurl"]) + text.nameext_from_url(image["url"], image) + image.update(data) + + for key in ("general", "imageurl", "imageurls"): + if key in image: + del image[key] + + yield Message.Directory, image + yield Message.Url, image["url"], image + + def metadata(self): + return {} + + def posts(self): + return () + + @staticmethod + def _list(src): + if not src: + return [] + return [x["tagname_display"] for x in src] + + @staticmethod + def _unpack(b): + for i in range(0, len(b), 4): + yield (b[i] << 24) + (b[i+1] << 16) + (b[i+2] << 8) + b[i+3] + + +class NozomiPostExtractor(NozomiExtractor): + """Extractor for individual posts on nozomi.la""" + subcategory = "post" + pattern = r"(?:https?://)?nozomi\.la/post/(\d+)" + test = ("https://nozomi.la/post/3649262.html", { + "url": "f4522adfc8159355fd0476de28761b5be0f02068", + "content": "cd20d2c5149871a0b80a1b0ce356526278964999", + "keyword": { + "artist" : ["hammer (sunset beach)"], + "character": ["patchouli knowledge"], + "copyright": ["touhou"], + "dataid" : "re:aaa9f7c632cde1e1a5baaff3fb6a6d857ec73df7fdc5cf5a", + "date" : "type:datetime", + "extension": "jpg", + "favorites": int, + "filename" : str, + "height" : 768, + "is_video" : False, + "postid" : 3649262, + "source" : "danbooru", + "sourceid" : 2434215, + "tags" : list, + "type" : "jpg", + "url" : str, + "width" : 1024, + }, + }) + + def __init__(self, match): + NozomiExtractor.__init__(self, match) + self.post_id = match.group(1) + + def posts(self): + return (self.post_id,) + + +class NozomiTagExtractor(NozomiExtractor): + """Extractor for posts from tag searches on nozomi.la""" + subcategory = "tag" + directory_fmt = ("{category}", "{search_tags}") + archive_fmt = "t_{search_tags}_{postid}" + pattern = r"(?:https?://)?nozomi\.la/tag/([^/?&#]+)-\d+\." + test = ("https://nozomi.la/tag/3:1_aspect_ratio-1.html", { + "pattern": r"^https://i.nozomi.la/\w/\w\w/\w+\.\w+$", + "count": ">= 75", + "range": "1-75", + }) + + def __init__(self, match): + NozomiExtractor.__init__(self, match) + self.tags = text.unquote(match.group(1)).lower() + + def metadata(self): + return {"search_tags": self.tags} + + def posts(self): + url = "https://n.nozomi.la/nozomi/{}.nozomi".format(self.tags) + i = 0 + + while True: + headers = {"Range": "bytes={}-{}".format(i, i+255)} + response = self.request(url, headers=headers) + yield from self._unpack(response.content) + + i += 256 + cr = response.headers.get("Content-Range", "").rpartition("/")[2] + if text.parse_int(cr, i) <= i: + return + + +class NozomiSearchExtractor(NozomiExtractor): + """Extractor for search results on nozomi.la""" + subcategory = "search" + directory_fmt = ("{category}", "{search_tags:J }") + archive_fmt = "t_{search_tags}_{postid}" + pattern = r"(?:https?://)?nozomi\.la/search\.html\?q=([^&#]+)" + test = ("https://nozomi.la/search.html?q=hibiscus%203:4_ratio#1", { + "count": ">= 5", + }) + + def __init__(self, match): + NozomiExtractor.__init__(self, match) + self.tags = text.unquote(match.group(1)).lower().split() + + def metadata(self): + return {"search_tags": self.tags} + + def posts(self): + index = None + result = set() + + def nozomi(path): + url = "https://j.nozomi.la/" + path + ".nozomi" + return self._unpack(self.request(url).content) + + for tag in self.tags: + if tag[0] == "-": + if not index: + index = set(nozomi("index")) + items = index.difference(nozomi("nozomi/" + tag[1:])) + else: + items = nozomi("nozomi/" + tag) + + if result: + result.intersection_update(items) + else: + result.update(items) + + return result diff --git a/gallery_dl/extractor/nsfwalbum.py b/gallery_dl/extractor/nsfwalbum.py index 5005fb4..8f1f3f2 100644 --- a/gallery_dl/extractor/nsfwalbum.py +++ b/gallery_dl/extractor/nsfwalbum.py @@ -17,14 +17,14 @@ class NsfwalbumAlbumExtractor(GalleryExtractor): category = "nsfwalbum" subcategory = "album" root = "https://nsfwalbum.com" - filename_fmt = "{album_id}_{page:>03}_{id}.{extension}" + filename_fmt = "{album_id}_{num:>03}_{id}.{extension}" directory_fmt = ("{category}", "{album_id} {title}") archive_fmt = "{id}" pattern = r"(?:https?://)?(?:www\.)?nsfwalbum\.com(/album/(\d+))" test = ("https://nsfwalbum.com/album/401611", { "range": "1-5", "url": "b0481fc7fad5982da397b6359fbed8421b8ba284", - "keyword": "fc1ad4ebcd6d4cf32da15203120112b8bcf12eec", + "keyword": "e98f9b0d473c00000831618d0235863b1dd78294", }) def __init__(self, match): diff --git a/gallery_dl/extractor/oauth.py b/gallery_dl/extractor/oauth.py index 6c6dd0a..912447b 100644 --- a/gallery_dl/extractor/oauth.py +++ b/gallery_dl/extractor/oauth.py @@ -296,8 +296,8 @@ class OAuthMastodon(OAuthBase): data = self.session.post(url, data=data).json() if "client_id" not in data or "client_secret" not in data: - self.log.error("Failed to register new application: '%s'", data) - raise exception.StopExtraction() + raise exception.StopExtraction( + "Failed to register new application: '%s'", data) data["client-id"] = data.pop("client_id") data["client-secret"] = data.pop("client_secret") diff --git a/gallery_dl/extractor/patreon.py b/gallery_dl/extractor/patreon.py index ab5932d..9b13391 100644 --- a/gallery_dl/extractor/patreon.py +++ b/gallery_dl/extractor/patreon.py @@ -76,6 +76,8 @@ class PatreonExtractor(Extractor): headers = {"Referer": self.root} while url: + if not url.startswith("http"): + url = "https://" + url.lstrip("/:") posts = self.request(url, headers=headers).json() if "included" in posts: diff --git a/gallery_dl/extractor/photobucket.py b/gallery_dl/extractor/photobucket.py index 83f75a3..8456f97 100644 --- a/gallery_dl/extractor/photobucket.py +++ b/gallery_dl/extractor/photobucket.py @@ -22,11 +22,11 @@ class PhotobucketAlbumExtractor(Extractor): filename_fmt = "{offset:>03}{pictureId:?_//}_{titleOrFilename}.{extension}" archive_fmt = "{id}" pattern = (r"(?:https?://)?((?:[^.]+\.)?photobucket\.com)" - r"/user/[^/?&#]+/library/[^?&#]*") + r"/user/[^/?&#]+/library(?:/[^?&#]*)?") test = ( - ("https://s258.photobucket.com/user/focolandia/library/", { - "pattern": r"https?://[oi]+\d+.photobucket.com/albums/hh280/", - "count": ">= 39" + ("https://s369.photobucket.com/user/CrpyLrkr/library", { + "pattern": r"https?://[oi]+\d+.photobucket.com/albums/oo139/", + "count": ">= 50" }), # subalbums of main "directory" ("https://s271.photobucket.com/user/lakerfanryan/library/", { @@ -149,10 +149,9 @@ class PhotobucketImageExtractor(Extractor): if "message" not in image: break # success tries += 1 - self.log.debug("'%s'", image["message"]) + self.log.debug(image["message"]) else: - self.log.error("%s", image["message"]) - raise exception.StopExtraction() + raise exception.StopExtraction(image["message"]) # adjust metadata entries to be at least somewhat similar # to what the 'album' extractor provides diff --git a/gallery_dl/extractor/pinterest.py b/gallery_dl/extractor/pinterest.py index f5b8869..e36a82b 100644 --- a/gallery_dl/extractor/pinterest.py +++ b/gallery_dl/extractor/pinterest.py @@ -241,9 +241,8 @@ class PinterestAPI(): if response.status_code == 404 or response.history: resource = self.extractor.subcategory.rpartition("-")[2] raise exception.NotFoundError(resource) - self.extractor.log.error("API request failed") self.extractor.log.debug("%s", response.text) - raise exception.StopExtraction() + raise exception.StopExtraction("API request failed") def _pagination(self, resource, options): while True: diff --git a/gallery_dl/extractor/pixiv.py b/gallery_dl/extractor/pixiv.py index d313daa..d32f245 100644 --- a/gallery_dl/extractor/pixiv.py +++ b/gallery_dl/extractor/pixiv.py @@ -335,11 +335,9 @@ class PixivSearchExtractor(PixivExtractor): def get_metadata(self, user=None): query = text.parse_query(self.query) - if "word" in query: - self.word = text.unescape(query["word"]) - else: - self.log.error("missing search term") - raise exception.StopExtraction() + if "word" not in query: + raise exception.StopExtraction("Missing search term") + self.word = query["word"] sort = query.get("order", "date_d") sort_map = { @@ -504,8 +502,7 @@ class PixivAppAPI(): return response.json() if response.status_code == 404: raise exception.NotFoundError() - self.log.error("API request failed: %s", response.text) - raise exception.StopExtraction() + raise exception.StopExtraction("API request failed: %s", response.text) def _pagination(self, endpoint, params): while True: diff --git a/gallery_dl/extractor/plurk.py b/gallery_dl/extractor/plurk.py index 325c6a0..2bb66ac 100644 --- a/gallery_dl/extractor/plurk.py +++ b/gallery_dl/extractor/plurk.py @@ -49,7 +49,7 @@ class PlurkExtractor(Extractor): data = {"plurk_id": plurk["id"], "count": "200"} while True: - info = self.request(url, "POST", data=data).json() + info = self.request(url, method="POST", data=data).json() yield from info["responses"] if not info["has_newer"]: return @@ -91,7 +91,8 @@ class PlurkTimelineExtractor(PlurkExtractor): offset = datetime.datetime.strptime( plurks[-1]["posted"], "%a, %d %b %Y %H:%M:%S %Z") data["offset"] = offset.strftime("%Y-%m-%dT%H:%M:%S.000Z") - response = self.request(url, "POST", headers=headers, data=data) + response = self.request( + url, method="POST", headers=headers, data=data) plurks = response.json()["plurks"] diff --git a/gallery_dl/extractor/reddit.py b/gallery_dl/extractor/reddit.py index 9c283de..ecce003 100644 --- a/gallery_dl/extractor/reddit.py +++ b/gallery_dl/extractor/reddit.py @@ -259,12 +259,17 @@ class RedditAPI(): data = {"grant_type": ("https://oauth.reddit.com/" "grants/installed_client"), "device_id": "DO_NOT_TRACK_THIS_DEVICE"} + + auth = (self.client_id, "") response = self.extractor.request( - url, method="POST", data=data, auth=(self.client_id, "")) + url, method="POST", data=data, auth=auth, fatal=False) + data = response.json() + if response.status_code != 200: - raise exception.AuthenticationError('"{} ({})"'.format( - response.json().get("message"), response.status_code)) - return "Bearer " + response.json()["access_token"] + self.log.debug("Server response: %s", data) + raise exception.AuthenticationError('"{}: {}"'.format( + data.get("error"), data.get("message"))) + return "Bearer " + data["access_token"] def _call(self, endpoint, params): url = "https://oauth.reddit.com" + endpoint diff --git a/gallery_dl/extractor/sankaku.py b/gallery_dl/extractor/sankaku.py index bb8a2ae..b07d024 100644 --- a/gallery_dl/extractor/sankaku.py +++ b/gallery_dl/extractor/sankaku.py @@ -201,9 +201,8 @@ class SankakuTagExtractor(SankakuExtractor): tags = self.tags.split() if not self.logged_in and len(tags) > 4: - self.log.error("Unauthenticated users cannot use " - "more than 4 tags at once.") - raise exception.StopExtraction() + raise exception.StopExtraction( + "Unauthenticated users cannot use more than 4 tags at once.") return {"search_tags": " ".join(tags)} def get_posts(self): diff --git a/gallery_dl/extractor/sexcom.py b/gallery_dl/extractor/sexcom.py index 38b7813..c4597af 100644 --- a/gallery_dl/extractor/sexcom.py +++ b/gallery_dl/extractor/sexcom.py @@ -21,6 +21,7 @@ class SexcomExtractor(Extractor): root = "https://www.sex.com" def items(self): + self.session.headers["Referer"] = self.root yield Message.Version, 1 yield Message.Directory, self.metadata() for pin in map(self._parse_pin, self.pins()): @@ -52,7 +53,7 @@ class SexcomExtractor(Extractor): def _parse_pin(self, url): response = self.request(url, fatal=False) if response.status_code >= 400: - self.log.warning('Unable to fetch %s ("%s: %s")', + self.log.warning('Unable to fetch %s ("%s %s")', url, response.status_code, response.reason) return None extr = text.extract_from(response.text) @@ -102,6 +103,7 @@ class SexcomPinExtractor(SexcomExtractor): # picture ("https://www.sex.com/pin/56714360/", { "url": "599190d6e3d79f9f49dda194a0a58cb0ffa3ab86", + "content": "963ed681cf53904173c7581b713c7f9471f04db0", "keyword": { "comments": int, "date": "2018-10-02T21:18:17-04:00", @@ -150,7 +152,7 @@ class SexcomRelatedPinExtractor(SexcomPinExtractor): directory_fmt = ("{category}", "related {original_pin[pin_id]}") pattern = r"(?:https?://)?(?:www\.)?sex\.com/pin/(\d+).*#related$" test = ("https://www.sex.com/pin/56714360/#related", { - "count": 24, + "count": ">= 22", }) def metadata(self): diff --git a/gallery_dl/extractor/simplyhentai.py b/gallery_dl/extractor/simplyhentai.py index ba0fcf4..82a61da 100644 --- a/gallery_dl/extractor/simplyhentai.py +++ b/gallery_dl/extractor/simplyhentai.py @@ -23,7 +23,7 @@ class SimplyhentaiGalleryExtractor(GalleryExtractor): (("https://original-work.simply-hentai.com" "/amazon-no-hiyaku-amazon-elixir"), { "url": "21613585ae5ec2f69ea579e9713f536fceab5bd5", - "keyword": "bf75f9ff0fb60756b1b9b92403526a72d9178d23", + "keyword": "9e87a0973553b2922ddee37958b8f5d87910af72", }), ("https://www.simply-hentai.com/notfound", { "exception": exception.GalleryDLException, @@ -43,7 +43,7 @@ class SimplyhentaiGalleryExtractor(GalleryExtractor): extr = text.extract_from(page) split = text.split_html - self.chapter_url = extr('<link rel="canonical" href="', '"') + self.gallery_url = extr('<link rel="canonical" href="', '"') title = extr('<meta property="og:title" content="', '"') if not title: raise exception.NotFoundError("gallery") @@ -63,11 +63,14 @@ class SimplyhentaiGalleryExtractor(GalleryExtractor): return data def images(self, _): - url = self.chapter_url + "/all-pages" + url = self.gallery_url + "/all-pages" headers = {"Accept": "application/json"} images = self.request(url, headers=headers).json() return [ - (urls["full"], {"image_id": text.parse_int(image_id)}) + ( + urls["full"].replace("/giant_thumb_", "/"), + {"image_id": text.parse_int(image_id)}, + ) for image_id, urls in sorted(images.items()) ] @@ -84,12 +87,12 @@ class SimplyhentaiImageExtractor(Extractor): test = ( (("https://www.simply-hentai.com/image" "/pheromomania-vol-1-kanzenban-isao-3949d8b3-400c-4b6"), { - "url": "0338eb137830ab6f81e5f410d3936ef785d063d9", + "url": "3d8eb55240a960134891bd77fe1df7988fcdc455", "keyword": "e10e5588481cab68329ef6ec1e5325206b2079a2", }), ("https://www.simply-hentai.com/gif/8915dfcf-0b6a-47c", { - "url": "11c060d7ec4dfd0bd105300b6e1fd454674a5af1", - "keyword": "dd97a4bb449c397d6fec9f43a1303c0fb168ae65", + "url": "f73916527211b4a40f26568ee26cd8999f5f4f30", + "keyword": "f94d775177fed918759c8a78a50976f867425b48", }), ) diff --git a/gallery_dl/extractor/smugmug.py b/gallery_dl/extractor/smugmug.py index 2e6508c..be29dcf 100644 --- a/gallery_dl/extractor/smugmug.py +++ b/gallery_dl/extractor/smugmug.py @@ -259,11 +259,9 @@ class SmugmugAPI(oauth.OAuth1API): if data["Code"] == 404: raise exception.NotFoundError() if data["Code"] == 429: - self.log.error("Rate limit reached") - else: - self.log.error("API request failed") - self.log.debug(data) - raise exception.StopExtraction() + raise exception.StopExtraction("Rate limit reached") + self.log.debug(data) + raise exception.StopExtraction("API request failed") def _expansion(self, endpoint, expands, params=None): endpoint = self._extend(endpoint, expands) diff --git a/gallery_dl/extractor/tsumino.py b/gallery_dl/extractor/tsumino.py index cc0dc90..298b7e0 100644 --- a/gallery_dl/extractor/tsumino.py +++ b/gallery_dl/extractor/tsumino.py @@ -109,14 +109,13 @@ class TsuminoGalleryExtractor(TsuminoBase, GalleryExtractor): def images(self, page): url = "{}/Read/Index/{}?page=1".format(self.root, self.gallery_id) - headers = {"Referer": self.chapter_url} + headers = {"Referer": self.gallery_url} response = self.request(url, headers=headers, fatal=False) if "/Auth/" in response.url: - self.log.error( + raise exception.StopExtraction( "Failed to get gallery JSON data. Visit '%s' in a browser " "and solve the CAPTCHA to continue.", response.url) - raise exception.StopExtraction() page = response.text tpl, pos = text.extract(page, 'data-cdn="', '"') @@ -195,8 +194,8 @@ class TsuminoSearchExtractor(TsuminoBase, Extractor): return self._parse_simple(query) return self._parse_jsurl(query) except Exception as exc: - self.log.error("Invalid search query: '%s' (%s)", query, exc) - raise exception.StopExtraction() + raise exception.StopExtraction( + "Invalid search query '%s' (%s)", query, exc) @staticmethod def _parse_simple(query): diff --git a/gallery_dl/extractor/tumblr.py b/gallery_dl/extractor/tumblr.py index 8abbaf7..998eed4 100644 --- a/gallery_dl/extractor/tumblr.py +++ b/gallery_dl/extractor/tumblr.py @@ -407,26 +407,22 @@ class TumblrAPI(oauth.OAuth1API): # daily rate limit if response.headers.get("x-ratelimit-perday-remaining") == "0": reset = response.headers.get("x-ratelimit-perday-reset") - self.log.error( + raise exception.StopExtraction( "Daily API rate limit exceeded: aborting; " - "rate limit will reset at %s", - self._to_time(reset), + "rate limit will reset at %s", self._to_time(reset), ) - raise exception.StopExtraction() # hourly rate limit reset = response.headers.get("x-ratelimit-perhour-reset") if reset: self.log.info( - "Hourly API rate limit exceeded; " - "waiting until %s for rate limit reset", - self._to_time(reset), + "Hourly API rate limit exceeded; waiting until " + "%s for rate limit reset", self._to_time(reset), ) time.sleep(int(reset) + 1) return self._call(blog, endpoint, params) - self.log.error(data) - raise exception.StopExtraction() + raise exception.StopExtraction(data) @staticmethod def _to_time(reset): diff --git a/gallery_dl/extractor/twitter.py b/gallery_dl/extractor/twitter.py index 8105ede..dfafc1f 100644 --- a/gallery_dl/extractor/twitter.py +++ b/gallery_dl/extractor/twitter.py @@ -10,7 +10,7 @@ from .common import Extractor, Message from .. import text, exception -from ..cache import cache +from ..cache import cache, memcache import re @@ -26,6 +26,7 @@ class TwitterExtractor(Extractor): def __init__(self, match): Extractor.__init__(self, match) self.user = match.group(1) + self.logged_in = False self.retweets = self.config("retweets", True) self.content = self.config("content", False) self.videos = self.config("videos", False) @@ -53,10 +54,20 @@ class TwitterExtractor(Extractor): yield Message.Urllist, urls, data if self.videos and "-videoContainer" in tweet: + if self.videos == "ytdl": + data["extension"] = None + url = "ytdl:{}/{}/status/{}".format( + self.root, data["user"], data["tweet_id"]) + else: + url = self._video_from_tweet(data["tweet_id"]) + ext = text.ext_from_url(url) + if ext == "m3u8": + url = "ytdl:" + url + data["extension"] = "mp4" + data["_ytdl_extra"] = {"protocol": "m3u8_native"} + else: + data["extension"] = ext data["num"] = 1 - data["extension"] = None - url = "ytdl:{}/{}/status/{}".format( - self.root, data["user"], data["tweet_id"]) yield Message.Url, url, data def metadata(self): @@ -70,6 +81,7 @@ class TwitterExtractor(Extractor): username, password = self._get_auth_info() if username: self._update_cookies(self._login_impl(username, password)) + self.logged_in = True @cache(maxage=360*24*3600, keyarg=1) def _login_impl(self, username, password): @@ -115,17 +127,48 @@ class TwitterExtractor(Extractor): data["content"] = cl if cl and len(cr) < 16 else content return data - def _tweets_from_api(self, url): + def _video_from_tweet(self, tweet_id): + url = "https://api.twitter.com/1.1/videos/tweet/config/{}.json".format( + tweet_id) + cookies = None + headers = { + "Origin" : self.root, + "Referer" : "{}/i/web/status/{}".format(self.root, tweet_id), + "x-csrf-token" : self.session.cookies.get("ct0"), + "authorization": "Bearer AAAAAAAAAAAAAAAAAAAAAPYXBAAAAAAACLXUNDekM" + "xqa8h%2F40K4moUkGsoc%3DTYfbDKbT3jJPCEVnMYqilB28N" + "HfOPqkca3qaAxGfsyKCs0wRbw", + } + + if self.logged_in: + headers["x-twitter-auth-type"] = "OAuth2Session" + else: + token = self._guest_token(headers) + cookies = {"gt": token} + headers["x-guest-token"] = token + + data = self.request(url, cookies=cookies, headers=headers).json() + return data["track"]["playbackUrl"] + + @memcache() + def _guest_token(self, headers): + return self.request( + "https://api.twitter.com/1.1/guest/activate.json", + method="POST", headers=headers, + ).json().get("guest_token") + + def _tweets_from_api(self, url, max_position=None): params = { "include_available_features": "1", "include_entities": "1", + "max_position": max_position, "reset_error_state": "false", "lang": "en", } headers = { "X-Requested-With": "XMLHttpRequest", "X-Twitter-Active-User": "yes", - "Referer": "{}/{}".format(self.root, self.user) + "Referer": self.root + "/", } while True: @@ -140,18 +183,23 @@ class TwitterExtractor(Extractor): if not data["has_more_items"]: return - position = text.parse_int(text.extract( - tweet, 'data-tweet-id="', '"')[0]) - if "max_position" in params and position >= params["max_position"]: - return - params["max_position"] = position + if "min_position" in data: + position = data["min_position"] + if position == max_position: + return + else: + position = text.parse_int(text.extract( + tweet, 'data-tweet-id="', '"')[0]) + if max_position and position >= max_position: + return + params["max_position"] = max_position = position class TwitterTimelineExtractor(TwitterExtractor): """Extractor for all images from a user's timeline""" subcategory = "timeline" pattern = (r"(?:https?://)?(?:www\.|mobile\.)?twitter\.com" - r"/([^/?&#]+)/?(?:$|[?#])") + r"/(?!search)([^/?&#]+)/?(?:$|[?#])") test = ( ("https://twitter.com/supernaturepics", { "range": "1-40", @@ -171,7 +219,7 @@ class TwitterMediaExtractor(TwitterExtractor): """Extractor for all images from a user's Media Tweets""" subcategory = "media" pattern = (r"(?:https?://)?(?:www\.|mobile\.)?twitter\.com" - r"/([^/?&#]+)/media(?!\w)") + r"/(?!search)([^/?&#]+)/media(?!\w)") test = ( ("https://twitter.com/supernaturepics/media", { "range": "1-40", @@ -186,6 +234,26 @@ class TwitterMediaExtractor(TwitterExtractor): return self._tweets_from_api(url) +class TwitterSearchExtractor(TwitterExtractor): + """Extractor for all images from a search timeline""" + subcategory = "search" + directory_fmt = ("{category}", "Search", "{search}") + pattern = (r"(?:https?://)?(?:www\.|mobile\.)?twitter\.com" + r"/search/?\?(?:[^&#]+&)*q=([^&#]+)") + test = ("https://twitter.com/search?q=nature", { + "range": "1-40", + "count": 40, + }) + + def metadata(self): + return {"search": self.user} + + def tweets(self): + url = "{}/i/search/timeline?f=tweets&q={}".format( + self.root, self.user) + return self._tweets_from_api(url, "-1") + + class TwitterTweetExtractor(TwitterExtractor): """Extractor for images from individual tweets""" subcategory = "tweet" @@ -205,17 +273,17 @@ class TwitterTweetExtractor(TwitterExtractor): # video ("https://twitter.com/perrypumas/status/1065692031626829824", { "options": (("videos", True),), - "pattern": r"ytdl:https://twitter.com/perrypumas/status/\d+", + "pattern": r"ytdl:https://video.twimg.com/ext_tw_video/.*.m3u8", }), # content with emoji, newlines, hashtags (#338) ("https://twitter.com/yumi_san0112/status/1151144618936823808", { "options": (("content", True),), - "keyword": "b13b6c4cd0b0c15b2ea7685479e7fedde3c47b9e", + "keyword": "b133464b73aec33871521ab021a3166204194285", }), # Reply to another tweet (#403) ("https://twitter.com/tyson_hesse/status/1103767554424598528", { - "options": (("videos", True),), - "pattern": r"ytdl:https://twitter.com/.*/1103767554424598528$", + "options": (("videos", "ytdl"),), + "pattern": r"ytdl:https://twitter.com/.+/1103767554424598528", }), # /i/web/ URL ("https://twitter.com/i/web/status/1155074198240292865", { @@ -231,9 +299,19 @@ class TwitterTweetExtractor(TwitterExtractor): return {"user": self.user, "tweet_id": self.tweet_id} def tweets(self): - self.session.cookies.clear() url = "{}/i/web/status/{}".format(self.root, self.tweet_id) - page = self.request(url).text + cookies = {"app_shell_visited": "1"} + headers = { + "Referer" : url, + "User-Agent": "Mozilla/5.0 (Windows NT 6.1; WOW64; " + "Trident/7.0; rv:11.0) like Gecko", + } + + response = self.request(url, cookies=cookies, headers=headers) + if response.history and response.url == self.root + "/": + raise exception.AuthorizationError() + page = response.text + end = page.index('class="js-tweet-stats-container') beg = page.rindex('<div class="tweet ', 0, end) return (page[beg:end],) diff --git a/gallery_dl/extractor/wallhaven.py b/gallery_dl/extractor/wallhaven.py index 4326582..09a166c 100644 --- a/gallery_dl/extractor/wallhaven.py +++ b/gallery_dl/extractor/wallhaven.py @@ -33,8 +33,8 @@ class WallhavenSearchExtractor(WallhavenExtractor): ("https://wallhaven.cc/search?q=touhou"), (("https://wallhaven.cc/search?q=id%3A87" "&categories=111&purity=100&sorting=date_added&order=asc&page=3"), { - "count": 4, - "url": "d024bc11895d758b76ffdb0fa85a627e53f072cf", + "count": 5, + "url": "d477b68a534c3416d506ae1f159b25debab64678", }), ) diff --git a/gallery_dl/job.py b/gallery_dl/job.py index 8b61024..9c76336 100644 --- a/gallery_dl/job.py +++ b/gallery_dl/job.py @@ -29,6 +29,7 @@ class Job(): extr.log.job = self extr.log.debug("Using %s for '%s'", extr.__class__.__name__, extr.url) + self.status = 0 self.pred_url = self._prepare_predicates("image", True) self.pred_queue = self._prepare_predicates("chapter", False) @@ -46,34 +47,18 @@ class Job(): log = self.extractor.log for msg in self.extractor: self.dispatch(msg) - except exception.AuthenticationError as exc: - msg = str(exc) or "Please provide a valid username/password pair." - log.error("Authentication failed: %s", msg) - except exception.AuthorizationError: - log.error("You do not have permission to access the resource " - "at '%s'", self.extractor.url) - except exception.NotFoundError as exc: - res = str(exc) or "resource (gallery/image/user)" - log.error("The %s at '%s' does not exist", res, self.extractor.url) - except exception.HttpError as exc: - err = exc.args[0] - if isinstance(err, Exception): - err = "{}: {}".format(err.__class__.__name__, err) - log.error("HTTP request failed: %s", err) - except exception.FormatError as exc: - err, obj = exc.args - log.error("Applying %s format string failed: %s: %s", - obj, err.__class__.__name__, err) - except exception.FilterError as exc: - err = exc.args[0] - log.error("Evaluating filter expression failed: %s: %s", - err.__class__.__name__, err) - except exception.StopExtraction: - pass + except exception.StopExtraction as exc: + if exc.message: + log.error(exc.message) + self.status |= exc.code + except exception.GalleryDLException as exc: + log.error("%s: %s", exc.__class__.__name__, exc) + self.status |= exc.code except OSError as exc: log.error("Unable to download data: %s: %s", exc.__class__.__name__, exc) log.debug("", exc_info=True) + self.status |= 128 except Exception as exc: log.error(("An unexpected error occurred: %s - %s. " "Please run gallery-dl again with the --verbose flag, " @@ -81,8 +66,13 @@ class Job(): "https://github.com/mikf/gallery-dl/issues ."), exc.__class__.__name__, exc) log.debug("", exc_info=True) + self.status |= 1 + except BaseException: + self.status |= 1 + raise finally: self.handle_finalize() + return self.status def dispatch(self, msg): """Call the appropriate message handler""" @@ -114,17 +104,17 @@ class Job(): ) # TODO: support for multiple message versions - def handle_url(self, url, keywords): + def handle_url(self, url, kwdict): """Handle Message.Url""" - def handle_urllist(self, urls, keywords): + def handle_urllist(self, urls, kwdict): """Handle Message.Urllist""" - self.handle_url(urls[0], keywords) + self.handle_url(urls[0], kwdict) - def handle_directory(self, keywords): + def handle_directory(self, kwdict): """Handle Message.Directory""" - def handle_queue(self, url, keywords): + def handle_queue(self, url, kwdict): """Handle Message.Queue""" def handle_finalize(self): @@ -132,8 +122,9 @@ class Job(): def update_kwdict(self, kwdict): """Update 'kwdict' with additional metadata""" - kwdict["category"] = self.extractor.category - kwdict["subcategory"] = self.extractor.subcategory + extr = self.extractor + kwdict["category"] = extr.category + kwdict["subcategory"] = extr.subcategory if self.userkwds: kwdict.update(self.userkwds) @@ -189,14 +180,14 @@ class DownloadJob(Job): self.postprocessors = None self.out = output.select() - def handle_url(self, url, keywords, fallback=None): + def handle_url(self, url, kwdict, fallback=None): """Download the resource specified in 'url'""" postprocessors = self.postprocessors pathfmt = self.pathfmt archive = self.archive # prepare download - pathfmt.set_filename(keywords) + pathfmt.set_filename(kwdict) if postprocessors: for pp in postprocessors: @@ -219,6 +210,7 @@ class DownloadJob(Job): break else: # download failed + self.status |= 4 self.log.error("Failed to download %s", pathfmt.filename or url) return @@ -236,41 +228,45 @@ class DownloadJob(Job): pathfmt.finalize() self.out.success(pathfmt.path, 0) if archive: - archive.add(keywords) + archive.add(kwdict) if postprocessors: for pp in postprocessors: pp.run_after(pathfmt) self._skipcnt = 0 - def handle_urllist(self, urls, keywords): + def handle_urllist(self, urls, kwdict): """Download the resource specified in 'url'""" fallback = iter(urls) url = next(fallback) - self.handle_url(url, keywords, fallback) + self.handle_url(url, kwdict, fallback) - def handle_directory(self, keywords): + def handle_directory(self, kwdict): """Set and create the target directory for downloads""" if not self.pathfmt: - self.initialize(keywords) + self.initialize(kwdict) else: - self.pathfmt.set_directory(keywords) + self.pathfmt.set_directory(kwdict) - def handle_queue(self, url, keywords): - if "_extractor" in keywords: - extr = keywords["_extractor"].from_url(url) + def handle_queue(self, url, kwdict): + if "_extractor" in kwdict: + extr = kwdict["_extractor"].from_url(url) else: extr = extractor.find(url) if extr: - self.__class__(extr, self).run() + self.status |= self.__class__(extr, self).run() else: self._write_unsupported(url) def handle_finalize(self): - if self.postprocessors: - for pp in self.postprocessors: - pp.finalize() + pathfmt = self.pathfmt if self.archive: self.archive.close() + if pathfmt: + self.extractor._store_cookies() + if self.postprocessors: + status = self.status + for pp in self.postprocessors: + pp.run_final(pathfmt, status) def handle_skip(self): self.out.skip(self.pathfmt.path) @@ -308,11 +304,11 @@ class DownloadJob(Job): self.downloaders[scheme] = instance return instance - def initialize(self, keywords=None): + def initialize(self, kwdict=None): """Delayed initialization of PathFormat, etc.""" self.pathfmt = util.PathFormat(self.extractor) - if keywords: - self.pathfmt.set_directory(keywords) + if kwdict: + self.pathfmt.set_directory(kwdict) self.sleep = self.extractor.config("sleep") if not self.extractor.config("download", True): @@ -379,15 +375,15 @@ class DownloadJob(Job): class SimulationJob(DownloadJob): """Simulate the extraction process without downloading anything""" - def handle_url(self, url, keywords, fallback=None): - self.pathfmt.set_filename(keywords) + def handle_url(self, url, kwdict, fallback=None): + self.pathfmt.set_filename(kwdict) self.out.skip(self.pathfmt.path) if self.sleep: time.sleep(self.sleep) if self.archive: - self.archive.add(keywords) + self.archive.add(kwdict) - def handle_directory(self, keywords): + def handle_directory(self, kwdict): if not self.pathfmt: self.initialize() @@ -395,19 +391,19 @@ class SimulationJob(DownloadJob): class KeywordJob(Job): """Print available keywords""" - def handle_url(self, url, keywords): + def handle_url(self, url, kwdict): print("\nKeywords for filenames and --filter:") print("------------------------------------") - self.print_keywords(keywords) + self.print_kwdict(kwdict) raise exception.StopExtraction() - def handle_directory(self, keywords): + def handle_directory(self, kwdict): print("Keywords for directory names:") print("-----------------------------") - self.print_keywords(keywords) + self.print_kwdict(kwdict) - def handle_queue(self, url, keywords): - if not keywords: + def handle_queue(self, url, kwdict): + if not kwdict: self.extractor.log.info( "This extractor delegates work to other extractors " "and does not provide any keywords on its own. Try " @@ -415,27 +411,27 @@ class KeywordJob(Job): else: print("Keywords for --chapter-filter:") print("------------------------------") - self.print_keywords(keywords) + self.print_kwdict(kwdict) if self.extractor.categorytransfer: print() KeywordJob(url, self).run() raise exception.StopExtraction() @staticmethod - def print_keywords(keywords, prefix=""): - """Print key-value pairs with formatting""" + def print_kwdict(kwdict, prefix=""): + """Print key-value pairs in 'kwdict' with formatting""" suffix = "]" if prefix else "" - for key, value in sorted(keywords.items()): + for key, value in sorted(kwdict.items()): if key[0] == "_": continue key = prefix + key + suffix if isinstance(value, dict): - KeywordJob.print_keywords(value, key + "[") + KeywordJob.print_kwdict(value, key + "[") elif isinstance(value, list): if value and isinstance(value[0], dict): - KeywordJob.print_keywords(value[0], key + "[][") + KeywordJob.print_kwdict(value[0], key + "[][") else: print(key, "[]", sep="") for val in value: @@ -502,6 +498,7 @@ class DataJob(Job): # dump to 'file' util.dump_json(self.data, self.file, self.ascii, 2) + return 0 def handle_url(self, url, kwdict): self.data.append((Message.Url, url, self._filter(kwdict))) diff --git a/gallery_dl/oauth.py b/gallery_dl/oauth.py index 69ab4f6..3093a72 100644 --- a/gallery_dl/oauth.py +++ b/gallery_dl/oauth.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2018 Mike Fährmann +# Copyright 2018-2019 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -126,7 +126,7 @@ class OAuth1API(): self.session = extractor.session self.api_key = api_key - def request(self, url, method="GET", **kwargs): + def request(self, url, **kwargs): kwargs["fatal"] = None kwargs["session"] = self.session - return self.extractor.request(url, method, **kwargs) + return self.extractor.request(url, **kwargs) diff --git a/gallery_dl/option.py b/gallery_dl/option.py index d3119b7..3118b83 100644 --- a/gallery_dl/option.py +++ b/gallery_dl/option.py @@ -321,13 +321,26 @@ def build_parser(): ) postprocessor.add_argument( "--ugoira-conv", - dest="postprocessors", - action="append_const", const={"name": "ugoira", "ffmpeg-args": ( - "-c:v", "libvpx", "-crf", "4", "-b:v", "5000k", "-an"), - "whitelist": ("pixiv", "danbooru")}, + dest="postprocessors", action="append_const", const={ + "name" : "ugoira", + "ffmpeg-args" : ("-c:v", "libvpx", "-crf", "4", "-b:v", "5000k"), + "ffmpeg-twopass": True, + "whitelist" : ("pixiv", "danbooru"), + }, help="Convert Pixiv Ugoira to WebM (requires FFmpeg)", ) postprocessor.add_argument( + "--ugoira-conv-lossless", + dest="postprocessors", action="append_const", const={ + "name" : "ugoira", + "ffmpeg-args" : ("-c:v", "libvpx-vp9", "-lossless", "1", + "-pix_fmt", "yuv420p"), + "ffmpeg-twopass": False, + "whitelist" : ("pixiv", "danbooru"), + }, + help="Convert Pixiv Ugoira to WebM in VP9 lossless mode", + ) + postprocessor.add_argument( "--write-metadata", dest="postprocessors", action="append_const", const={"name": "metadata"}, diff --git a/gallery_dl/postprocessor/common.py b/gallery_dl/postprocessor/common.py index 71ef932..83b42eb 100644 --- a/gallery_dl/postprocessor/common.py +++ b/gallery_dl/postprocessor/common.py @@ -31,8 +31,8 @@ class PostProcessor(): """Execute postprocessor after moving a file to its target location""" @staticmethod - def finalize(): - """Cleanup""" + def run_final(pathfmt, status): + """Postprocessor finalization after all files have been downloaded""" def __repr__(self): return self.__class__.__name__ diff --git a/gallery_dl/postprocessor/exec.py b/gallery_dl/postprocessor/exec.py index 19a9b87..0a56281 100644 --- a/gallery_dl/postprocessor/exec.py +++ b/gallery_dl/postprocessor/exec.py @@ -26,17 +26,26 @@ class ExecPP(PostProcessor): def __init__(self, pathfmt, options): PostProcessor.__init__(self) args = options["command"] + final = options.get("final", False) if isinstance(args, str): + if final: + self._format = self._format_args_directory + else: + self._format = self._format_args_path if "{}" not in args: args += " {}" self.args = args self.shell = True - self._format = self._format_args_string else: + self._format = self._format_args_list self.args = [util.Formatter(arg) for arg in args] self.shell = False - self._format = self._format_args_list + + if final: + self.run_after = PostProcessor.run_after + else: + self.run_final = PostProcessor.run_final if options.get("async", False): self._exec = self._exec_async @@ -44,9 +53,16 @@ class ExecPP(PostProcessor): def run_after(self, pathfmt): self._exec(self._format(pathfmt)) - def _format_args_string(self, pathfmt): + def run_final(self, pathfmt, status): + if status == 0: + self._exec(self._format(pathfmt)) + + def _format_args_path(self, pathfmt): return self.args.replace("{}", quote(pathfmt.realpath)) + def _format_args_directory(self, pathfmt): + return self.args.replace("{}", quote(pathfmt.realdirectory)) + def _format_args_list(self, pathfmt): kwdict = pathfmt.kwdict kwdict["_directory"] = pathfmt.realdirectory diff --git a/gallery_dl/postprocessor/zip.py b/gallery_dl/postprocessor/zip.py index 6659a8d..42f7608 100644 --- a/gallery_dl/postprocessor/zip.py +++ b/gallery_dl/postprocessor/zip.py @@ -59,7 +59,7 @@ class ZipPP(PostProcessor): with zipfile.ZipFile(*self.args) as zfile: self._write(pathfmt, zfile) - def finalize(self): + def run_final(self, pathfmt, status): if self.zfile: self.zfile.close() diff --git a/gallery_dl/util.py b/gallery_dl/util.py index d87184d..fb51edf 100644 --- a/gallery_dl/util.py +++ b/gallery_dl/util.py @@ -52,10 +52,10 @@ def advance(iterable, num): return iterator -def raises(obj): - """Returns a function that raises 'obj' as exception""" - def wrap(): - raise obj +def raises(cls): + """Returns a function that raises 'cls' as exception""" + def wrap(*args): + raise cls(*args) return wrap @@ -287,21 +287,21 @@ class UniquePredicate(): class FilterPredicate(): """Predicate; True if evaluating the given expression returns True""" - globalsdict = { - "parse_int": text.parse_int, - "urlsplit": urllib.parse.urlsplit, - "datetime": datetime.datetime, - "abort": raises(exception.StopExtraction()), - "re": re, - } def __init__(self, filterexpr, target="image"): name = "<{} filter>".format(target) self.codeobj = compile(filterexpr, name, "eval") + self.globals = { + "parse_int": text.parse_int, + "urlsplit" : urllib.parse.urlsplit, + "datetime" : datetime.datetime, + "abort" : raises(exception.StopExtraction), + "re" : re, + } def __call__(self, url, kwds): try: - return eval(self.codeobj, self.globalsdict, kwds) + return eval(self.codeobj, self.globals, kwds) except exception.GalleryDLException: raise except Exception as exc: @@ -528,7 +528,7 @@ class PathFormat(): self.filename_formatter = Formatter( filename_fmt, kwdefault).format_map except Exception as exc: - raise exception.FormatError(exc, "filename") + raise exception.FilenameFormatError(exc) try: self.directory_formatters = [ @@ -536,7 +536,7 @@ class PathFormat(): for dirfmt in directory_fmt ] except Exception as exc: - raise exception.FormatError(exc, "directory") + raise exception.DirectoryFormatError(exc) self.directory = self.realdirectory = "" self.filename = "" @@ -616,7 +616,7 @@ class PathFormat(): if segment: append(self.clean_segment(segment)) except Exception as exc: - raise exception.FormatError(exc, "directory") + raise exception.DirectoryFormatError(exc) # Join path segements sep = os.sep @@ -673,7 +673,7 @@ class PathFormat(): self.filename = filename = self.clean_path(self.clean_segment( self.filename_formatter(self.kwdict))) except Exception as exc: - raise exception.FormatError(exc, "filename") + raise exception.FilenameFormatError(exc) # Combine directory and filename to full paths self.path = self.directory + filename diff --git a/gallery_dl/version.py b/gallery_dl/version.py index e83bed6..4d73139 100644 --- a/gallery_dl/version.py +++ b/gallery_dl/version.py @@ -6,4 +6,4 @@ # it under the terms of the GNU General Public License version 2 as # published by the Free Software Foundation. -__version__ = "1.10.6" +__version__ = "1.11.1" diff --git a/requirements.txt b/requirements.txt deleted file mode 100644 index f9f5cd8..0000000 --- a/requirements.txt +++ /dev/null @@ -1 +0,0 @@ -requests>=2.11.0 diff --git a/scripts/bash_completion.py b/scripts/bash_completion.py deleted file mode 100755 index 69e6a79..0000000 --- a/scripts/bash_completion.py +++ /dev/null @@ -1,56 +0,0 @@ -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- - -# Copyright 2019 Mike Fährmann -# -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License version 2 as -# published by the Free Software Foundation. - -"""Generate bash completion script from gallery-dl's argument parser""" - -import util -from gallery_dl import option - - -TEMPLATE = """_gallery_dl() -{ - local cur prev - COMPREPLY=() - cur="${COMP_WORDS[COMP_CWORD]}" - prev="${COMP_WORDS[COMP_CWORD-1]}" - - if [[ "${prev}" =~ ^(%(fileopts)s)$ ]]; then - COMPREPLY=( $(compgen -f -- "${cur}") ) - elif [[ "${prev}" =~ ^(%(diropts)s)$ ]]; then - COMPREPLY=( $(compgen -d -- "${cur}") ) - else - COMPREPLY=( $(compgen -W "%(opts)s" -- "${cur}") ) - fi -} - -complete -F _gallery_dl gallery-dl -""" - -opts = [] -diropts = [] -fileopts = [] -for action in option.build_parser()._actions: - - if action.metavar in ("DEST",): - diropts.extend(action.option_strings) - - elif action.metavar in ("FILE", "CFG"): - fileopts.extend(action.option_strings) - - for opt in action.option_strings: - if opt.startswith("--"): - opts.append(opt) - -PATH = util.path("gallery-dl.bash_completion") -with open(PATH, "w", encoding="utf-8") as file: - file.write(TEMPLATE % { - "opts" : " ".join(opts), - "diropts" : "|".join(diropts), - "fileopts": "|".join(fileopts), - }) diff --git a/scripts/build_testresult_db.py b/scripts/build_testresult_db.py deleted file mode 100755 index fda9f64..0000000 --- a/scripts/build_testresult_db.py +++ /dev/null @@ -1,56 +0,0 @@ -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- - -"""Collect results of extractor unit tests""" - -import sys -import os.path -import datetime - -import util -from gallery_dl import extractor, job, config -from test.test_results import setup_test_config - - -# filter test cases - -tests = [ - (idx, extr, url, result) - - for extr in extractor.extractors() - if hasattr(extr, "test") and extr.test - if len(sys.argv) <= 1 or extr.category in sys.argv - - for idx, (url, result) in enumerate(extr._get_tests()) - if result -] - - -# setup target directory - -path = util.path("archive", "testdb", str(datetime.date.today())) -os.makedirs(path, exist_ok=True) - - -for idx, extr, url, result in tests: - - # filename - name = "{}-{}-{}.json".format(extr.category, extr.subcategory, idx) - print(name) - - # config values - setup_test_config() - - if "options" in result: - for key, value in result["options"]: - config.set(key.split("."), value) - if "range" in result: - config.set(("image-range",), result["range"]) - config.set(("chapter-range",), result["range"]) - - # write test data - try: - with open(os.path.join(path, name), "w") as outfile: - job.DataJob(url, file=outfile, ensure_ascii=False).run() - except KeyboardInterrupt: - sys.exit() diff --git a/scripts/create_test_data.py b/scripts/create_test_data.py deleted file mode 100755 index 14ab0c0..0000000 --- a/scripts/create_test_data.py +++ /dev/null @@ -1,69 +0,0 @@ -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- - -# Copyright 2015-2019 Mike Fährmann -# -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License version 2 as -# published by the Free Software Foundation. - -"""Create testdata for extractor tests""" - -import argparse - -import util # noqa -from gallery_dl import extractor -from test.test_results import ResultJob, setup_test_config - - -TESTDATA_FMT = """ - test = ("{}", {{ - "url": "{}", - "keyword": "{}", - "content": "{}", - }}) -""" - -TESTDATA_EXCEPTION_FMT = """ - test = ("{}", {{ - "exception": exception.{}, - }}) -""" - - -def main(): - parser = argparse.ArgumentParser() - parser.add_argument("--content", action="store_true") - parser.add_argument("--recreate", action="store_true") - parser.add_argument("urls", nargs="*") - args = parser.parse_args() - - if args.recreate: - urls = [ - test[0] - for extr in extractor.extractors() if extr.category in args.urls - for test in extr.test - ] - else: - urls = args.urls - - setup_test_config() - - for url in urls: - tjob = ResultJob(url, content=args.content) - try: - tjob.run() - except Exception as exc: - fmt = TESTDATA_EXCEPTION_FMT - data = (exc.__class__.__name__,) - else: - fmt = TESTDATA_FMT - data = (tjob.hash_url.hexdigest(), - tjob.hash_keyword.hexdigest(), - tjob.hash_content.hexdigest()) - print(tjob.extractor.__class__.__name__) - print(fmt.format(url, *data)) - - -if __name__ == '__main__': - main() diff --git a/scripts/hook-gallery_dl.py b/scripts/hook-gallery_dl.py deleted file mode 100644 index d549019..0000000 --- a/scripts/hook-gallery_dl.py +++ /dev/null @@ -1,9 +0,0 @@ -# -*- coding: utf-8 -*- - -from gallery_dl import extractor, downloader, postprocessor - -hiddenimports = [ - package.__name__ + "." + module - for package in (extractor, downloader, postprocessor) - for module in package.modules -] diff --git a/scripts/man.py b/scripts/man.py deleted file mode 100755 index 91608a3..0000000 --- a/scripts/man.py +++ /dev/null @@ -1,304 +0,0 @@ -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- - -# Copyright 2019 Mike Fährmann -# -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License version 2 as -# published by the Free Software Foundation. - -"""Generate man pages""" - -import re -import datetime - -import util -import gallery_dl.option -import gallery_dl.version - - -def build_gallery_dl_1(path=None): - - OPTS_FMT = """.TP\n.B "{}" {}\n{}""" - - TEMPLATE = r""" -.TH "GALLERY-DL" "1" "%(date)s" "%(version)s" "gallery-dl Manual" -.\" disable hyphenation -.nh - -.SH NAME -gallery-dl \- download image-galleries and -collections - -.SH SYNOPSIS -.B gallery-dl -[OPTION]... URL... - -.SH DESCRIPTION -.B gallery-dl -is a command-line program to download image-galleries and -collections -from several image hosting sites. It is a cross-platform tool -with many configuration options and powerful filenaming capabilities. - -.SH OPTIONS -%(options)s - -.SH EXAMPLES -.TP -gallery-dl \f[I]URL\f[] -Download images from \f[I]URL\f[]. -.TP -gallery-dl -g -u <username> -p <password> \f[I]URL\f[] -Print direct URLs from a site that requires authentication. -.TP -gallery-dl --filter 'type == "ugoira"' --range '2-4' \f[I]URL\f[] -Apply filter and range expressions. This will only download -the second, third, and fourth file where its type value is equal to "ugoira". -.TP -gallery-dl r:\f[I]URL\f[] -Scan \f[I]URL\f[] for other URLs and invoke \f[B]gallery-dl\f[] on them. -.TP -gallery-dl oauth:\f[I]SITE\-NAME\f[] -Gain OAuth authentication tokens for -.IR deviantart , -.IR flickr , -.IR reddit , -.IR smugmug ", and" -.IR tumblr . - -.SH FILES -.TP -.I /etc/gallery-dl.conf -The system wide configuration file. -.TP -.I ~/.config/gallery-dl/config.json -Per user configuration file. -.TP -.I ~/.gallery-dl.conf -Alternate per user configuration file. - -.SH BUGS -https://github.com/mikf/gallery-dl/issues - -.SH AUTHORS -Mike Fährmann <mike_faehrmann@web.de> -.br -and https://github.com/mikf/gallery-dl/graphs/contributors - -.SH "SEE ALSO" -.BR gallery-dl.conf (5) -""" - - options = [] - for action in gallery_dl.option.build_parser()._actions: - if action.help.startswith("=="): - continue - options.append(OPTS_FMT.format( - ", ".join(action.option_strings).replace("-", r"\-"), - r"\f[I]{}\f[]".format(action.metavar) if action.metavar else "", - action.help, - )) - - if not path: - path = util.path("gallery-dl.1") - with open(path, "w", encoding="utf-8") as file: - file.write(TEMPLATE.lstrip() % { - "options": "\n".join(options), - "version": gallery_dl.version.__version__, - "date" : datetime.datetime.now().strftime("%Y-%m-%d"), - }) - - -def build_gallery_dl_conf_5(path=None): - - TEMPLATE = r""" -.TH "GALLERY-DL.CONF" "5" "%(date)s" "%(version)s" "gallery-dl Manual" -.\" disable hyphenation -.nh -.\" disable justification (adjust text to left margin only) -.ad l - -.SH NAME -gallery-dl.conf \- gallery-dl configuration file - -.SH DESCRIPTION -gallery-dl will search for configuration files in the following places -every time it is started, unless -.B --ignore-config -is specified: -.PP -.RS 4 -.nf -.I /etc/gallery-dl.conf -.I $HOME/.config/gallery-dl/config.json -.I $HOME/.gallery-dl.conf -.fi -.RE -.PP -It is also possible to specify additional configuration files with the -.B -c/--config -command-line option or to add further option values with -.B -o/--option -as <key>=<value> pairs, - -Configuration files are JSON-based and therefore don't allow any ordinary -comments, but, since unused keys are simply ignored, it is possible to utilize -those as makeshift comments by settings their values to arbitrary strings. - -.SH EXAMPLE -{ -.RS 4 -"base-directory": "/tmp/", -.br -"extractor": { -.RS 4 -"pixiv": { -.RS 4 -"directory": ["Pixiv", "Works", "{user[id]}"], -.br -"filename": "{id}{num}.{extension}", -.br -"username": "foo", -.br -"password": "bar" -.RE -}, -.br -"flickr": { -.RS 4 -"_comment": "OAuth keys for account 'foobar'", -.br -"access-token": "0123456789-0123456789abcdef", -.br -"access-token-secret": "fedcba9876543210" -.RE -} -.RE -}, -.br -"downloader": { -.RS 4 -"retries": 3, -.br -"timeout": 2.5 -.RE -} -.RE -} - -%(options)s - -.SH BUGS -https://github.com/mikf/gallery-dl/issues - -.SH AUTHORS -Mike Fährmann <mike_faehrmann@web.de> -.br -and https://github.com/mikf/gallery-dl/graphs/contributors - -.SH "SEE ALSO" -.BR gallery-dl (1) -""" - - sections = parse_docs_configuration() - content = [] - - for sec_name, section in sections.items(): - content.append(".SH " + sec_name.upper()) - - for opt_name, option in section.items(): - content.append(".SS " + opt_name) - - for field, text in option.items(): - if field in ("Type", "Default"): - content.append('.IP "{}:" {}'.format(field, len(field)+2)) - content.append(strip_rst(text)) - else: - content.append('.IP "{}:" 4'.format(field)) - content.append(strip_rst(text, field != "Example")) - - if not path: - path = util.path("gallery-dl.conf.5") - with open(path, "w", encoding="utf-8") as file: - file.write(TEMPLATE.lstrip() % { - "options": "\n".join(content), - "version": gallery_dl.version.__version__, - "date" : datetime.datetime.now().strftime("%Y-%m-%d"), - }) - - -def parse_docs_configuration(): - - doc_path = util.path("docs", "configuration.rst") - with open(doc_path, encoding="utf-8") as file: - doc_lines = file.readlines() - - sections = {} - sec_name = None - options = None - opt_name = None - opt_desc = None - name = None - last = last2 = None - for line in doc_lines: - - # start of new section - if re.match(r"^=+$", line): - if sec_name and options: - sections[sec_name] = options - sec_name = last.strip() - options = {} - - elif re.match(r"^=+ =+$", line): - # start of option table - if re.match(r"^-+$", last): - opt_name = last2.strip() - opt_desc = {} - # end of option table - elif opt_desc: - options[opt_name] = opt_desc - opt_name = None - name = None - - # inside option table - elif opt_name: - if line[0].isalpha(): - name, _, line = line.partition(" ") - opt_desc[name] = "" - line = line.strip() - if line.startswith(("* ", "- ")): - line = "\n" + line - elif line.startswith("| "): - line = line[2:] + "\n.br" - opt_desc[name] += line + "\n" - - last2 = last - last = line - sections[sec_name] = options - - return sections - - -def strip_rst(text, extended=True, *, ITALIC=r"\\f[I]\1\\f[]", REGULAR=r"\1"): - - text = text.replace("\\", "\\\\") - - # ``foo`` - repl = ITALIC if extended else REGULAR - text = re.sub(r"``([^`]+)``", repl, text) - # |foo|_ - text = re.sub(r"\|([^|]+)\|_*", ITALIC, text) - # `foo`_ - text = re.sub(r"`([^`]+)`_+", ITALIC, text) - # `foo` - text = re.sub(r"`([^`]+)`", REGULAR, text) - # foo_ - text = re.sub(r"([A-Za-z0-9-]+)_+(?=\s)", ITALIC, text) - # ------- - text = re.sub(r"---+", "", text) - - return text - - -if __name__ == "__main__": - build_gallery_dl_1() - build_gallery_dl_conf_5() diff --git a/scripts/pyinstaller.py b/scripts/pyinstaller.py deleted file mode 100755 index 879ae50..0000000 --- a/scripts/pyinstaller.py +++ /dev/null @@ -1,18 +0,0 @@ -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- - -"""Build a standalone executable using PyInstaller""" - -import PyInstaller.__main__ -import util - -PyInstaller.__main__.run([ - "--onefile", - "--console", - "--name", "gallery-dl." + ("exe" if PyInstaller.is_win else "bin"), - "--additional-hooks-dir", util.path("scripts"), - "--distpath", util.path("dist"), - "--workpath", util.path("build"), - "--specpath", util.path("build"), - util.path("gallery_dl", "__main__.py"), -]) diff --git a/scripts/release.sh b/scripts/release.sh deleted file mode 100755 index ef444e0..0000000 --- a/scripts/release.sh +++ /dev/null @@ -1,167 +0,0 @@ -#!/bin/bash -set -e - -prompt() { - echo "root: ${ROOTDIR} old: ${OLDVERSION} - new: ${NEWVERSION}" - read -n 1 -r -p "Proceed? [Y/n] " P - echo - if [ "$P" == y -o "$P" == Y -o -z "$P" ]; then - return 0 - else - exit 1 - fi -} - -cleanup() { - cd "${ROOTDIR}" - echo Removing old build directory - - if [ -d ./build ]; then - rm -rf ./build - fi -} - -update() { - cd "${ROOTDIR}" - echo Updating version to ${NEWVERSION} - - sed -i "s#\"${PYVERSION}\"#\"${NEWVERSION}\"#" "gallery_dl/version.py" - sed -i "s#v${OLDVERSION}#v${NEWVERSION}#" "${README}" -} - -update-dev() { - cd "${ROOTDIR}" - - IFS="." read MAJOR MINOR BUILD <<< "${NEWVERSION}" - BUILD=$((BUILD+1)) - # update version to -dev - sed -i "s#\"${NEWVERSION}\"#\"${MAJOR}.${MINOR}.${BUILD}-dev\"#" "gallery_dl/version.py" - # add 'unreleased' line to changelog - sed -i "2i\\\n## Unreleased" "${CHANGELOG}" - - git add "gallery_dl/version.py" "${CHANGELOG}" -} - -build-python() { - cd "${ROOTDIR}" - echo Building bdist_wheel and sdist - - python setup.py bdist_wheel sdist -} - -build-linux() { - cd "${ROOTDIR}" - echo Building Linux executable - - make executable -} - -build-windows() { - cd "${ROOTDIR}/dist" - echo Building Windows executable - - # remove old executable - rm -f "gallery-dl.exe" - - # build windows exe in vm - ln -fs "${ROOTDIR}" /tmp/ - vmstart "Windows 7" & - disown - while [ ! -e "gallery-dl.exe" ] ; do - sleep 5 - done - sleep 2 - - # check exe version - OUTPUT="$(wine gallery-dl.exe --version)" - if [[ ! "${OUTPUT%?}" == "${NEWVERSION}" ]]; then - echo "exe version mismatch: ${OUTPUT} != ${NEWVERSION}" - exit 3 - fi -} - -sign() { - cd "${ROOTDIR}/dist" - echo Signing files - - gpg --detach-sign --armor gallery_dl-${NEWVERSION}-py3-none-any.whl - gpg --detach-sign --armor gallery_dl-${NEWVERSION}.tar.gz - gpg --detach-sign --yes gallery-dl.exe - gpg --detach-sign --yes gallery-dl.bin -} - -changelog() { - cd "${ROOTDIR}" - echo Updating "${CHANGELOG}" - - # - replace "#NN" with link to actual issue - # - insert new version and date - sed -i \ - -e "s*\([( ]\)#\([0-9]\+\)*\1[#\2](https://github.com/mikf/gallery-dl/issues/\2)*g" \ - -e "s*^## [Uu]nreleased*## ${NEWVERSION} - $(date +%Y-%m-%d)*" \ - "${CHANGELOG}" -} - -supportedsites() { - cd "${ROOTDIR}" - echo Checking if "${SUPPORTEDSITES}" is up to date - - ./scripts/supportedsites.py - if ! git diff --quiet "${SUPPORTEDSITES}"; then - echo "updated ${SUPPORTEDSITES} contains changes" - exit 4 - fi -} - -git-upload() { - cd "${ROOTDIR}" - echo Pushing changes to github - - git add "gallery_dl/version.py" "${README}" "${CHANGELOG}" - git commit -S -m "release version ${NEWVERSION}" - git tag -s -m "version ${NEWVERSION}" "v${NEWVERSION}" - git push - git push origin "v${NEWVERSION}" -} - -pypi-upload() { - cd "${ROOTDIR}/dist" - echo Uploading to PyPI - - twine upload gallery_dl-${NEWVERSION}* -} - - -ROOTDIR="$(realpath "$(dirname "$0")/..")/" -README="README.rst" -CHANGELOG="CHANGELOG.md" -SUPPORTEDSITES="./docs/supportedsites.rst" - -LASTTAG="$(git describe --abbrev=0 --tags)" -OLDVERSION="${LASTTAG#v}" -PYVERSION="$(python -c "import gallery_dl as g; print(g.__version__)")" - -if [[ "$1" ]]; then - NEWVERSION="$1" -else - NEWVERSION="${PYVERSION%-dev}" -fi - -if [[ ! $NEWVERSION =~ [0-9]+\.[0-9]+\.[0-9]+(-[a-z]+(\.[0-9]+)?)?$ ]]; then - echo "invalid version: $NEWVERSION" - exit 2 -fi - - -prompt -supportedsites -cleanup -update -build-python -build-linux -build-windows -sign -changelog -git-upload -pypi-upload -update-dev diff --git a/scripts/run_tests.sh b/scripts/run_tests.sh deleted file mode 100755 index d8c8a03..0000000 --- a/scripts/run_tests.sh +++ /dev/null @@ -1,24 +0,0 @@ -#!/bin/bash - -DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" - -TESTS_CORE=(config cookies downloader extractor oauth postprocessor text util) -TESTS_RESULTS=(results) - - -# select tests -case "${1:-${GALLERYDL_TESTS:-core}}" in - core) TESTS=( ${TESTS_CORE[@]} );; - results) TESTS=( ${TESTS_RESULTS[@]} );; - *) TESTS=( );; -esac - - -# transform each array element to test_###.py -TESTS=( ${TESTS[@]/#/test_} ) -TESTS=( ${TESTS[@]/%/.py} ) - - -# run 'nosetests' with selected tests -# (or all tests if ${TESTS} is empty) -nosetests --verbose -w "${DIR}/../test" ${TESTS[@]} diff --git a/scripts/supportedsites.py b/scripts/supportedsites.py deleted file mode 100755 index 96c11d6..0000000 --- a/scripts/supportedsites.py +++ /dev/null @@ -1,285 +0,0 @@ -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- - -"""Generate a reStructuredText document with all supported sites""" - -import sys -import collections - -import util -from gallery_dl import extractor - - -CATEGORY_MAP = { - "2chan" : "Futaba Channel", - "35photo" : "35PHOTO", - "adultempire" : "Adult Empire", - "archivedmoe" : "Archived.Moe", - "archiveofsins" : "Archive of Sins", - "artstation" : "ArtStation", - "b4k" : "arch.b4k.co", - "bobx" : "BobX", - "deviantart" : "DeviantArt", - "dokireader" : "Doki Reader", - "dynastyscans" : "Dynasty Reader", - "e621" : "e621", - "erolord" : "EroLord.com", - "e-hentai" : "E-Hentai", - "exhentai" : "ExHentai", - "fallenangels" : "Fallen Angels Scans", - "fashionnova" : "Fashion Nova", - "hbrowse" : "HBrowse", - "hentai2read" : "Hentai2Read", - "hentaicafe" : "Hentai Cafe", - "hentaifoundry" : "Hentai Foundry", - "hentaifox" : "HentaiFox", - "hentaihere" : "HentaiHere", - "hitomi" : "Hitomi.la", - "idolcomplex" : "Idol Complex", - "imagebam" : "ImageBam", - "imagefap" : "ImageFap", - "imgbb" : "ImgBB", - "imgbox" : "imgbox", - "imgth" : "imgth", - "imgur" : "imgur", - "jaiminisbox" : "Jaimini's Box", - "kireicake" : "Kirei Cake", - "kissmanga" : "KissManga", - "lineblog" : "LINE BLOG", - "livedoor" : "livedoor Blog", - "mangadex" : "MangaDex", - "mangafox" : "Manga Fox", - "mangahere" : "Manga Here", - "mangapark" : "MangaPark", - "mangastream" : "Manga Stream", - "myportfolio" : "Adobe Portfolio", - "nhentai" : "nhentai", - "nijie" : "nijie", - "nsfwalbum" : "NSFWalbum.com", - "nyafuu" : "Nyafuu Archive", - "paheal" : "rule #34", - "powermanga" : "PowerManga", - "readcomiconline": "Read Comic Online", - "rbt" : "RebeccaBlackTech", - "rule34" : "Rule 34", - "sankaku" : "Sankaku Channel", - "sankakucomplex" : "Sankaku Complex", - "seaotterscans" : "Sea Otter Scans", - "seiga" : "Niconico Seiga", - "senmanga" : "Sen Manga", - "sensescans" : "Sense-Scans", - "sexcom" : "Sex.com", - "simplyhentai" : "Simply Hentai", - "slickpic" : "SlickPic", - "slideshare" : "SlideShare", - "smugmug" : "SmugMug", - "thebarchive" : "The /b/ Archive", - "vanillarock" : "もえぴりあ", - "vsco" : "VSCO", - "wikiart" : "WikiArt.org", - "worldthree" : "World Three", - "xhamster" : "xHamster", - "xvideos" : "XVideos", - "yaplog" : "yaplog!", - "yuki" : "yuki.la 4chan archive", -} - -SUBCATEGORY_MAP = { - "artwork": "Artwork Listings", - "artists": "", - "doujin" : "Doujin", - "gallery": "Galleries", - "image" : "individual Images", - "issue" : "Comic-Issues", - "manga" : "Manga", - "me" : "pixiv.me Links", - "media" : "Media Timelines", - "path" : "Images from Users and Folders", - "pinit" : "pin.it Links", - "popular": "Popular Images", - "recent" : "Recent Images", - "search" : "Search Results", - "stash" : "Sta.sh", - "status" : "Images from Statuses", - "tag" : "Tag-Searches", - "user" : "User Profiles", - "work" : "Individual Images", - "related-pin" : "related Pins", - "related-board": "", -} - -AUTH_MAP = { - "danbooru" : "Optional", - "deviantart" : "Optional (OAuth)", - "e-hentai" : "Optional", - "exhentai" : "Optional", - "flickr" : "Optional (OAuth)", - "idolcomplex": "Optional", - "imgbb" : "Optional", - "instagram" : "Optional", - "luscious" : "Optional", - "mangoxo" : "Optional", - "nijie" : "Required", - "pixiv" : "Required", - "reddit" : "Optional (OAuth)", - "sankaku" : "Optional", - "seiga" : "Required", - "smugmug" : "Optional (OAuth)", - "tsumino" : "Optional", - "tumblr" : "Optional (OAuth)", - "twitter" : "Optional", - "wallhaven" : ("Optional (`API Key " - "<configuration.rst#extractorwallhavenapi-key>`__)"), -} - -IGNORE_LIST = ( - "directlink", - "oauth", - "recursive", - "test", -) - - -def domain(cls): - """Return the web-domain related to an extractor class""" - url = sys.modules[cls.__module__].__doc__.split()[-1] - if url.startswith("http"): - return url - - if hasattr(cls, "root") and cls.root: - return cls.root + "/" - - if hasattr(cls, "https"): - scheme = "https" if cls.https else "http" - netloc = cls.__doc__.split()[-1] - return "{}://{}/".format(scheme, netloc) - - test = next(cls._get_tests(), None) - if test: - url = test[0] - return url[:url.find("/", 8)+1] - - return "" - - -def category_text(cls): - """Return a human-readable representation of a category""" - c = cls.category - return CATEGORY_MAP.get(c) or c.capitalize() - - -def subcategory_text(cls): - """Return a human-readable representation of a subcategory""" - sc = cls.subcategory - if sc in SUBCATEGORY_MAP: - return SUBCATEGORY_MAP[sc] - sc = sc.capitalize() - return sc if sc.endswith("s") else sc + "s" - - -def category_key(cls): - """Generate sorting keys by category""" - key = category_text(cls).lower() - if cls.__module__.endswith(".imagehosts"): - key = "zz" + key - return key - - -def subcategory_key(cls): - """Generate sorting keys by subcategory""" - if cls.subcategory == "issue": - return "A" - return cls.subcategory - - -def build_extractor_list(): - """Generate a sorted list of lists of extractor classes""" - extractors = collections.defaultdict(list) - - # get lists of extractor classes grouped by category - for extr in extractor.extractors(): - if not extr.category or extr.category in IGNORE_LIST: - continue - extractors[extr.category].append(extr) - - # sort extractor lists with the same category - for extrlist in extractors.values(): - extrlist.sort(key=subcategory_key) - - # ugly hack to add e-hentai.org - eh = [] - for extr in extractors["exhentai"]: - class eh_extr(extr): - category = "e-hentai" - root = "https://e-hentai.org" - eh.append(eh_extr) - extractors["e-hentai"] = eh - - # sort lists by category - return sorted( - extractors.values(), - key=lambda lst: category_key(lst[0]), - ) - - -# define table columns -COLUMNS = ( - ("Site", 20, - lambda x: category_text(x[0])), - ("URL" , 35, - lambda x: domain(x[0])), - ("Capabilities", 50, - lambda x: ", ".join(subcategory_text(extr) for extr in x - if subcategory_text(extr))), - ("Authentication", 16, - lambda x: AUTH_MAP.get(x[0].category, "")), -) - - -def write_output(fobj, columns, extractors): - - def pad(output, col, category=None): - size = col[1] - output = output if isinstance(output, str) else col[2](output) - - if len(output) > size: - sub = "|{}-{}|".format(category, col[0][0]) - subs.append((sub, output)) - output = sub - - return output + " " * (size - len(output)) - - w = fobj.write - subs = [] - - # caption - w("Supported Sites\n") - w("===============\n") - w("Unless otherwise known, assume all sites to be NSFW\n\n") - - # table head - sep = " ".join("=" * c[1] for c in columns) + "\n" - w(sep) - w(" ".join(pad(c[0], c) for c in columns).strip() + "\n") - w(sep) - - # table body - for lst in extractors: - w(" ".join( - pad(col[2](lst), col, lst[0].category) - for col in columns - ).strip()) - w("\n") - - # table bottom - w(sep) - w("\n") - - # substitutions - for sub, value in subs: - w(".. {} replace:: {}\n".format(sub, value)) - - -outfile = sys.argv[1] if len(sys.argv) > 1 else "supportedsites.rst" -with open(util.path("docs", outfile), "w") as file: - write_output(file, COLUMNS, build_extractor_list()) diff --git a/scripts/util.py b/scripts/util.py deleted file mode 100644 index bfbd6cb..0000000 --- a/scripts/util.py +++ /dev/null @@ -1,11 +0,0 @@ -# -*- coding: utf-8 -*- - -import sys -import os.path - -ROOTDIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) -sys.path.insert(0, os.path.realpath(ROOTDIR)) - - -def path(*segments, join=os.path.join): - return join(ROOTDIR, *segments) @@ -1,3 +1,8 @@ [flake8] exclude = gallery_dl/__init__.py,gallery_dl/__main__.py,setup.py,build,scripts,archive ignore = E203,E226,W504 + +[egg_info] +tag_build = +tag_date = 0 + @@ -1,22 +1,15 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 # -*- coding: utf-8 -*- -from __future__ import unicode_literals, print_function - +import re import sys import os.path import warnings +from setuptools import setup if sys.hexversion < 0x3040000: sys.exit("Python 3.4+ required") -try: - from setuptools import setup - has_setuptools = True -except ImportError: - from distutils.core import setup - has_setuptools = False - def read(fname): path = os.path.join(os.path.dirname(__file__), fname) @@ -24,7 +17,8 @@ def read(fname): return file.read() def check_file(fname): - if os.path.exists(fname): + path = os.path.join(os.path.dirname(__file__), fname) + if os.path.exists(path): return True warnings.warn( "Not including file '{}' since it is not present. " @@ -34,65 +28,27 @@ def check_file(fname): # get version without importing the package -exec(read("gallery_dl/version.py")) - -DESCRIPTION = ("Command-line program to download image-galleries and " - "-collections from several image hosting sites") -LONG_DESCRIPTION = read("README.rst") - -if "py2exe" in sys.argv: - try: - import py2exe - except ImportError: - sys.exit("Error importing 'py2exe'") - params = { - "console": [{ - "script": "./gallery_dl/__main__.py", - "dest_base": "gallery-dl", - "version": __version__, - "description": DESCRIPTION, - "comments": LONG_DESCRIPTION, - "product_name": "gallery-dl", - "product_version": __version__, - }], - "options": {"py2exe": { - "bundle_files": 0, - "compressed": 1, - "optimize": 1, - "dist_dir": ".", - "packages": ["gallery_dl"], - "dll_excludes": ["w9xpopen.exe"], - }}, - "zipfile": None, - } -elif has_setuptools: - params = { - "entry_points": { - "console_scripts": [ - "gallery-dl = gallery_dl:main" - ] - } - } -else: - params = { - "scripts": ["bin/gallery-dl"] - } +VERSION = re.search( + r'__version__\s*=\s*"([^"]+)"', + read("gallery_dl/version.py"), +).group(1) -data_files = [ +FILES = [ (path, [f for f in files if check_file(f)]) for (path, files) in [ - ('etc/bash_completion.d', ['gallery-dl.bash_completion']), - ('share/man/man1' , ['gallery-dl.1']), - ('share/man/man5' , ['gallery-dl.conf.5']), + ("share/bash-completion/completions", ["data/completion/gallery-dl"]), + ("share/man/man1" , ["data/man/gallery-dl.1"]), + ("share/man/man5" , ["data/man/gallery-dl.conf.5"]), ] ] setup( name="gallery_dl", - version=__version__, - description=DESCRIPTION, - long_description=LONG_DESCRIPTION, + version=VERSION, + description=("Command-line program to download image-galleries and " + "-collections from several image hosting sites"), + long_description=read("README.rst"), url="https://github.com/mikf/gallery-dl", download_url="https://github.com/mikf/gallery-dl/releases/latest", author="Mike Fährmann", @@ -104,13 +60,27 @@ setup( install_requires=[ "requests>=2.11.0", ], + extras_require={ + "cloudflare": [ + "pyOpenSSL>=19.0.0", + "cryptography>=2.8.0", + ], + "video": [ + "youtube-dl", + ], + }, packages=[ "gallery_dl", "gallery_dl.extractor", "gallery_dl.downloader", "gallery_dl.postprocessor", ], - data_files=data_files, + entry_points={ + "console_scripts": [ + "gallery-dl = gallery_dl:main", + ], + }, + data_files=FILES, keywords="image gallery downloader crawler scraper", classifiers=[ "Development Status :: 5 - Production/Stable", @@ -124,11 +94,11 @@ setup( "Programming Language :: Python :: 3.5", "Programming Language :: Python :: 3.6", "Programming Language :: Python :: 3.7", + "Programming Language :: Python :: 3.8", "Programming Language :: Python :: 3 :: Only", "Topic :: Internet :: WWW/HTTP", "Topic :: Multimedia :: Graphics", "Topic :: Utilities", ], test_suite="test", - **params ) diff --git a/snap/local/launchers/gallery-dl-launch b/snap/local/launchers/gallery-dl-launch deleted file mode 100755 index 908f303..0000000 --- a/snap/local/launchers/gallery-dl-launch +++ /dev/null @@ -1,32 +0,0 @@ -#!/usr/bin/env bash -# This is the maintainence launcher for the snap, make necessary runtime environment changes to make the snap work here. You may also insert security confinement/deprecation/obsoletion notice of the snap here. - -set \ - -o errexit \ - -o errtrace \ - -o nounset \ - -o pipefail - -# Use user's real home directory for canonical configuration path access -declare REALHOME="$( - getent passwd "${USER}" \ - | cut --delimiter=: --fields=6 -)" -HOME="${REALHOME}" - -if ! test -f "${SNAP_USER_COMMON}"/marker_disable_interface_warning; then - # Warn if the `removable-media` interface isn't connected - if ! ls /media &>/dev/null; then - printf -- \ - "It seems that this snap isn't connected to the \`removable-media\` security confinement interface. If you want to save the files under \`/media\`, \`/run/media\`, or \`/mnt\` directories you need to connect this snap to the \`removable-media\` interface by running the following command in a terminal:\\n\\n sudo snap connect %s:removable-media\\n\\n" \ - "${SNAP_NAME}" \ - >&2 - printf -- \ - "To disable this warning create an empty file at the following path:\\n\\n %s/marker_disable_interface_warning\\n\\n" \ - "${SNAP_USER_COMMON}" \ - >&2 - fi -fi - -# Finally run the next part of the command chain -exec "${@}" diff --git a/snap/snapcraft.yaml b/snap/snapcraft.yaml deleted file mode 100644 index f8e26fa..0000000 --- a/snap/snapcraft.yaml +++ /dev/null @@ -1,110 +0,0 @@ -%YAML 1.1 ---- -# Snapcraft Recipe for gallery-dl -# ------------------------------ -# This file is in the YAML data serialization format: -# http://yaml.org -# For the spec. of writing this file refer the following documentation: -# * The snapcraft format -# https://docs.snapcraft.io/the-snapcraft-format/8337 -# * Snap Documentation -# https://docs.snapcraft.io -# * Topics under the doc category in the Snapcraft Forum -# https://forum.snapcraft.io/c/doc -# For support refer to the snapcraft section in the Snapcraft Forum: -# https://forum.snapcraft.io/c/snapcraft -name: gallery-dl -license: GPL-2.0 -base: core -summary: Download image-galleries and -collections from several image hosting sites -description: | - `gallery-dl` is a command-line program to download image-galleries and -collections from several image hosting sites (see [Supported Sites][1]). It is a cross-platform tool with many configuration options and powerful filenaming capabilities. - - [1]: https://github.com/mikf/gallery-dl/blob/master/docs/supportedsites.rst - -adopt-info: gallery-dl -confinement: strict -grade: stable - -plugs: - # For `xdg-open` command access for opening OAuth authentication webpages - desktop: - - # Storage access - home: - removable-media: # Non-A/C - - # Network access - network: - - # For network service for recieving OAuth callback tokens - network-bind: - - # Configuration access - config-gallery-dl: - interface: personal-files - read: - - $HOME/.config/gallery-dl - - $HOME/.gallery-dl.conf - etc-gallery-dl: - interface: system-files - read: - - /etc/gallery-dl.conf - -parts: - # Launcher programs to fix problems at runtime - launchers: - source: snap/local/launchers - plugin: dump - organize: - '*': bin/ - - # Check out the tagged release revision if it isn’t promoted to the stable channel - # https://forum.snapcraft.io/t/selective-checkout-check-out-the-tagged-release-revision-if-it-isnt-promoted-to-the-stable-channel/10617 - selective-checkout: - plugin: nil - build-packages: - - git - stage-snaps: - - selective-checkout - prime: - - -* - - gallery-dl: - after: - - selective-checkout - - source: . - override-pull: | - snapcraftctl pull - $SNAPCRAFT_STAGE/scriptlets/selective-checkout - - plugin: python - build-packages: - - make - python-packages: - - youtube_dl - override-build: | - # build manpages and bash completion - make man completion - - snapcraftctl build - - ffmpeg: - plugin: nil - stage-packages: - - ffmpeg - -apps: - gallery-dl: - adapter: full - command-chain: - - bin/gallery-dl-launch - command: bin/gallery-dl - completer: etc/bash_completion.d/gallery-dl.bash_completion - environment: - LANG: C.UTF-8 - LC_ALL: C.UTF-8 - - # Satisfy FFmpeg's libpulsecommon dependency - LD_LIBRARY_PATH: $LD_LIBRARY_PATH:$SNAP/usr/lib/$SNAPCRAFT_ARCH_TRIPLET/pulseaudio diff --git a/test/__init__.py b/test/__init__.py deleted file mode 100644 index e69de29..0000000 --- a/test/__init__.py +++ /dev/null diff --git a/test/test_extractor.py b/test/test_extractor.py index fa0709b..2555b58 100644 --- a/test/test_extractor.py +++ b/test/test_extractor.py @@ -157,24 +157,12 @@ class TestExtractor(unittest.TestCase): def capitalize(c): if "-" in c: return string.capwords(c.replace("-", " ")).replace(" ", "") - if "." in c: - c = c.replace(".", "") return c.capitalize() - mapping = { - "2chan" : "futaba", - "3dbooru": "threedeebooru", - "4chan" : "fourchan", - "4plebs" : "fourplebs", - "8chan" : "infinitychan", - "oauth" : None, - } - for extr in extractor.extractors(): - category = mapping.get(extr.category, extr.category) - if category: + if extr.category not in ("", "oauth"): expected = "{}{}Extractor".format( - capitalize(category), + capitalize(extr.category), capitalize(extr.subcategory), ) if expected[0].isdigit(): diff --git a/test/test_postprocessor.py b/test/test_postprocessor.py index 78b9838..0ab89db 100644 --- a/test/test_postprocessor.py +++ b/test/test_postprocessor.py @@ -327,7 +327,7 @@ class ZipTest(BasePostprocessorTest): self.assertEqual(len(pp.zfile.NameToInfo), 3) # close file - pp.finalize() + pp.run_final(self.pathfmt, 0) # reopen to check persistence with zipfile.ZipFile(pp.zfile.filename) as file: @@ -360,7 +360,7 @@ class ZipTest(BasePostprocessorTest): pp.prepare(self.pathfmt) pp.run(self.pathfmt) - pp.finalize() + pp.run_final(self.pathfmt, 0) self.assertEqual(pp.zfile.write.call_count, 3) for call in pp.zfile.write.call_args_list: diff --git a/test/test_results.py b/test/test_results.py index bde3af5..6d628c3 100644 --- a/test/test_results.py +++ b/test/test_results.py @@ -26,8 +26,10 @@ TRAVIS_SKIP = { # temporary issues, etc. BROKEN = { - "8chan", + "hentaifox", + "livedoor", "mangapark", + "yaplog", } @@ -84,47 +86,47 @@ class TestExtractorResults(unittest.TestCase): raise # test archive-id uniqueness - self.assertEqual(len(set(tjob.list_archive)), len(tjob.list_archive)) + self.assertEqual(len(set(tjob.archive_list)), len(tjob.archive_list)) if tjob.queue: # test '_extractor' entries - for url, kwdict in zip(tjob.list_url, tjob.list_keyword): + for url, kwdict in zip(tjob.url_list, tjob.kwdict_list): if "_extractor" in kwdict: extr = kwdict["_extractor"].from_url(url) self.assertIsInstance(extr, kwdict["_extractor"]) self.assertEqual(extr.url, url) else: # test 'extension' entries - for kwdict in tjob.list_keyword: + for kwdict in tjob.kwdict_list: self.assertIn("extension", kwdict) # test extraction results if "url" in result: - self.assertEqual(result["url"], tjob.hash_url.hexdigest()) + self.assertEqual(result["url"], tjob.url_hash.hexdigest()) if "content" in result: - self.assertEqual(result["content"], tjob.hash_content.hexdigest()) + self.assertEqual(result["content"], tjob.content_hash.hexdigest()) if "keyword" in result: - keyword = result["keyword"] - if isinstance(keyword, dict): - for kwdict in tjob.list_keyword: - self._test_kwdict(kwdict, keyword) + expected = result["keyword"] + if isinstance(expected, dict): + for kwdict in tjob.kwdict_list: + self._test_kwdict(kwdict, expected) else: # assume SHA1 hash - self.assertEqual(keyword, tjob.hash_keyword.hexdigest()) + self.assertEqual(expected, tjob.kwdict_hash.hexdigest()) if "count" in result: count = result["count"] if isinstance(count, str): self.assertRegex(count, r"^ *(==|!=|<|<=|>|>=) *\d+ *$") - expr = "{} {}".format(len(tjob.list_url), count) + expr = "{} {}".format(len(tjob.url_list), count) self.assertTrue(eval(expr), msg=expr) else: # assume integer - self.assertEqual(len(tjob.list_url), count) + self.assertEqual(len(tjob.url_list), count) if "pattern" in result: - self.assertGreater(len(tjob.list_url), 0) - for url in tjob.list_url: + self.assertGreater(len(tjob.url_list), 0) + for url in tjob.url_list: self.assertRegex(url, result["pattern"]) def _test_kwdict(self, kwdict, tests): @@ -158,58 +160,60 @@ class ResultJob(job.DownloadJob): job.DownloadJob.__init__(self, url, parent) self.queue = False self.content = content - self.list_url = [] - self.list_keyword = [] - self.list_archive = [] - self.hash_url = hashlib.sha1() - self.hash_keyword = hashlib.sha1() - self.hash_archive = hashlib.sha1() - self.hash_content = hashlib.sha1() + + self.url_list = [] + self.url_hash = hashlib.sha1() + self.kwdict_list = [] + self.kwdict_hash = hashlib.sha1() + self.archive_list = [] + self.archive_hash = hashlib.sha1() + self.content_hash = hashlib.sha1() if content: - self.fileobj = TestPathfmt(self.hash_content) + self.fileobj = TestPathfmt(self.content_hash) self.format_directory = TestFormatter( - "".join(self.extractor.directory_fmt)) - self.format_filename = TestFormatter(self.extractor.filename_fmt) + "".join(self.extractor.directory_fmt)).format_map + self.format_filename = TestFormatter( + self.extractor.filename_fmt).format_map def run(self): for msg in self.extractor: self.dispatch(msg) - def handle_url(self, url, keywords, fallback=None): - self.update_url(url) - self.update_keyword(keywords) - self.update_archive(keywords) - self.update_content(url) - self.format_filename.format_map(keywords) + def handle_url(self, url, kwdict, fallback=None): + self._update_url(url) + self._update_kwdict(kwdict) + self._update_archive(kwdict) + self._update_content(url) + self.format_filename(kwdict) - def handle_directory(self, keywords): - self.update_keyword(keywords, False) - self.format_directory.format_map(keywords) + def handle_directory(self, kwdict): + self._update_kwdict(kwdict, False) + self.format_directory(kwdict) - def handle_queue(self, url, keywords): + def handle_queue(self, url, kwdict): self.queue = True - self.update_url(url) - self.update_keyword(keywords) + self._update_url(url) + self._update_kwdict(kwdict) - def update_url(self, url): - self.list_url.append(url) - self.hash_url.update(url.encode()) + def _update_url(self, url): + self.url_list.append(url) + self.url_hash.update(url.encode()) - def update_keyword(self, kwdict, to_list=True): + def _update_kwdict(self, kwdict, to_list=True): if to_list: - self.list_keyword.append(kwdict) + self.kwdict_list.append(kwdict.copy()) kwdict = self._filter(kwdict) - self.hash_keyword.update( + self.kwdict_hash.update( json.dumps(kwdict, sort_keys=True, default=str).encode()) - def update_archive(self, kwdict): + def _update_archive(self, kwdict): archive_id = self.extractor.archive_fmt.format_map(kwdict) - self.list_archive.append(archive_id) - self.hash_archive.update(archive_id.encode()) + self.archive_list.append(archive_id) + self.archive_hash.update(archive_id.encode()) - def update_content(self, url): + def _update_content(self, url): if self.content: scheme = url.partition(":")[0] self.get_downloader(scheme).download(url, self.fileobj) @@ -285,8 +289,10 @@ def setup_test_config(): config.set(("extractor", "password"), name) config.set(("extractor", "nijie" , "username"), email) config.set(("extractor", "seiga" , "username"), email) + config.set(("extractor", "danbooru" , "username"), None) config.set(("extractor", "instagram", "username"), None) + config.set(("extractor", "imgur" , "username"), None) config.set(("extractor", "twitter" , "username"), None) config.set(("extractor", "mangoxo" , "username"), "LiQiang3") diff --git a/test/test_util.py b/test/test_util.py index 815b2d8..9b252a3 100644 --- a/test/test_util.py +++ b/test/test_util.py @@ -313,17 +313,17 @@ class TestOther(unittest.TestCase): util.advance(util.advance(items, 1), 2), range(3, 5)) def test_raises(self): - func = util.raises(Exception()) + func = util.raises(Exception) with self.assertRaises(Exception): func() - func = util.raises(ValueError(1)) + func = util.raises(ValueError) with self.assertRaises(ValueError): - func() + func(1) with self.assertRaises(ValueError): - func() + func(2) with self.assertRaises(ValueError): - func() + func(3) def test_combine_dict(self): self.assertEqual( |