diff options
| author | 2020-06-01 23:11:38 -0400 | |
|---|---|---|
| committer | 2020-06-01 23:11:38 -0400 | |
| commit | e71c0d2e99f894c50b0d514729a8b44a90ce03a4 (patch) | |
| tree | fc2ab19492570cf481e6efca5c42736a3c85f875 | |
| parent | 40449733839d8ac9d6f44cc5942fb7623817d186 (diff) | |
| parent | a70a3246927b72f1ded37acd55ee719515441b5b (diff) | |
Update upstream source from tag 'upstream/1.14.0'
Update to upstream version '1.14.0'
with Debian dir e10ad4f35c0e6250ed14f70c95b81b307b7b2acf
64 files changed, 3058 insertions, 410 deletions
diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 0000000..df67569 --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,1054 @@ +# Changelog + +## Unreleased +### Additions +- [imagechest] add new extractor for imgchest.com (#750) +- [instagram] add `post_url`, `tags`, `location`, `tagged_users` metadata (#743) +- [redgifs] add image extractor (#724) +- [webtoons] add new extractor for webtoons.com (#761) +- implement `--write-pages` option (#736) +- extend `path-restrict` option (#662) +- implement `path-replace` option (#662, #755) +- make `path` and `keywords` available in logging messages (#574, #575) +### Changes +- [danbooru] change default value of `ugoira` to `false` +- [downloader:ytdl] change default value of `forward-cookies` to `false` +- [downloader:ytdl] fix file extensions when merging into `.mkv` (#720) +- write OAuth tokens to cache (#616) +- use `%APPDATA%\gallery-dl` for config files and cache on Windows +- use `util.Formatter` for formatting logging messages +- reuse HTTP connections from parent extractors +### Fixes +- [deviantart] use private access tokens for Journals (#738) +- [gelbooru] simplify and fix pool extraction +- [imgur] fix extraction of animated images without `mp4` entry +- [imgur] treat `/t/unmuted/` URLs as galleries +- [instagram] fix login with username & password (#756, #771, #797, #803) +- [reddit] don't send OAuth headers for file downloads (#729) +- fix/improve Cloudflare bypass code (#728, #757) +- reset filenames on empty file extensions (#733) + +## 1.13.6 - 2020-05-02 +### Additions +- [patreon] respect filters and sort order in query parameters ([#711](https://github.com/mikf/gallery-dl/issues/711)) +- [speakerdeck] add a new extractor for speakerdeck.com ([#726](https://github.com/mikf/gallery-dl/issues/726)) +- [twitter] add `replies` option ([#705](https://github.com/mikf/gallery-dl/issues/705)) +- [weibo] add `videos` option +- [downloader:http] add MIME types for `.psd` files ([#714](https://github.com/mikf/gallery-dl/issues/714)) +### Fixes +- [artstation] improve embed extraction ([#720](https://github.com/mikf/gallery-dl/issues/720)) +- [deviantart] limit API wait times ([#721](https://github.com/mikf/gallery-dl/issues/721)) +- [newgrounds] fix URLs produced by the `following` extractor ([#684](https://github.com/mikf/gallery-dl/issues/684)) +- [patreon] improve file hash extraction ([#713](https://github.com/mikf/gallery-dl/issues/713)) +- [vsco] fix user gallery extraction +- fix/improve Cloudflare bypass code ([#728](https://github.com/mikf/gallery-dl/issues/728)) + +## 1.13.5 - 2020-04-27 +### Additions +- [500px] recognize `web.500px.com` URLs +- [aryion] support downloading from folders ([#694](https://github.com/mikf/gallery-dl/issues/694)) +- [furaffinity] add extractor for followed users ([#515](https://github.com/mikf/gallery-dl/issues/515)) +- [hitomi] add extractor for tag searches ([#697](https://github.com/mikf/gallery-dl/issues/697)) +- [instagram] add `post_id` and `num` metadata fields ([#698](https://github.com/mikf/gallery-dl/issues/698)) +- [newgrounds] add extractor for followed users ([#684](https://github.com/mikf/gallery-dl/issues/684)) +- [patreon] recognize URLs with creator IDs ([#711](https://github.com/mikf/gallery-dl/issues/711)) +- [twitter] add `reply` metadata field ([#705](https://github.com/mikf/gallery-dl/issues/705)) +- [xhamster] recognize `xhamster.porncache.net` URLs ([#700](https://github.com/mikf/gallery-dl/issues/700)) +### Fixes +- [gelbooru] improve post ID extraction in pool listings +- [hitomi] fix extraction of galleries without tags +- [jaiminisbox] update metadata decoding procedure ([#702](https://github.com/mikf/gallery-dl/issues/702)) +- [mastodon] fix pagination ([#701](https://github.com/mikf/gallery-dl/issues/701)) +- [mastodon] improve account searches ([#704](https://github.com/mikf/gallery-dl/issues/704)) +- [patreon] fix hash extraction from download URLs ([#693](https://github.com/mikf/gallery-dl/issues/693)) +- improve parameter extraction when solving Cloudflare challenges + +## 1.13.4 - 2020-04-12 +### Additions +- [aryion] add `gallery` and `post` extractors ([#390](https://github.com/mikf/gallery-dl/issues/390), [#673](https://github.com/mikf/gallery-dl/issues/673)) +- [deviantart] detect and handle folders in sta.sh listings ([#659](https://github.com/mikf/gallery-dl/issues/659)) +- [hentainexus] add `circle`, `event`, and `title_conventional` metadata fields ([#661](https://github.com/mikf/gallery-dl/issues/661)) +- [hiperdex] add `artist` extractor ([#606](https://github.com/mikf/gallery-dl/issues/606)) +- [mastodon] add access tokens for `mastodon.social` and `baraag.net` ([#665](https://github.com/mikf/gallery-dl/issues/665)) +### Changes +- [deviantart] retrieve *all* download URLs through the OAuth API +- automatically read config files in PyInstaller executable directories ([#682](https://github.com/mikf/gallery-dl/issues/682)) +### Fixes +- [deviantart] handle "Request blocked" errors ([#655](https://github.com/mikf/gallery-dl/issues/655)) +- [deviantart] improve JPEG quality replacement pattern +- [hiperdex] fix extraction +- [mastodon] handle API rate limits ([#665](https://github.com/mikf/gallery-dl/issues/665)) +- [mastodon] update OAuth credentials for pawoo.net ([#665](https://github.com/mikf/gallery-dl/issues/665)) +- [myportfolio] fix extraction of galleries without title +- [piczel] fix extraction of single images +- [vsco] fix collection extraction +- [weibo] accept status URLs with non-numeric IDs ([#664](https://github.com/mikf/gallery-dl/issues/664)) + +## 1.13.3 - 2020-03-28 +### Additions +- [instagram] Add support for user's saved medias ([#644](https://github.com/mikf/gallery-dl/issues/644)) +- [nozomi] support multiple images per post ([#646](https://github.com/mikf/gallery-dl/issues/646)) +- [35photo] add `tag` extractor +### Changes +- [mangadex] transform timestamps from `date` fields to datetime objects +### Fixes +- [deviantart] handle decode errors for `extended_fetch` results ([#655](https://github.com/mikf/gallery-dl/issues/655)) +- [e621] fix bug in API rate limiting and improve pagination ([#651](https://github.com/mikf/gallery-dl/issues/651)) +- [instagram] update pattern for user profile URLs +- [mangapark] fix metadata extraction +- [nozomi] sort search results ([#646](https://github.com/mikf/gallery-dl/issues/646)) +- [piczel] fix extraction +- [twitter] fix typo in `x-twitter-auth-type` header ([#625](https://github.com/mikf/gallery-dl/issues/625)) +- remove trailing dots from Windows directory names ([#647](https://github.com/mikf/gallery-dl/issues/647)) +- fix crash with missing `stdout`/`stderr`/`stdin` handles ([#653](https://github.com/mikf/gallery-dl/issues/653)) + +## 1.13.2 - 2020-03-14 +### Additions +- [furaffinity] extract more metadata +- [instagram] add `post_shortcode` metadata field ([#525](https://github.com/mikf/gallery-dl/issues/525)) +- [kabeuchi] add extractor ([#561](https://github.com/mikf/gallery-dl/issues/561)) +- [newgrounds] add extractor for favorited posts ([#394](https://github.com/mikf/gallery-dl/issues/394)) +- [pixiv] implement `avatar` option ([#595](https://github.com/mikf/gallery-dl/issues/595), [#623](https://github.com/mikf/gallery-dl/issues/623)) +- [twitter] add extractor for bookmarked Tweets ([#625](https://github.com/mikf/gallery-dl/issues/625)) +### Fixes +- [bcy] reduce number of HTTP requests during data extraction +- [e621] update to new interface ([#635](https://github.com/mikf/gallery-dl/issues/635)) +- [exhentai] handle incomplete MIME types ([#632](https://github.com/mikf/gallery-dl/issues/632)) +- [hitomi] improve metadata extraction +- [mangoxo] fix login +- [newgrounds] improve error handling when extracting post data + +## 1.13.1 - 2020-03-01 +### Additions +- [hentaihand] add extractors ([#605](https://github.com/mikf/gallery-dl/issues/605)) +- [hiperdex] add chapter and manga extractors ([#606](https://github.com/mikf/gallery-dl/issues/606)) +- [oauth] implement option to write DeviantArt refresh-tokens to cache ([#616](https://github.com/mikf/gallery-dl/issues/616)) +- [downloader:http] add more MIME types for `.bmp` and `.rar` files ([#621](https://github.com/mikf/gallery-dl/issues/621), [#628](https://github.com/mikf/gallery-dl/issues/628)) +- warn about expired cookies +### Fixes +- [bcy] fix partial image URLs ([#613](https://github.com/mikf/gallery-dl/issues/613)) +- [danbooru] fix Ugoira downloads and metadata +- [deviantart] check availability of `/intermediary/` URLs ([#609](https://github.com/mikf/gallery-dl/issues/609)) +- [hitomi] follow multiple redirects & fix image URLs +- [piczel] improve and update +- [tumblr] replace `-` with ` ` in tag searches ([#611](https://github.com/mikf/gallery-dl/issues/611)) +- [vsco] update gallery URL pattern +- fix `--verbose` and `--quiet` command-line options + +## 1.13.0 - 2020-02-16 +### Additions +- Support for + - `furaffinity` - https://www.furaffinity.net/ ([#284](https://github.com/mikf/gallery-dl/issues/284)) + - `8kun` - https://8kun.top/ ([#582](https://github.com/mikf/gallery-dl/issues/582)) + - `bcy` - https://bcy.net/ ([#592](https://github.com/mikf/gallery-dl/issues/592)) +- [blogger] implement video extraction ([#587](https://github.com/mikf/gallery-dl/issues/587)) +- [oauth] add option to specify port number used by local server ([#604](https://github.com/mikf/gallery-dl/issues/604)) +- [pixiv] add `rating` metadata field ([#595](https://github.com/mikf/gallery-dl/issues/595)) +- [pixiv] recognize tags at the end of new bookmark URLs +- [reddit] add `videos` option +- [weibo] use youtube-dl to download from m3u8 manifests +- implement `parent-directory` option ([#551](https://github.com/mikf/gallery-dl/issues/551)) +- extend filename formatting capabilities: + - implement field name alternatives ([#525](https://github.com/mikf/gallery-dl/issues/525)) + - allow multiple "special" format specifiers per replacement field ([#595](https://github.com/mikf/gallery-dl/issues/595)) + - allow for numeric list and string indices +### Changes +- [reddit] handle reddit-hosted images and videos natively ([#551](https://github.com/mikf/gallery-dl/issues/551)) +- [twitter] change default value for `videos` to `true` +### Fixes +- [cloudflare] unescape challenge URLs +- [deviantart] fix video extraction from `extended_fetch` results +- [hitomi] implement workaround for "broken" redirects +- [khinsider] fix and improve metadata extraction +- [patreon] filter duplicate files per post ([#590](https://github.com/mikf/gallery-dl/issues/590)) +- [piczel] fix extraction +- [pixiv] fix user IDs for bookmarks API calls ([#596](https://github.com/mikf/gallery-dl/issues/596)) +- [sexcom] fix image URLs +- [twitter] force old login page layout ([#584](https://github.com/mikf/gallery-dl/issues/584), [#598](https://github.com/mikf/gallery-dl/issues/598)) +- [vsco] skip "invalid" entities +- improve functions to load/save cookies.txt files ([#586](https://github.com/mikf/gallery-dl/issues/586)) +### Removals +- [yaplog] remove module + +## 1.12.3 - 2020-01-19 +### Additions +- [hentaifoundry] extract more metadata ([#565](https://github.com/mikf/gallery-dl/issues/565)) +- [twitter] add option to extract TwitPic embeds ([#579](https://github.com/mikf/gallery-dl/issues/579)) +- implement a post-processor module to compare file versions ([#530](https://github.com/mikf/gallery-dl/issues/530)) +### Fixes +- [hitomi] update image URL generation +- [mangadex] revert domain to `mangadex.org` +- [pinterest] improve detection of invalid pin.it links +- [pixiv] update URL patterns for user profiles and bookmarks ([#568](https://github.com/mikf/gallery-dl/issues/568)) +- [twitter] Fix stop before real end ([#573](https://github.com/mikf/gallery-dl/issues/573)) +- remove temp files before downloading from fallback URLs +### Removals +- [erolord] remove extractor + +## 1.12.2 - 2020-01-05 +### Additions +- [deviantart] match new search/popular URLs ([#538](https://github.com/mikf/gallery-dl/issues/538)) +- [deviantart] match `/favourites/all` URLs ([#555](https://github.com/mikf/gallery-dl/issues/555)) +- [deviantart] add extractor for followed users ([#515](https://github.com/mikf/gallery-dl/issues/515)) +- [pixiv] support listing followed users ([#515](https://github.com/mikf/gallery-dl/issues/515)) +- [imagefap] handle beta.imagefap.com URLs ([#552](https://github.com/mikf/gallery-dl/issues/552)) +- [postprocessor:metadata] add `directory` option ([#520](https://github.com/mikf/gallery-dl/issues/520)) +### Fixes +- [artstation] fix search result pagination ([#537](https://github.com/mikf/gallery-dl/issues/537)) +- [directlink] send Referer headers ([#536](https://github.com/mikf/gallery-dl/issues/536)) +- [exhentai] restrict default directory name length ([#545](https://github.com/mikf/gallery-dl/issues/545)) +- [mangadex] change domain to mangadex.cc ([#559](https://github.com/mikf/gallery-dl/issues/559)) +- [mangahere] send `isAdult` cookies ([#556](https://github.com/mikf/gallery-dl/issues/556)) +- [newgrounds] fix tags metadata extraction +- [pixiv] retry after rate limit errors ([#535](https://github.com/mikf/gallery-dl/issues/535)) +- [twitter] handle quoted tweets ([#526](https://github.com/mikf/gallery-dl/issues/526)) +- [twitter] handle API rate limits ([#526](https://github.com/mikf/gallery-dl/issues/526)) +- [twitter] fix URLs forwarded to youtube-dl ([#540](https://github.com/mikf/gallery-dl/issues/540)) +- prevent infinite recursion when spawning new extractors ([#489](https://github.com/mikf/gallery-dl/issues/489)) +- improve output of `--list-keywords` for "parent" extractors ([#548](https://github.com/mikf/gallery-dl/issues/548)) +- provide fallback for SQLite versions with missing `WITHOUT ROWID` support ([#553](https://github.com/mikf/gallery-dl/issues/553)) + +## 1.12.1 - 2019-12-22 +### Additions +- [4chan] add extractor for entire boards ([#510](https://github.com/mikf/gallery-dl/issues/510)) +- [realbooru] add extractors for pools, posts, and tag searches ([#514](https://github.com/mikf/gallery-dl/issues/514)) +- [instagram] implement a `videos` option ([#521](https://github.com/mikf/gallery-dl/issues/521)) +- [vsco] implement a `videos` option +- [postprocessor:metadata] implement a `bypost` option for downloading the metadata of an entire post ([#511](https://github.com/mikf/gallery-dl/issues/511)) +### Changes +- [reddit] change the default value for `comments` to `0` +- [vsco] improve image resolutions +- make filesystem-related errors during file downloads non-fatal ([#512](https://github.com/mikf/gallery-dl/issues/512)) +### Fixes +- [foolslide] add fallback for chapter data extraction +- [instagram] ignore errors during post-page extraction +- [patreon] avoid errors when fetching user info ([#508](https://github.com/mikf/gallery-dl/issues/508)) +- [patreon] improve URL pattern for single posts +- [reddit] fix errors with `t1` submissions +- [vsco] fix user profile extraction … again +- [weibo] handle unavailable/deleted statuses +- [downloader:http] improve rate limit handling +- retain trailing zeroes in Cloudflare challenge answers + +## 1.12.0 - 2019-12-08 +### Additions +- [flickr] support 3k, 4k, 5k, and 6k photo sizes ([#472](https://github.com/mikf/gallery-dl/issues/472)) +- [imgur] add extractor for subreddit links ([#500](https://github.com/mikf/gallery-dl/issues/500)) +- [newgrounds] add extractors for `audio` listings and general `media` files ([#394](https://github.com/mikf/gallery-dl/issues/394)) +- [newgrounds] implement login support ([#394](https://github.com/mikf/gallery-dl/issues/394)) +- [postprocessor:metadata] implement a `extension-format` option ([#477](https://github.com/mikf/gallery-dl/issues/477)) +- `--exec-after` +### Changes +- [deviantart] ensure consistent username capitalization ([#455](https://github.com/mikf/gallery-dl/issues/455)) +- [directlink] split `{path}` into `{path}/{filename}.{extension}` +- [twitter] update metadata fields with user/author information +- [postprocessor:metadata] filter private entries & rename `format` to `content-format` +- Enable `cookies-update` by default +### Fixes +- [2chan] fix metadata extraction +- [behance] get images from 'media_collection' modules +- [bobx] fix image downloads by randomly generating session cookies ([#482](https://github.com/mikf/gallery-dl/issues/482)) +- [deviantart] revert to getting download URLs from OAuth API calls ([#488](https://github.com/mikf/gallery-dl/issues/488)) +- [deviantart] fix URL generation from '/extended_fetch' results ([#505](https://github.com/mikf/gallery-dl/issues/505)) +- [flickr] adjust OAuth redirect URI ([#503](https://github.com/mikf/gallery-dl/issues/503)) +- [hentaifox] fix extraction +- [imagefap] adapt to new image URL format +- [imgbb] fix error in galleries without user info ([#471](https://github.com/mikf/gallery-dl/issues/471)) +- [instagram] prevent errors with missing 'video_url' fields ([#479](https://github.com/mikf/gallery-dl/issues/479)) +- [nijie] fix `date` parsing +- [pixiv] match new search URLs ([#507](https://github.com/mikf/gallery-dl/issues/507)) +- [plurk] fix comment pagination +- [sexcom] send specific Referer headers when downloading videos +- [twitter] fix infinite loops ([#499](https://github.com/mikf/gallery-dl/issues/499)) +- [vsco] fix user profile and collection extraction ([#480](https://github.com/mikf/gallery-dl/issues/480)) +- Fix Cloudflare DDoS protection bypass +### Removals +- `--abort-on-skip` + +## 1.11.1 - 2019-11-09 +### Fixes +- Fix inclusion of bash completion and man pages in source distributions + +## 1.11.0 - 2019-11-08 +### Additions +- Support for + - `blogger` - https://www.blogger.com/ ([#364](https://github.com/mikf/gallery-dl/issues/364)) + - `nozomi` - https://nozomi.la/ ([#388](https://github.com/mikf/gallery-dl/issues/388)) + - `issuu` - https://issuu.com/ ([#413](https://github.com/mikf/gallery-dl/issues/413)) + - `naver` - https://blog.naver.com/ ([#447](https://github.com/mikf/gallery-dl/issues/447)) +- Extractor for `twitter` search results ([#448](https://github.com/mikf/gallery-dl/issues/448)) +- Extractor for `deviantart` user profiles with configurable targets ([#377](https://github.com/mikf/gallery-dl/issues/377), [#419](https://github.com/mikf/gallery-dl/issues/419)) +- `--ugoira-conv-lossless` ([#432](https://github.com/mikf/gallery-dl/issues/432)) +- `cookies-update` option to allow updating cookies.txt files ([#445](https://github.com/mikf/gallery-dl/issues/445)) +- Optional `cloudflare` and `video` installation targets ([#460](https://github.com/mikf/gallery-dl/issues/460)) +- Allow executing commands with the `exec` post-processor after all files are downloaded ([#413](https://github.com/mikf/gallery-dl/issues/413), [#421](https://github.com/mikf/gallery-dl/issues/421)) +### Changes +- Rewrite `imgur` using its public API ([#446](https://github.com/mikf/gallery-dl/issues/446)) +- Rewrite `luscious` using GraphQL queries ([#457](https://github.com/mikf/gallery-dl/issues/457)) +- Adjust default `nijie` filenames to match `pixiv` +- Change enumeration index for gallery extractors from `page` to `num` +- Return non-zero exit status when errors occurred +- Forward proxy settings to youtube-dl downloader +- Install bash completion script into `share/bash-completion/completions` +### Fixes +- Adapt to new `instagram` page layout when logged in ([#391](https://github.com/mikf/gallery-dl/issues/391)) +- Support protected `twitter` videos ([#452](https://github.com/mikf/gallery-dl/issues/452)) +- Extend `hitomi` URL pattern and fix gallery extraction +- Restore OAuth2 authentication error messages +- Miscellaneous fixes for `patreon` ([#444](https://github.com/mikf/gallery-dl/issues/444)), `deviantart` ([#455](https://github.com/mikf/gallery-dl/issues/455)), `sexcom` ([#464](https://github.com/mikf/gallery-dl/issues/464)), `imgur` ([#467](https://github.com/mikf/gallery-dl/issues/467)), `simplyhentai` + +## 1.10.6 - 2019-10-11 +### Additions +- `--exec` command-line option to specify a command to run after each file download ([#421](https://github.com/mikf/gallery-dl/issues/421)) +### Changes +- Include titles in `gfycat` default filenames ([#434](https://github.com/mikf/gallery-dl/issues/434)) +### Fixes +- Fetch working download URLs for `deviantart` ([#436](https://github.com/mikf/gallery-dl/issues/436)) +- Various fixes and improvements for `yaplog` blogs ([#443](https://github.com/mikf/gallery-dl/issues/443)) +- Fix image URL generation for `hitomi` galleries +- Miscellaneous fixes for `behance` and `xvideos` + +## 1.10.5 - 2019-09-28 +### Additions +- `instagram.highlights` option to include highlighted stories when downloading user profiles ([#329](https://github.com/mikf/gallery-dl/issues/329)) +- Support for `/user/` URLs on `reddit` ([#350](https://github.com/mikf/gallery-dl/issues/350)) +- Support for `imgur` user profiles and favorites ([#420](https://github.com/mikf/gallery-dl/issues/420)) +- Additional metadata fields on `nijie`([#423](https://github.com/mikf/gallery-dl/issues/423)) +### Fixes +- Improve handling of private `deviantart` artworks ([#414](https://github.com/mikf/gallery-dl/issues/414)) and 429 status codes ([#424](https://github.com/mikf/gallery-dl/issues/424)) +- Prevent fatal errors when trying to open download-archive files ([#417](https://github.com/mikf/gallery-dl/issues/417)) +- Detect and ignore unavailable videos on `weibo` ([#427](https://github.com/mikf/gallery-dl/issues/427)) +- Update the `scope` of new `reddit` refresh-tokens ([#428](https://github.com/mikf/gallery-dl/issues/428)) +- Fix inconsistencies with the `reddit.comments` option ([#429](https://github.com/mikf/gallery-dl/issues/429)) +- Extend URL patterns for `hentaicafe` manga and `pixiv` artworks +- Improve detection of unavailable albums on `luscious` and `imgbb` +- Miscellaneous fixes for `tsumino` + +## 1.10.4 - 2019-09-08 +### Additions +- Support for + - `lineblog` - https://www.lineblog.me/ ([#404](https://github.com/mikf/gallery-dl/issues/404)) + - `fuskator` - https://fuskator.com/ ([#407](https://github.com/mikf/gallery-dl/issues/407)) +- `ugoira` option for `danbooru` to download pre-rendered ugoira animations ([#406](https://github.com/mikf/gallery-dl/issues/406)) +### Fixes +- Download the correct files from `twitter` replies ([#403](https://github.com/mikf/gallery-dl/issues/403)) +- Prevent crash when trying to use unavailable downloader modules ([#405](https://github.com/mikf/gallery-dl/issues/405)) +- Fix `pixiv` authentication ([#411](https://github.com/mikf/gallery-dl/issues/411)) +- Improve `exhentai` image limit checks +- Miscellaneous fixes for `hentaicafe`, `simplyhentai`, `tumblr` + +## 1.10.3 - 2019-08-30 +### Additions +- Provide `filename` metadata for all `deviantart` files ([#392](https://github.com/mikf/gallery-dl/issues/392), [#400](https://github.com/mikf/gallery-dl/issues/400)) +- Implement a `ytdl.outtmpl` option to let youtube-dl handle filenames by itself ([#395](https://github.com/mikf/gallery-dl/issues/395)) +- Support `seiga` mobile URLs ([#401](https://github.com/mikf/gallery-dl/issues/401)) +### Fixes +- Extract more than the first 32 posts from `piczel` galleries ([#396](https://github.com/mikf/gallery-dl/issues/396)) +- Fix filenames of archives created with `--zip` ([#397](https://github.com/mikf/gallery-dl/issues/397)) +- Skip unavailable images and videos on `flickr` ([#398](https://github.com/mikf/gallery-dl/issues/398)) +- Fix filesystem paths on Windows with Python 3.6 and lower ([#402](https://github.com/mikf/gallery-dl/issues/402)) + +## 1.10.2 - 2019-08-23 +### Additions +- Support for `instagram` stories and IGTV ([#371](https://github.com/mikf/gallery-dl/issues/371), [#373](https://github.com/mikf/gallery-dl/issues/373)) +- Support for individual `imgbb` images ([#363](https://github.com/mikf/gallery-dl/issues/363)) +- `deviantart.quality` option to set the JPEG compression quality for newer images ([#369](https://github.com/mikf/gallery-dl/issues/369)) +- `enumerate` option for `extractor.skip` ([#306](https://github.com/mikf/gallery-dl/issues/306)) +- `adjust-extensions` option to control filename extension adjustments +- `path-remove` option to remove control characters etc. from filesystem paths +### Changes +- Rename `restrict-filenames` to `path-restrict` +- Adjust `pixiv` metadata and default filename format ([#366](https://github.com/mikf/gallery-dl/issues/366)) + - Set `filename` to `"{category}_{user[id]}_{id}{suffix}.{extension}"` to restore the old default +- Improve and optimize directory and filename generation +### Fixes +- Allow the `classify` post-processor to handle files with unknown filename extension ([#138](https://github.com/mikf/gallery-dl/issues/138)) +- Fix rate limit handling for OAuth APIs ([#368](https://github.com/mikf/gallery-dl/issues/368)) +- Fix artwork and scraps extraction on `deviantart` ([#376](https://github.com/mikf/gallery-dl/issues/376), [#392](https://github.com/mikf/gallery-dl/issues/392)) +- Distinguish between `imgur` album and gallery URLs ([#380](https://github.com/mikf/gallery-dl/issues/380)) +- Prevent crash when using `--ugoira-conv` ([#382](https://github.com/mikf/gallery-dl/issues/382)) +- Handle multi-image posts on `patreon` ([#383](https://github.com/mikf/gallery-dl/issues/383)) +- Miscellaneous fixes for `*reactor`, `simplyhentai` + +## 1.10.1 - 2019-08-02 +## Fixes +- Use the correct domain for exhentai.org input URLs + +## 1.10.0 - 2019-08-01 +### Warning +- Prior to version 1.10.0 all cache files were created world readable (mode `644`) + leading to possible sensitive information disclosure on multi-user systems +- It is recommended to restrict access permissions of already existing files + (`/tmp/.gallery-dl.cache`) with `chmod 600` +- Windows users should not be affected +### Additions +- Support for + - `vsco` - https://vsco.co/ ([#331](https://github.com/mikf/gallery-dl/issues/331)) + - `imgbb` - https://imgbb.com/ ([#361](https://github.com/mikf/gallery-dl/issues/361)) + - `adultempire` - https://www.adultempire.com/ ([#340](https://github.com/mikf/gallery-dl/issues/340)) +- `restrict-filenames` option to create Windows-compatible filenames on any platform ([#348](https://github.com/mikf/gallery-dl/issues/348)) +- `forward-cookies` option to control cookie forwarding to youtube-dl ([#352](https://github.com/mikf/gallery-dl/issues/352)) +### Changes +- The default cache file location on non-Windows systems is now + - `$XDG_CACHE_HOME/gallery-dl/cache.sqlite3` or + - `~/.cache/gallery-dl/cache.sqlite3` +- New cache files are created with mode `600` +- `exhentai` extractors will always use `e-hentai.org` as domain +### Fixes +- Better handling of `exhentai` image limits and errors ([#356](https://github.com/mikf/gallery-dl/issues/356), [#360](https://github.com/mikf/gallery-dl/issues/360)) +- Try to prevent ZIP file corruption ([#355](https://github.com/mikf/gallery-dl/issues/355)) +- Miscellaneous fixes for `behance`, `ngomik` + +## 1.9.0 - 2019-07-19 +### Additions +- Support for + - `erolord` - http://erolord.com/ ([#326](https://github.com/mikf/gallery-dl/issues/326)) +- Add login support for `instagram` ([#195](https://github.com/mikf/gallery-dl/issues/195)) +- Add `--no-download` and `extractor.*.download` disable file downloads ([#220](https://github.com/mikf/gallery-dl/issues/220)) +- Add `-A/--abort` to specify the number of consecutive download skips before aborting +- Interpret `-1` as infinite retries ([#300](https://github.com/mikf/gallery-dl/issues/300)) +- Implement custom log message formats per log-level ([#304](https://github.com/mikf/gallery-dl/issues/304)) +- Implement an `mtime` post-processor that sets file modification times according to metadata fields ([#332](https://github.com/mikf/gallery-dl/issues/332)) +- Implement a `twitter.content` option to enable tweet text extraction ([#333](https://github.com/mikf/gallery-dl/issues/333), [#338](https://github.com/mikf/gallery-dl/issues/338)) +- Enable `date-min/-max/-format` options for `tumblr` ([#337](https://github.com/mikf/gallery-dl/issues/337)) +### Changes +- Set file modification times according to their `Last-Modified` header when downloading ([#236](https://github.com/mikf/gallery-dl/issues/236), [#277](https://github.com/mikf/gallery-dl/issues/277)) + - Use `--no-mtime` or `downloader.*.mtime` to disable this behavior +- Duplicate download URLs are no longer silently ignored (controllable with `extractor.*.image-unique`) +- Deprecate `--abort-on-skip` +### Fixes +- Retry downloads on OpenSSL exceptions ([#324](https://github.com/mikf/gallery-dl/issues/324)) +- Ignore unavailable pins on `sexcom` instead of raising an exception ([#325](https://github.com/mikf/gallery-dl/issues/325)) +- Use Firefox's SSL/TLS ciphers to prevent Cloudflare CAPTCHAs ([#342](https://github.com/mikf/gallery-dl/issues/342)) +- Improve folder name matching on `deviantart` ([#343](https://github.com/mikf/gallery-dl/issues/343)) +- Forward cookies to `youtube-dl` to allow downloading private videos +- Miscellaneous fixes for `35photo`, `500px`, `newgrounds`, `simplyhentai` + +## 1.8.7 - 2019-06-28 +### Additions +- Support for + - `vanillarock` - https://vanilla-rock.com/ ([#254](https://github.com/mikf/gallery-dl/issues/254)) + - `nsfwalbum` - https://nsfwalbum.com/ ([#287](https://github.com/mikf/gallery-dl/issues/287)) +- `artist` and `tags` metadata for `hentaicafe` ([#238](https://github.com/mikf/gallery-dl/issues/238)) +- `description` metadata for `instagram` ([#310](https://github.com/mikf/gallery-dl/issues/310)) +- Format string option to replace a substring with another - `R<old>/<new>/` ([#318](https://github.com/mikf/gallery-dl/issues/318)) +### Changes +- Delete empty archives created by the `zip` post-processor ([#316](https://github.com/mikf/gallery-dl/issues/316)) +### Fixes +- Handle `hitomi` Game CG galleries correctly ([#321](https://github.com/mikf/gallery-dl/issues/321)) +- Miscellaneous fixes for `deviantart`, `hitomi`, `pururin`, `kissmanga`, `keenspot`, `mangoxo`, `imagefap` + +## 1.8.6 - 2019-06-14 +### Additions +- Support for + - `slickpic` - https://www.slickpic.com/ ([#249](https://github.com/mikf/gallery-dl/issues/249)) + - `xhamster` - https://xhamster.com/ ([#281](https://github.com/mikf/gallery-dl/issues/281)) + - `pornhub` - https://www.pornhub.com/ ([#282](https://github.com/mikf/gallery-dl/issues/282)) + - `8muses` - https://www.8muses.com/ ([#305](https://github.com/mikf/gallery-dl/issues/305)) +- `extra` option for `deviantart` to download Sta.sh content linked in description texts ([#302](https://github.com/mikf/gallery-dl/issues/302)) +### Changes +- Detect `directlink` URLs with upper case filename extensions ([#296](https://github.com/mikf/gallery-dl/issues/296)) +### Fixes +- Improved error handling for `tumblr` API calls ([#297](https://github.com/mikf/gallery-dl/issues/297)) +- Fixed extraction of `livedoor` blogs ([#301](https://github.com/mikf/gallery-dl/issues/301)) +- Fixed extraction of special `deviantart` Sta.sh items ([#307](https://github.com/mikf/gallery-dl/issues/307)) +- Fixed pagination for specific `keenspot` comics + +## 1.8.5 - 2019-06-01 +### Additions +- Support for + - `keenspot` - http://keenspot.com/ ([#223](https://github.com/mikf/gallery-dl/issues/223)) + - `sankakucomplex` - https://www.sankakucomplex.com ([#258](https://github.com/mikf/gallery-dl/issues/258)) +- `folders` option for `deviantart` to add a list of containing folders to each file ([#276](https://github.com/mikf/gallery-dl/issues/276)) +- `captcha` option for `kissmanga` and `readcomiconline` to control CAPTCHA handling ([#279](https://github.com/mikf/gallery-dl/issues/279)) +- `filename` metadata for files downloaded with youtube-dl ([#291](https://github.com/mikf/gallery-dl/issues/291)) +### Changes +- Adjust `wallhaven` extractors to new page layout: + - use API and add `api-key` option + - removed traditional login support +- Provide original filenames for `patreon` downloads ([#268](https://github.com/mikf/gallery-dl/issues/268)) +- Use e-hentai.org or exhentai.org depending on input URL ([#278](https://github.com/mikf/gallery-dl/issues/278)) +### Fixes +- Fix pagination over `sankaku` popular listings ([#265](https://github.com/mikf/gallery-dl/issues/265)) +- Fix folder and collection extraction on `deviantart` ([#271](https://github.com/mikf/gallery-dl/issues/271)) +- Detect "AreYouHuman" redirects on `readcomiconline` ([#279](https://github.com/mikf/gallery-dl/issues/279)) +- Miscellaneous fixes for `hentainexus`, `livedoor`, `ngomik` + +## 1.8.4 - 2019-05-17 +### Additions +- Support for + - `patreon` - https://www.patreon.com/ ([#226](https://github.com/mikf/gallery-dl/issues/226)) + - `hentainexus` - https://hentainexus.com/ ([#256](https://github.com/mikf/gallery-dl/issues/256)) +- `date` metadata fields for `pixiv` ([#248](https://github.com/mikf/gallery-dl/issues/248)), `instagram` ([#250](https://github.com/mikf/gallery-dl/issues/250)), `exhentai`, and `newgrounds` +### Changes +- Improved `flickr` metadata and video extraction ([#246](https://github.com/mikf/gallery-dl/issues/246)) +### Fixes +- Download original GIF animations from `deviantart` ([#242](https://github.com/mikf/gallery-dl/issues/242)) +- Ignore missing `edge_media_to_comment` fields on `instagram` ([#250](https://github.com/mikf/gallery-dl/issues/250)) +- Fix serialization of `datetime` objects for `--write-metadata` ([#251](https://github.com/mikf/gallery-dl/issues/251), [#252](https://github.com/mikf/gallery-dl/issues/252)) +- Allow multiple post-processor command-line options at once ([#253](https://github.com/mikf/gallery-dl/issues/253)) +- Prevent crash on `booru` sites when no tags are available ([#259](https://github.com/mikf/gallery-dl/issues/259)) +- Fix extraction on `instagram` after `rhx_gis` field removal ([#266](https://github.com/mikf/gallery-dl/issues/266)) +- Avoid Cloudflare CAPTCHAs for Python interpreters built against OpenSSL < 1.1.1 +- Miscellaneous fixes for `luscious` + +## 1.8.3 - 2019-05-04 +### Additions +- Support for + - `plurk` - https://www.plurk.com/ ([#212](https://github.com/mikf/gallery-dl/issues/212)) + - `sexcom` - https://www.sex.com/ ([#147](https://github.com/mikf/gallery-dl/issues/147)) +- `--clear-cache` +- `date` metadata fields for `deviantart`, `twitter`, and `tumblr` ([#224](https://github.com/mikf/gallery-dl/issues/224), [#232](https://github.com/mikf/gallery-dl/issues/232)) +### Changes +- Standalone executables are now built using PyInstaller: + - uses the latest CPython interpreter (Python 3.7.3) + - available on several platforms (Windows, Linux, macOS) + - includes the `certifi` CA bundle, `youtube-dl`, and `pyOpenSSL` on Windows +### Fixes +- Patch `urllib3`'s default list of SSL/TLS ciphers to prevent Cloudflare CAPTCHAs ([#227](https://github.com/mikf/gallery-dl/issues/227)) + (Windows users need to install `pyOpenSSL` for this to take effect) +- Provide fallback URLs for `twitter` images ([#237](https://github.com/mikf/gallery-dl/issues/237)) +- Send `Referer` headers when downloading from `hitomi` ([#239](https://github.com/mikf/gallery-dl/issues/239)) +- Updated login procedure on `mangoxo` + +## 1.8.2 - 2019-04-12 +### Additions +- Support for + - `pixnet` - https://www.pixnet.net/ ([#177](https://github.com/mikf/gallery-dl/issues/177)) + - `wikiart` - https://www.wikiart.org/ ([#179](https://github.com/mikf/gallery-dl/issues/179)) + - `mangoxo` - https://www.mangoxo.com/ ([#184](https://github.com/mikf/gallery-dl/issues/184)) + - `yaplog` - https://yaplog.jp/ ([#190](https://github.com/mikf/gallery-dl/issues/190)) + - `livedoor` - http://blog.livedoor.jp/ ([#190](https://github.com/mikf/gallery-dl/issues/190)) +- Login support for `mangoxo` ([#184](https://github.com/mikf/gallery-dl/issues/184)) and `twitter` ([#214](https://github.com/mikf/gallery-dl/issues/214)) +### Changes +- Increased required `Requests` version to 2.11.0 +### Fixes +- Improved image quality on `reactor` sites ([#210](https://github.com/mikf/gallery-dl/issues/210)) +- Support `imagebam` galleries with more than 100 images ([#219](https://github.com/mikf/gallery-dl/issues/219)) +- Updated Cloudflare bypass code + +## 1.8.1 - 2019-03-29 +### Additions +- Support for: + - `35photo` - https://35photo.pro/ ([#162](https://github.com/mikf/gallery-dl/issues/162)) + - `500px` - https://500px.com/ ([#185](https://github.com/mikf/gallery-dl/issues/185)) +- `instagram` extractor for hashtags ([#202](https://github.com/mikf/gallery-dl/issues/202)) +- Option to get more metadata on `deviantart` ([#189](https://github.com/mikf/gallery-dl/issues/189)) +- Man pages and bash completion ([#150](https://github.com/mikf/gallery-dl/issues/150)) +- Snap improvements ([#197](https://github.com/mikf/gallery-dl/issues/197), [#199](https://github.com/mikf/gallery-dl/issues/199), [#207](https://github.com/mikf/gallery-dl/issues/207)) +### Changes +- Better FFmpeg arguments for `--ugoira-conv` +- Adjusted metadata for `luscious` albums +### Fixes +- Proper handling of `instagram` multi-image posts ([#178](https://github.com/mikf/gallery-dl/issues/178), [#201](https://github.com/mikf/gallery-dl/issues/201)) +- Fixed `tumblr` avatar URLs when not using OAuth1.0 ([#193](https://github.com/mikf/gallery-dl/issues/193)) +- Miscellaneous fixes for `exhentai`, `komikcast` + +## 1.8.0 - 2019-03-15 +### Additions +- Support for: + - `weibo` - https://www.weibo.com/ + - `pururin` - https://pururin.io/ ([#174](https://github.com/mikf/gallery-dl/issues/174)) + - `fashionnova` - https://www.fashionnova.com/ ([#175](https://github.com/mikf/gallery-dl/issues/175)) + - `shopify` sites in general ([#175](https://github.com/mikf/gallery-dl/issues/175)) +- Snap packaging ([#169](https://github.com/mikf/gallery-dl/issues/169), [#170](https://github.com/mikf/gallery-dl/issues/170), [#187](https://github.com/mikf/gallery-dl/issues/187), [#188](https://github.com/mikf/gallery-dl/issues/188)) +- Automatic Cloudflare DDoS protection bypass +- Extractor and Job information for logging format strings +- `dynastyscans` image and search extractors ([#163](https://github.com/mikf/gallery-dl/issues/163)) +- `deviantart` scraps extractor ([#168](https://github.com/mikf/gallery-dl/issues/168)) +- `artstation` extractor for artwork listings ([#172](https://github.com/mikf/gallery-dl/issues/172)) +- `smugmug` video support and improved image format selection ([#183](https://github.com/mikf/gallery-dl/issues/183)) +### Changes +- More metadata for `nhentai` galleries +- Combined `myportfolio` extractors into one +- Renamed `name` metadata field to `filename` and removed the original `filename` field +- Simplified and improved internal data structures +- Optimized creation of child extractors +### Fixes +- Filter empty `tumblr` URLs ([#165](https://github.com/mikf/gallery-dl/issues/165)) +- Filter ads and improve connection speed on `hentaifoundry` +- Show proper error messages if `luscious` galleries are unavailable +- Miscellaneous fixes for `mangahere`, `ngomik`, `simplyhentai`, `imgspice` +### Removals +- `seaotterscans` + +## 1.7.0 - 2019-02-05 +- Added support for: + - `photobucket` - http://photobucket.com/ ([#117](https://github.com/mikf/gallery-dl/issues/117)) + - `hentaifox` - https://hentaifox.com/ ([#160](https://github.com/mikf/gallery-dl/issues/160)) + - `tsumino` - https://www.tsumino.com/ ([#161](https://github.com/mikf/gallery-dl/issues/161)) +- Added the ability to dynamically generate extractors based on a user's config file for + - [`mastodon`](https://github.com/tootsuite/mastodon) instances ([#144](https://github.com/mikf/gallery-dl/issues/144)) + - [`foolslide`](https://github.com/FoolCode/FoOlSlide) based sites + - [`foolfuuka`](https://github.com/FoolCode/FoolFuuka) based archives +- Added an extractor for `behance` collections ([#157](https://github.com/mikf/gallery-dl/issues/157)) +- Added login support for `luscious` ([#159](https://github.com/mikf/gallery-dl/issues/159)) and `tsumino` ([#161](https://github.com/mikf/gallery-dl/issues/161)) +- Added an option to stop downloading if the `exhentai` image limit is exceeded ([#141](https://github.com/mikf/gallery-dl/issues/141)) +- Fixed extraction issues for `behance` and `mangapark` + +## 1.6.3 - 2019-01-18 +- Added `metadata` post-processor to write image metadata to an external file ([#135](https://github.com/mikf/gallery-dl/issues/135)) +- Added option to reverse chapter order of manga extractors ([#149](https://github.com/mikf/gallery-dl/issues/149)) +- Added authentication support for `danbooru` ([#151](https://github.com/mikf/gallery-dl/issues/151)) +- Added tag metadata for `exhentai` and `hbrowse` galleries +- Improved `*reactor` extractors ([#148](https://github.com/mikf/gallery-dl/issues/148)) +- Fixed extraction issues for `nhentai` ([#156](https://github.com/mikf/gallery-dl/issues/156)), `pinterest`, `mangapark` + +## 1.6.2 - 2019-01-01 +- Added support for: + - `instagram` - https://www.instagram.com/ ([#134](https://github.com/mikf/gallery-dl/issues/134)) +- Added support for multiple items on sta.sh pages ([#113](https://github.com/mikf/gallery-dl/issues/113)) +- Added option to download `tumblr` avatars ([#137](https://github.com/mikf/gallery-dl/issues/137)) +- Changed defaults for visited post types and inline media on `tumblr` +- Improved inline extraction of `tumblr` posts ([#133](https://github.com/mikf/gallery-dl/issues/133), [#137](https://github.com/mikf/gallery-dl/issues/137)) +- Improved error handling and retry behavior of all API calls +- Improved handling of missing fields in format strings ([#136](https://github.com/mikf/gallery-dl/issues/136)) +- Fixed hash extraction for unusual `tumblr` URLs ([#129](https://github.com/mikf/gallery-dl/issues/129)) +- Fixed image subdomains for `hitomi` galleries ([#142](https://github.com/mikf/gallery-dl/issues/142)) +- Fixed and improved miscellaneous issues for `kissmanga` ([#20](https://github.com/mikf/gallery-dl/issues/20)), `luscious`, `mangapark`, `readcomiconline` + +## 1.6.1 - 2018-11-28 +- Added support for: + - `joyreactor` - http://joyreactor.cc/ ([#114](https://github.com/mikf/gallery-dl/issues/114)) + - `pornreactor` - http://pornreactor.cc/ ([#114](https://github.com/mikf/gallery-dl/issues/114)) + - `newgrounds` - https://www.newgrounds.com/ ([#119](https://github.com/mikf/gallery-dl/issues/119)) +- Added extractor for search results on `luscious` ([#127](https://github.com/mikf/gallery-dl/issues/127)) +- Fixed filenames of ZIP archives ([#126](https://github.com/mikf/gallery-dl/issues/126)) +- Fixed extraction issues for `gfycat`, `hentaifoundry` ([#125](https://github.com/mikf/gallery-dl/issues/125)), `mangafox` + +## 1.6.0 - 2018-11-17 +- Added support for: + - `wallhaven` - https://alpha.wallhaven.cc/ + - `yuki` - https://yuki.la/ +- Added youtube-dl integration and video downloads for `twitter` ([#99](https://github.com/mikf/gallery-dl/issues/99)), `behance`, `artstation` +- Added per-extractor options for network connections (`retries`, `timeout`, `verify`) +- Added a `--no-check-certificate` command-line option +- Added ability to specify the number of skipped downloads before aborting/exiting ([#115](https://github.com/mikf/gallery-dl/issues/115)) +- Added extractors for scraps, favorites, popular and recent images on `hentaifoundry` ([#110](https://github.com/mikf/gallery-dl/issues/110)) +- Improved login procedure for `pixiv` to avoid unwanted emails on each new login +- Improved album metadata and error handling for `flickr` ([#109](https://github.com/mikf/gallery-dl/issues/109)) +- Updated default User-Agent string to Firefox 62 ([#122](https://github.com/mikf/gallery-dl/issues/122)) +- Fixed `twitter` API response handling when logged in ([#123](https://github.com/mikf/gallery-dl/issues/123)) +- Fixed issue when converting Ugoira using H.264 +- Fixed miscellaneous issues for `2chan`, `deviantart`, `fallenangels`, `flickr`, `imagefap`, `pinterest`, `turboimagehost`, `warosu`, `yuki` ([#112](https://github.com/mikf/gallery-dl/issues/112)) + +## 1.5.3 - 2018-09-14 +- Added support for: + - `hentaicafe` - https://hentai.cafe/ ([#101](https://github.com/mikf/gallery-dl/issues/101)) + - `bobx` - http://www.bobx.com/dark/ +- Added black-/whitelist options for post-processor modules +- Added support for `tumblr` inline videos ([#102](https://github.com/mikf/gallery-dl/issues/102)) +- Fixed extraction of `smugmug` albums without owner ([#100](https://github.com/mikf/gallery-dl/issues/100)) +- Fixed issues when using default config values with `reddit` extractors ([#104](https://github.com/mikf/gallery-dl/issues/104)) +- Fixed pagination for user favorites on `sankaku` ([#106](https://github.com/mikf/gallery-dl/issues/106)) +- Fixed a crash when processing `deviantart` journals ([#108](https://github.com/mikf/gallery-dl/issues/108)) + +## 1.5.2 - 2018-08-31 +- Added support for `twitter` timelines ([#96](https://github.com/mikf/gallery-dl/issues/96)) +- Added option to suppress FFmpeg output during ugoira conversions +- Improved filename formatter performance +- Improved inline image quality on `tumblr` ([#98](https://github.com/mikf/gallery-dl/issues/98)) +- Fixed image URLs for newly released `mangadex` chapters +- Fixed a smaller issue with `deviantart` journals +- Replaced `subapics` with `ngomik` + +## 1.5.1 - 2018-08-17 +- Added support for: + - `piczel` - https://piczel.tv/ +- Added support for related pins on `pinterest` +- Fixed accessing "offensive" galleries on `exhentai` ([#97](https://github.com/mikf/gallery-dl/issues/97)) +- Fixed extraction issues for `mangadex`, `komikcast` and `behance` +- Removed original-image functionality from `tumblr`, since "raw" images are no longer accessible + +## 1.5.0 - 2018-08-03 +- Added support for: + - `behance` - https://www.behance.net/ + - `myportfolio` - https://www.myportfolio.com/ ([#95](https://github.com/mikf/gallery-dl/issues/95)) +- Added custom format string options to handle long strings ([#92](https://github.com/mikf/gallery-dl/issues/92), [#94](https://github.com/mikf/gallery-dl/issues/94)) + - Slicing: `"{field[10:40]}"` + - Replacement: `"{field:L40/too long/}"` +- Improved frame rate handling for ugoira conversions +- Improved private access token usage on `deviantart` +- Fixed metadata extraction for some images on `nijie` +- Fixed chapter extraction on `mangahere` +- Removed `whatisthisimnotgoodwithcomputers` +- Removed support for Python 3.3 + +## 1.4.2 - 2018-07-06 +- Added image-pool extractors for `safebooru` and `rule34` +- Added option for extended tag information on `booru` sites ([#92](https://github.com/mikf/gallery-dl/issues/92)) +- Added support for DeviantArt's new URL format +- Added support for `mangapark` mirrors +- Changed `imagefap` extractors to use HTTPS +- Fixed crash when skipping downloads for files without known extension + +## 1.4.1 - 2018-06-22 +- Added an `ugoira` post-processor to convert `pixiv` animations to WebM +- Added `--zip` and `--ugoira-conv` command-line options +- Changed how ugoira frame information is handled + - instead of being written to a separate file, it is now made available as metadata field of the ZIP archive +- Fixed manga and chapter titles for `mangadex` +- Fixed file deletion by post-processors + +## 1.4.0 - 2018-06-08 +- Added support for: + - `simplyhentai` - https://www.simply-hentai.com/ ([#89](https://github.com/mikf/gallery-dl/issues/89)) +- Added extractors for + - `pixiv` search results and followed users + - `deviantart` search results and popular listings +- Added post-processors to perform actions on downloaded files +- Added options to configure logging behavior +- Added OAuth support for `smugmug` +- Changed `pixiv` extractors to use the AppAPI + - this breaks `favorite` archive IDs and changes some metadata fields +- Changed the default filename format for `tumblr` and renamed `offset` to `num` +- Fixed a possible UnicodeDecodeError during installation ([#86](https://github.com/mikf/gallery-dl/issues/86)) +- Fixed extraction of `mangadex` manga with more than 100 chapters ([#84](https://github.com/mikf/gallery-dl/issues/84)) +- Fixed miscellaneous issues for `imgur`, `reddit`, `komikcast`, `mangafox` and `imagebam` + +## 1.3.5 - 2018-05-04 +- Added support for: + - `smugmug` - https://www.smugmug.com/ +- Added title information for `mangadex` chapters +- Improved the `pinterest` API implementation ([#83](https://github.com/mikf/gallery-dl/issues/83)) +- Improved error handling for `deviantart` and `tumblr` +- Removed `gomanga` and `puremashiro` + +## 1.3.4 - 2018-04-20 +- Added support for custom OAuth2 credentials for `pinterest` +- Improved rate limit handling for `tumblr` extractors +- Improved `hentaifoundry` extractors +- Improved `imgur` URL patterns +- Fixed miscellaneous extraction issues for `luscious` and `komikcast` +- Removed `loveisover` and `spectrumnexus` + +## 1.3.3 - 2018-04-06 +- Added extractors for + - `nhentai` search results + - `exhentai` search results and favorites + - `nijie` doujins and favorites +- Improved metadata extraction for `exhentai` and `nijie` +- Improved `tumblr` extractors by avoiding unnecessary API calls +- Fixed Cloudflare DDoS protection bypass +- Fixed errors when trying to print unencodable characters + +## 1.3.2 - 2018-03-23 +- Added extractors for `artstation` albums, challenges and search results +- Improved URL and metadata extraction for `hitomi`and `nhentai` +- Fixed page transitions for `danbooru` API results ([#82](https://github.com/mikf/gallery-dl/issues/82)) + +## 1.3.1 - 2018-03-16 +- Added support for: + - `mangadex` - https://mangadex.org/ + - `artstation` - https://www.artstation.com/ +- Added Cloudflare DDoS protection bypass to `komikcast` extractors +- Changed archive ID formats for `deviantart` folders and collections +- Improved error handling for `deviantart` API calls +- Removed `imgchili` and various smaller image hosts + +## 1.3.0 - 2018-03-02 +- Added `--proxy` to explicitly specify a proxy server ([#76](https://github.com/mikf/gallery-dl/issues/76)) +- Added options to customize [archive ID formats](https://github.com/mikf/gallery-dl/blob/master/docs/configuration.rst#extractorarchive-format) and [undefined replacement fields](https://github.com/mikf/gallery-dl/blob/master/docs/configuration.rst#extractorkeywords-default) +- Changed various archive ID formats to improve their behavior for favorites / bookmarks / etc. + - Affected modules are `deviantart`, `flickr`, `tumblr`, `pixiv` and all …boorus +- Improved `sankaku` and `idolcomplex` support by + - respecting `page` and `next` URL parameters ([#79](https://github.com/mikf/gallery-dl/issues/79)) + - bypassing the page-limit for unauthenticated users +- Improved `directlink` metadata by properly unquoting it +- Fixed `pixiv` ugoira extraction ([#78](https://github.com/mikf/gallery-dl/issues/78)) +- Fixed miscellaneous extraction issues for `mangastream` and `tumblr` +- Removed `yeet`, `chronos`, `coreimg`, `hosturimage`, `imageontime`, `img4ever`, `imgmaid`, `imgupload` + +## 1.2.0 - 2018-02-16 +- Added support for: + - `paheal` - https://rule34.paheal.net/ ([#69](https://github.com/mikf/gallery-dl/issues/69)) + - `komikcast` - https://komikcast.com/ ([#70](https://github.com/mikf/gallery-dl/issues/70)) + - `subapics` - http://subapics.com/ ([#70](https://github.com/mikf/gallery-dl/issues/70)) +- Added `--download-archive` to record downloaded files in an archive file +- Added `--write-log` to write logging output to a file +- Added a filetype check on download completion to fix incorrectly assigned filename extensions ([#63](https://github.com/mikf/gallery-dl/issues/63)) +- Added the `tumblr:...` pseudo URI scheme to support custom domains for Tumblr blogs ([#71](https://github.com/mikf/gallery-dl/issues/71)) +- Added fallback URLs for `tumblr` images ([#64](https://github.com/mikf/gallery-dl/issues/64)) +- Added support for `reddit`-hosted images ([#68](https://github.com/mikf/gallery-dl/issues/68)) +- Improved the input file format by allowing comments and per-URL options +- Fixed OAuth 1.0 signature generation for Python 3.3 and 3.4 ([#75](https://github.com/mikf/gallery-dl/issues/75)) +- Fixed smaller issues for `luscious`, `hentai2read`, `hentaihere` and `imgur` +- Removed the `batoto` module + +## 1.1.2 - 2018-01-12 +- Added support for: + - `puremashiro` - http://reader.puremashiro.moe/ ([#66](https://github.com/mikf/gallery-dl/issues/66)) + - `idolcomplex` - https://idol.sankakucomplex.com/ +- Added an option to filter reblogs on `tumblr` ([#61](https://github.com/mikf/gallery-dl/issues/61)) +- Added OAuth user authentication for `tumblr` ([#65](https://github.com/mikf/gallery-dl/issues/65)) +- Added support for `slideshare` mobile URLs ([#67](https://github.com/mikf/gallery-dl/issues/67)) +- Improved pagination for various …booru sites to work around page limits +- Fixed chapter information parsing for certain manga on `kissmanga` ([#58](https://github.com/mikf/gallery-dl/issues/58)) and `batoto` ([#60](https://github.com/mikf/gallery-dl/issues/60)) + +## 1.1.1 - 2017-12-22 +- Added support for: + - `slideshare` - https://www.slideshare.net/ ([#54](https://github.com/mikf/gallery-dl/issues/54)) +- Added pool- and post-extractors for `sankaku` +- Added OAuth user authentication for `deviantart` +- Updated `luscious` to support `members.luscious.net` URLs ([#55](https://github.com/mikf/gallery-dl/issues/55)) +- Updated `mangahere` to use their new domain name (mangahere.cc) and support mobile URLs +- Updated `gelbooru` to not be restricted to the first 20,000 images ([#56](https://github.com/mikf/gallery-dl/issues/56)) +- Fixed extraction issues for `nhentai` and `khinsider` + +## 1.1.0 - 2017-12-08 +- Added the ``-r/--limit-rate`` command-line option to set a maximum download rate +- Added the ``--sleep`` command-line option to specify the number of seconds to sleep before each download +- Updated `gelbooru` to no longer use their now disabled API +- Fixed SWF extraction for `sankaku` ([#52](https://github.com/mikf/gallery-dl/issues/52)) +- Fixed extraction issues for `hentai2read` and `khinsider` +- Removed the deprecated `--images` and `--chapters` options +- Removed the ``mangazuki`` module + +## 1.0.2 - 2017-11-24 +- Added an option to set a [custom user-agent string](https://github.com/mikf/gallery-dl/blob/master/docs/configuration.rst#extractoruser-agent) +- Improved retry behavior for failed HTTP requests +- Improved `seiga` by providing better metadata and getting more than the latest 200 images +- Improved `tumblr` by adding support for [all post types](https://github.com/mikf/gallery-dl/blob/master/docs/configuration.rst#extractortumblrposts), scanning for [inline images](https://github.com/mikf/gallery-dl/blob/master/docs/configuration.rst#extractortumblrinline) and following [external links](https://github.com/mikf/gallery-dl/blob/master/docs/configuration.rst#extractortumblrexternal) ([#48](https://github.com/mikf/gallery-dl/issues/48)) +- Fixed extraction issues for `hbrowse`, `khinsider` and `senmanga` + +## 1.0.1 - 2017-11-10 +- Added support for: + - `xvideos` - https://www.xvideos.com/ ([#45](https://github.com/mikf/gallery-dl/issues/45)) +- Fixed exception handling during file downloads which could lead to a premature exit +- Fixed an issue with `tumblr` where not all images would be downloaded when using tags ([#48](https://github.com/mikf/gallery-dl/issues/48)) +- Fixed extraction issues for `imgbox` ([#47](https://github.com/mikf/gallery-dl/issues/47)), `mangastream` ([#49](https://github.com/mikf/gallery-dl/issues/49)) and `mangahere` + +## 1.0.0 - 2017-10-27 +- Added support for: + - `warosu` - https://warosu.org/ + - `b4k` - https://arch.b4k.co/ +- Added support for `pixiv` ranking lists +- Added support for `booru` popular lists (`danbooru`, `e621`, `konachan`, `yandere`, `3dbooru`) +- Added the `--cookies` command-line and [`cookies`](https://github.com/mikf/gallery-dl/blob/master/docs/configuration.rst#extractorcookies) config option to load additional cookies +- Added the `--filter` and `--chapter-filter` command-line options to select individual images or manga-chapters by their metadata using simple Python expressions ([#43](https://github.com/mikf/gallery-dl/issues/43)) +- Added the [`verify`](https://github.com/mikf/gallery-dl/blob/master/docs/configuration.rst#downloaderhttpverify) config option to control certificate verification during file downloads +- Added config options to overwrite internally used API credentials ([API Tokens & IDs](https://github.com/mikf/gallery-dl/blob/master/docs/configuration.rst#api-tokens-ids)) +- Added `-K` as a shortcut for `--list-keywords` +- Changed the `--images` and `--chapters` command-line options to `--range` and `--chapter-range` +- Changed keyword names for various modules to make them accessible by `--filter`. In general minus signs have been replaced with underscores (e.g. `gallery-id` -> `gallery_id`). +- Changed default filename formats for manga extractors to optionally use volume and title information +- Improved the downloader modules to use [`.part` files](https://github.com/mikf/gallery-dl/blob/master/docs/configuration.rst#downloaderpart) and support resuming incomplete downloads ([#29](https://github.com/mikf/gallery-dl/issues/29)) +- Improved `deviantart` by distinguishing between users and groups ([#26](https://github.com/mikf/gallery-dl/issues/26)), always using HTTPS, and always downloading full-sized original images +- Improved `sankaku` by adding authentication support and fixing various other issues ([#44](https://github.com/mikf/gallery-dl/issues/44)) +- Improved URL pattern for direct image links ([#30](https://github.com/mikf/gallery-dl/issues/30)) +- Fixed an issue with `luscious` not getting original image URLs ([#33](https://github.com/mikf/gallery-dl/issues/33)) +- Fixed various smaller issues for `batoto`, `hentai2read` ([#38](https://github.com/mikf/gallery-dl/issues/38)), `jaiminisbox`, `khinsider`, `kissmanga` ([#28](https://github.com/mikf/gallery-dl/issues/28), [#46](https://github.com/mikf/gallery-dl/issues/46)), `mangahere`, `pawoo`, `twitter` +- Removed `kisscomic` and `yonkouprod` modules + +## 0.9.1 - 2017-07-24 +- Added support for: + - `2chan` - https://www.2chan.net/ + - `4plebs` - https://archive.4plebs.org/ + - `archivedmoe` - https://archived.moe/ + - `archiveofsins` - https://archiveofsins.com/ + - `desuarchive` - https://desuarchive.org/ + - `fireden` - https://boards.fireden.net/ + - `loveisover` - https://archive.loveisover.me/ + - `nyafuu` - https://archive.nyafuu.org/ + - `rbt` - https://rbt.asia/ + - `thebarchive` - https://thebarchive.com/ + - `mangazuki` - https://mangazuki.co/ +- Improved `reddit` to allow submission filtering by ID and human-readable dates +- Improved `deviantart` to support group galleries and gallery folders ([#26](https://github.com/mikf/gallery-dl/issues/26)) +- Changed `deviantart` to use better default path formats +- Fixed extraction of larger `imgur` albums +- Fixed some smaller issues for `pixiv`, `batoto` and `fallenangels` + +## 0.9.0 - 2017-06-28 +- Added support for: + - `reddit` - https://www.reddit.com/ ([#15](https://github.com/mikf/gallery-dl/issues/15)) + - `flickr` - https://www.flickr.com/ ([#16](https://github.com/mikf/gallery-dl/issues/16)) + - `gfycat` - https://gfycat.com/ +- Added support for direct image links +- Added user authentication via [OAuth](https://github.com/mikf/gallery-dl#52oauth) for `reddit` and `flickr` +- Added support for user authentication data from [`.netrc`](https://stackoverflow.com/tags/.netrc/info) files ([#22](https://github.com/mikf/gallery-dl/issues/22)) +- Added a simple progress indicator for multiple URLs ([#19](https://github.com/mikf/gallery-dl/issues/19)) +- Added the `--write-unsupported` command-line option to write unsupported URLs to a file +- Added documentation for all available config options ([configuration.rst](https://github.com/mikf/gallery-dl/blob/master/docs/configuration.rst)) +- Improved `pixiv` to support tags for user downloads ([#17](https://github.com/mikf/gallery-dl/issues/17)) +- Improved `pixiv` to support shortened and http://pixiv.me/... URLs ([#23](https://github.com/mikf/gallery-dl/issues/23)) +- Improved `imgur` to properly handle `.gifv` images and provide better metadata +- Fixed an issue with `kissmanga` where metadata parsing for some series failed ([#20](https://github.com/mikf/gallery-dl/issues/20)) +- Fixed an issue with getting filename extensions from `Content-Type` response headers + +## 0.8.4 - 2017-05-21 +- Added the `--abort-on-skip` option to stop extraction if a download would be skipped +- Improved the output format of the `--list-keywords` option +- Updated `deviantart` to support all media types and journals +- Updated `fallenangels` to support their [Vietnamese version](https://truyen.fascans.com/) +- Fixed an issue with multiple tags on ...booru sites +- Removed the `yomanga` module + +## 0.8.3 - 2017-05-01 +- Added support for https://pawoo.net/ +- Added manga extractors for all [FoOlSlide](https://foolcode.github.io/FoOlSlide/)-based modules +- Added the `-q/--quiet` and `-v/--verbose` options to control output verbosity +- Added the `-j/--dump-json` option to dump extractor results in JSON format +- Added the `--ignore-config` option +- Updated the `exhentai` extractor to fall back to using the e-hentai version if no username is given +- Updated `deviantart` to support sta.sh URLs +- Fixed an issue with `kissmanga` which prevented image URLs from being decrypted properly (again) +- Fixed an issue with `pixhost` where for an image inside an album it would always download the first image of that album ([#13](https://github.com/mikf/gallery-dl/issues/13)) +- Removed the `mangashare` and `readcomics` modules + +## 0.8.2 - 2017-04-10 +- Fixed an issue in `kissmanga` which prevented image URLs from being decrypted properly + +## 0.8.1 - 2017-04-09 +- Added new extractors: + - `kireicake` - https://reader.kireicake.com/ + - `seaotterscans` - https://reader.seaotterscans.com/ +- Added a favourites extractor for `deviantart` +- Re-enabled the `kissmanga` module +- Updated `nijie` to support multi-page image listings +- Updated `mangastream` to support readms.net URLs +- Updated `exhentai` to support e-hentai.org URLs +- Updated `fallenangels` to support their new domain and site layout + +## 0.8.0 - 2017-03-28 +- Added logging support +- Added the `-R/--retries` option to specify how often a download should be retried before giving up +- Added the `--http-timeout` option to set a timeout for HTTP connections +- Improved error handling/tolerance during HTTP file downloads ([#10](https://github.com/mikf/gallery-dl/issues/10)) +- Improved option parsing and the help message from `-h/--help` +- Changed the way configuration values are used by prioritizing top-level values + - This allows for cmdline options like `-u/--username` to overwrite values set in configuration files +- Fixed an issue with `imagefap.com` where incorrectly reported gallery sizes would cause the extractor to fail ([#9](https://github.com/mikf/gallery-dl/issues/9)) +- Fixed an issue with `seiga.nicovideo.jp` where invalid characters in an API response caused the XML parser to fail +- Fixed an issue with `seiga.nicovideo.jp` where the filename extension for the first image would be used for all others +- Removed support for old configuration paths on Windows +- Removed several modules: + - `mangamint`: site is down + - `whentai`: now requires account with VIP status for original images + - `kissmanga`: encrypted image URLs (will be re-added later) + +## 0.7.0 - 2017-03-06 +- Added `--images` and `--chapters` options + - Specifies which images (or chapters) to download through a comma-separated list of indices or index-ranges + - Example: `--images -2,4,6-8,10-` will select images with index 1, 2, 4, 6, 7, 8 and 10 up to the last one +- Changed the `-g`/`--get-urls` option + - The amount of how often the -g option is given now determines up until which level URLs are resolved. + - See 3bca86618505c21628cd9c7179ce933a78d00ca2 +- Changed several option keys: + - `directory_fmt` -> `directory` + - `filename_fmt` -> `filename` + - `download-original` -> `original` +- Improved [FoOlSlide](https://foolcode.github.io/FoOlSlide/)-based extractors +- Fixed URL extraction for hentai2read +- Fixed an issue with deviantart, where the API access token wouldn't get refreshed + +## 0.6.4 - 2017-02-13 +- Added new extractors: + - fallenangels (famatg.com) +- Fixed url- and data-extraction for: + - nhentai + - mangamint + - twitter + - imagetwist +- Disabled InsecureConnectionWarning when no certificates are available + +## 0.6.3 - 2017-01-25 +- Added new extractors: + - gomanga + - yomanga + - mangafox +- Fixed deviantart extractor failing - switched to using their API +- Fixed an issue with SQLite on Python 3.6 +- Automated test builds via Travis CI +- Standalone executables for Windows + +## 0.6.2 - 2017-01-05 +- Added new extractors: + - kisscomic + - readcomics + - yonkouprod + - jaiminisbox +- Added manga extractor to batoto-module +- Added user extractor to seiga-module +- Added `-i`/`--input-file` argument to allow local files and stdin as input (like wget) +- Added basic support for `file://` URLs + - this allows for the recursive extractor to be applied to local files: + - `$ gallery-dl r:file://[path to file]` +- Added a utility extractor to run unit test URLs +- Updated luscious to deal with API changes +- Fixed twitter to provide the original image URL +- Minor fixes to hentaifoundry +- Removed imgclick extractor + +## 0.6.1 - 2016-11-30 +- Added new extractors: + - whentai + - readcomiconline + - sensescans, worldthree + - imgmaid, imagevenue, img4ever, imgspot, imgtrial, pixhost +- Added base class for extractors of [FoOlSlide](https://foolcode.github.io/FoOlSlide/)-based sites +- Changed default paths for configuration files on Windows + - old paths are still supported, but that will change in future versions +- Fixed aborting downloads if a single one failed ([#5](https://github.com/mikf/gallery-dl/issues/5)) +- Fixed cloudflare-bypass cache containing outdated cookies +- Fixed image URLs for hitomi and 8chan +- Updated deviantart to always provide the highest quality image +- Updated README.rst +- Removed doujinmode extractor + +## 0.6.0 - 2016-10-08 +- Added new extractors: + - hentaihere + - dokireader + - twitter + - rapidimg, picmaniac +- Added support to find filename extensions by Content-Type response header +- Fixed filename/path issues on Windows ([#4](https://github.com/mikf/gallery-dl/issues/4)): + - Enable path names with more than 260 characters + - Remove trailing spaces in path segments +- Updated Job class to automatically set category/subcategory keywords + +## 0.5.2 - 2016-09-23 +- Added new extractors: + - pinterest + - rule34 + - dynastyscans + - imagebam, coreimg, imgcandy, imgtrex +- Added login capabilities for batoto +- Added `--version` cmdline argument to print the current program version and exit +- Added `--list-extractors` cmdline argument to print names of all extractor classes together with descriptions and example URLs +- Added proper error messages if an image/user does not exist +- Added unittests for every extractor + +## 0.5.1 - 2016-08-22 +- Added new extractors: + - luscious + - doujinmode + - hentaibox + - seiga + - imagefap +- Changed error output to use stderr instead of stdout +- Fixed broken pipes causing an exception-dump by catching BrokenPipeErrors + +## 0.5.0 - 2016-07-25 + +## 0.4.1 - 2015-12-03 +- New modules (imagetwist, turboimagehost) +- Manga-extractors: Download entire manga and not just single chapters +- Generic extractor (provisional) +- Better and configurable console output +- Windows support + +## 0.4.0 - 2015-11-26 + +## 0.3.3 - 2015-11-10 + +## 0.3.2 - 2015-11-04 + +## 0.3.1 - 2015-10-30 + +## 0.3.0 - 2015-10-05 + +## 0.2.0 - 2015-06-28 + +## 0.1.0 - 2015-05-27 @@ -0,0 +1,339 @@ + GNU GENERAL PUBLIC LICENSE + Version 2, June 1991 + + Copyright (C) 1989, 1991 Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + Preamble + + The licenses for most software are designed to take away your +freedom to share and change it. By contrast, the GNU General Public +License is intended to guarantee your freedom to share and change free +software--to make sure the software is free for all its users. This +General Public License applies to most of the Free Software +Foundation's software and to any other program whose authors commit to +using it. (Some other Free Software Foundation software is covered by +the GNU Lesser General Public License instead.) You can apply it to +your programs, too. + + When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +this service if you wish), that you receive source code or can get it +if you want it, that you can change the software or use pieces of it +in new free programs; and that you know you can do these things. + + To protect your rights, we need to make restrictions that forbid +anyone to deny you these rights or to ask you to surrender the rights. +These restrictions translate to certain responsibilities for you if you +distribute copies of the software, or if you modify it. + + For example, if you distribute copies of such a program, whether +gratis or for a fee, you must give the recipients all the rights that +you have. You must make sure that they, too, receive or can get the +source code. And you must show them these terms so they know their +rights. + + We protect your rights with two steps: (1) copyright the software, and +(2) offer you this license which gives you legal permission to copy, +distribute and/or modify the software. + + Also, for each author's protection and ours, we want to make certain +that everyone understands that there is no warranty for this free +software. If the software is modified by someone else and passed on, we +want its recipients to know that what they have is not the original, so +that any problems introduced by others will not reflect on the original +authors' reputations. + + Finally, any free program is threatened constantly by software +patents. We wish to avoid the danger that redistributors of a free +program will individually obtain patent licenses, in effect making the +program proprietary. To prevent this, we have made it clear that any +patent must be licensed for everyone's free use or not licensed at all. + + The precise terms and conditions for copying, distribution and +modification follow. + + GNU GENERAL PUBLIC LICENSE + TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION + + 0. This License applies to any program or other work which contains +a notice placed by the copyright holder saying it may be distributed +under the terms of this General Public License. The "Program", below, +refers to any such program or work, and a "work based on the Program" +means either the Program or any derivative work under copyright law: +that is to say, a work containing the Program or a portion of it, +either verbatim or with modifications and/or translated into another +language. (Hereinafter, translation is included without limitation in +the term "modification".) Each licensee is addressed as "you". + +Activities other than copying, distribution and modification are not +covered by this License; they are outside its scope. The act of +running the Program is not restricted, and the output from the Program +is covered only if its contents constitute a work based on the +Program (independent of having been made by running the Program). +Whether that is true depends on what the Program does. + + 1. You may copy and distribute verbatim copies of the Program's +source code as you receive it, in any medium, provided that you +conspicuously and appropriately publish on each copy an appropriate +copyright notice and disclaimer of warranty; keep intact all the +notices that refer to this License and to the absence of any warranty; +and give any other recipients of the Program a copy of this License +along with the Program. + +You may charge a fee for the physical act of transferring a copy, and +you may at your option offer warranty protection in exchange for a fee. + + 2. You may modify your copy or copies of the Program or any portion +of it, thus forming a work based on the Program, and copy and +distribute such modifications or work under the terms of Section 1 +above, provided that you also meet all of these conditions: + + a) You must cause the modified files to carry prominent notices + stating that you changed the files and the date of any change. + + b) You must cause any work that you distribute or publish, that in + whole or in part contains or is derived from the Program or any + part thereof, to be licensed as a whole at no charge to all third + parties under the terms of this License. + + c) If the modified program normally reads commands interactively + when run, you must cause it, when started running for such + interactive use in the most ordinary way, to print or display an + announcement including an appropriate copyright notice and a + notice that there is no warranty (or else, saying that you provide + a warranty) and that users may redistribute the program under + these conditions, and telling the user how to view a copy of this + License. (Exception: if the Program itself is interactive but + does not normally print such an announcement, your work based on + the Program is not required to print an announcement.) + +These requirements apply to the modified work as a whole. If +identifiable sections of that work are not derived from the Program, +and can be reasonably considered independent and separate works in +themselves, then this License, and its terms, do not apply to those +sections when you distribute them as separate works. But when you +distribute the same sections as part of a whole which is a work based +on the Program, the distribution of the whole must be on the terms of +this License, whose permissions for other licensees extend to the +entire whole, and thus to each and every part regardless of who wrote it. + +Thus, it is not the intent of this section to claim rights or contest +your rights to work written entirely by you; rather, the intent is to +exercise the right to control the distribution of derivative or +collective works based on the Program. + +In addition, mere aggregation of another work not based on the Program +with the Program (or with a work based on the Program) on a volume of +a storage or distribution medium does not bring the other work under +the scope of this License. + + 3. You may copy and distribute the Program (or a work based on it, +under Section 2) in object code or executable form under the terms of +Sections 1 and 2 above provided that you also do one of the following: + + a) Accompany it with the complete corresponding machine-readable + source code, which must be distributed under the terms of Sections + 1 and 2 above on a medium customarily used for software interchange; or, + + b) Accompany it with a written offer, valid for at least three + years, to give any third party, for a charge no more than your + cost of physically performing source distribution, a complete + machine-readable copy of the corresponding source code, to be + distributed under the terms of Sections 1 and 2 above on a medium + customarily used for software interchange; or, + + c) Accompany it with the information you received as to the offer + to distribute corresponding source code. (This alternative is + allowed only for noncommercial distribution and only if you + received the program in object code or executable form with such + an offer, in accord with Subsection b above.) + +The source code for a work means the preferred form of the work for +making modifications to it. For an executable work, complete source +code means all the source code for all modules it contains, plus any +associated interface definition files, plus the scripts used to +control compilation and installation of the executable. However, as a +special exception, the source code distributed need not include +anything that is normally distributed (in either source or binary +form) with the major components (compiler, kernel, and so on) of the +operating system on which the executable runs, unless that component +itself accompanies the executable. + +If distribution of executable or object code is made by offering +access to copy from a designated place, then offering equivalent +access to copy the source code from the same place counts as +distribution of the source code, even though third parties are not +compelled to copy the source along with the object code. + + 4. You may not copy, modify, sublicense, or distribute the Program +except as expressly provided under this License. Any attempt +otherwise to copy, modify, sublicense or distribute the Program is +void, and will automatically terminate your rights under this License. +However, parties who have received copies, or rights, from you under +this License will not have their licenses terminated so long as such +parties remain in full compliance. + + 5. You are not required to accept this License, since you have not +signed it. However, nothing else grants you permission to modify or +distribute the Program or its derivative works. These actions are +prohibited by law if you do not accept this License. Therefore, by +modifying or distributing the Program (or any work based on the +Program), you indicate your acceptance of this License to do so, and +all its terms and conditions for copying, distributing or modifying +the Program or works based on it. + + 6. Each time you redistribute the Program (or any work based on the +Program), the recipient automatically receives a license from the +original licensor to copy, distribute or modify the Program subject to +these terms and conditions. You may not impose any further +restrictions on the recipients' exercise of the rights granted herein. +You are not responsible for enforcing compliance by third parties to +this License. + + 7. If, as a consequence of a court judgment or allegation of patent +infringement or for any other reason (not limited to patent issues), +conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot +distribute so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you +may not distribute the Program at all. For example, if a patent +license would not permit royalty-free redistribution of the Program by +all those who receive copies directly or indirectly through you, then +the only way you could satisfy both it and this License would be to +refrain entirely from distribution of the Program. + +If any portion of this section is held invalid or unenforceable under +any particular circumstance, the balance of the section is intended to +apply and the section as a whole is intended to apply in other +circumstances. + +It is not the purpose of this section to induce you to infringe any +patents or other property right claims or to contest validity of any +such claims; this section has the sole purpose of protecting the +integrity of the free software distribution system, which is +implemented by public license practices. Many people have made +generous contributions to the wide range of software distributed +through that system in reliance on consistent application of that +system; it is up to the author/donor to decide if he or she is willing +to distribute software through any other system and a licensee cannot +impose that choice. + +This section is intended to make thoroughly clear what is believed to +be a consequence of the rest of this License. + + 8. If the distribution and/or use of the Program is restricted in +certain countries either by patents or by copyrighted interfaces, the +original copyright holder who places the Program under this License +may add an explicit geographical distribution limitation excluding +those countries, so that distribution is permitted only in or among +countries not thus excluded. In such case, this License incorporates +the limitation as if written in the body of this License. + + 9. The Free Software Foundation may publish revised and/or new versions +of the General Public License from time to time. Such new versions will +be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + +Each version is given a distinguishing version number. If the Program +specifies a version number of this License which applies to it and "any +later version", you have the option of following the terms and conditions +either of that version or of any later version published by the Free +Software Foundation. If the Program does not specify a version number of +this License, you may choose any version ever published by the Free Software +Foundation. + + 10. If you wish to incorporate parts of the Program into other free +programs whose distribution conditions are different, write to the author +to ask for permission. For software which is copyrighted by the Free +Software Foundation, write to the Free Software Foundation; we sometimes +make exceptions for this. Our decision will be guided by the two goals +of preserving the free status of all derivatives of our free software and +of promoting the sharing and reuse of software generally. + + NO WARRANTY + + 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY +FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN +OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES +PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED +OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS +TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE +PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, +REPAIR OR CORRECTION. + + 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR +REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, +INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING +OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED +TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY +YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER +PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE +POSSIBILITY OF SUCH DAMAGES. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Programs + + If you develop a new program, and you want it to be of the greatest +possible use to the public, the best way to achieve this is to make it +free software which everyone can redistribute and change under these terms. + + To do so, attach the following notices to the program. It is safest +to attach them to the start of each source file to most effectively +convey the exclusion of warranty; and each file should have at least +the "copyright" line and a pointer to where the full notice is found. + + <one line to give the program's name and a brief idea of what it does.> + Copyright (C) <year> <name of author> + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +Also add information on how to contact you by electronic and paper mail. + +If the program is interactive, make it output a short notice like this +when it starts in an interactive mode: + + Gnomovision version 69, Copyright (C) year name of author + Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. + This is free software, and you are welcome to redistribute it + under certain conditions; type `show c' for details. + +The hypothetical commands `show w' and `show c' should show the appropriate +parts of the General Public License. Of course, the commands you use may +be called something other than `show w' and `show c'; they could even be +mouse-clicks or menu items--whatever suits your program. + +You should also get your employer (if you work as a programmer) or your +school, if any, to sign a "copyright disclaimer" for the program, if +necessary. Here is a sample; alter the names: + + Yoyodyne, Inc., hereby disclaims all copyright interest in the program + `Gnomovision' (which makes passes at compilers) written by James Hacker. + + <signature of Ty Coon>, 1 April 1989 + Ty Coon, President of Vice + +This General Public License does not permit incorporating your program into +proprietary programs. If your program is a subroutine library, you may +consider it more useful to permit linking proprietary applications with the +library. If this is what you want to do, use the GNU Lesser General +Public License instead of this License. diff --git a/MANIFEST.in b/MANIFEST.in new file mode 100644 index 0000000..71172df --- /dev/null +++ b/MANIFEST.in @@ -0,0 +1,2 @@ +include README.rst CHANGELOG.md LICENSE +recursive-include docs *.conf @@ -1,6 +1,6 @@ Metadata-Version: 2.1 Name: gallery_dl -Version: 1.13.6 +Version: 1.14.0 Summary: Command-line program to download image-galleries and -collections from several image hosting sites Home-page: https://github.com/mikf/gallery-dl Author: Mike Fährmann @@ -94,8 +94,8 @@ Description: ========== put it into your `PATH <https://en.wikipedia.org/wiki/PATH_(variable)>`__, and run it inside a command prompt (like ``cmd.exe``). - - `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.13.6/gallery-dl.exe>`__ - - `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.13.6/gallery-dl.bin>`__ + - `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.14.0/gallery-dl.exe>`__ + - `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.14.0/gallery-dl.bin>`__ These executables include a Python 3.8 interpreter and all required Python packages. @@ -110,6 +110,15 @@ Description: ========== $ snap install gallery-dl + Chocolatey + ---------- + + Windows users that have Chocolatey_ installed can install *gallery-dl* from the Chocolatey Community Packages repository: + + .. code:: powershell + + $ choco install gallery-dl + Usage ===== @@ -173,7 +182,7 @@ Description: ========== +--------------------------------------------+------------------------------------------+ | Linux | Windows | +--------------------------------------------+------------------------------------------+ - |* ``/etc/gallery-dl.conf`` |* | + |* ``/etc/gallery-dl.conf`` |* ``%APPDATA%\gallery-dl\config.json`` | |* ``${HOME}/.config/gallery-dl/config.json``|* ``%USERPROFILE%\gallery-dl\config.json``| |* ``${HOME}/.gallery-dl.conf`` |* ``%USERPROFILE%\gallery-dl.conf`` | +--------------------------------------------+------------------------------------------+ @@ -183,6 +192,11 @@ Description: ========== Values in later configuration files will override previous ones. + Command line options will override all related settings in the configuration file(s), + e.g. using ``--write-metadata`` will enable writing metadata using the default values + for all ``postprocessors.metadata.*`` settings, overriding any specific settings in + configuration files. + Authentication ============== @@ -190,7 +204,7 @@ Description: ========== Username & Password ------------------- - Some extractors require you to provide valid login-credentials in the form of + Some extractors require you to provide valid login credentials in the form of a username & password pair. This is necessary for ``pixiv``, ``nijie``, and ``seiga`` and optional for @@ -200,16 +214,14 @@ Description: ========== You can set the necessary information in your configuration file (cf. gallery-dl.conf_) - .. code:: + .. code:: json { "extractor": { - ... "pixiv": { "username": "<username>", "password": "<password>" } - ... } } @@ -222,6 +234,49 @@ Description: ========== $ gallery-dl -u <username> -p <password> URL $ gallery-dl -o username=<username> -o password=<password> URL + Cookies + ------- + + For sites where login with username & password is not possible due to + CAPTCHA or similar, or has not been implemented yet, you can use the + cookies from a browser login session and input them into *gallery-dl*. + + This can be done via the + `cookies <https://github.com/mikf/gallery-dl/blob/master/docs/configuration.rst#extractorcookies>`__ + option in your configuration file by specifying + + - | the path to a Mozilla/Netscape format cookies.txt file exported by a browser addon + | (e.g. `cookies.txt <https://chrome.google.com/webstore/detail/cookiestxt/njabckikapfpffapmjgojcnbfjonfjfg>`__ for Chrome, + `Export Cookies <https://addons.mozilla.org/en-US/firefox/addon/export-cookies-txt/?src=search>`__ for Firefox) + + - | a list of name-value pairs gathered from your browser's web developer tools + | (in `Chrome <https://developers.google.com/web/tools/chrome-devtools/storage/cookies>`__, + in `Firefox <https://developer.mozilla.org/en-US/docs/Tools/Storage_Inspector>`__) + + For example: + + .. code:: json + + { + "extractor": { + "instagram": { + "cookies": "$HOME/path/to/cookies.txt" + }, + "patreon": { + "cookies": { + "session_id": "K1T57EKu19TR49C51CDjOJoXNQLF7VbdVOiBrC9ye0a" + } + } + } + } + + You can also specify a cookies.txt file with + the :code:`--cookies` command-line option: + + .. code:: bash + + $ gallery-dl --cookies "$HOME/path/to/cookies.txt" URL + OAuth ----- @@ -232,7 +287,7 @@ Description: ========== which would otherwise be unavailable to a public user. To link your account to *gallery-dl*, start by invoking it with - ``oauth:<site-name>`` as an argument. For example: + ``oauth:<sitename>`` as an argument. For example: .. code:: bash @@ -247,7 +302,7 @@ Description: ========== .. _gallery-dl-example.conf: https://github.com/mikf/gallery-dl/blob/master/docs/gallery-dl-example.conf .. _configuration.rst: https://github.com/mikf/gallery-dl/blob/master/docs/configuration.rst .. _Supported Sites: https://github.com/mikf/gallery-dl/blob/master/docs/supportedsites.rst - .. _stable: https://github.com/mikf/gallery-dl/archive/v1.13.6.tar.gz + .. _stable: https://github.com/mikf/gallery-dl/archive/v1.14.0.tar.gz .. _dev: https://github.com/mikf/gallery-dl/archive/master.tar.gz .. _Python: https://www.python.org/downloads/ @@ -259,6 +314,7 @@ Description: ========== .. _pyOpenSSL: https://pyopenssl.org/ .. _Snapd: https://docs.snapcraft.io/installing-snapd .. _OAuth: https://en.wikipedia.org/wiki/OAuth + .. _Chocolatey: https://chocolatey.org/install .. |pypi| image:: https://img.shields.io/pypi/v/gallery-dl.svg :target: https://pypi.org/project/gallery-dl/ @@ -83,8 +83,8 @@ Download a standalone executable file, put it into your `PATH <https://en.wikipedia.org/wiki/PATH_(variable)>`__, and run it inside a command prompt (like ``cmd.exe``). -- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.13.6/gallery-dl.exe>`__ -- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.13.6/gallery-dl.bin>`__ +- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.14.0/gallery-dl.exe>`__ +- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.14.0/gallery-dl.bin>`__ These executables include a Python 3.8 interpreter and all required Python packages. @@ -99,6 +99,15 @@ Linux users that are using a distro that is supported by Snapd_ can install *gal $ snap install gallery-dl +Chocolatey +---------- + +Windows users that have Chocolatey_ installed can install *gallery-dl* from the Chocolatey Community Packages repository: + +.. code:: powershell + + $ choco install gallery-dl + Usage ===== @@ -162,7 +171,7 @@ Configuration files for *gallery-dl* use a JSON-based file format. +--------------------------------------------+------------------------------------------+ | Linux | Windows | +--------------------------------------------+------------------------------------------+ -|* ``/etc/gallery-dl.conf`` |* | +|* ``/etc/gallery-dl.conf`` |* ``%APPDATA%\gallery-dl\config.json`` | |* ``${HOME}/.config/gallery-dl/config.json``|* ``%USERPROFILE%\gallery-dl\config.json``| |* ``${HOME}/.gallery-dl.conf`` |* ``%USERPROFILE%\gallery-dl.conf`` | +--------------------------------------------+------------------------------------------+ @@ -172,6 +181,11 @@ i.e. ``C:\Users\<username>\``) Values in later configuration files will override previous ones. +Command line options will override all related settings in the configuration file(s), +e.g. using ``--write-metadata`` will enable writing metadata using the default values +for all ``postprocessors.metadata.*`` settings, overriding any specific settings in +configuration files. + Authentication ============== @@ -179,7 +193,7 @@ Authentication Username & Password ------------------- -Some extractors require you to provide valid login-credentials in the form of +Some extractors require you to provide valid login credentials in the form of a username & password pair. This is necessary for ``pixiv``, ``nijie``, and ``seiga`` and optional for @@ -189,16 +203,14 @@ and optional for You can set the necessary information in your configuration file (cf. gallery-dl.conf_) -.. code:: +.. code:: json { "extractor": { - ... "pixiv": { "username": "<username>", "password": "<password>" } - ... } } @@ -211,6 +223,49 @@ or you can provide them directly via the $ gallery-dl -u <username> -p <password> URL $ gallery-dl -o username=<username> -o password=<password> URL +Cookies +------- + +For sites where login with username & password is not possible due to +CAPTCHA or similar, or has not been implemented yet, you can use the +cookies from a browser login session and input them into *gallery-dl*. + +This can be done via the +`cookies <https://github.com/mikf/gallery-dl/blob/master/docs/configuration.rst#extractorcookies>`__ +option in your configuration file by specifying + +- | the path to a Mozilla/Netscape format cookies.txt file exported by a browser addon + | (e.g. `cookies.txt <https://chrome.google.com/webstore/detail/cookiestxt/njabckikapfpffapmjgojcnbfjonfjfg>`__ for Chrome, + `Export Cookies <https://addons.mozilla.org/en-US/firefox/addon/export-cookies-txt/?src=search>`__ for Firefox) + +- | a list of name-value pairs gathered from your browser's web developer tools + | (in `Chrome <https://developers.google.com/web/tools/chrome-devtools/storage/cookies>`__, + in `Firefox <https://developer.mozilla.org/en-US/docs/Tools/Storage_Inspector>`__) + +For example: + +.. code:: json + + { + "extractor": { + "instagram": { + "cookies": "$HOME/path/to/cookies.txt" + }, + "patreon": { + "cookies": { + "session_id": "K1T57EKu19TR49C51CDjOJoXNQLF7VbdVOiBrC9ye0a" + } + } + } + } + +You can also specify a cookies.txt file with +the :code:`--cookies` command-line option: + +.. code:: bash + + $ gallery-dl --cookies "$HOME/path/to/cookies.txt" URL + OAuth ----- @@ -221,7 +276,7 @@ to issue requests on your account's behalf and enables it to access resources which would otherwise be unavailable to a public user. To link your account to *gallery-dl*, start by invoking it with -``oauth:<site-name>`` as an argument. For example: +``oauth:<sitename>`` as an argument. For example: .. code:: bash @@ -236,7 +291,7 @@ access to *gallery-dl*. Authorize it and you will be shown one or more .. _gallery-dl-example.conf: https://github.com/mikf/gallery-dl/blob/master/docs/gallery-dl-example.conf .. _configuration.rst: https://github.com/mikf/gallery-dl/blob/master/docs/configuration.rst .. _Supported Sites: https://github.com/mikf/gallery-dl/blob/master/docs/supportedsites.rst -.. _stable: https://github.com/mikf/gallery-dl/archive/v1.13.6.tar.gz +.. _stable: https://github.com/mikf/gallery-dl/archive/v1.14.0.tar.gz .. _dev: https://github.com/mikf/gallery-dl/archive/master.tar.gz .. _Python: https://www.python.org/downloads/ @@ -248,6 +303,7 @@ access to *gallery-dl*. Authorize it and you will be shown one or more .. _pyOpenSSL: https://pyopenssl.org/ .. _Snapd: https://docs.snapcraft.io/installing-snapd .. _OAuth: https://en.wikipedia.org/wiki/OAuth +.. _Chocolatey: https://chocolatey.org/install .. |pypi| image:: https://img.shields.io/pypi/v/gallery-dl.svg :target: https://pypi.org/project/gallery-dl/ diff --git a/data/completion/gallery-dl b/data/completion/gallery-dl index 11a796a..921d601 100644 --- a/data/completion/gallery-dl +++ b/data/completion/gallery-dl @@ -10,7 +10,7 @@ _gallery_dl() elif [[ "${prev}" =~ ^(-d|--dest)$ ]]; then COMPREPLY=( $(compgen -d -- "${cur}") ) else - COMPREPLY=( $(compgen -W "--help --version --dest --input-file --cookies --proxy --clear-cache --quiet --verbose --get-urls --dump-json --simulate --list-keywords --list-modules --list-extractors --write-log --write-unsupported --limit-rate --retries --abort --http-timeout --sleep --no-part --no-mtime --no-download --no-check-certificate --config --config-yaml --option --ignore-config --username --password --netrc --download-archive --range --chapter-range --filter --chapter-filter --zip --ugoira-conv --ugoira-conv-lossless --write-metadata --write-tags --mtime-from-date --exec --exec-after" -- "${cur}") ) + COMPREPLY=( $(compgen -W "--help --version --dest --input-file --cookies --proxy --clear-cache --quiet --verbose --get-urls --dump-json --simulate --list-keywords --list-modules --list-extractors --write-log --write-unsupported --write-pages --limit-rate --retries --abort --http-timeout --sleep --no-part --no-mtime --no-download --no-check-certificate --config --config-yaml --option --ignore-config --username --password --netrc --download-archive --range --chapter-range --filter --chapter-filter --zip --ugoira-conv --ugoira-conv-lossless --write-metadata --write-tags --mtime-from-date --exec --exec-after" -- "${cur}") ) fi } diff --git a/data/man/gallery-dl.1 b/data/man/gallery-dl.1 index c115752..fe9a684 100644 --- a/data/man/gallery-dl.1 +++ b/data/man/gallery-dl.1 @@ -1,4 +1,4 @@ -.TH "GALLERY-DL" "1" "2020-05-02" "1.13.6" "gallery-dl Manual" +.TH "GALLERY-DL" "1" "2020-05-31" "1.14.0" "gallery-dl Manual" .\" disable hyphenation .nh @@ -68,6 +68,9 @@ Write logging output to FILE .B "\-\-write\-unsupported" \f[I]FILE\f[] Write URLs, which get emitted by other extractors but cannot be handled, to FILE .TP +.B "\-\-write\-pages" +Write downloaded intermediary pages to files in the current directory to debug problems +.TP .B "\-r, \-\-limit\-rate" \f[I]RATE\f[] Maximum download rate (e.g. 500k or 2.5M) .TP @@ -78,7 +81,7 @@ Maximum number of retries for failed HTTP requests or -1 for infinite retries (d Abort extractor run after N consecutive file downloads have been skipped, e.g. if files with the same filename already exist .TP .B "\-\-http\-timeout" \f[I]SECONDS\f[] -Timeout for HTTP connections (defaut: 30.0) +Timeout for HTTP connections (default: 30.0) .TP .B "\-\-sleep" \f[I]SECONDS\f[] Number of seconds to sleep before each download diff --git a/data/man/gallery-dl.conf.5 b/data/man/gallery-dl.conf.5 index a944167..5a37463 100644 --- a/data/man/gallery-dl.conf.5 +++ b/data/man/gallery-dl.conf.5 @@ -1,4 +1,4 @@ -.TH "GALLERY-DL.CONF" "5" "2020-05-02" "1.13.6" "gallery-dl Manual" +.TH "GALLERY-DL.CONF" "5" "2020-05-31" "1.14.0" "gallery-dl Manual" .\" disable hyphenation .nh .\" disable justification (adjust text to left margin only) @@ -153,17 +153,25 @@ for any spawned child extractors. .SS extractor.*.path-restrict .IP "Type:" 6 -\f[I]string\f[] +\f[I]string\f[] or \f[I]object\f[] .IP "Default:" 9 \f[I]"auto"\f[] .IP "Example:" 4 "/!? (){}" +.br +{" ": "_", "/": "-", "|": "-", ":": "-", "*": "+"} +.br .IP "Description:" 4 -Set of characters to replace with underscores (\f[I]_\f[]) -in generated path segment names. +A string of characters to be replaced with the value of +.br +\f[I]path-replace <extractor.*.path-replace_>\f[] +or an object mapping invalid/unwanted characters to their replacements +.br +for generated path segment names. +.br Special values: @@ -175,9 +183,20 @@ depending on the local operating system .br * \f[I]"windows"\f[]: \f[I]"\\\\\\\\|/<>:\\"?*"\f[] -Note: In a set with 2 or more characters, \f[I][]^-\\\f[] need to be +Note: In a string with 2 or more characters, \f[I][]^-\\\f[] need to be escaped with backslashes, e.g. \f[I]"\\\\[\\\\]"\f[] +.SS extractor.*.path-replace +.IP "Type:" 6 +\f[I]string\f[] + +.IP "Default:" 9 +\f[I]"_"\f[] + +.IP "Description:" 4 +The replacement character(s) for +\f[I]path-restrict <extractor.*.path-restrict_>\f[] + .SS extractor.*.path-remove .IP "Type:" 6 \f[I]string\f[] @@ -188,7 +207,7 @@ escaped with backslashes, e.g. \f[I]"\\\\[\\\\]"\f[] .IP "Description:" 4 Set of characters to remove from generated path names. -Note: In a set with 2 or more characters, \f[I][]^-\\\f[] need to be +Note: In a string with 2 or more characters, \f[I][]^-\\\f[] need to be escaped with backslashes, e.g. \f[I]"\\\\[\\\\]"\f[] .SS extractor.*.skip @@ -627,7 +646,7 @@ Download embedded videos hosted on https://www.blogger.com/ \f[I]bool\f[] .IP "Default:" 9 -\f[I]true\f[] +\f[I]false\f[] .IP "Description:" 4 Controls the download target for Ugoira posts. @@ -1023,6 +1042,17 @@ open the URL in the user's browser. .br * \f[I]false\f[]: Ask the user to copy & paste an URL from the terminal. +.SS extractor.oauth.cache +.IP "Type:" 6 +\f[I]bool\f[] + +.IP "Default:" 9 +\f[I]true\f[] + +.IP "Description:" 4 +Store tokens received during OAuth authorizations +in \f[I]cache <cache.file_>\f[]. + .SS extractor.oauth.port .IP "Type:" 6 \f[I]integer\f[] @@ -1588,7 +1618,7 @@ directly passed to youtube-dl. \f[I]bool\f[] .IP "Default:" 9 -\f[I]true\f[] +\f[I]false\f[] .IP "Description:" 4 Forward cookies to youtube-dl. @@ -2094,7 +2124,7 @@ case the Python interpreter gets shut down unexpectedly .IP "Default:" 9 .br -* \f[I]tempfile.gettempdir()\f[] + \f[I]".gallery-dl.cache"\f[] on Windows +* (\f[I]%APPDATA%\f[] or \f[I]"~"\f[]) + \f[I]"/gallery-dl/cache.sqlite3"\f[] on Windows .br * (\f[I]$XDG_CACHE_HOME\f[] or \f[I]"~/.cache"\f[]) + \f[I]"/gallery-dl/cache.sqlite3"\f[] on all other platforms diff --git a/docs/gallery-dl-example.conf b/docs/gallery-dl-example.conf new file mode 100644 index 0000000..d84febd --- /dev/null +++ b/docs/gallery-dl-example.conf @@ -0,0 +1,206 @@ +{ + "extractor": + { + "base-directory": "~/gallery-dl/", + "archive": "~/gallery-dl/archive.sqlite3", + "proxy": "http://10.10.1.10:3128", + + "postprocessors": [ + { + "name": "ugoira", + "whitelist": ["pixiv", "danbooru"], + "ffmpeg-twopass": true, + "ffmpeg-args": ["-c:v", "libvpx", "-crf", "4", "-b:v", "5000k", "-an"] + }, + { + "name": "metadata", + "whitelist": ["danbooru", "yandere", "sankaku"], + "mode": "tags" + } + ], + + "pixiv": + { + "archive": "~/gallery-dl/archive-pixiv.sqlite3", + + "filename": "{id}{num}.{extension}", + "directory": ["Pixiv", "Works", "{user[id]}"], + + "username": "foo", + "password": "bar", + + "favorite": + { + "directory": ["Pixiv", "Favorites", "{user[id]}"] + }, + + "bookmark": + { + "directory": ["Pixiv", "My Bookmarks"], + + "username": "foo123", + "password": "bar123" + } + }, + + "exhentai": + { + "cookies": + { + "ipb_member_id": "12345", + "ipb_pass_hash": "1234567890abcdef", + "igneous" : "123456789", + "hath_perks" : "m1.m2.m3.a-123456789a" + }, + + "proxy": + { + "http": "http://10.10.1.10:8080", + "https": "https://10.10.1.10:443" + }, + + "filename": "{num:>04}_{name}.{extension}", + "directory": ["{category!c}", "{title}"], + + "wait-min": 1.0, + "wait-max": 5.0 + }, + + "mangadex": + { + "chapter-filter": "lang == 'en'", + "postprocessors": [{ + "name": "zip", + "keep-files": false, + "compression": "zip" + }] + }, + + "deviantart": + { + "include": "gallery,scraps", + "metadata": true, + "postprocessors": [{ + "name": "metadata", + "mode": "custom", + "directory" : "Descriptions", + "content-format" : "{description}\n", + "extension-format": "descr.txt" + }] + }, + + "flickr": + { + "access-token": "1234567890-abcdef", + "access-token-secret": "1234567890abcdef", + "size-max": 1920 + }, + + "reddit": + { + "morecomments": true, + "date-min": "2017-01", + "date-format": "%Y-%m", + "recursion": 1 + }, + + "sankaku": + { + "sleep": 2, + "wait-min": 5.0, + "wait-max": 5.0, + "cookies": "~/gallery-dl/cookies-sankaku.txt" + }, + + "tumblr": + { + "posts": "all", + "external": false, + "reblogs": false, + "inline": true, + + "likes": + { + "posts": "video,photo,link", + "external": true, + "reblogs": true + } + }, + + "mastodon": + { + "mastodon.xyz": + { + "access-token": "cab65529..." + }, + "tabletop.social": { + "access-token": "513a36c6..." + }, + + "directory": ["mastodon", "{instance}", "{account[username]!l}"], + "filename": "{id}_{media[id]}.{extension}" + }, + + "foolslide": { + "otscans": {"root": "https://otscans.com/foolslide"}, + "helvetica": {"root": "https://helveticascans.com/r" } + }, + + "foolfuuka": { + "fireden-onion": {"root": "http://ydt6jy2ng3s3xg2e.onion"}, + "scalearchive": {"root": "https://archive.scaled.team" } + }, + + "replace invalid path characters with unicode alternatives": null, + "path-restrict": { + "\\": "⧹", + "/" : "⧸", + "|" : "│", + ":" : "꞉", + "*" : "∗", + "?" : "?", + "\"": "″", + "<" : "﹤", + ">" : "﹥" + } + }, + + "downloader": + { + "part-directory": "/tmp/.download/", + "rate": "1M", + "retries": 3, + "timeout": 8.5 + }, + + "output": + { + "mode": "terminal", + "log": { + "level": "info", + "format": { + "debug" : "\u001b[0;37m{name}: {message}\u001b[0m", + "info" : "\u001b[1;37m{name}: {message}\u001b[0m", + "warning": "\u001b[1;33m{name}: {message}\u001b[0m", + "error" : "\u001b[1;31m{name}: {message}\u001b[0m" + } + }, + "logfile": { + "path": "~/gallery-dl/log.txt", + "mode": "w", + "level": "debug" + }, + "unsupportedfile": { + "path": "~/gallery-dl/unsupported.txt", + "mode": "a", + "format": "{asctime} {message}", + "format-date": "%Y-%m-%d-%H-%M-%S" + } + }, + + "cache": { + "file": "~/gallery-dl/cache.sqlite3" + }, + + "netrc": true +} diff --git a/docs/gallery-dl.conf b/docs/gallery-dl.conf new file mode 100644 index 0000000..c120d25 --- /dev/null +++ b/docs/gallery-dl.conf @@ -0,0 +1,218 @@ +{ + "extractor": + { + "base-directory": "./gallery-dl/", + "postprocessors": null, + "archive": null, + "cookies": null, + "cookies-update": false, + "proxy": null, + "skip": true, + "sleep": 0, + "path-restrict": "auto", + "path-replace": "_", + "path-remove": "\\u0000-\\u001f\\u007f", + "user-agent": "Mozilla/5.0 (X11; Linux x86_64; rv:68.0) Gecko/20100101 Firefox/68.0", + + "artstation": + { + "external": false + }, + "blogger": + { + "videos": true + }, + "danbooru": + { + "username": null, + "password": null, + "ugoira": false + }, + "deviantart": + { + "extra": false, + "flat": true, + "folders": false, + "journals": "html", + "mature": true, + "metadata": false, + "original": true, + "quality": 100, + "wait-min": 0 + }, + "exhentai": + { + "username": null, + "password": null, + "original": true, + "wait-min": 3.0, + "wait-max": 6.0 + }, + "flickr": + { + "videos": true, + "size-max": null + }, + "gelbooru": + { + "api": true + }, + "gfycat": + { + "format": "mp4" + }, + "hitomi": + { + "metadata": true + }, + "idolcomplex": + { + "username": null, + "password": null, + "wait-min": 3.0, + "wait-max": 6.0 + }, + "imgur": + { + "mp4": true + }, + "instagram": + { + "highlights": false, + "videos": true + }, + "kissmanga": + { + "captcha": "stop" + }, + "nijie": + { + "username": null, + "password": null + }, + "oauth": + { + "browser": true, + "cache": true, + "port": 6414 + }, + "pixiv": + { + "username": null, + "password": null, + "avatar": false, + "ugoira": true + }, + "reactor": + { + "wait-min": 3.0, + "wait-max": 6.0 + }, + "readcomiconline": + { + "captcha": "stop" + }, + "recursive": + { + "blacklist": ["directlink", "oauth", "recursive", "test"] + }, + "reddit": + { + "comments": 0, + "morecomments": false, + "date-min": 0, + "date-max": 253402210800, + "date-format": "%Y-%m-%dT%H:%M:%S", + "id-min": "0", + "id-max": "zik0zj", + "recursion": 0, + "videos": true, + "user-agent": "Python:gallery-dl:0.8.4 (by /u/mikf1)" + }, + "sankaku": + { + "username": null, + "password": null, + "wait-min": 3.0, + "wait-max": 6.0 + }, + "seiga": + { + "username": null, + "password": null + }, + "tumblr": + { + "avatar": false, + "external": false, + "inline": true, + "posts": "all", + "reblogs": true + }, + "twitter": + { + "content": false, + "replies": true, + "retweets": true, + "twitpic": false, + "videos": true + }, + "vsco": + { + "videos": true + }, + "wallhaven": + { + "api-key": null + }, + "weibo": + { + "retweets": true, + "videos": true + }, + "booru": + { + "tags": false + } + }, + + "downloader": + { + "part": true, + "part-directory": null, + + "http": + { + "adjust-extensions": true, + "mtime": true, + "rate": null, + "retries": 4, + "timeout": 30.0, + "verify": true + }, + + "ytdl": + { + "format": null, + "forward-cookies": false, + "mtime": true, + "outtmpl": null, + "rate": null, + "retries": 4, + "timeout": 30.0, + "verify": true + } + }, + + "output": + { + "mode": "auto", + "progress": true, + "shorten": true, + "log": "[{name}][{levelname}] {message}", + "logfile": null, + "unsupportedfile": null + }, + + "netrc": false +} diff --git a/gallery_dl.egg-info/PKG-INFO b/gallery_dl.egg-info/PKG-INFO index a79e69f..c7189b2 100644 --- a/gallery_dl.egg-info/PKG-INFO +++ b/gallery_dl.egg-info/PKG-INFO @@ -1,6 +1,6 @@ Metadata-Version: 2.1 Name: gallery-dl -Version: 1.13.6 +Version: 1.14.0 Summary: Command-line program to download image-galleries and -collections from several image hosting sites Home-page: https://github.com/mikf/gallery-dl Author: Mike Fährmann @@ -94,8 +94,8 @@ Description: ========== put it into your `PATH <https://en.wikipedia.org/wiki/PATH_(variable)>`__, and run it inside a command prompt (like ``cmd.exe``). - - `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.13.6/gallery-dl.exe>`__ - - `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.13.6/gallery-dl.bin>`__ + - `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.14.0/gallery-dl.exe>`__ + - `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.14.0/gallery-dl.bin>`__ These executables include a Python 3.8 interpreter and all required Python packages. @@ -110,6 +110,15 @@ Description: ========== $ snap install gallery-dl + Chocolatey + ---------- + + Windows users that have Chocolatey_ installed can install *gallery-dl* from the Chocolatey Community Packages repository: + + .. code:: powershell + + $ choco install gallery-dl + Usage ===== @@ -173,7 +182,7 @@ Description: ========== +--------------------------------------------+------------------------------------------+ | Linux | Windows | +--------------------------------------------+------------------------------------------+ - |* ``/etc/gallery-dl.conf`` |* | + |* ``/etc/gallery-dl.conf`` |* ``%APPDATA%\gallery-dl\config.json`` | |* ``${HOME}/.config/gallery-dl/config.json``|* ``%USERPROFILE%\gallery-dl\config.json``| |* ``${HOME}/.gallery-dl.conf`` |* ``%USERPROFILE%\gallery-dl.conf`` | +--------------------------------------------+------------------------------------------+ @@ -183,6 +192,11 @@ Description: ========== Values in later configuration files will override previous ones. + Command line options will override all related settings in the configuration file(s), + e.g. using ``--write-metadata`` will enable writing metadata using the default values + for all ``postprocessors.metadata.*`` settings, overriding any specific settings in + configuration files. + Authentication ============== @@ -190,7 +204,7 @@ Description: ========== Username & Password ------------------- - Some extractors require you to provide valid login-credentials in the form of + Some extractors require you to provide valid login credentials in the form of a username & password pair. This is necessary for ``pixiv``, ``nijie``, and ``seiga`` and optional for @@ -200,16 +214,14 @@ Description: ========== You can set the necessary information in your configuration file (cf. gallery-dl.conf_) - .. code:: + .. code:: json { "extractor": { - ... "pixiv": { "username": "<username>", "password": "<password>" } - ... } } @@ -222,6 +234,49 @@ Description: ========== $ gallery-dl -u <username> -p <password> URL $ gallery-dl -o username=<username> -o password=<password> URL + Cookies + ------- + + For sites where login with username & password is not possible due to + CAPTCHA or similar, or has not been implemented yet, you can use the + cookies from a browser login session and input them into *gallery-dl*. + + This can be done via the + `cookies <https://github.com/mikf/gallery-dl/blob/master/docs/configuration.rst#extractorcookies>`__ + option in your configuration file by specifying + + - | the path to a Mozilla/Netscape format cookies.txt file exported by a browser addon + | (e.g. `cookies.txt <https://chrome.google.com/webstore/detail/cookiestxt/njabckikapfpffapmjgojcnbfjonfjfg>`__ for Chrome, + `Export Cookies <https://addons.mozilla.org/en-US/firefox/addon/export-cookies-txt/?src=search>`__ for Firefox) + + - | a list of name-value pairs gathered from your browser's web developer tools + | (in `Chrome <https://developers.google.com/web/tools/chrome-devtools/storage/cookies>`__, + in `Firefox <https://developer.mozilla.org/en-US/docs/Tools/Storage_Inspector>`__) + + For example: + + .. code:: json + + { + "extractor": { + "instagram": { + "cookies": "$HOME/path/to/cookies.txt" + }, + "patreon": { + "cookies": { + "session_id": "K1T57EKu19TR49C51CDjOJoXNQLF7VbdVOiBrC9ye0a" + } + } + } + } + + You can also specify a cookies.txt file with + the :code:`--cookies` command-line option: + + .. code:: bash + + $ gallery-dl --cookies "$HOME/path/to/cookies.txt" URL + OAuth ----- @@ -232,7 +287,7 @@ Description: ========== which would otherwise be unavailable to a public user. To link your account to *gallery-dl*, start by invoking it with - ``oauth:<site-name>`` as an argument. For example: + ``oauth:<sitename>`` as an argument. For example: .. code:: bash @@ -247,7 +302,7 @@ Description: ========== .. _gallery-dl-example.conf: https://github.com/mikf/gallery-dl/blob/master/docs/gallery-dl-example.conf .. _configuration.rst: https://github.com/mikf/gallery-dl/blob/master/docs/configuration.rst .. _Supported Sites: https://github.com/mikf/gallery-dl/blob/master/docs/supportedsites.rst - .. _stable: https://github.com/mikf/gallery-dl/archive/v1.13.6.tar.gz + .. _stable: https://github.com/mikf/gallery-dl/archive/v1.14.0.tar.gz .. _dev: https://github.com/mikf/gallery-dl/archive/master.tar.gz .. _Python: https://www.python.org/downloads/ @@ -259,6 +314,7 @@ Description: ========== .. _pyOpenSSL: https://pyopenssl.org/ .. _Snapd: https://docs.snapcraft.io/installing-snapd .. _OAuth: https://en.wikipedia.org/wiki/OAuth + .. _Chocolatey: https://chocolatey.org/install .. |pypi| image:: https://img.shields.io/pypi/v/gallery-dl.svg :target: https://pypi.org/project/gallery-dl/ diff --git a/gallery_dl.egg-info/SOURCES.txt b/gallery_dl.egg-info/SOURCES.txt index 5d3f07b..e094fe1 100644 --- a/gallery_dl.egg-info/SOURCES.txt +++ b/gallery_dl.egg-info/SOURCES.txt @@ -1,9 +1,14 @@ +CHANGELOG.md +LICENSE +MANIFEST.in README.rst setup.cfg setup.py data/completion/gallery-dl data/man/gallery-dl.1 data/man/gallery-dl.conf.5 +docs/gallery-dl-example.conf +docs/gallery-dl.conf gallery_dl/__init__.py gallery_dl/__main__.py gallery_dl/aes.py @@ -73,6 +78,7 @@ gallery_dl/extractor/hitomi.py gallery_dl/extractor/hypnohub.py gallery_dl/extractor/idolcomplex.py gallery_dl/extractor/imagebam.py +gallery_dl/extractor/imagechest.py gallery_dl/extractor/imagefap.py gallery_dl/extractor/imagehosts.py gallery_dl/extractor/imgbb.py @@ -124,6 +130,7 @@ gallery_dl/extractor/readcomiconline.py gallery_dl/extractor/realbooru.py gallery_dl/extractor/recursive.py gallery_dl/extractor/reddit.py +gallery_dl/extractor/redgifs.py gallery_dl/extractor/rule34.py gallery_dl/extractor/safebooru.py gallery_dl/extractor/sankaku.py @@ -145,6 +152,7 @@ gallery_dl/extractor/vanillarock.py gallery_dl/extractor/vsco.py gallery_dl/extractor/wallhaven.py gallery_dl/extractor/warosu.py +gallery_dl/extractor/webtoons.py gallery_dl/extractor/weibo.py gallery_dl/extractor/wikiart.py gallery_dl/extractor/xhamster.py diff --git a/gallery_dl/cache.py b/gallery_dl/cache.py index 6cde65d..3886091 100644 --- a/gallery_dl/cache.py +++ b/gallery_dl/cache.py @@ -57,7 +57,7 @@ class MemoryCacheDecorator(CacheDecorator): value, expires = self.cache[key] except KeyError: expires = 0 - if expires < timestamp: + if expires <= timestamp: value = self.func(*args, **kwargs) expires = timestamp + self.maxage self.cache[key] = value, expires @@ -189,25 +189,26 @@ def clear(): def _path(): - path = config.get(("cache",), "file", -1) - if path != -1: + path = config.get(("cache",), "file", util.SENTINEL) + if path is not util.SENTINEL: return util.expand_path(path) - if os.name == "nt": - import tempfile - return os.path.join(tempfile.gettempdir(), ".gallery-dl.cache") + if util.WINDOWS: + cachedir = os.environ.get("APPDATA", "~") + else: + cachedir = os.environ.get("XDG_CACHE_HOME", "~/.cache") - cachedir = util.expand_path(os.path.join( - os.environ.get("XDG_CACHE_HOME", "~/.cache"), "gallery-dl")) + cachedir = util.expand_path(os.path.join(cachedir, "gallery-dl")) os.makedirs(cachedir, exist_ok=True) return os.path.join(cachedir, "cache.sqlite3") try: dbfile = _path() - if os.name != "nt": - # restrict access permissions for new db files - os.close(os.open(dbfile, os.O_CREAT | os.O_RDONLY, 0o600)) + + # restrict access permissions for new db files + os.close(os.open(dbfile, os.O_CREAT | os.O_RDONLY, 0o600)) + DatabaseCacheDecorator.db = sqlite3.connect( dbfile, timeout=30, check_same_thread=False) except (OSError, TypeError, sqlite3.OperationalError): diff --git a/gallery_dl/cloudflare.py b/gallery_dl/cloudflare.py index 43ccdeb..0cf5a57 100644 --- a/gallery_dl/cloudflare.py +++ b/gallery_dl/cloudflare.py @@ -8,11 +8,11 @@ """Methods to access sites behind Cloudflare protection""" -import re import time import operator import collections import urllib.parse +from xml.etree import ElementTree from . import text from .cache import memcache @@ -41,12 +41,16 @@ def solve_challenge(session, response, kwargs): url = root + text.unescape(text.extract(page, 'action="', '"')[0]) headers["Referer"] = response.url - for inpt in text.extract_iter(page, "<input ", ">"): - name = text.extract(inpt, 'name="', '"')[0] + form = text.extract(page, 'id="challenge-form"', '</form>')[0] + for element in ElementTree.fromstring( + "<f>" + form + "</f>").findall("input"): + name = element.attrib.get("name") + if not name: + continue if name == "jschl_answer": value = solve_js_challenge(page, parsed.netloc) else: - value = text.unescape(text.extract(inpt, 'value="', '"')[0]) + value = element.attrib.get("value") params[name] = value time.sleep(4) @@ -84,6 +88,8 @@ def solve_js_challenge(page, netloc): variable = "{}.{}".format(data["var"], data["key"]) vlength = len(variable) + k = text.extract(page, "k = '", "'")[0] + # evaluate the initial expression solution = evaluate_expression(data["expr"], page, netloc) @@ -97,7 +103,7 @@ def solve_js_challenge(page, netloc): # select arithmetc function based on operator (+/-/*) func = OPERATORS[expr[vlength]] # evaluate the rest of the expression - value = evaluate_expression(expr[vlength+2:], page, netloc) + value = evaluate_expression(expr[vlength+2:], page, netloc, k) # combine expression value with our current solution solution = func(solution, value) @@ -110,17 +116,18 @@ def solve_js_challenge(page, netloc): solution = "{:.10f}".format(solution) return solution + elif expr.startswith("k+="): + k += str(evaluate_expression(expr[3:], page, netloc)) + -def evaluate_expression(expr, page, netloc, *, - split_re=re.compile(r"[(+]+([^)]*)\)")): +def evaluate_expression(expr, page, netloc, k=""): """Evaluate a single Javascript expression for the challenge""" if expr.startswith("function(p)"): # get HTML element with ID k and evaluate the expression inside # 'eval(eval("document.getElementById(k).innerHTML"))' - k, pos = text.extract(page, "k = '", "'") - e, pos = text.extract(page, 'id="'+k+'"', '<') - return evaluate_expression(e.partition(">")[2], page, netloc) + expr = text.extract(page, 'id="'+k+'"', '<')[0] + return evaluate_expression(expr.partition(">")[2], page, netloc) if "/" in expr: # split the expression in numerator and denominator subexpressions, diff --git a/gallery_dl/config.py b/gallery_dl/config.py index c2787ad..5303616 100644 --- a/gallery_dl/config.py +++ b/gallery_dl/config.py @@ -22,8 +22,9 @@ log = logging.getLogger("config") _config = {} -if os.name == "nt": +if util.WINDOWS: _default_configs = [ + r"%APPDATA%\gallery-dl\config.json", r"%USERPROFILE%\gallery-dl\config.json", r"%USERPROFILE%\gallery-dl.conf", ] @@ -139,7 +140,6 @@ def unset(path, key, *, conf=_config): class apply(): """Context Manager: apply a collection of key-value pairs""" - _sentinel = object() def __init__(self, kvlist): self.original = [] @@ -147,12 +147,12 @@ class apply(): def __enter__(self): for path, key, value in self.kvlist: - self.original.append((path, key, get(path, key, self._sentinel))) + self.original.append((path, key, get(path, key, util.SENTINEL))) set(path, key, value) def __exit__(self, etype, value, traceback): for path, key, value in self.original: - if value is self._sentinel: + if value is util.SENTINEL: unset(path, key) else: set(path, key, value) diff --git a/gallery_dl/downloader/common.py b/gallery_dl/downloader/common.py index eca1284..d858075 100644 --- a/gallery_dl/downloader/common.py +++ b/gallery_dl/downloader/common.py @@ -9,7 +9,6 @@ """Common classes and constants used by downloader modules.""" import os -import logging from .. import config, util @@ -17,15 +16,12 @@ class DownloaderBase(): """Base class for downloaders""" scheme = "" - def __init__(self, extractor, output): - self.session = extractor.session - self.out = output + def __init__(self, job): + self.out = job.out + self.session = job.extractor.session self.part = self.config("part", True) self.partdir = self.config("part-directory") - - self.log = logging.getLogger("downloader." + self.scheme) - self.log.job = extractor.log.job - self.log.extractor = extractor + self.log = job.get_logger("downloader." + self.scheme) if self.partdir: self.partdir = util.expand_path(self.partdir) diff --git a/gallery_dl/downloader/http.py b/gallery_dl/downloader/http.py index 021dc16..6644827 100644 --- a/gallery_dl/downloader/http.py +++ b/gallery_dl/downloader/http.py @@ -24,16 +24,18 @@ except ImportError: class HttpDownloader(DownloaderBase): scheme = "http" - def __init__(self, extractor, output): - DownloaderBase.__init__(self, extractor, output) + def __init__(self, job): + DownloaderBase.__init__(self, job) + extractor = job.extractor + self.chunk_size = 16384 + self.downloading = False + self.adjust_extension = self.config("adjust-extensions", True) self.retries = self.config("retries", extractor._retries) self.timeout = self.config("timeout", extractor._timeout) self.verify = self.config("verify", extractor._verify) self.mtime = self.config("mtime", True) self.rate = self.config("rate") - self.downloading = False - self.chunk_size = 16384 if self.retries < 0: self.retries = float("inf") diff --git a/gallery_dl/downloader/ytdl.py b/gallery_dl/downloader/ytdl.py index fe6c4bc..c3dd863 100644 --- a/gallery_dl/downloader/ytdl.py +++ b/gallery_dl/downloader/ytdl.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2018 Mike Fährmann +# Copyright 2018-2020 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -17,8 +17,9 @@ import os class YoutubeDLDownloader(DownloaderBase): scheme = "ytdl" - def __init__(self, extractor, output): - DownloaderBase.__init__(self, extractor, output) + def __init__(self, job): + DownloaderBase.__init__(self, job) + extractor = job.extractor retries = self.config("retries", extractor._retries) options = { @@ -35,7 +36,7 @@ class YoutubeDLDownloader(DownloaderBase): if self.config("logging", True): options["logger"] = self.log - self.forward_cookies = self.config("forward-cookies", True) + self.forward_cookies = self.config("forward-cookies", False) outtmpl = self.config("outtmpl") self.outtmpl = DEFAULT_OUTTMPL if outtmpl == "default" else outtmpl @@ -70,6 +71,10 @@ class YoutubeDLDownloader(DownloaderBase): if "url" in info_dict: text.nameext_from_url(info_dict["url"], pathfmt.kwdict) + formats = info_dict.get("requested_formats") + if formats and not compatible_formats(formats): + info_dict["ext"] = "mkv" + if self.outtmpl: self.ytdl.params["outtmpl"] = self.outtmpl pathfmt.filename = filename = self.ytdl.prepare_filename(info_dict) @@ -105,4 +110,15 @@ class YoutubeDLDownloader(DownloaderBase): return True +def compatible_formats(formats): + video_ext = formats[0].get("ext") + audio_ext = formats[1].get("ext") + + if video_ext == "webm" and audio_ext == "webm": + return True + + exts = ("mp3", "mp4", "m4a", "m4p", "m4b", "m4r", "m4v", "ismv", "isma") + return video_ext in exts and audio_ext in exts + + __downloader__ = YoutubeDLDownloader diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py index 85fbddb..561b484 100644 --- a/gallery_dl/extractor/__init__.py +++ b/gallery_dl/extractor/__init__.py @@ -48,6 +48,7 @@ modules = [ "hypnohub", "idolcomplex", "imagebam", + "imagechest", "imagefap", "imgbb", "imgbox", @@ -94,6 +95,7 @@ modules = [ "readcomiconline", "realbooru", "reddit", + "redgifs", "rule34", "safebooru", "sankaku", @@ -113,6 +115,7 @@ modules = [ "vsco", "wallhaven", "warosu", + "webtoons", "weibo", "wikiart", "xhamster", diff --git a/gallery_dl/extractor/common.py b/gallery_dl/extractor/common.py index 3a282c2..dd685df 100644 --- a/gallery_dl/extractor/common.py +++ b/gallery_dl/extractor/common.py @@ -40,6 +40,7 @@ class Extractor(): self._cookiefile = None self._cookiejar = self.session.cookies self._parentdir = "" + self._write_pages = self.config("write-pages", False) self._retries = self.config("retries", 4) self._timeout = self.config("timeout", 30) self._verify = self.config("verify", True) @@ -91,6 +92,8 @@ class Extractor(): raise exception.HttpError(exc) else: code = response.status_code + if self._write_pages: + self._dump_response(response) if 200 <= code < 400 or fatal is None and \ (400 <= code < 500) or not fatal and \ (400 <= code < 429 or 431 <= code < 500): @@ -325,6 +328,33 @@ class Extractor(): test = (test, None) yield test + def _dump_response(self, response): + """Write the response content to a .dump file in the current directory. + + The file name is derived from the response url, + replacing special characters with "_" + """ + for resp in response.history: + self._dump_response(resp) + + if hasattr(Extractor, "_dump_index"): + Extractor._dump_index += 1 + else: + Extractor._dump_index = 1 + Extractor._dump_sanitize = re.compile(r"[\\\\|/<>:\"?*&=#]+").sub + + fname = "{:>02}_{}".format( + Extractor._dump_index, + Extractor._dump_sanitize('_', response.url) + )[:250] + + try: + with open(fname + ".dump", 'wb') as fp: + util.dump_response(response, fp) + except Exception as e: + self.log.warning("Failed to dump HTTP request (%s: %s)", + e.__class__.__name__, e) + class GalleryExtractor(Extractor): @@ -460,7 +490,7 @@ class SharedConfigMixin(): """Enable sharing of config settings based on 'basecategory'""" basecategory = "" - def config(self, key, default=None, *, sentinel=object()): + def config(self, key, default=None, *, sentinel=util.SENTINEL): value = Extractor.config(self, key, sentinel) return value if value is not sentinel else config.interpolate( ("extractor", self.basecategory, self.subcategory), key, default) diff --git a/gallery_dl/extractor/danbooru.py b/gallery_dl/extractor/danbooru.py index 3a0d0ef..e0edf89 100644 --- a/gallery_dl/extractor/danbooru.py +++ b/gallery_dl/extractor/danbooru.py @@ -32,7 +32,7 @@ class DanbooruExtractor(SharedConfigMixin, Extractor): def __init__(self, match): super().__init__(match) self.root = "https://{}.donmai.us".format(match.group(1)) - self.ugoira = self.config("ugoira", True) + self.ugoira = self.config("ugoira", False) self.params = {} username, api_key = self._get_auth_info() @@ -156,8 +156,8 @@ class DanbooruPostExtractor(DanbooruExtractor): "content": "5e255713cbf0a8e0801dc423563c34d896bb9229", }), ("https://danbooru.donmai.us/posts/3613024", { - "pattern": r"https?://.+\.webm$", - "options": (("ugoira", False),) + "pattern": r"https?://.+\.zip$", + "options": (("ugoira", True),) }) ) diff --git a/gallery_dl/extractor/deviantart.py b/gallery_dl/extractor/deviantart.py index 2631052..cda357a 100644 --- a/gallery_dl/extractor/deviantart.py +++ b/gallery_dl/extractor/deviantart.py @@ -126,8 +126,9 @@ class DeviantartExtractor(Extractor): if self.extra: for match in DeviantartStashExtractor.pattern.finditer( deviation.get("description", "")): + url = text.ensure_http_scheme(match.group(0)) deviation["_extractor"] = DeviantartStashExtractor - yield Message.Queue, match.group(0), deviation + yield Message.Queue, url, deviation def deviations(self): """Return an iterable containing all relevant Deviation-objects""" @@ -849,9 +850,12 @@ class DeviantartOAuthAPI(): self.client_secret = extractor.config( "client-secret", self.CLIENT_SECRET) - self.refresh_token = extractor.config("refresh-token") - if self.refresh_token == "cache": - self.refresh_token = "#" + str(self.client_id) + token = extractor.config("refresh-token") + if token is None or token == "cache": + token = "#" + str(self.client_id) + if not _refresh_token_cache(token): + token = None + self.refresh_token_key = token self.log.debug( "Using %s API credentials (client-id %s)", @@ -904,7 +908,7 @@ class DeviantartOAuthAPI(): """Get extended content of a single Deviation""" endpoint = "deviation/content" params = {"deviationid": deviation_id} - return self._call(endpoint, params) + return self._call(endpoint, params, public=False) def deviation_download(self, deviation_id): """Get the original file download (if allowed)""" @@ -951,18 +955,19 @@ class DeviantartOAuthAPI(): endpoint = "user/profile/" + username return self._call(endpoint, fatal=False) - def authenticate(self, refresh_token): + def authenticate(self, refresh_token_key): """Authenticate the application by requesting an access token""" - self.headers["Authorization"] = self._authenticate_impl(refresh_token) + self.headers["Authorization"] = \ + self._authenticate_impl(refresh_token_key) @cache(maxage=3600, keyarg=1) - def _authenticate_impl(self, refresh_token): + def _authenticate_impl(self, refresh_token_key): """Actual authenticate implementation""" url = "https://www.deviantart.com/oauth2/token" - if refresh_token: + if refresh_token_key: self.log.info("Refreshing private access token") data = {"grant_type": "refresh_token", - "refresh_token": _refresh_token_cache(refresh_token)} + "refresh_token": _refresh_token_cache(refresh_token_key)} else: self.log.info("Requesting public access token") data = {"grant_type": "client_credentials"} @@ -976,8 +981,9 @@ class DeviantartOAuthAPI(): self.log.debug("Server response: %s", data) raise exception.AuthenticationError('"{}" ({})'.format( data.get("error_description"), data.get("error"))) - if refresh_token: - _refresh_token_cache.update(refresh_token, data["refresh_token"]) + if refresh_token_key: + _refresh_token_cache.update( + refresh_token_key, data["refresh_token"]) return "Bearer " + data["access_token"] def _call(self, endpoint, params=None, fatal=True, public=True): @@ -987,7 +993,7 @@ class DeviantartOAuthAPI(): if self.delay >= 0: time.sleep(2 ** self.delay) - self.authenticate(None if public else self.refresh_token) + self.authenticate(None if public else self.refresh_token_key) response = self.extractor.request( url, headers=self.headers, params=params, fatal=None) data = response.json() @@ -1023,7 +1029,7 @@ class DeviantartOAuthAPI(): if extend: if public and len(data["results"]) < params["limit"]: - if self.refresh_token: + if self.refresh_token_key: self.log.debug("Switching to private access token") public = False continue @@ -1154,9 +1160,11 @@ class DeviantartEclipseAPI(): return text.rextract(page, '\\"id\\":', ',', pos)[0].strip('" ') -@cache(maxage=10*365*24*3600, keyarg=0) -def _refresh_token_cache(original_token, new_token=None): - return new_token or original_token +@cache(maxage=100*365*24*3600, keyarg=0) +def _refresh_token_cache(token): + if token and token[0] == "#": + return None + return token ############################################################################### diff --git a/gallery_dl/extractor/gelbooru.py b/gallery_dl/extractor/gelbooru.py index 0c05a97..612c742 100644 --- a/gallery_dl/extractor/gelbooru.py +++ b/gallery_dl/extractor/gelbooru.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2014-2019 Mike Fährmann +# Copyright 2014-2020 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -10,7 +10,7 @@ from . import booru from .common import Message -from .. import text, util +from .. import text class GelbooruExtractor(booru.XmlParserMixin, @@ -31,6 +31,7 @@ class GelbooruExtractor(booru.XmlParserMixin, else: self.items = self.items_noapi self.session.cookies["fringeBenefits"] = "yup" + self.per_page = 42 def items_noapi(self): yield Message.Version, 1 @@ -46,6 +47,19 @@ class GelbooruExtractor(booru.XmlParserMixin, def get_posts(self): """Return an iterable containing all relevant post objects""" + url = "https://gelbooru.com/index.php?page=post&s=list" + params = { + "tags": self.params["tags"], + "pid" : self.page_start * self.per_page + } + + while True: + page = self.request(url, params=params).text + ids = list(text.extract_iter(page, '<a id="p', '"')) + yield from ids + if len(ids) < self.per_page: + return + params["pid"] += self.per_page def get_post_data(self, post_id): """Extract metadata of a single post""" @@ -88,34 +102,20 @@ class GelbooruTagExtractor(booru.TagMixin, GelbooruExtractor): }), ) - def __init__(self, match): - super().__init__(match) - if not self.use_api: - self.per_page = 42 - - def get_posts(self): - url = "https://gelbooru.com/index.php?page=post&s=list" - params = {"tags": self.tags, "pid": self.page_start * self.per_page} - while True: - page = self.request(url, params=params).text - ids = list(text.extract_iter(page, '<a id="p', '"')) - yield from ids - if len(ids) < self.per_page: - return - params["pid"] += self.per_page - - -class GelbooruPoolExtractor(booru.GelbooruPoolMixin, GelbooruExtractor): +class GelbooruPoolExtractor(booru.PoolMixin, GelbooruExtractor): """Extractor for image-pools from gelbooru.com""" pattern = (r"(?:https?://)?(?:www\.)?gelbooru\.com/(?:index\.php)?" r"\?page=pool&s=show&id=(?P<pool>\d+)") - test = ("https://gelbooru.com/index.php?page=pool&s=show&id=761", { - "count": 6, - }) - - def get_posts(self): - return util.advance(self.posts, self.page_start) + test = ( + ("https://gelbooru.com/index.php?page=pool&s=show&id=761", { + "count": 6, + }), + ("https://gelbooru.com/index.php?page=pool&s=show&id=761", { + "options": (("api", False),), + "count": 6, + }), + ) class GelbooruPostExtractor(booru.PostMixin, GelbooruExtractor): diff --git a/gallery_dl/extractor/hentainexus.py b/gallery_dl/extractor/hentainexus.py index ef64942..aa41836 100644 --- a/gallery_dl/extractor/hentainexus.py +++ b/gallery_dl/extractor/hentainexus.py @@ -51,20 +51,38 @@ class HentainexusGalleryExtractor(GalleryExtractor): "description": rmve(extr('viewcolumn">Description</td>', '</td>')), } data["lang"] = util.language_to_code(data["language"]) - data["type"] = "Doujinshi" if 'doujin' in data["tags"] else "Manga" - data["title_conventional"] = self.join_title( - data["event"], - data["circle"], - data["artist"], - data["title"], - data["parody"], - data["book"], - data["magazine"], - ) + if 'doujin' in data['tags']: + data['type'] = 'Doujinshi' + elif 'illustration' in data['tags']: + data['type'] = 'Illustration' + else: + data['type'] = 'Manga' + data["title_conventional"] = self._join_title(data) return data + def images(self, page): + url = "{}/read/{}".format(self.root, self.gallery_id) + extr = text.extract_from(self.request(url).text) + urls = extr("initReader(", "]") + "]" + return [(url, None) for url in json.loads(urls)] + @staticmethod - def join_title(event, circle, artist, title, parody, book, magazine): + def _join_title(data): + event = data['event'] + artist = data['artist'] + circle = data['circle'] + title = data['title'] + parody = data['parody'] + book = data['book'] + magazine = data['magazine'] + + # a few galleries have a large number of artists or parodies, + # which get replaced with "Various" in the title string + if artist.count(',') >= 3: + artist = 'Various' + if parody.count(',') >= 3: + parody = 'Various' + jt = '' if event: jt += '({}) '.format(event) @@ -81,12 +99,6 @@ class HentainexusGalleryExtractor(GalleryExtractor): jt += ' ({})'.format(magazine) return jt - def images(self, page): - url = "{}/read/{}".format(self.root, self.gallery_id) - extr = text.extract_from(self.request(url).text) - urls = extr("initReader(", "]") + "]" - return [(url, None) for url in json.loads(urls)] - class HentainexusSearchExtractor(Extractor): """Extractor for search results on hentainexus.com""" diff --git a/gallery_dl/extractor/hiperdex.py b/gallery_dl/extractor/hiperdex.py index 3883445..1c53723 100644 --- a/gallery_dl/extractor/hiperdex.py +++ b/gallery_dl/extractor/hiperdex.py @@ -14,6 +14,9 @@ from ..cache import memcache import re +BASE_PATTERN = r"(?:https?://)?(?:www\.)?hiperdex\.(?:com|net|info)" + + class HiperdexBase(): """Base class for hiperdex extractors""" category = "hiperdex" @@ -61,11 +64,10 @@ class HiperdexBase(): class HiperdexChapterExtractor(HiperdexBase, ChapterExtractor): """Extractor for manga chapters from hiperdex.com""" - pattern = (r"(?:https?://)?(?:www\.)?hiperdex\.(?:com|net)" - r"(/manga/([^/?&#]+)/([^/?&#]+))") + pattern = BASE_PATTERN + r"(/manga/([^/?&#]+)/([^/?&#]+))" test = ( ("https://hiperdex.com/manga/domestic-na-kanojo/154-5/", { - "pattern": r"https://hiperdex.com/wp-content/uploads" + "pattern": r"https://hiperdex.(com|net|info)/wp-content/uploads" r"/WP-manga/data/manga_\w+/[0-9a-f]{32}/\d+\.webp", "count": 9, "keyword": { @@ -82,6 +84,7 @@ class HiperdexChapterExtractor(HiperdexBase, ChapterExtractor): }, }), ("https://hiperdex.net/manga/domestic-na-kanojo/154-5/"), + ("https://hiperdex.info/manga/domestic-na-kanojo/154-5/"), ) def __init__(self, match): @@ -102,8 +105,7 @@ class HiperdexChapterExtractor(HiperdexBase, ChapterExtractor): class HiperdexMangaExtractor(HiperdexBase, MangaExtractor): """Extractor for manga from hiperdex.com""" chapterclass = HiperdexChapterExtractor - pattern = (r"(?:https?://)?(?:www\.)?hiperdex\.(?:com|net)" - r"(/manga/([^/?&#]+))/?$") + pattern = BASE_PATTERN + r"(/manga/([^/?&#]+))/?$" test = ( ("https://hiperdex.com/manga/youre-not-that-special/", { "count": 51, @@ -123,6 +125,7 @@ class HiperdexMangaExtractor(HiperdexBase, MangaExtractor): }, }), ("https://hiperdex.net/manga/youre-not-that-special/"), + ("https://hiperdex.info/manga/youre-not-that-special/"), ) def __init__(self, match): @@ -154,11 +157,11 @@ class HiperdexArtistExtractor(HiperdexBase, MangaExtractor): categorytransfer = False chapterclass = HiperdexMangaExtractor reverse = False - pattern = (r"(?:https?://)?(?:www\.)?hiperdex\.(?:com|net)" - r"(/manga-a(?:rtist|uthor)/([^/?&#]+))") + pattern = BASE_PATTERN + r"(/manga-a(?:rtist|uthor)/([^/?&#]+))" test = ( ("https://hiperdex.com/manga-artist/beck-ho-an/"), ("https://hiperdex.net/manga-artist/beck-ho-an/"), + ("https://hiperdex.info/manga-artist/beck-ho-an/"), ("https://hiperdex.com/manga-author/viagra/", { "pattern": HiperdexMangaExtractor.pattern, "count": ">= 6", diff --git a/gallery_dl/extractor/imagechest.py b/gallery_dl/extractor/imagechest.py new file mode 100644 index 0000000..a1ba0c3 --- /dev/null +++ b/gallery_dl/extractor/imagechest.py @@ -0,0 +1,48 @@ +# -*- coding: utf-8 -*- + +# Copyright 2020 Leonid "Bepis" Pavel +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. + +"""Extract images from galleries at https://imgchest.com/""" + +from .common import GalleryExtractor +from .. import text, exception + + +class ImagechestGalleryExtractor(GalleryExtractor): + """Extractor for image galleries from imgchest.com""" + category = "imagechest" + root = "https://imgchest.com" + pattern = r"(?:https?://)?(?:www\.)?imgchest\.com/p/([A-Za-z0-9]{11})" + test = ( + ("https://imgchest.com/p/3na7kr3by8d", { + "url": "f095b4f78c051e5a94e7c663814d1e8d4c93c1f7", + "content": "076959e65be30249a2c651fbe6090dc30ba85193", + "count": 3 + }), + ) + + def __init__(self, match): + self.gallery_id = match.group(1) + url = self.root + "/p/" + self.gallery_id + GalleryExtractor.__init__(self, match, url) + + def metadata(self, page): + if "Sorry, but the page you requested could not be found." in page: + raise exception.NotFoundError("gallery") + + return { + "gallery_id": self.gallery_id, + "title": text.unescape(text.extract( + page, 'property="og:title" content="', '"')[0].strip()) + } + + def images(self, page): + return [ + (url, None) + for url in text.extract_iter( + page, 'property="og:image" content="', '"') + ] diff --git a/gallery_dl/extractor/imgur.py b/gallery_dl/extractor/imgur.py index 0813ea9..44fa5f2 100644 --- a/gallery_dl/extractor/imgur.py +++ b/gallery_dl/extractor/imgur.py @@ -34,7 +34,11 @@ class ImgurExtractor(Extractor): except KeyError: pass - url = image["mp4"] if image["animated"] and self.mp4 else image["link"] + if image["animated"] and self.mp4 and "mp4" in image: + url = image["mp4"] + else: + url = image["link"] + image["date"] = text.parse_timestamp(image["datetime"]) text.nameext_from_url(url, image) @@ -100,6 +104,9 @@ class ImgurImageExtractor(ImgurExtractor): ("https://imgur.com/HjoXJAd", { # url ends with '.jpg?1' "url": "ec2cf11a2bfb4939feff374781a6e6f3e9af8e8e", }), + ("https://imgur.com/1Nily2P", { # animated png + "pattern": "https://i.imgur.com/1Nily2P.png", + }), ("https://imgur.com/zzzzzzz", { # not found "exception": exception.HttpError, }), @@ -130,7 +137,7 @@ class ImgurAlbumExtractor(ImgurExtractor): directory_fmt = ("{category}", "{album[id]}{album[title]:? - //}") filename_fmt = "{category}_{album[id]}_{num:>03}_{id}.{extension}" archive_fmt = "{album[id]}_{id}" - pattern = BASE_PATTERN + r"/(?:a|t/unmuted)/(\w{7}|\w{5})" + pattern = BASE_PATTERN + r"/a/(\w{7}|\w{5})" test = ( ("https://imgur.com/a/TcBmP", { "url": "ce3552f550a5b5316bd9c7ae02e21e39f30c0563", @@ -192,9 +199,6 @@ class ImgurAlbumExtractor(ImgurExtractor): ("https://imgur.com/a/RhJXhVT/all", { # 7 character album hash "url": "695ef0c950023362a0163ee5041796300db76674", }), - ("https://imgur.com/t/unmuted/YMqBcua", { # unmuted URL - "url": "86b4747f8147cec7602f0214e267309af73a8655", - }), ("https://imgur.com/a/TcBmQ", { "exception": exception.HttpError, }), @@ -225,7 +229,7 @@ class ImgurAlbumExtractor(ImgurExtractor): class ImgurGalleryExtractor(ImgurExtractor): """Extractor for imgur galleries""" subcategory = "gallery" - pattern = BASE_PATTERN + r"/gallery/(\w{7}|\w{5})" + pattern = BASE_PATTERN + r"/(?:gallery|t/unmuted)/(\w{7}|\w{5})" test = ( ("https://imgur.com/gallery/zf2fIms", { # non-album gallery (#380) "pattern": "https://imgur.com/zf2fIms", @@ -233,6 +237,9 @@ class ImgurGalleryExtractor(ImgurExtractor): ("https://imgur.com/gallery/eD9CT", { "pattern": "https://imgur.com/a/eD9CT", }), + ("https://imgur.com/t/unmuted/26sEhNr", { # unmuted URL + "pattern": "https://imgur.com/26sEhNr", + }), ) def items(self): diff --git a/gallery_dl/extractor/instagram.py b/gallery_dl/extractor/instagram.py index ea39cab..3781711 100644 --- a/gallery_dl/extractor/instagram.py +++ b/gallery_dl/extractor/instagram.py @@ -14,6 +14,8 @@ from .. import text, exception from ..cache import cache import itertools import json +import time +import re class InstagramExtractor(Extractor): @@ -26,6 +28,10 @@ class InstagramExtractor(Extractor): cookiedomain = ".instagram.com" cookienames = ("sessionid",) + def __init__(self, match): + Extractor.__init__(self, match) + self._find_tags = re.compile(r'#\w+').findall + def get_metadata(self): return {} @@ -78,9 +84,10 @@ class InstagramExtractor(Extractor): url = self.root + "/accounts/login/ajax/" data = { "username" : username, - "password" : password, + "enc_password" : "#PWD_INSTAGRAM_BROWSER:0:{}:{}".format( + int(time.time()), password), "queryParams" : "{}", - "optIntoOneTap": "true", + "optIntoOneTap": "false", } response = self.request(url, method="POST", headers=headers, data=data) @@ -133,12 +140,24 @@ class InstagramExtractor(Extractor): 'fullname': media['owner']['full_name'], 'post_id': media['id'], 'post_shortcode': media['shortcode'], + 'post_url': url, 'description': text.parse_unicode_escapes('\n'.join( edge['node']['text'] for edge in media['edge_media_to_caption']['edges'] )), } + tags = self._find_tags(common['description']) + if tags: + common['tags'] = sorted(set(tags)) + + location = media['location'] + if location: + common['location_id'] = location['id'] + common['location_slug'] = location['slug'] + common['location_url'] = "{}/explore/locations/{}/{}/".format( + self.root, location['id'], location['slug']) + medias = [] if media['__typename'] == 'GraphSidecar': for num, edge in enumerate( @@ -156,6 +175,7 @@ class InstagramExtractor(Extractor): 'sidecar_media_id': media['id'], 'sidecar_shortcode': media['shortcode'], } + self._extract_tagged_users(children, media_data) media_data.update(common) medias.append(media_data) @@ -169,6 +189,7 @@ class InstagramExtractor(Extractor): 'height': text.parse_int(media['dimensions']['height']), 'width': text.parse_int(media['dimensions']['width']), } + self._extract_tagged_users(media, media_data) media_data.update(common) medias.append(media_data) @@ -189,12 +210,12 @@ class InstagramExtractor(Extractor): user_id = '"{}"'.format( shared_data['entry_data']['StoriesPage'][0]['user']['id']) highlight_id = '' - query_hash = 'cda12de4f7fd3719c0569ce03589f4c4' + query_hash = '0a85e6ea60a4c99edc58ab2f3d17cfdf' variables = ( '{{' '"reel_ids":[{}],"tag_names":[],"location_ids":[],' - '"highlight_reel_ids":[{}],"precomposed_overlay":true,' + '"highlight_reel_ids":[{}],"precomposed_overlay":false,' '"show_story_viewer_list":true,' '"story_viewer_fetch_count":50,"story_viewer_cursor":"",' '"stories_video_dash_manifest":false' @@ -250,7 +271,7 @@ class InstagramExtractor(Extractor): data = self._request_graphql( variables, - 'aec5501414615eca36a9acf075655b1e', + 'ad99dd9d3646cc3c0dda65debcd266a7', shared_data['config']['csrf_token'], ) @@ -305,6 +326,18 @@ class InstagramExtractor(Extractor): variables, psdf['query_hash'], csrf, ) + def _extract_tagged_users(self, src_media, dest_dict): + edges = src_media['edge_media_to_tagged_user']['edges'] + if edges: + dest_dict['tagged_users'] = tagged_users = [] + for edge in edges: + user = edge['node']['user'] + tagged_users.append({ + 'id' : user['id'], + 'username' : user['username'], + 'full_name': user['full_name'], + }) + class InstagramImageExtractor(InstagramExtractor): """Extractor for PostPage""" @@ -321,10 +354,15 @@ class InstagramImageExtractor(InstagramExtractor): "description": str, "height": int, "likes": int, + "location_id": "214424288", + "location_slug": "hong-kong", + "location_url": "re:/explore/locations/214424288/hong-kong/", "media_id": "1922949326347663701", "shortcode": "BqvsDleB3lV", "post_id": "1922949326347663701", "post_shortcode": "BqvsDleB3lV", + "post_url": "https://www.instagram.com/p/BqvsDleB3lV/", + "tags": ["#WHPsquares"], "typename": "GraphImage", "username": "instagram", "width": int, @@ -339,6 +377,7 @@ class InstagramImageExtractor(InstagramExtractor): "sidecar_shortcode": "BoHk1haB5tM", "post_id": "1875629777499953996", "post_shortcode": "BoHk1haB5tM", + "post_url": "https://www.instagram.com/p/BoHk1haB5tM/", "num": int, "likes": int, "username": "instagram", @@ -354,7 +393,9 @@ class InstagramImageExtractor(InstagramExtractor): "height": int, "likes": int, "media_id": "1923502432034620000", + "post_url": "https://www.instagram.com/p/Bqxp0VSBgJg/", "shortcode": "Bqxp0VSBgJg", + "tags": ["#ASMR"], "typename": "GraphVideo", "username": "instagram", "width": int, @@ -370,6 +411,7 @@ class InstagramImageExtractor(InstagramExtractor): "height": int, "likes": int, "media_id": "1806097553666903266", + "post_url": "https://www.instagram.com/p/BkQjCfsBIzi/", "shortcode": "BkQjCfsBIzi", "typename": "GraphVideo", "username": "instagram", @@ -381,11 +423,23 @@ class InstagramImageExtractor(InstagramExtractor): ("https://www.instagram.com/p/BtOvDOfhvRr/", { "count": 2, "keyword": { + "post_url": "https://www.instagram.com/p/BtOvDOfhvRr/", "sidecar_media_id": "1967717017113261163", "sidecar_shortcode": "BtOvDOfhvRr", "video_url": str, } - }) + }), + + # GraphImage with tagged user + ("https://www.instagram.com/p/B_2lf3qAd3y/", { + "keyword": { + "tagged_users": [{ + "id": "1246468638", + "username": "kaaymbl", + "full_name": "Call Me Kay", + }] + } + }), ) def __init__(self, match): @@ -476,7 +530,7 @@ class InstagramUserExtractor(InstagramExtractor): 'node_id': 'id', 'variables_id': 'id', 'edge_to_medias': 'edge_owner_to_timeline_media', - 'query_hash': 'f2405b236d85e8296cf30347c9f08c2a', + 'query_hash': '44efc15d3c13342d02df0b5a9fa3d33f', }) if self.config('highlights'): @@ -545,5 +599,5 @@ class InstagramTagExtractor(InstagramExtractor): 'node_id': 'name', 'variables_id': 'tag_name', 'edge_to_medias': 'edge_hashtag_to_media', - 'query_hash': 'f12c9ec5e46a3173b2969c712ad84744', + 'query_hash': '7dabc71d3e758b1ec19ffb85639e427b', }) diff --git a/gallery_dl/extractor/mangadex.py b/gallery_dl/extractor/mangadex.py index 38c90df..72465f7 100644 --- a/gallery_dl/extractor/mangadex.py +++ b/gallery_dl/extractor/mangadex.py @@ -51,7 +51,7 @@ class MangadexChapterExtractor(MangadexExtractor): test = ( ("https://mangadex.org/chapter/122094", { "keyword": "ef1084c2845825979e150512fed8fdc209baf05a", - "content": "7ab3bef5caccb62b881f8e6e70359d3c7be8137f", + "content": "50383a4c15124682057b197d40261641a98db514", }), # oneshot ("https://mangadex.cc/chapter/138086", { diff --git a/gallery_dl/extractor/mastodon.py b/gallery_dl/extractor/mastodon.py index 4f0e38d..002c8f7 100644 --- a/gallery_dl/extractor/mastodon.py +++ b/gallery_dl/extractor/mastodon.py @@ -27,11 +27,9 @@ class MastodonExtractor(Extractor): Extractor.__init__(self, match) self.api = MastodonAPI(self) - def config(self, key, default=None, *, sentinel=object()): + def config(self, key, default=None, *, sentinel=util.SENTINEL): value = Extractor.config(self, key, sentinel) - if value is not sentinel: - return value - return config.interpolate( + return value if value is not sentinel else config.interpolate( ("extractor", "mastodon", self.instance, self.subcategory), key, default, ) diff --git a/gallery_dl/extractor/newgrounds.py b/gallery_dl/extractor/newgrounds.py index 17fe935..84794ad 100644 --- a/gallery_dl/extractor/newgrounds.py +++ b/gallery_dl/extractor/newgrounds.py @@ -224,10 +224,7 @@ class NewgroundsImageExtractor(NewgroundsExtractor): self.post_url = "https://www.newgrounds.com/art/view/{}/{}".format( self.user, match.group(3)) else: - url = match.group(0) - if not url.startswith("http"): - url = "https://" + url - self.post_url = url + self.post_url = text.ensure_http_scheme(match.group(0)) def posts(self): return (self.post_url,) @@ -414,6 +411,6 @@ class NewgroundsFollowingExtractor(NewgroundsFavoriteExtractor): @staticmethod def _extract_favorites(page): return [ - "https://" + user.rpartition('"')[2].lstrip("/:") + text.ensure_http_scheme(user.rpartition('"')[2]) for user in text.extract_iter(page, 'class="item-user', '"><img') ] diff --git a/gallery_dl/extractor/oauth.py b/gallery_dl/extractor/oauth.py index c06721c..c07c4b7 100644 --- a/gallery_dl/extractor/oauth.py +++ b/gallery_dl/extractor/oauth.py @@ -10,9 +10,8 @@ from .common import Extractor, Message from . import deviantart, flickr, reddit, smugmug, tumblr -from .. import text, oauth, config, exception +from .. import text, oauth, util, config, exception from ..cache import cache -import os import urllib.parse REDIRECT_URI_LOCALHOST = "http://localhost:6414/" @@ -27,6 +26,7 @@ class OAuthBase(Extractor): def __init__(self, match): Extractor.__init__(self, match) self.client = None + self.cache = config.get(("extractor", self.category), "cache", True) def oauth_config(self, key, default=None): return config.interpolate( @@ -42,7 +42,7 @@ class OAuthBase(Extractor): server.listen(1) # workaround for ctrl+c not working during server.accept on Windows - if os.name == "nt": + if util.WINDOWS: server.settimeout(1.0) while True: try: @@ -87,12 +87,20 @@ class OAuthBase(Extractor): # exchange the request token for an access token data = self.session.get(access_token_url, params=data).text - data = text.parse_query(data) - self.send(OAUTH1_MSG_TEMPLATE.format( - category=self.subcategory, - token=data["oauth_token"], - token_secret=data["oauth_token_secret"], + token = data["oauth_token"] + token_secret = data["oauth_token_secret"] + + # write to cache + if self.cache: + key = (self.subcategory, self.session.auth.consumer_key) + oauth._token_cache.update(key, (token, token_secret)) + self.log.info("Writing tokens to cache") + + # display tokens + self.send(self._generate_message( + ("access-token", "access-token-secret"), + (token, token_secret), )) def _oauth2_authorization_code_grant( @@ -149,24 +157,66 @@ class OAuthBase(Extractor): self.send(data["error"]) return - # display token - part = key.partition("_")[0] - template = message_template or OAUTH2_MSG_TEMPLATE - self.send(template.format( - category=self.subcategory, - key=part, - Key=part.capitalize(), - token=data[key], - instance=getattr(self, "instance", ""), - client_id=client_id, - client_secret=client_secret, - )) - # write to cache - if cache and config.get(("extractor", self.category), "cache"): + if self.cache and cache: cache.update("#" + str(client_id), data[key]) self.log.info("Writing 'refresh-token' to cache") + # display token + if message_template: + msg = message_template.format( + category=self.subcategory, + key=key.partition("_")[0], + token=data[key], + instance=getattr(self, "instance", ""), + client_id=client_id, + client_secret=client_secret, + ) + else: + msg = self._generate_message( + ("refresh-token",), + (data[key],), + ) + self.send(msg) + + def _generate_message(self, names, values): + if len(names) == 1: + _vh = "This value has" + _is = "is" + _it = "it" + _va = "this value" + else: + _vh = "These values have" + _is = "are" + _it = "them" + _va = "these values" + + msg = "\nYour {} {}\n\n{}\n\n".format( + " and ".join("'" + n + "'" for n in names), + _is, + "\n".join(values), + ) + + if self.cache: + opt = self.oauth_config(names[0]) + if opt is None or opt == "cache": + msg += _vh + " been cached and will automatically be used." + else: + msg += ( + "Set 'extractor.{}.{}' to \"cache\" to use {}.".format( + self.subcategory, names[0], _it, + ) + ) + else: + msg += "Put " + _va + " into your configuration file as \n" + msg += " and\n".join( + "'extractor." + self.subcategory + "." + n + "'" + for n in names + ) + msg += "." + + return msg + class OAuthDeviantart(OAuthBase): subcategory = "deviantart" @@ -224,6 +274,7 @@ class OAuthReddit(OAuthBase): "https://www.reddit.com/api/v1/authorize", "https://www.reddit.com/api/v1/access_token", scope="read history", + cache=reddit._refresh_token_cache, ) @@ -318,49 +369,8 @@ class OAuthMastodon(OAuthBase): return data -OAUTH1_MSG_TEMPLATE = """ -Your Access Token and Access Token Secret are - -{token} -{token_secret} - -Put these values into your configuration file as -'extractor.{category}.access-token' and -'extractor.{category}.access-token-secret'. - -Example: -{{ - "extractor": {{ - "{category}": {{ - "access-token": "{token}", - "access-token-secret": "{token_secret}" - }} - }} -}} -""" - - -OAUTH2_MSG_TEMPLATE = """ -Your {Key} Token is - -{token} - -Put this value into your configuration file as -'extractor.{category}.{key}-token'. - -Example: -{{ - "extractor": {{ - "{category}": {{ - "{key}-token": "{token}" - }} - }} -}} -""" - - MASTODON_MSG_TEMPLATE = """ -Your {Key} Token is +Your 'access-token' is {token} diff --git a/gallery_dl/extractor/patreon.py b/gallery_dl/extractor/patreon.py index 570bd72..a14ec9c 100644 --- a/gallery_dl/extractor/patreon.py +++ b/gallery_dl/extractor/patreon.py @@ -98,8 +98,7 @@ class PatreonExtractor(Extractor): headers = {"Referer": self.root} while url: - if not url.startswith("http"): - url = "https://" + url.lstrip("/:") + url = text.ensure_http_scheme(url) posts = self.request(url, headers=headers).json() if "included" in posts: diff --git a/gallery_dl/extractor/recursive.py b/gallery_dl/extractor/recursive.py index 1a793a0..ead5c35 100644 --- a/gallery_dl/extractor/recursive.py +++ b/gallery_dl/extractor/recursive.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2015-2019 Mike Fährmann +# Copyright 2015-2020 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -28,6 +28,7 @@ class RecursiveExtractor(Extractor): self.session.mount("file://", FileAdapter()) page = self.request(self.url.partition(":")[2]).text + del self.session.adapters["file://"] yield Message.Version, 1 with extractor.blacklist(blist): diff --git a/gallery_dl/extractor/reddit.py b/gallery_dl/extractor/reddit.py index d0232cc..2e3864a 100644 --- a/gallery_dl/extractor/reddit.py +++ b/gallery_dl/extractor/reddit.py @@ -222,20 +222,25 @@ class RedditAPI(): self.extractor = extractor self.comments = text.parse_int(extractor.config("comments", 0)) self.morecomments = extractor.config("morecomments", False) - self.refresh_token = extractor.config("refresh-token") self.log = extractor.log client_id = extractor.config("client-id", self.CLIENT_ID) user_agent = extractor.config("user-agent", self.USER_AGENT) if (client_id == self.CLIENT_ID) ^ (user_agent == self.USER_AGENT): - self.client_id = None - self.log.warning( + raise exception.StopExtraction( "Conflicting values for 'client-id' and 'user-agent': " "overwrite either both or none of them.") + + self.client_id = client_id + self.headers = {"User-Agent": user_agent} + + token = extractor.config("refresh-token") + if token is None or token == "cache": + key = "#" + self.client_id + self.refresh_token = _refresh_token_cache(key) else: - self.client_id = client_id - extractor.session.headers["User-Agent"] = user_agent + self.refresh_token = token def submission(self, submission_id): """Fetch the (submission, comments)=-tuple for a submission id""" @@ -277,13 +282,15 @@ class RedditAPI(): def authenticate(self): """Authenticate the application by requesting an access token""" - access_token = self._authenticate_impl(self.refresh_token) - self.extractor.session.headers["Authorization"] = access_token + self.headers["Authorization"] = \ + self._authenticate_impl(self.refresh_token) @cache(maxage=3600, keyarg=1) def _authenticate_impl(self, refresh_token=None): """Actual authenticate implementation""" url = "https://www.reddit.com/api/v1/access_token" + self.headers["Authorization"] = None + if refresh_token: self.log.info("Refreshing private access token") data = {"grant_type": "refresh_token", @@ -294,9 +301,9 @@ class RedditAPI(): "grants/installed_client"), "device_id": "DO_NOT_TRACK_THIS_DEVICE"} - auth = (self.client_id, "") response = self.extractor.request( - url, method="POST", data=data, auth=auth, fatal=False) + url, method="POST", headers=self.headers, + data=data, auth=(self.client_id, ""), fatal=False) data = response.json() if response.status_code != 200: @@ -307,9 +314,10 @@ class RedditAPI(): def _call(self, endpoint, params): url = "https://oauth.reddit.com" + endpoint - params["raw_json"] = 1 + params["raw_json"] = "1" self.authenticate() - response = self.extractor.request(url, params=params, fatal=None) + response = self.extractor.request( + url, params=params, headers=self.headers, fatal=None) remaining = response.headers.get("x-ratelimit-remaining") if remaining and float(remaining) < 2: @@ -380,3 +388,10 @@ class RedditAPI(): @staticmethod def _decode(sid): return util.bdecode(sid, "0123456789abcdefghijklmnopqrstuvwxyz") + + +@cache(maxage=100*365*24*3600, keyarg=0) +def _refresh_token_cache(token): + if token and token[0] == "#": + return None + return token diff --git a/gallery_dl/extractor/redgifs.py b/gallery_dl/extractor/redgifs.py new file mode 100644 index 0000000..7855eab --- /dev/null +++ b/gallery_dl/extractor/redgifs.py @@ -0,0 +1,58 @@ +# -*- coding: utf-8 -*- + +# Copyright 2020 Mike Fährmann +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. + +"""Extractors for https://redgifs.com/""" + +from .gfycat import GfycatImageExtractor +from ..cache import cache + + +class RedgifsImageExtractor(GfycatImageExtractor): + """Extractor for individual images from redgifs.com""" + category = "redgifs" + pattern = r"(?:https?://)?(?:www\.)?redgifs\.com/watch/([A-Za-z]+)" + test = ("https://redgifs.com/watch/foolishforkedabyssiniancat", { + "pattern": r"https://\w+.redgifs.com/FoolishForkedAbyssiniancat.mp4", + "content": "f6e03f1df9a2ff2a74092f53ee7580d2fb943533", + }) + + def _get_info(self, gfycat_id): + api = RedgifsAPI(self) + return api.gfycat(gfycat_id) + + +class RedgifsAPI(): + + def __init__(self, extractor): + self.extractor = extractor + self.headers = {} + + def gfycat(self, gfycat_id): + endpoint = "v1/gfycats/" + gfycat_id + return self._call(endpoint)["gfyItem"] + + @cache(maxage=3600) + def _authenticate_impl(self): + url = "https://weblogin.redgifs.com/oauth/webtoken" + headers = { + "Referer": "https://www.redgifs.com/", + "Origin" : "https://www.redgifs.com", + } + data = { + "access_key": "dBLwVuGn9eq4dtXLs8WSfpjcYFY7bPQe" + "AqGPSFgqeW5B9uzj2cMVhF63pTFF4Rg9", + } + + response = self.extractor.request( + url, method="POST", headers=headers, json=data) + return "Bearer " + response.json()["access_token"] + + def _call(self, endpoint): + self.headers["Authorization"] = self._authenticate_impl() + url = "https://napi.redgifs.com/" + endpoint + return self.extractor.request(url, headers=self.headers).json() diff --git a/gallery_dl/extractor/sexcom.py b/gallery_dl/extractor/sexcom.py index b21ad32..2cef430 100644 --- a/gallery_dl/extractor/sexcom.py +++ b/gallery_dl/extractor/sexcom.py @@ -121,9 +121,9 @@ class SexcomPinExtractor(SexcomExtractor): }, }), # gif - ("https://www.sex.com/pin/11465040-big-titted-hentai-gif/", { - "pattern": "https://cdn.sex.com/images/.+/2014/01/26/4829951.gif", - "content": "af6726d74d11d819e1c885fe5303f711862eae96", + ("https://www.sex.com/pin/55435122-ecchi/", { + "pattern": "https://cdn.sex.com/images/.+/2017/12/07/18760842.gif", + "content": "176cc63fa05182cb0438c648230c0f324a5965fe", }), # video ("https://www.sex.com/pin/55748341/", { diff --git a/gallery_dl/extractor/tumblr.py b/gallery_dl/extractor/tumblr.py index 7e99823..3e3a5a0 100644 --- a/gallery_dl/extractor/tumblr.py +++ b/gallery_dl/extractor/tumblr.py @@ -108,11 +108,11 @@ class TumblrExtractor(Extractor): del photo["alt_sizes"] yield self._prepare_image(photo["url"], post) - url = post.get("audio_url") # type: "audio" + url = post.get("audio_url") # type "audio" if url and url.startswith("https://a.tumblr.com/"): yield self._prepare(url, post) - url = post.get("video_url") # type: "video" + url = post.get("video_url") # type "video" if url: yield self._prepare(_original_video(url), post) diff --git a/gallery_dl/extractor/twitter.py b/gallery_dl/extractor/twitter.py index c409f54..4c7b757 100644 --- a/gallery_dl/extractor/twitter.py +++ b/gallery_dl/extractor/twitter.py @@ -360,12 +360,13 @@ class TwitterTweetExtractor(TwitterExtractor): "pattern": r"ytdl:https://video.twimg.com/ext_tw_video/.*.m3u8", }), # content with emoji, newlines, hashtags (#338) - ("https://twitter.com/yumi_san0112/status/1151144618936823808", { + ("https://twitter.com/playpokemon/status/1263832915173048321", { "options": (("content", True),), "keyword": {"content": ( - "re:晴、お誕生日おめでとう🎉!\n実は下の名前が同じなので結構親近感ある" - "アイドルです✨\n今年の晴ちゃんめちゃくちゃ可愛い路線攻めてるから、そろ" - "そろまたかっこいい晴が見たいですねw\n#結城晴生誕祭2019\n#結城晴生誕祭" + r"re:Gear up for #PokemonSwordShieldEX with special Mystery " + "Gifts! \n\nYou’ll be able to receive four Galarian form " + "Pokémon with Hidden Abilities, plus some very useful items. " + "It’s our \\(Mystery\\) Gift to you, Trainers! \n\n❓🎁➡️ " )}, }), # Reply to another tweet (#403) diff --git a/gallery_dl/extractor/webtoons.py b/gallery_dl/extractor/webtoons.py new file mode 100644 index 0000000..86ada49 --- /dev/null +++ b/gallery_dl/extractor/webtoons.py @@ -0,0 +1,148 @@ +# -*- coding: utf-8 -*- + +# Copyright 2020 Leonardo Taccari +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. + +"""Extract images from https://www.webtoons.com/""" + +from .common import Extractor, Message +from .. import exception, text, util + + +BASE_PATTERN = r"(?:https?://)?(?:www\.)?webtoons\.com/((en|fr)" + + +class WebtoonsExtractor(Extractor): + category = "webtoons" + root = "https://www.webtoons.com" + cookiedomain = "www.webtoons.com" + + def __init__(self, match): + Extractor.__init__(self, match) + self.session.cookies.set("ageGatePass", "true", + domain=self.cookiedomain) + self.path, self.lang, self.genre , self.comic, self.query = \ + match.groups() + + +class WebtoonsEpisodeExtractor(WebtoonsExtractor): + """Extractor for an episode on webtoons.com""" + subcategory = "episode" + directory_fmt = ("{category}", "{comic}") + filename_fmt = "{episode}-{num:>02}.{extension}" + archive_fmt = "{title_no}_{episode}_{num}" + pattern = (BASE_PATTERN + r"/([^/?&#]+)/([^/?&#]+)/(?:[^/?&#]+))" + r"/viewer(?:\?([^#]+))") + test = ( + (("https://www.webtoons.com/en/comedy/safely-endangered" + "/ep-572-earth/viewer?title_no=352&episode_no=572"), { + "url": "11041d71a3f92728305c11a228e77cf0f7aa02ef", + "content": "4f7701a750368e377d65900e6e8f64a5f9cb9c86", + "count": 5, + }), + ) + + def __init__(self, match): + WebtoonsExtractor.__init__(self, match) + query = text.parse_query(self.query) + self.title_no = query.get("title_no") + if not self.title_no: + raise exception.NotFoundError("title_no") + self.episode = query.get("episode_no") + if not self.episode: + raise exception.NotFoundError("episode_no") + + def items(self): + url = "{}/{}/viewer?{}".format(self.root, self.path, self.query) + self.session.headers["Referer"] = url + + page = self.request(url).text + data = self.get_job_metadata(page) + imgs = self.get_image_urls(page) + data["count"] = len(imgs) + + yield Message.Version, 1 + yield Message.Directory, data + for data["num"], url in enumerate(imgs, 1): + yield Message.Url, url, text.nameext_from_url(url, data) + + def get_job_metadata(self, page): + """Collect metadata for extractor-job""" + title, pos = text.extract( + page, '<meta property="og:title" content="', '"') + descr, pos = text.extract( + page, '<meta property="og:description" content="', '"', pos) + + return { + "genre": self.genre, + "comic": self.comic, + "title_no": self.title_no, + "episode": self.episode, + "title": text.unescape(title), + "description": text.unescape(descr), + "lang": self.lang, + "language": util.code_to_language(self.lang), + } + + @staticmethod + def get_image_urls(page): + """Extract and return a list of all image urls""" + return list(text.extract_iter(page, 'class="_images" data-url="', '"')) + + +class WebtoonsComicExtractor(WebtoonsExtractor): + """Extractor for an entire comic on webtoons.com""" + subcategory = "comic" + pattern = (BASE_PATTERN + r"/([^/?&#]+)/([^/?&#]+))" + r"/list(?:\?([^#]+))") + test = ( + # english + (("https://www.webtoons.com/en/comedy/live-with-yourself/" + "list?title_no=919"), { + "pattern": WebtoonsEpisodeExtractor.pattern, + "range": "1-15", + "count": ">= 15", + }), + # french + (("https://www.webtoons.com/fr/romance/subzero/" + "list?title_no=1845&page=3"), { + "count": ">= 15", + }), + ) + + def __init__(self, match): + WebtoonsExtractor.__init__(self, match) + query = text.parse_query(self.query) + self.title_no = query.get("title_no") + if not self.title_no: + raise exception.NotFoundError("title_no") + self.page_no = int(query.get("page", 1)) + + def items(self): + page = None + data = {"_extractor": WebtoonsEpisodeExtractor} + + while True: + path = "/{}/list?title_no={}&page={}".format( + self.path, self.title_no, self.page_no) + + if page and path not in page: + return + + page = self.request(self.root + path).text + data["page"] = self.page_no + + for url in self.get_episode_urls(page): + yield Message.Queue, url, data + + self.page_no += 1 + + @staticmethod + def get_episode_urls(page): + """Extract and return all episode urls in 'page'""" + pos = page.find('id="_listUl"') + return text.extract_iter( + page, '<a href="', '" class="NPI=a:list', pos) diff --git a/gallery_dl/extractor/wikiart.py b/gallery_dl/extractor/wikiart.py index b614cab..0ada118 100644 --- a/gallery_dl/extractor/wikiart.py +++ b/gallery_dl/extractor/wikiart.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2019 Mike Fährmann +# Copyright 2019-2020 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -69,8 +69,8 @@ class WikiartArtistExtractor(WikiartExtractor): directory_fmt = ("{category}", "{artist[artistName]}") pattern = BASE_PATTERN + r"/(?!\w+-by-)([\w-]+)" test = ("https://www.wikiart.org/en/thomas-cole", { - "url": "9049e52e897b9ae6586df4c2c4f827d0a19dafa3", - "keyword": "c3168b21a993707c41efb7674e8c90d53a79d483", + "url": "5ba2fbe6783fcce34e65014d16e5fbc581490c98", + "keyword": "6d92913c55675e05553f000cfee5daff0b4107cf", }) def __init__(self, match): diff --git a/gallery_dl/job.py b/gallery_dl/job.py index 6ba2572..130df58 100644 --- a/gallery_dl/job.py +++ b/gallery_dl/job.py @@ -24,20 +24,32 @@ class Job(): extr = extractor.find(extr) if not extr: raise exception.NoExtractorError() - self.extractor = extr - extr.log.extractor = extr - extr.log.job = self + self.pathfmt = None + + self._logger_extra = { + "job" : self, + "extractor": extr, + "path" : output.PathfmtProxy(self), + "keywords" : output.KwdictProxy(self), + } + extr.log = self._wrap_logger(extr.log) extr.log.debug("Using %s for '%s'", extr.__class__.__name__, extr.url) self.status = 0 self.pred_url = self._prepare_predicates("image", True) self.pred_queue = self._prepare_predicates("chapter", False) - if parent and parent.extractor.config( - "category-transfer", parent.extractor.categorytransfer): - self.extractor.category = parent.extractor.category - self.extractor.subcategory = parent.extractor.subcategory + if parent: + pextr = parent.extractor + + # transfer (sub)category + if pextr.config("category-transfer", pextr.categorytransfer): + extr.category = pextr.category + extr.subcategory = pextr.subcategory + + # reuse connection adapters + extr.session.adapters = pextr.session.adapters # user-supplied metadata self.userkwds = self.extractor.config("keywords") @@ -165,6 +177,12 @@ class Job(): return util.build_predicate(predicates) + def get_logger(self, name): + return self._wrap_logger(logging.getLogger(name)) + + def _wrap_logger(self, logger): + return output.LoggerAdapter(logger, self._logger_extra) + def _write_unsupported(self, url): if self.ulog: self.ulog.info(url) @@ -175,8 +193,7 @@ class DownloadJob(Job): def __init__(self, url, parent=None): Job.__init__(self, url, parent) - self.log = logging.getLogger("download") - self.pathfmt = None + self.log = self.get_logger("download") self.archive = None self.sleep = None self.downloaders = {} @@ -325,7 +342,7 @@ class DownloadJob(Job): cls = downloader.find(scheme) if cls and config.get(("downloader", cls.scheme), "enabled", True): - instance = cls(self.extractor, self.out) + instance = cls(self) else: instance = None self.log.error("'%s:' URLs are not supported/enabled", scheme) @@ -338,19 +355,20 @@ class DownloadJob(Job): def initialize(self, kwdict=None): """Delayed initialization of PathFormat, etc.""" - self.pathfmt = util.PathFormat(self.extractor) + config = self.extractor.config + pathfmt = self.pathfmt = util.PathFormat(self.extractor) if kwdict: - self.pathfmt.set_directory(kwdict) + pathfmt.set_directory(kwdict) - self.sleep = self.extractor.config("sleep") - if not self.extractor.config("download", True): - self.download = self.pathfmt.fix_extension + self.sleep = config("sleep") + if not config("download", True): + self.download = pathfmt.fix_extension - skip = self.extractor.config("skip", True) + skip = config("skip", True) if skip: self._skipexc = None if skip == "enumerate": - self.pathfmt.check_file = self.pathfmt._enum_file + pathfmt.check_file = pathfmt._enum_file elif isinstance(skip, str): skip, _, smax = skip.partition(":") if skip == "abort": @@ -360,9 +378,9 @@ class DownloadJob(Job): self._skipcnt = 0 self._skipmax = text.parse_int(smax) else: - self.pathfmt.exists = lambda x=None: False + pathfmt.exists = lambda x=None: False - archive = self.extractor.config("archive") + archive = config("archive") if archive: path = util.expand_path(archive) try: @@ -374,27 +392,28 @@ class DownloadJob(Job): else: self.extractor.log.debug("Using download archive '%s'", path) - postprocessors = self.extractor.config("postprocessors") + postprocessors = config("postprocessors") if postprocessors: + pp_log = self.get_logger("postprocessor") pp_list = [] + category = self.extractor.category for pp_dict in postprocessors: whitelist = pp_dict.get("whitelist") blacklist = pp_dict.get("blacklist") - if (whitelist and self.extractor.category not in whitelist or - blacklist and self.extractor.category in blacklist): + if (whitelist and category not in whitelist or + blacklist and category in blacklist): continue name = pp_dict.get("name") pp_cls = postprocessor.find(name) if not pp_cls: - postprocessor.log.warning("module '%s' not found", name) + pp_log.warning("module '%s' not found", name) continue try: - pp_obj = pp_cls(self.pathfmt, pp_dict) + pp_obj = pp_cls(self, pp_dict) except Exception as exc: - postprocessor.log.error( - "'%s' initialization failed: %s: %s", - name, exc.__class__.__name__, exc) + pp_log.error("'%s' initialization failed: %s: %s", + name, exc.__class__.__name__, exc) else: pp_list.append(pp_obj) diff --git a/gallery_dl/oauth.py b/gallery_dl/oauth.py index 9ceefbf..e9dfff0 100644 --- a/gallery_dl/oauth.py +++ b/gallery_dl/oauth.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2018-2019 Mike Fährmann +# Copyright 2018-2020 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -20,6 +20,7 @@ import requests import requests.auth from . import text +from .cache import cache def nonce(size, alphabet=string.ascii_letters): @@ -117,6 +118,10 @@ class OAuth1API(): token_secret = extractor.config("access-token-secret") key_type = "default" if api_key == self.API_KEY else "custom" + if token is None or token == "cache": + key = (extractor.category, api_key) + token, token_secret = _token_cache(key) + if api_key and api_secret and token and token_secret: self.log.debug("Using %s OAuth1.0 authentication", key_type) self.session = OAuth1Session( @@ -131,3 +136,8 @@ class OAuth1API(): kwargs["fatal"] = None kwargs["session"] = self.session return self.extractor.request(url, **kwargs) + + +@cache(maxage=100*365*24*3600, keyarg=0) +def _token_cache(key): + return None, None diff --git a/gallery_dl/option.py b/gallery_dl/option.py index 34222a2..5b99bee 100644 --- a/gallery_dl/option.py +++ b/gallery_dl/option.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2017-2019 Mike Fährmann +# Copyright 2017-2020 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -173,6 +173,12 @@ def build_parser(): help=("Write URLs, which get emitted by other extractors but cannot " "be handled, to FILE"), ) + output.add_argument( + "--write-pages", + dest="write-pages", nargs=0, action=ConfigConstAction, const=True, + help=("Write downloaded intermediary pages to files " + "in the current directory to debug problems"), + ) downloader = parser.add_argument_group("Downloader Options") downloader.add_argument( @@ -196,7 +202,7 @@ def build_parser(): downloader.add_argument( "--http-timeout", dest="timeout", metavar="SECONDS", type=float, action=ConfigAction, - help="Timeout for HTTP connections (defaut: 30.0)", + help="Timeout for HTTP connections (default: 30.0)", ) downloader.add_argument( "--sleep", diff --git a/gallery_dl/output.py b/gallery_dl/output.py index 9e2f8a6..2d3dc17 100644 --- a/gallery_dl/output.py +++ b/gallery_dl/output.py @@ -22,34 +22,93 @@ LOG_LEVEL = logging.INFO class Logger(logging.Logger): - """Custom logger that includes extractor and job info in log records""" - extractor = util.NONE - job = util.NONE + """Custom logger that includes extra info in log records""" def makeRecord(self, name, level, fn, lno, msg, args, exc_info, func=None, extra=None, sinfo=None, factory=logging._logRecordFactory): rv = factory(name, level, fn, lno, msg, args, exc_info, func, sinfo) - rv.extractor = self.extractor - rv.job = self.job + if extra: + rv.__dict__.update(extra) return rv +class LoggerAdapter(): + """Trimmed-down version of logging.LoggingAdapter""" + __slots__ = ("logger", "extra") + + def __init__(self, logger, extra): + self.logger = logger + self.extra = extra + + def debug(self, msg, *args, **kwargs): + if self.logger.isEnabledFor(logging.DEBUG): + kwargs["extra"] = self.extra + self.logger._log(logging.DEBUG, msg, args, **kwargs) + + def info(self, msg, *args, **kwargs): + if self.logger.isEnabledFor(logging.INFO): + kwargs["extra"] = self.extra + self.logger._log(logging.INFO, msg, args, **kwargs) + + def warning(self, msg, *args, **kwargs): + if self.logger.isEnabledFor(logging.WARNING): + kwargs["extra"] = self.extra + self.logger._log(logging.WARNING, msg, args, **kwargs) + + def error(self, msg, *args, **kwargs): + if self.logger.isEnabledFor(logging.ERROR): + kwargs["extra"] = self.extra + self.logger._log(logging.ERROR, msg, args, **kwargs) + + +class PathfmtProxy(): + __slots__ = ("job",) + + def __init__(self, job): + self.job = job + + def __getattribute__(self, name): + pathfmt = object.__getattribute__(self, "job").pathfmt + return pathfmt.__dict__.get(name) if pathfmt else None + + +class KwdictProxy(): + __slots__ = ("job",) + + def __init__(self, job): + self.job = job + + def __getattribute__(self, name): + pathfmt = object.__getattribute__(self, "job").pathfmt + return pathfmt.kwdict.get(name) if pathfmt else None + + class Formatter(logging.Formatter): """Custom formatter that supports different formats per loglevel""" def __init__(self, fmt, datefmt): - if not isinstance(fmt, dict): + if isinstance(fmt, dict): + for key in ("debug", "info", "warning", "error"): + value = fmt[key] if key in fmt else LOG_FORMAT + fmt[key] = (util.Formatter(value).format_map, + "{asctime" in value) + else: + if fmt == LOG_FORMAT: + fmt = (fmt.format_map, False) + else: + fmt = (util.Formatter(fmt).format_map, "{asctime" in fmt) fmt = {"debug": fmt, "info": fmt, "warning": fmt, "error": fmt} + self.formats = fmt self.datefmt = datefmt def format(self, record): record.message = record.getMessage() - fmt = self.formats[record.levelname] - if "{asctime" in fmt: + fmt, asctime = self.formats[record.levelname] + if asctime: record.asctime = self.formatTime(record, self.datefmt) - msg = fmt.format_map(record.__dict__) + msg = fmt(record.__dict__) if record.exc_info and not record.exc_text: record.exc_text = self.formatException(record.exc_info) if record.exc_text: @@ -244,7 +303,7 @@ class ColorOutput(TerminalOutput): print("\r\033[1;32m", self.shorten(path), "\033[0m", sep="") -if os.name == "nt": +if util.WINDOWS: ANSI = os.environ.get("TERM") == "ANSI" OFFSET = 1 CHAR_SKIP = "# " diff --git a/gallery_dl/postprocessor/__init__.py b/gallery_dl/postprocessor/__init__.py index 7a3bf23..faa4d6c 100644 --- a/gallery_dl/postprocessor/__init__.py +++ b/gallery_dl/postprocessor/__init__.py @@ -9,7 +9,6 @@ """Post-processing modules""" import importlib -import logging modules = [ "classify", @@ -21,8 +20,6 @@ modules = [ "zip", ] -log = logging.getLogger("postprocessor") - def find(name): """Return a postprocessor class with the given name""" diff --git a/gallery_dl/postprocessor/classify.py b/gallery_dl/postprocessor/classify.py index 4a9bde9..0106903 100644 --- a/gallery_dl/postprocessor/classify.py +++ b/gallery_dl/postprocessor/classify.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2018 Mike Fährmann +# Copyright 2018-2020 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -22,8 +22,8 @@ class ClassifyPP(PostProcessor): "Archives" : ("zip", "rar", "7z", "tar", "gz", "bz2"), } - def __init__(self, pathfmt, options): - PostProcessor.__init__(self) + def __init__(self, job, options): + PostProcessor.__init__(self, job) mapping = options.get("mapping", self.DEFAULT_MAPPING) self.mapping = { diff --git a/gallery_dl/postprocessor/common.py b/gallery_dl/postprocessor/common.py index 70b0dfb..64f978e 100644 --- a/gallery_dl/postprocessor/common.py +++ b/gallery_dl/postprocessor/common.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2018-2019 Mike Fährmann +# Copyright 2018-2020 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -8,15 +8,13 @@ """Common classes and constants used by postprocessor modules.""" -import logging - class PostProcessor(): """Base class for postprocessors""" - def __init__(self): + def __init__(self, job): name = self.__class__.__name__[:-2].lower() - self.log = logging.getLogger("postprocessor." + name) + self.log = job.get_logger("postprocessor." + name) @staticmethod def prepare(pathfmt): diff --git a/gallery_dl/postprocessor/compare.py b/gallery_dl/postprocessor/compare.py index ddbcef0..0d11844 100644 --- a/gallery_dl/postprocessor/compare.py +++ b/gallery_dl/postprocessor/compare.py @@ -14,8 +14,8 @@ import os class ComparePP(PostProcessor): - def __init__(self, pathfmt, options): - PostProcessor.__init__(self) + def __init__(self, job, options): + PostProcessor.__init__(self, job) if options.get("action") == "enumerate": self.run = self._run_enumerate if options.get("shallow"): diff --git a/gallery_dl/postprocessor/exec.py b/gallery_dl/postprocessor/exec.py index 0a56281..cbe51ae 100644 --- a/gallery_dl/postprocessor/exec.py +++ b/gallery_dl/postprocessor/exec.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2018-2019 Mike Fährmann +# Copyright 2018-2020 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -11,10 +11,9 @@ from .common import PostProcessor from .. import util import subprocess -import os -if os.name == "nt": +if util.WINDOWS: def quote(s): return '"' + s.replace('"', '\\"') + '"' else: @@ -23,8 +22,8 @@ else: class ExecPP(PostProcessor): - def __init__(self, pathfmt, options): - PostProcessor.__init__(self) + def __init__(self, job, options): + PostProcessor.__init__(self, job) args = options["command"] final = options.get("final", False) diff --git a/gallery_dl/postprocessor/metadata.py b/gallery_dl/postprocessor/metadata.py index aa50dfd..a955ba3 100644 --- a/gallery_dl/postprocessor/metadata.py +++ b/gallery_dl/postprocessor/metadata.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2019 Mike Fährmann +# Copyright 2019-2020 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -15,8 +15,8 @@ import os class MetadataPP(PostProcessor): - def __init__(self, pathfmt, options): - PostProcessor.__init__(self) + def __init__(self, job, options): + PostProcessor.__init__(self, job) mode = options.get("mode", "json") if mode == "custom": diff --git a/gallery_dl/postprocessor/mtime.py b/gallery_dl/postprocessor/mtime.py index 7065428..b8a4988 100644 --- a/gallery_dl/postprocessor/mtime.py +++ b/gallery_dl/postprocessor/mtime.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2019 Mike Fährmann +# Copyright 2019-2020 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -14,8 +14,8 @@ from ..text import parse_int class MtimePP(PostProcessor): - def __init__(self, pathfmt, options): - PostProcessor.__init__(self) + def __init__(self, job, options): + PostProcessor.__init__(self, job) self.key = options.get("key", "date") def run(self, pathfmt): diff --git a/gallery_dl/postprocessor/ugoira.py b/gallery_dl/postprocessor/ugoira.py index 706e706..1afba86 100644 --- a/gallery_dl/postprocessor/ugoira.py +++ b/gallery_dl/postprocessor/ugoira.py @@ -1,12 +1,12 @@ # -*- coding: utf-8 -*- -# Copyright 2018 Mike Fährmann +# Copyright 2018-2020 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as # published by the Free Software Foundation. -"""Convert pixiv ugoira to webm""" +"""Convert Pixiv Ugoira to WebM""" from .common import PostProcessor from .. import util @@ -19,8 +19,8 @@ import os class UgoiraPP(PostProcessor): - def __init__(self, pathfmt, options): - PostProcessor.__init__(self) + def __init__(self, job, options): + PostProcessor.__init__(self, job) self.extension = options.get("extension") or "webm" self.args = options.get("ffmpeg-args") or () self.twopass = options.get("ffmpeg-twopass", False) diff --git a/gallery_dl/postprocessor/zip.py b/gallery_dl/postprocessor/zip.py index a43c43a..6970e95 100644 --- a/gallery_dl/postprocessor/zip.py +++ b/gallery_dl/postprocessor/zip.py @@ -22,8 +22,8 @@ class ZipPP(PostProcessor): "lzma" : zipfile.ZIP_LZMA, } - def __init__(self, pathfmt, options): - PostProcessor.__init__(self) + def __init__(self, job, options): + PostProcessor.__init__(self, job) self.delete = not options.get("keep-files", False) ext = "." + options.get("extension", "zip") algorithm = options.get("compression", "store") @@ -33,7 +33,7 @@ class ZipPP(PostProcessor): algorithm) algorithm = "store" - self.path = pathfmt.realdirectory + self.path = job.pathfmt.realdirectory args = (self.path[:-1] + ext, "a", self.COMPRESSION_ALGORITHMS[algorithm], True) diff --git a/gallery_dl/text.py b/gallery_dl/text.py index 3bb6390..4dc0963 100644 --- a/gallery_dl/text.py +++ b/gallery_dl/text.py @@ -60,6 +60,13 @@ def split_html(txt, sep=None): return [] +def ensure_http_scheme(url, scheme="https://"): + """Prepend 'scheme' to 'url' if it doesn't have one""" + if url and not url.startswith(("https://", "http://")): + return scheme + url.lstrip("/:") + return url + + def filename_from_url(url): """Extract the last part of an URL to use as a filename""" try: diff --git a/gallery_dl/util.py b/gallery_dl/util.py index 83cf84b..85b871b 100644 --- a/gallery_dl/util.py +++ b/gallery_dl/util.py @@ -113,6 +113,57 @@ def dump_json(obj, fp=sys.stdout, ensure_ascii=True, indent=4): fp.write("\n") +def dump_response(response, fp=sys.stdout, + headers=True, content=True, hide_auth=True): + """Write the contents of 'response' into a file-like object""" + + if headers: + request = response.request + req_headers = request.headers.copy() + outfmt = """\ +{request.method} {request.url} +Status: {response.status_code} {response.reason} + +Request Headers +--------------- +{request_headers} + +Response Headers +---------------- +{response_headers} +""" + if hide_auth: + authorization = req_headers.get("Authorization") + if authorization: + atype, sep, _ = authorization.partition(" ") + req_headers["Authorization"] = atype + " ***" if sep else "***" + + cookies = req_headers.get("Cookie") + if cookies: + req_headers["Cookie"] = ";".join( + cookie.partition("=")[0] + "=***" + for cookie in cookies.split(";") + ) + + fp.write(outfmt.format( + request=request, + response=response, + request_headers="\n".join( + name + ": " + value + for name, value in req_headers.items() + ), + response_headers="\n".join( + name + ": " + value + for name, value in response.headers.items() + ), + ).encode()) + + if content: + if headers: + fp.write(b"\nContent\n-------\n") + fp.write(response.content) + + def expand_path(path): """Expand environment variables and tildes (~)""" if not path: @@ -270,6 +321,8 @@ class UniversalNone(): NONE = UniversalNone() +WINDOWS = (os.name == "nt") +SENTINEL = object() def build_predicate(predicates): @@ -672,22 +725,26 @@ class PathFormat(): self.basedirectory = basedir restrict = extractor.config("path-restrict", "auto") + replace = extractor.config("path-replace", "_") + if restrict == "auto": - restrict = "\\\\|/<>:\"?*" if os.name == "nt" else "/" + restrict = "\\\\|/<>:\"?*" if WINDOWS else "/" elif restrict == "unix": restrict = "/" elif restrict == "windows": restrict = "\\\\|/<>:\"?*" + self.clean_segment = self._build_cleanfunc(restrict, replace) remove = extractor.config("path-remove", "\x00-\x1f\x7f") - - self.clean_segment = self._build_cleanfunc(restrict, "_") self.clean_path = self._build_cleanfunc(remove, "") @staticmethod def _build_cleanfunc(chars, repl): if not chars: return lambda x: x + elif isinstance(chars, dict): + def func(x, table=str.maketrans(chars)): + return x.translate(table) elif len(chars) == 1: def func(x, c=chars, r=repl): return x.replace(c, r) @@ -726,7 +783,7 @@ class PathFormat(): def set_directory(self, kwdict): """Build directory path and create it if necessary""" - windows = os.name == "nt" + self.kwdict = kwdict # Build path segments by applying 'kwdict' to directory format strings segments = [] @@ -734,7 +791,7 @@ class PathFormat(): try: for formatter in self.directory_formatters: segment = formatter(kwdict).strip() - if windows: + if WINDOWS: # remove trailing dots and spaces (#647) segment = segment.rstrip(". ") if segment: @@ -751,7 +808,7 @@ class PathFormat(): directory += sep self.directory = directory - if windows: + if WINDOWS: # Enable longer-than-260-character paths on Windows directory = "\\\\?\\" + os.path.abspath(directory) @@ -772,6 +829,8 @@ class PathFormat(): if self.extension: self.build_path() + else: + self.filename = "" def set_extension(self, extension, real=True): """Set filename extension""" diff --git a/gallery_dl/version.py b/gallery_dl/version.py index 40b5c73..dd6f373 100644 --- a/gallery_dl/version.py +++ b/gallery_dl/version.py @@ -6,4 +6,4 @@ # it under the terms of the GNU General Public License version 2 as # published by the Free Software Foundation. -__version__ = "1.13.6" +__version__ = "1.14.0" diff --git a/test/test_cache.py b/test/test_cache.py index e19896e..ecf482c 100644 --- a/test/test_cache.py +++ b/test/test_cache.py @@ -10,8 +10,8 @@ import os import sys import unittest +from unittest.mock import patch -import time import tempfile sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) @@ -22,8 +22,8 @@ config.set(("cache",), "file", dbpath) from gallery_dl import cache # noqa E402 -def tearDownModule(): - util.remove_file(dbpath) +# def tearDownModule(): +# util.remove_file(dbpath) class TestCache(unittest.TestCase): @@ -86,32 +86,50 @@ class TestCache(unittest.TestCase): self.assertEqual(ka(9, 9, 2), 6) def test_expires_mem(self): - @cache.memcache(maxage=1) + @cache.memcache(maxage=2) def ex(a, b, c): return a+b+c - self.assertEqual(ex(1, 1, 1), 3) - self.assertEqual(ex(2, 2, 2), 3) - self.assertEqual(ex(3, 3, 3), 3) + with patch("time.time") as tmock: + tmock.return_value = 0.001 + self.assertEqual(ex(1, 1, 1), 3) + self.assertEqual(ex(2, 2, 2), 3) + self.assertEqual(ex(3, 3, 3), 3) - time.sleep(2) - self.assertEqual(ex(3, 3, 3), 9) - self.assertEqual(ex(2, 2, 2), 9) - self.assertEqual(ex(1, 1, 1), 9) + # value is still cached after 1 second + tmock.return_value += 1.0 + self.assertEqual(ex(3, 3, 3), 3) + self.assertEqual(ex(2, 2, 2), 3) + self.assertEqual(ex(1, 1, 1), 3) + + # new value after 'maxage' seconds + tmock.return_value += 1.0 + self.assertEqual(ex(3, 3, 3), 9) + self.assertEqual(ex(2, 2, 2), 9) + self.assertEqual(ex(1, 1, 1), 9) def test_expires_db(self): - @cache.cache(maxage=1) + @cache.cache(maxage=2) def ex(a, b, c): return a+b+c - self.assertEqual(ex(1, 1, 1), 3) - self.assertEqual(ex(2, 2, 2), 3) - self.assertEqual(ex(3, 3, 3), 3) - - time.sleep(2) - self.assertEqual(ex(3, 3, 3), 9) - self.assertEqual(ex(2, 2, 2), 9) - self.assertEqual(ex(1, 1, 1), 9) + with patch("time.time") as tmock: + tmock.return_value = 0.999 + self.assertEqual(ex(1, 1, 1), 3) + self.assertEqual(ex(2, 2, 2), 3) + self.assertEqual(ex(3, 3, 3), 3) + + # value is still cached after 1 second + tmock.return_value += 1.0 + self.assertEqual(ex(3, 3, 3), 3) + self.assertEqual(ex(2, 2, 2), 3) + self.assertEqual(ex(1, 1, 1), 3) + + # new value after 'maxage' seconds + tmock.return_value += 1.0 + self.assertEqual(ex(3, 3, 3), 9) + self.assertEqual(ex(2, 2, 2), 9) + self.assertEqual(ex(1, 1, 1), 9) def test_update_mem_simple(self): @cache.memcache(keyarg=0) diff --git a/test/test_config.py b/test/test_config.py index cb202be..4171435 100644 --- a/test/test_config.py +++ b/test/test_config.py @@ -14,7 +14,8 @@ import unittest import json import tempfile -sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) +ROOTDIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) +sys.path.insert(0, ROOTDIR) from gallery_dl import config # noqa E402 @@ -156,10 +157,12 @@ class TestConfigFiles(unittest.TestCase): @staticmethod def _load(name): - rootdir = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) - path = os.path.join(rootdir, "docs", name) - with open(path) as fp: - return json.load(fp) + path = os.path.join(ROOTDIR, "docs", name) + try: + with open(path) as fp: + return json.load(fp) + except FileNotFoundError: + raise unittest.SkipTest(path + " not available") if __name__ == '__main__': diff --git a/test/test_downloader.py b/test/test_downloader.py index 9393040..5d73a4c 100644 --- a/test/test_downloader.py +++ b/test/test_downloader.py @@ -14,21 +14,30 @@ from unittest.mock import Mock, MagicMock, patch import re import base64 +import logging import os.path import tempfile import threading import http.server + sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) -from gallery_dl import downloader, extractor, config, util # noqa E402 -from gallery_dl.downloader.common import DownloaderBase # noqa E402 -from gallery_dl.output import NullOutput # noqa E402 +from gallery_dl import downloader, extractor, output, config, util # noqa E402 class MockDownloaderModule(Mock): __downloader__ = "mock" +class FakeJob(): + + def __init__(self): + self.extractor = extractor.find("test:") + self.pathfmt = util.PathFormat(self.extractor) + self.out = output.NullOutput() + self.get_logger = logging.getLogger + + class TestDownloaderModule(unittest.TestCase): @classmethod @@ -96,11 +105,10 @@ class TestDownloaderBase(unittest.TestCase): @classmethod def setUpClass(cls): - cls.extractor = extractor.find("test:") - cls.extractor.log.job = None cls.dir = tempfile.TemporaryDirectory() cls.fnum = 0 config.set((), "base-directory", cls.dir.name) + cls.job = FakeJob() @classmethod def tearDownClass(cls): @@ -113,12 +121,13 @@ class TestDownloaderBase(unittest.TestCase): cls.fnum += 1 kwdict = { - "category": "test", + "category" : "test", "subcategory": "test", - "filename": name, - "extension": extension, + "filename" : name, + "extension" : extension, } - pathfmt = util.PathFormat(cls.extractor) + + pathfmt = cls.job.pathfmt pathfmt.set_directory(kwdict) pathfmt.set_filename(kwdict) @@ -159,7 +168,7 @@ class TestHTTPDownloader(TestDownloaderBase): @classmethod def setUpClass(cls): TestDownloaderBase.setUpClass() - cls.downloader = downloader.find("http")(cls.extractor, NullOutput()) + cls.downloader = downloader.find("http")(cls.job) port = 8088 cls.address = "http://127.0.0.1:{}".format(port) @@ -196,7 +205,7 @@ class TestTextDownloader(TestDownloaderBase): @classmethod def setUpClass(cls): TestDownloaderBase.setUpClass() - cls.downloader = downloader.find("text")(cls.extractor, NullOutput()) + cls.downloader = downloader.find("text")(cls.job) def test_text_download(self): self._run_test("text:foobar", None, "foobar", "txt", "txt") @@ -208,29 +217,6 @@ class TestTextDownloader(TestDownloaderBase): self._run_test("text:", None, "", "txt", "txt") -class FakeDownloader(DownloaderBase): - scheme = "fake" - - def __init__(self, extractor, output): - DownloaderBase.__init__(self, extractor, output) - - def connect(self, url, offset): - pass - - def receive(self, file): - pass - - def reset(self): - pass - - def get_extension(self): - pass - - @staticmethod - def _check_extension(file, pathfmt): - pass - - class HttpRequestHandler(http.server.BaseHTTPRequestHandler): def do_GET(self): diff --git a/test/test_oauth.py b/test/test_oauth.py index 58d4088..e4664e4 100644 --- a/test/test_oauth.py +++ b/test/test_oauth.py @@ -15,6 +15,7 @@ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) from gallery_dl import oauth, text # noqa E402 TESTSERVER = "http://term.ie/oauth/example" +TESTSERVER = "http://term.ie/oauth/example" CONSUMER_KEY = "key" CONSUMER_SECRET = "secret" REQUEST_TOKEN = "requestkey" @@ -99,8 +100,10 @@ class TestOAuthSession(unittest.TestCase): CONSUMER_KEY, CONSUMER_SECRET, oauth_token, oauth_token_secret, ) - url = TESTSERVER + endpoint - return session.get(url, params=params).text + try: + return session.get(TESTSERVER + endpoint, params=params).text + except OSError: + raise unittest.SkipTest() if __name__ == "__main__": diff --git a/test/test_postprocessor.py b/test/test_postprocessor.py index 354f9ff..5da3131 100644 --- a/test/test_postprocessor.py +++ b/test/test_postprocessor.py @@ -12,12 +12,14 @@ import sys import unittest from unittest.mock import Mock, mock_open, patch +import logging import zipfile import tempfile from datetime import datetime, timezone as tz sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) -from gallery_dl import postprocessor, extractor, util, config # noqa E402 +from gallery_dl import extractor, output, util # noqa E402 +from gallery_dl import postprocessor, util, config # noqa E402 from gallery_dl.postprocessor.common import PostProcessor # noqa E402 @@ -25,6 +27,15 @@ class MockPostprocessorModule(Mock): __postprocessor__ = "mock" +class FakeJob(): + + def __init__(self): + self.extractor = extractor.find("test:") + self.pathfmt = util.PathFormat(self.extractor) + self.out = output.NullOutput() + self.get_logger = logging.getLogger + + class TestPostprocessorModule(unittest.TestCase): def setUp(self): @@ -58,9 +69,9 @@ class BasePostprocessorTest(unittest.TestCase): @classmethod def setUpClass(cls): - cls.extractor = extractor.find("test:") cls.dir = tempfile.TemporaryDirectory() config.set((), "base-directory", cls.dir.name) + cls.job = FakeJob() @classmethod def tearDownClass(cls): @@ -74,12 +85,12 @@ class BasePostprocessorTest(unittest.TestCase): if data is not None: kwdict.update(data) - self.pathfmt = util.PathFormat(self.extractor) + self.pathfmt = self.job.pathfmt self.pathfmt.set_directory(kwdict) self.pathfmt.set_filename(kwdict) pp = postprocessor.find(self.__class__.__name__[:-4].lower()) - return pp(self.pathfmt, options) + return pp(self.job, options) class ClassifyTest(BasePostprocessorTest): diff --git a/test/test_results.py b/test/test_results.py index 046efc5..5bef1a4 100644 --- a/test/test_results.py +++ b/test/test_results.py @@ -26,10 +26,14 @@ TRAVIS_SKIP = { "archivedmoe", "archiveofsins", "thebarchive", "fireden", "4plebs", "sankaku", "idolcomplex", "mangahere", "readcomiconline", "mangadex", "sankakucomplex", "warosu", "fuskator", "patreon", "komikcast", + "instagram", } # temporary issues, etc. BROKEN = { + "e621", + "imagevenue", + "jaiminisbox", "photobucket", "worldthree", } diff --git a/test/test_text.py b/test/test_text.py index 4f31d81..aeb8096 100644 --- a/test/test_text.py +++ b/test/test_text.py @@ -94,6 +94,33 @@ class TestText(unittest.TestCase): for value in INVALID: self.assertEqual(f(value), empty) + def test_ensure_http_scheme(self, f=text.ensure_http_scheme): + result = "https://example.org/filename.ext" + + # standard usage + self.assertEqual(f(""), "") + self.assertEqual(f("example.org/filename.ext"), result) + self.assertEqual(f("/example.org/filename.ext"), result) + self.assertEqual(f("//example.org/filename.ext"), result) + self.assertEqual(f("://example.org/filename.ext"), result) + + # no change + self.assertEqual(f(result), result) + self.assertEqual( + f("http://example.org/filename.ext"), + "http://example.org/filename.ext", + ) + + # ... + self.assertEqual( + f("htp://example.org/filename.ext"), + "https://htp://example.org/filename.ext", + ) + + # invalid arguments + for value in INVALID_ALT: + self.assertEqual(f(value), value) + def test_filename_from_url(self, f=text.filename_from_url): result = "filename.ext" |
