diff options
author | Unit 193 <unit193@unit193.net> | 2021-11-01 05:03:49 -0400 |
---|---|---|
committer | Unit 193 <unit193@unit193.net> | 2021-11-01 05:03:49 -0400 |
commit | 4a965d875415907cc1a016b428ae305a964f9228 (patch) | |
tree | 7cece9948a7ba390348e00c669f9cb1f7a9ba39a | |
parent | 34ba2951b8c523713425c98addb9256ea05c946f (diff) | |
download | gallery-dl-4a965d875415907cc1a016b428ae305a964f9228.tar.bz2 gallery-dl-4a965d875415907cc1a016b428ae305a964f9228.tar.xz gallery-dl-4a965d875415907cc1a016b428ae305a964f9228.tar.zst |
New upstream version 1.19.1.upstream/1.19.1
29 files changed, 1095 insertions, 365 deletions
diff --git a/CHANGELOG.md b/CHANGELOG.md index 87dd18f..f6b60f0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,30 @@ # Changelog +## 1.19.1 - 2021-10-24 +### Additions +- [inkbunny] add `following` extractor ([#515](https://github.com/mikf/gallery-dl/issues/515)) +- [inkbunny] add `pool` extractor ([#1937](https://github.com/mikf/gallery-dl/issues/1937)) +- [kemonoparty] add `discord` extractor ([#1827](https://github.com/mikf/gallery-dl/issues/1827), [#1940](https://github.com/mikf/gallery-dl/issues/1940)) +- [nhentai] add `tag` extractor ([#1950](https://github.com/mikf/gallery-dl/issues/1950), [#1955](https://github.com/mikf/gallery-dl/issues/1955)) +- [patreon] add `files` option ([#1935](https://github.com/mikf/gallery-dl/issues/1935)) +- [picarto] add `gallery` extractor ([#1931](https://github.com/mikf/gallery-dl/issues/1931)) +- [pixiv] add `sketch` extractor ([#1497](https://github.com/mikf/gallery-dl/issues/1497)) +- [seisoparty] add `favorite` extractor ([#1906](https://github.com/mikf/gallery-dl/issues/1906)) +- [twitter] add `size` option ([#1881](https://github.com/mikf/gallery-dl/issues/1881)) +- [vk] add `album` extractor ([#474](https://github.com/mikf/gallery-dl/issues/474), [#1952](https://github.com/mikf/gallery-dl/issues/1952)) +- [postprocessor:compare] add `equal` option ([#1592](https://github.com/mikf/gallery-dl/issues/1592)) +### Fixes +- [cyberdrop] extract direct download URLs ([#1943](https://github.com/mikf/gallery-dl/issues/1943)) +- [deviantart] update `search` argument handling ([#1911](https://github.com/mikf/gallery-dl/issues/1911)) +- [deviantart] full resolution for non-downloadable images ([#293](https://github.com/mikf/gallery-dl/issues/293)) +- [furaffinity] unquote search queries ([#1958](https://github.com/mikf/gallery-dl/issues/1958)) +- [inkbunny] match "long" URLs for pools and favorites ([#1937](https://github.com/mikf/gallery-dl/issues/1937)) +- [kemonoparty] improve inline extraction ([#1899](https://github.com/mikf/gallery-dl/issues/1899)) +- [mangadex] update parameter handling for API requests ([#1908](https://github.com/mikf/gallery-dl/issues/1908)) +- [patreon] better filenames for `content` images ([#1954](https://github.com/mikf/gallery-dl/issues/1954)) +- [redgifs][gfycat] provide fallback URLs ([#1962](https://github.com/mikf/gallery-dl/issues/1962)) +- [downloader:ytdl] prevent crash in `_progress_hook()` + ## 1.19.0 - 2021-10-01 ### Additions - [aryion] add `tag` extractor ([#1849](https://github.com/mikf/gallery-dl/issues/1849)) @@ -1,6 +1,6 @@ Metadata-Version: 2.1 Name: gallery_dl -Version: 1.19.0 +Version: 1.19.1 Summary: Command-line program to download image galleries and collections from several image hosting sites Home-page: https://github.com/mikf/gallery-dl Author: Mike Fährmann @@ -75,8 +75,8 @@ Description: ========== Prebuilt executable files with a Python interpreter and required Python packages included are available for - - `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.19.0/gallery-dl.exe>`__ - - `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.19.0/gallery-dl.bin>`__ + - `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.19.1/gallery-dl.exe>`__ + - `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.19.1/gallery-dl.bin>`__ | Executables build from the latest commit can be found at | https://github.com/mikf/gallery-dl/actions/workflows/executables.yml @@ -64,8 +64,8 @@ Standalone Executable Prebuilt executable files with a Python interpreter and required Python packages included are available for -- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.19.0/gallery-dl.exe>`__ -- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.19.0/gallery-dl.bin>`__ +- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.19.1/gallery-dl.exe>`__ +- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.19.1/gallery-dl.bin>`__ | Executables build from the latest commit can be found at | https://github.com/mikf/gallery-dl/actions/workflows/executables.yml diff --git a/data/man/gallery-dl.1 b/data/man/gallery-dl.1 index 29621b0..b893a3e 100644 --- a/data/man/gallery-dl.1 +++ b/data/man/gallery-dl.1 @@ -1,4 +1,4 @@ -.TH "GALLERY-DL" "1" "2021-10-01" "1.19.0" "gallery-dl Manual" +.TH "GALLERY-DL" "1" "2021-10-24" "1.19.1" "gallery-dl Manual" .\" disable hyphenation .nh diff --git a/data/man/gallery-dl.conf.5 b/data/man/gallery-dl.conf.5 index a5c0970..cdedaee 100644 --- a/data/man/gallery-dl.conf.5 +++ b/data/man/gallery-dl.conf.5 @@ -1,4 +1,4 @@ -.TH "GALLERY-DL.CONF" "5" "2021-10-01" "1.19.0" "gallery-dl Manual" +.TH "GALLERY-DL.CONF" "5" "2021-10-24" "1.19.1" "gallery-dl Manual" .\" disable hyphenation .nh .\" disable justification (adjust text to left margin only) @@ -451,6 +451,8 @@ and optional for .br * \f[I]sankaku\f[] .br +* \f[I]seisoparty\f[] +.br * \f[I]subscribestar\f[] .br * \f[I]tapas\f[] @@ -1203,18 +1205,6 @@ files if they are images and falls back to preview versions for everything else (archives, etc.). -.SS extractor.deviantart.quality -.IP "Type:" 6 -\f[I]integer\f[] - -.IP "Default:" 9 -\f[I]100\f[] - -.IP "Description:" 4 -JPEG quality level of newer images for which -an original file download is not available. - - .SS extractor.deviantart.refresh-token .IP "Type:" 6 \f[I]string\f[] @@ -1411,18 +1401,24 @@ You can use \f[I]"all"\f[] instead of listing all values separately. .SS extractor.gfycat.format .IP "Type:" 6 -\f[I]string\f[] +.br +* \f[I]list\f[] of \f[I]strings\f[] +.br +* \f[I]string\f[] .IP "Default:" 9 -\f[I]"mp4"\f[] +\f[I]["mp4", "webm", "mobile", "gif"]\f[] .IP "Description:" 4 -The name of the preferred animation format, which can be one of -\f[I]"mp4"\f[], \f[I]"webm"\f[], \f[I]"gif"\f[], \f[I]"webp"\f[] or \f[I]"mjpg"\f[]. +List of names of the preferred animation format, which can be +\f[I]"mp4"\f[], \f[I]"webm"\f[], \f[I]"mobile"\f[], \f[I]"gif"\f[], or \f[I]"webp"\f[]. + +If a selected format is not available, the next one in the list will be +tried until an available format is found. -If the selected format is not available, \f[I]"mp4"\f[], \f[I]"webm"\f[] -and \f[I]"gif"\f[] (in that order) will be tried instead, until an -available format is found. +If the format is given as \f[I]string\f[], it will be extended with +\f[I]["mp4", "webm", "mobile", "gif"]\f[]. Use a list with one element to +restrict it to only one possible format. .SS extractor.hentaifoundry.include @@ -1445,17 +1441,6 @@ Possible values are You can use \f[I]"all"\f[] instead of listing all values separately. -.SS extractor.hentainexus.original -.IP "Type:" 6 -\f[I]bool\f[] - -.IP "Default:" 9 -\f[I]true\f[] - -.IP "Description:" 4 -Download original files instead of WebP versions. - - .SS extractor.hitomi.metadata .IP "Type:" 6 \f[I]bool\f[] @@ -1608,6 +1593,20 @@ to download as mp4 videos. The server to use for API requests. +.SS extractor.mangadex.api-parameters +.IP "Type:" 6 +\f[I]object\f[] + +.IP "Example:" 4 +{"order[updatedAt]": "desc"} + +.IP "Description:" 4 +Additional query parameters to send when fetching manga chapters. + +(See \f[I]/manga/{id}/feed\f[] +and \f[I]/user/follows/manga/feed\f[]) + + .SS extractor.mangadex.lang .IP "Type:" 6 \f[I]string\f[] @@ -1631,6 +1630,17 @@ to filter chapters by. Provide \f[I]artist\f[], \f[I]author\f[], and \f[I]group\f[] metadata fields. +.SS extractor.mangadex.ratings +.IP "Type:" 6 +\f[I]list\f[] of \f[I]strings\f[] + +.IP "Default:" 9 +\f[I]["safe", "suggestive", "erotica", "pornographic"]\f[] + +.IP "Description:" 4 +List of acceptable content ratings for returned chapters. + + .SS extractor.mastodon.reblogs .IP "Type:" 6 \f[I]bool\f[] @@ -1774,6 +1784,20 @@ port number in your browser's address bar when using a different port than the default. +.SS extractor.patreon.files +.IP "Type:" 6 +\f[I]list\f[] of \f[I]strings\f[] + +.IP "Default:" 9 +\f[I]["images", "attachments", "postfile", "content"]\f[] + +.IP "Description:" 4 +Determines the type and order of files to be downloaded. + +Available types are +\f[I]postfile\f[], \f[I]images\f[], \f[I]attachments\f[], and \f[I]content\f[]. + + .SS extractor.photobucket.subalbums .IP "Type:" 6 \f[I]bool\f[] @@ -2077,19 +2101,24 @@ video extraction and download .SS extractor.redgifs.format .IP "Type:" 6 -\f[I]string\f[] +.br +* \f[I]list\f[] of \f[I]strings\f[] +.br +* \f[I]string\f[] .IP "Default:" 9 -\f[I]"mp4"\f[] +\f[I]["mp4", "webm", "mobile", "gif"]\f[] .IP "Description:" 4 -The name of the preferred format, which can be one of -\f[I]"mp4"\f[], \f[I]"webm"\f[], \f[I]"gif"\f[], \f[I]"webp"\f[], \f[I]"mobile"\f[], -or \f[I]"mini"\f[]. +List of names of the preferred animation format, which can be +\f[I]"mp4"\f[], \f[I]"webm"\f[], \f[I]"gif"\f[], \f[I]"webp"\f[], \f[I]"mobile"\f[], or \f[I]"mini"\f[]. -If the selected format is not available, \f[I]"mp4"\f[], \f[I]"webm"\f[] -and \f[I]"gif"\f[] (in that order) will be tried instead, until an -available format is found. +If a selected format is not available, the next one in the list will be +tried until an available format is found. + +If the format is given as \f[I]string\f[], it will be extended with +\f[I]["mp4", "webm", "mobile", "gif"]\f[]. Use a list with one element to +restrict it to only one possible format. .SS extractor.sankakucomplex.embeds @@ -2229,6 +2258,22 @@ Fetch media from all Tweets and replies in a \f[I]conversation <https://help.twitter.com/en/using-twitter/twitter-conversations>\f[]. +.SS extractor.twitter.size +.IP "Type:" 6 +\f[I]list\f[] of \f[I]strings\f[] + +.IP "Default:" 9 +\f[I]["orig", "large", "medium", "small"]\f[] + +.IP "Description:" 4 +The image version to download. +Any entries after the first one will be used for potential +\f[I]fallback\f[] URLs. + +Known available sizes are +\f[I]4096x4096\f[], \f[I]orig\f[], \f[I]large\f[], \f[I]medium\f[], and \f[I]small\f[]. + + .SS extractor.twitter.logout .IP "Type:" 6 \f[I]bool\f[] @@ -2991,27 +3036,38 @@ in their default location. \f[I]"replace"\f[] .IP "Description:" 4 -The action to take when files do not compare as equal. +The action to take when files do **not** compare as equal. .br * \f[I]"replace"\f[]: Replace/Overwrite the old version with the new one .br -* \f[I]"abort:N"\f[]: Same as \f[I]"replace"\f[] and stop the current extractor run -after \f[I]N\f[] consecutive files compared as equal. +* \f[I]"enumerate"\f[]: Add an enumeration index to the filename of the new +version like \f[I]skip = "enumerate"\f[] + + +.SS compare.equal +.IP "Type:" 6 +\f[I]string\f[] + +.IP "Default:" 9 +\f[I]"null"\f[] + +.IP "Description:" 4 +The action to take when files do compare as equal. .br -* \f[I]"terminate:N"\f[]: Same as \f[I]"replace"\f[] -and stop the current extractor run, including parent extractors, +* \f[I]"abort:N"\f[]: Stop the current extractor run after \f[I]N\f[] consecutive files compared as equal. .br -* \f[I]"exit:N"\f[]: Same as \f[I]"replace"\f[] and exit the program +* \f[I]"terminate:N"\f[]: Stop the current extractor run, +including parent extractors, after \f[I]N\f[] consecutive files compared as equal. .br -* \f[I]"enumerate"\f[]: Add an enumeration index to the filename of the new -version like \f[I]skip = "enumerate"\f[] +* \f[I]"exit:N"\f[]: Exit the program +after \f[I]N\f[] consecutive files compared as equal. .SS compare.shallow diff --git a/docs/gallery-dl-example.conf b/docs/gallery-dl-example.conf index d84febd..72e7465 100644 --- a/docs/gallery-dl-example.conf +++ b/docs/gallery-dl-example.conf @@ -2,91 +2,123 @@ "extractor": { "base-directory": "~/gallery-dl/", + + "#": "set global archive file for all extractors", "archive": "~/gallery-dl/archive.sqlite3", - "proxy": "http://10.10.1.10:3128", + "#": "replace invalid path characters with unicode alternatives", + "path-restrict": { + "\\": "⧹", + "/" : "⧸", + "|" : "│", + ":" : "꞉", + "*" : "∗", + "?" : "?", + "\"": "″", + "<" : "﹤", + ">" : "﹥" + }, + + "#": "write tags for several *booru sites", "postprocessors": [ { - "name": "ugoira", - "whitelist": ["pixiv", "danbooru"], - "ffmpeg-twopass": true, - "ffmpeg-args": ["-c:v", "libvpx", "-crf", "4", "-b:v", "5000k", "-an"] - }, - { "name": "metadata", - "whitelist": ["danbooru", "yandere", "sankaku"], - "mode": "tags" + "mode": "tags", + "whitelist": ["danbooru", "moebooru", "sankaku"] } ], "pixiv": { + "#": "override global archive setting for pixiv", "archive": "~/gallery-dl/archive-pixiv.sqlite3", + "#": "set custom directory and filename format strings for all pixiv downloads", "filename": "{id}{num}.{extension}", "directory": ["Pixiv", "Works", "{user[id]}"], + "refresh-token": "aBcDeFgHiJkLmNoPqRsTuVwXyZ01234567890-FedC9", - "username": "foo", - "password": "bar", + "#": "transform ugoira into lossless MKVs", + "ugoira": true, + "postprocessors": ["ugoira-copy"], + "#": "use special settings for favorites and bookmarks", "favorite": { "directory": ["Pixiv", "Favorites", "{user[id]}"] }, - "bookmark": { "directory": ["Pixiv", "My Bookmarks"], - - "username": "foo123", - "password": "bar123" + "refresh-token": "01234567890aBcDeFgHiJkLmNoPqRsTuVwXyZ-ZyxW1" } }, + "danbooru": + { + "ugoira": true, + "postprocessors": ["ugoira-webm"] + }, + "exhentai": { + "#": "use cookies instead of logging in with username and password", "cookies": { "ipb_member_id": "12345", "ipb_pass_hash": "1234567890abcdef", "igneous" : "123456789", - "hath_perks" : "m1.m2.m3.a-123456789a" + "hath_perks" : "m1.m2.m3.a-123456789a", + "sk" : "n4m34tv3574m2c4e22c35zgeehiw", + "sl" : "dm_2" }, - "proxy": - { - "http": "http://10.10.1.10:8080", - "https": "https://10.10.1.10:443" - }, + "#": "wait 2 to 4.8 seconds between HTTP requests", + "sleep-request": [2.0, 4.8], "filename": "{num:>04}_{name}.{extension}", - "directory": ["{category!c}", "{title}"], - - "wait-min": 1.0, - "wait-max": 5.0 + "directory": ["{category!c}", "{title}"] }, - "mangadex": + "sankaku": { - "chapter-filter": "lang == 'en'", - "postprocessors": [{ - "name": "zip", - "keep-files": false, - "compression": "zip" - }] + "#": "authentication with cookies is not possible for sankaku", + "username": "user", + "password": "#secret#" + }, + + "furaffinity": { + "#": "authentication with username and password is not possible due to CAPTCHA", + "cookies": { + "a": "01234567-89ab-cdef-fedc-ba9876543210", + "b": "fedcba98-7654-3210-0123-456789abcdef" + }, + + "descriptions": "html", + "postprocessors": ["content"] }, "deviantart": { + "#": "download 'gallery' and 'scraps' images for user profile URLs", "include": "gallery,scraps", + + "#": "use custom API credentials to avoid 429 errors", + "client-id": "98765", + "client-secret": "0123456789abcdef0123456789abcdef", + "refresh-token": "0123456789abcdef0123456789abcdef01234567", + + "#": "put description texts into a separate directory", "metadata": true, - "postprocessors": [{ - "name": "metadata", - "mode": "custom", - "directory" : "Descriptions", - "content-format" : "{description}\n", - "extension-format": "descr.txt" - }] + "postprocessors": [ + { + "name": "metadata", + "mode": "custom", + "directory" : "Descriptions", + "content-format" : "{description}\n", + "extension-format": "descr.txt" + } + ] }, "flickr": @@ -96,88 +128,126 @@ "size-max": 1920 }, + "mangadex": + { + "#": "only download safe/suggestive chapters translated to English", + "lang": "en", + "ratings": ["safe", "suggestive"], + + "#": "put chapters into '.cbz' archives", + "postprocessors": ["cbz"] + }, + "reddit": { - "morecomments": true, - "date-min": "2017-01", - "date-format": "%Y-%m", - "recursion": 1 + "#": "only spawn child extractors for links to specific sites", + "whitelist": ["imgur", "redgifs", "gfycat"], + + "#": "put files from child extractors into the reddit directory", + "parent-directory": true, + + "#": "transfer metadata to any child extractor as '_reddit'", + "parent-metadata": "_reddit" }, - "sankaku": + "imgur": { - "sleep": 2, - "wait-min": 5.0, - "wait-max": 5.0, - "cookies": "~/gallery-dl/cookies-sankaku.txt" + "#": "use different directory and filename formats when coming from a reddit post", + "directory": + { + "'_reddit' in locals()": [] + }, + "filename": + { + "'_reddit' in locals()": "{_reddit[id]} {id}.{extension}", + "" : "{id}.{extension}" + } }, "tumblr": { - "posts": "all", + "posts" : "all", "external": false, - "reblogs": false, - "inline": true, + "reblogs" : false, + "inline" : true, + "#": "use special settings when downloading liked posts", "likes": { - "posts": "video,photo,link", + "posts" : "video,photo,link", "external": true, - "reblogs": true + "reblogs" : true } }, + "twitter": + { + "#": "write text content for *all* tweets", + "postprocessors": ["content"], + "text-tweets": true + }, + "mastodon": { - "mastodon.xyz": + "#": "add 'tabletop.social' as recognized mastodon instance", + "#": "(run 'gallery-dl oauth:mastodon:tabletop.social to get an access token')", + "tabletop.social": { - "access-token": "cab65529..." - }, - "tabletop.social": { + "root": "https://tabletop.social", "access-token": "513a36c6..." }, + "#": "set filename format strings for all 'mastodon' instances", "directory": ["mastodon", "{instance}", "{account[username]!l}"], - "filename": "{id}_{media[id]}.{extension}" + "filename" : "{id}_{media[id]}.{extension}" }, "foolslide": { - "otscans": {"root": "https://otscans.com/foolslide"}, + "#": "add two more foolslide instances", + "otscans" : {"root": "https://otscans.com/foolslide"}, "helvetica": {"root": "https://helveticascans.com/r" } }, "foolfuuka": { + "#": "add two other foolfuuka 4chan archives", "fireden-onion": {"root": "http://ydt6jy2ng3s3xg2e.onion"}, - "scalearchive": {"root": "https://archive.scaled.team" } - }, - - "replace invalid path characters with unicode alternatives": null, - "path-restrict": { - "\\": "⧹", - "/" : "⧸", - "|" : "│", - ":" : "꞉", - "*" : "∗", - "?" : "?", - "\"": "″", - "<" : "﹤", - ">" : "﹥" + "scalearchive" : {"root": "https://archive.scaled.team" } } }, "downloader": { - "part-directory": "/tmp/.download/", + "#": "restrict download speed to 1 MB/s", "rate": "1M", + + "#": "show download progress indicator after 2 seconds", + "progress": 2.0, + + "#": "retry failed downloads up to 3 times", "retries": 3, - "timeout": 8.5 + + "#": "consider a download 'failed' after 8 seconds of inactivity", + "timeout": 8.0, + + "#": "write '.part' files into a special directory", + "part-directory": "/tmp/.download/", + + "#": "do not update file modification times", + "mtime": false, + + "ytdl": + { + "#": "use yt-dlp instead of youtube-dl", + "module": "yt_dlp" + } }, "output": { - "mode": "terminal", "log": { "level": "info", + + "#": "use different ANSI colors for each log level", "format": { "debug" : "\u001b[0;37m{name}: {message}\u001b[0m", "info" : "\u001b[1;37m{name}: {message}\u001b[0m", @@ -185,11 +255,19 @@ "error" : "\u001b[1;31m{name}: {message}\u001b[0m" } }, + + "#": "shorten filenames to fit into one terminal line", + "#": "while also considering wider East-Asian characters", + "shorten": "eaw", + + "#": "write logging messages to a separate file", "logfile": { "path": "~/gallery-dl/log.txt", "mode": "w", "level": "debug" }, + + "#": "write unrecognized URLs to a separate file", "unsupportedfile": { "path": "~/gallery-dl/unsupported.txt", "mode": "a", @@ -198,9 +276,63 @@ } }, - "cache": { - "file": "~/gallery-dl/cache.sqlite3" + "postprocessor": + { + "#": "write 'content' metadata into separate files", + "content": + { + "name" : "metadata", + + "#": "write data for every post instead of each individual file", + "event": "post", + "filename": "{post_id|tweet_id|id}.txt", + + "#": "write only the values for 'content' or 'description'", + "mode" : "custom", + "format": "{content|description}\n" + }, + + "#": "put files into a '.cbz' archive", + "cbz": + { + "name": "zip", + "extension": "cbz" + }, + + "#": "various ugoira post processor configurations to create different file formats", + "ugoira-webm": + { + "name": "ugoira", + "extension": "webm", + "ffmpeg-args": ["-c:v", "libvpx-vp9", "-an", "-b:v", "0", "-crf", "30"], + "ffmpeg-twopass": true, + "ffmpeg-demuxer": "image2" + }, + "ugoira-mp4": + { + "name": "ugoira", + "extension": "mp4", + "ffmpeg-args": ["-c:v", "libx264", "-an", "-b:v", "4M", "-preset", "veryslow"], + "ffmpeg-twopass": true, + "libx264-prevent-odd": true + }, + "ugoira-gif": + { + "name": "ugoira", + "extension": "gif", + "ffmpeg-args": ["-filter_complex", "[0:v] split [a][b];[a] palettegen [p];[b][p] paletteuse"] + }, + "ugoira-copy": { + "name": "ugoira", + "extension": "mkv", + "ffmpeg-args": ["-c", "copy"], + "libx264-prevent-odd": false, + "repeat-last-frame": false + } }, - "netrc": true + "#": "use a custom cache file location", + "cache": { + "file": "~/gallery-dl/cache.sqlite3" + } } diff --git a/docs/gallery-dl.conf b/docs/gallery-dl.conf index b998597..c89f4b9 100644 --- a/docs/gallery-dl.conf +++ b/docs/gallery-dl.conf @@ -76,7 +76,6 @@ "mature": true, "metadata": false, "original": true, - "quality": 100, "wait-min": 0 }, "e621": @@ -107,7 +106,7 @@ }, "gfycat": { - "format": "mp4" + "format": ["mp4", "webm", "mobile", "gif"] }, "hentaifoundry": { @@ -222,7 +221,7 @@ }, "redgifs": { - "format": "mp4" + "format": ["mp4", "webm", "mobile", "gif"] }, "sankakucomplex": { diff --git a/gallery_dl.egg-info/PKG-INFO b/gallery_dl.egg-info/PKG-INFO index 63101a1..5f9b4b8 100644 --- a/gallery_dl.egg-info/PKG-INFO +++ b/gallery_dl.egg-info/PKG-INFO @@ -1,6 +1,6 @@ Metadata-Version: 2.1 Name: gallery-dl -Version: 1.19.0 +Version: 1.19.1 Summary: Command-line program to download image galleries and collections from several image hosting sites Home-page: https://github.com/mikf/gallery-dl Author: Mike Fährmann @@ -75,8 +75,8 @@ Description: ========== Prebuilt executable files with a Python interpreter and required Python packages included are available for - - `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.19.0/gallery-dl.exe>`__ - - `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.19.0/gallery-dl.bin>`__ + - `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.19.1/gallery-dl.exe>`__ + - `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.19.1/gallery-dl.bin>`__ | Executables build from the latest commit can be found at | https://github.com/mikf/gallery-dl/actions/workflows/executables.yml diff --git a/gallery_dl.egg-info/SOURCES.txt b/gallery_dl.egg-info/SOURCES.txt index b662b96..6dd43c8 100644 --- a/gallery_dl.egg-info/SOURCES.txt +++ b/gallery_dl.egg-info/SOURCES.txt @@ -132,6 +132,7 @@ gallery_dl/extractor/patreon.py gallery_dl/extractor/philomena.py gallery_dl/extractor/photobucket.py gallery_dl/extractor/photovogue.py +gallery_dl/extractor/picarto.py gallery_dl/extractor/piczel.py gallery_dl/extractor/pillowfort.py gallery_dl/extractor/pinterest.py diff --git a/gallery_dl/cache.py b/gallery_dl/cache.py index 7a49b61..923ed32 100644 --- a/gallery_dl/cache.py +++ b/gallery_dl/cache.py @@ -211,13 +211,18 @@ def _path(): return os.path.join(cachedir, "cache.sqlite3") -try: - dbfile = _path() +def _init(): + try: + dbfile = _path() + + # restrict access permissions for new db files + os.close(os.open(dbfile, os.O_CREAT | os.O_RDONLY, 0o600)) + + DatabaseCacheDecorator.db = sqlite3.connect( + dbfile, timeout=60, check_same_thread=False) + except (OSError, TypeError, sqlite3.OperationalError): + global cache + cache = memcache - # restrict access permissions for new db files - os.close(os.open(dbfile, os.O_CREAT | os.O_RDONLY, 0o600)) - DatabaseCacheDecorator.db = sqlite3.connect( - dbfile, timeout=60, check_same_thread=False) -except (OSError, TypeError, sqlite3.OperationalError): - cache = memcache # noqa: F811 +_init() diff --git a/gallery_dl/downloader/ytdl.py b/gallery_dl/downloader/ytdl.py index 86e247b..f4d3e05 100644 --- a/gallery_dl/downloader/ytdl.py +++ b/gallery_dl/downloader/ytdl.py @@ -136,8 +136,9 @@ class YoutubeDLDownloader(DownloaderBase): def _progress_hook(self, info): if info["status"] == "downloading" and \ info["elapsed"] >= self.progress: + total = info.get("total_bytes") or info.get("total_bytes_estimate") self.out.progress( - info["total_bytes"], + None if total is None else int(total), info["downloaded_bytes"], int(info["speed"]), ) diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py index c512548..93702ab 100644 --- a/gallery_dl/extractor/__init__.py +++ b/gallery_dl/extractor/__init__.py @@ -95,6 +95,7 @@ modules = [ "philomena", "photobucket", "photovogue", + "picarto", "piczel", "pillowfort", "pinterest", diff --git a/gallery_dl/extractor/cyberdrop.py b/gallery_dl/extractor/cyberdrop.py index 2004921..d1b1b25 100644 --- a/gallery_dl/extractor/cyberdrop.py +++ b/gallery_dl/extractor/cyberdrop.py @@ -8,7 +8,6 @@ from .common import Extractor, Message from .. import text -import binascii class CyberdropAlbumExtractor(Extractor): @@ -19,7 +18,7 @@ class CyberdropAlbumExtractor(Extractor): archive_fmt = "{album_id}_{id}" pattern = r"(?:https?://)?(?:www\.)?cyberdrop\.me/a/([^/?#]+)" test = ("https://cyberdrop.me/a/keKRjm4t", { - "pattern": r"https://f\.cyberdrop\.cc/.*\.[a-z]+$", + "pattern": r"https://fs-\d+\.cyberdrop\.to/.*\.[a-z]+$", "keyword": { "album_id": "keKRjm4t", "album_name": "Fate (SFW)", @@ -38,7 +37,14 @@ class CyberdropAlbumExtractor(Extractor): def items(self): url = self.root + "/a/" + self.album_id extr = text.extract_from(self.request(url).text) - extr("const albumData = {", "") + + files = [] + append = files.append + while True: + url = extr('downloadUrl: "', '"') + if not url: + break + append(text.unescape(url)) data = { "album_id" : self.album_id, @@ -46,13 +52,11 @@ class CyberdropAlbumExtractor(Extractor): "date" : text.parse_timestamp(extr("timestamp: ", ",")), "album_size" : text.parse_int(extr("totalSize: ", ",")), "description": extr("description: `", "`"), + "count" : len(files), } - files = extr("fl: '", "'").split(",") - data["count"] = len(files) yield Message.Directory, data - for file_b64 in files: - file = binascii.a2b_base64(file_b64).decode() - text.nameext_from_url(file, data) + for url in files: + text.nameext_from_url(url, data) data["filename"], _, data["id"] = data["filename"].rpartition("-") - yield Message.Url, "https://f.cyberdrop.cc/" + file, data + yield Message.Url, url, data diff --git a/gallery_dl/extractor/deviantart.py b/gallery_dl/extractor/deviantart.py index 7dac770..4604d39 100644 --- a/gallery_dl/extractor/deviantart.py +++ b/gallery_dl/extractor/deviantart.py @@ -14,6 +14,7 @@ from ..cache import cache, memcache import collections import itertools import mimetypes +import binascii import time import re @@ -39,7 +40,6 @@ class DeviantartExtractor(Extractor): self.offset = 0 self.flat = self.config("flat", True) self.extra = self.config("extra", False) - self.quality = self.config("quality", "100") self.original = self.config("original", True) self.comments = self.config("comments", False) self.user = match.group(1) or match.group(2) @@ -53,9 +53,6 @@ class DeviantartExtractor(Extractor): else: self.unwatch = None - if self.quality: - self.quality = ",q_{}".format(self.quality) - if self.original != "image": self._update_content = self._update_content_default else: @@ -104,19 +101,8 @@ class DeviantartExtractor(Extractor): if self.original and deviation["is_downloadable"]: self._update_content(deviation, content) - - if content["src"].startswith("https://images-wixmp-"): - if deviation["index"] <= 790677560: - # https://github.com/r888888888/danbooru/issues/4069 - intermediary, count = re.subn( - r"(/f/[^/]+/[^/]+)/v\d+/.*", - r"/intermediary\1", content["src"], 1) - if count: - deviation["_fallback"] = (content["src"],) - content["src"] = intermediary - if self.quality: - content["src"] = re.sub( - r",q_\d+", self.quality, content["src"], 1) + else: + self._update_token(deviation, content) yield self.commit(deviation, content) @@ -302,6 +288,32 @@ class DeviantartExtractor(Extractor): if mtype and mtype.startswith("image/"): content.update(data) + def _update_token(self, deviation, content): + """Replace JWT to be able to remove width/height limits + + All credit goes to @Ironchest337 + for discovering and implementing this method + """ + url, sep, _ = content["src"].partition("/v1/") + if not sep: + return + + # header = b'{"typ":"JWT","alg":"none"}' + payload = ( + b'{"sub":"urn:app:","iss":"urn:app:","obj":[[{"path":"/f/' + + url.partition("/f/")[2].encode() + + b'"}]],"aud":["urn:service:file.download"]}' + ) + + deviation["_fallback"] = (content["src"],) + content["src"] = ( + "{}?token=eyJ0eXAiOiJKV1QiLCJhbGciOiJub25lIn0.{}.".format( + url, + # base64 of 'header' is precomputed as 'eyJ0eX...' + # binascii.a2b_base64(header).rstrip(b"=\n").decode(), + binascii.b2a_base64(payload).rstrip(b"=\n").decode()) + ) + def _limited_request(self, url, **kwargs): """Limits HTTP requests to one every 2 seconds""" kwargs["fatal"] = None @@ -746,29 +758,27 @@ class DeviantartPopularExtractor(DeviantartExtractor): def __init__(self, match): DeviantartExtractor.__init__(self, match) - self.search_term = self.time_range = self.category_path = None self.user = "" trange1, path, trange2, query = match.groups() - trange = trange1 or trange2 query = text.parse_query(query) - - if not trange: - trange = query.get("order") - - if path: - self.category_path = path.strip("/") - if trange: - if trange.startswith("popular-"): - trange = trange[8:] - self.time_range = trange.replace("-", "").replace("hours", "hr") - if query: - self.search_term = query.get("q") + self.search_term = query.get("q") + + trange = trange1 or trange2 or query.get("order", "") + if trange.startswith("popular-"): + trange = trange[8:] + self.time_range = { + "most-recent" : "now", + "this-week" : "1week", + "this-month" : "1month", + "this-century": "alltime", + "all-time" : "alltime", + }.get(trange, "alltime") self.popular = { "search": self.search_term or "", - "range" : trange or "", - "path" : self.category_path, + "range" : trange or "all-time", + "path" : path.strip("/") if path else "", } def deviations(self): @@ -851,12 +861,8 @@ class DeviantartDeviationExtractor(DeviantartExtractor): }), # wixmp URL rewrite (("https://www.deviantart.com/citizenfresh/art/Hverarond-789295466"), { - "pattern": (r"https://images-wixmp-\w+\.wixmp\.com" - r"/intermediary/f/[^/]+/[^.]+\.jpg") - }), - # wixmp URL rewrite v2 (#369) - (("https://www.deviantart.com/josephbiwald/art/Destiny-2-804940104"), { - "pattern": r"https://images-wixmp-\w+\.wixmp\.com/.*,q_100," + "pattern": (r"https://images-wixmp-\w+\.wixmp\.com/f" + r"/[^/]+/[^.]+\.jpg\?token="), }), # GIF (#242) (("https://www.deviantart.com/skatergators/art/COM-Moni-781571783"), { diff --git a/gallery_dl/extractor/furaffinity.py b/gallery_dl/extractor/furaffinity.py index 9516dfa..b5ecbd6 100644 --- a/gallery_dl/extractor/furaffinity.py +++ b/gallery_dl/extractor/furaffinity.py @@ -250,7 +250,7 @@ class FuraffinitySearchExtractor(FuraffinityExtractor): FuraffinityExtractor.__init__(self, match) self.query = text.parse_query(match.group(2)) if self.user and "q" not in self.query: - self.query["q"] = text.unescape(self.user) + self.query["q"] = text.unquote(self.user) def metadata(self): return {"search": self.query.get("q")} diff --git a/gallery_dl/extractor/gfycat.py b/gallery_dl/extractor/gfycat.py index 2757852..9b4d5ee 100644 --- a/gallery_dl/extractor/gfycat.py +++ b/gallery_dl/extractor/gfycat.py @@ -22,7 +22,13 @@ class GfycatExtractor(Extractor): def __init__(self, match): Extractor.__init__(self, match) self.key = match.group(1).lower() - self.formats = (self.config("format", "mp4"), "mp4", "webm", "gif") + + formats = self.config("format") + if formats is None: + formats = ("mp4", "webm", "mobile", "gif") + elif isinstance(formats, str): + formats = (formats, "mp4", "webm", "mobile", "gif") + self.formats = formats def items(self): metadata = self.metadata() @@ -30,23 +36,25 @@ class GfycatExtractor(Extractor): if "gfyName" not in gfycat: self.log.warning("Skipping '%s' (malformed)", gfycat["gfyId"]) continue - url = self._select_format(gfycat) + url = self._process(gfycat) gfycat.update(metadata) - gfycat["date"] = text.parse_timestamp(gfycat.get("createDate")) yield Message.Directory, gfycat yield Message.Url, url, gfycat - def _select_format(self, gfyitem): + def _process(self, gfycat): + gfycat["_fallback"] = formats = self._formats(gfycat) + gfycat["date"] = text.parse_timestamp(gfycat.get("createDate")) + return next(formats, "") + + def _formats(self, gfycat): for fmt in self.formats: key = fmt + "Url" - if key in gfyitem: - url = gfyitem[key] + if key in gfycat: + url = gfycat[key] if url.startswith("http:"): url = "https" + url[4:] - gfyitem["extension"] = url.rpartition(".")[2] - return url - gfyitem["extension"] = "" - return "" + gfycat["extension"] = url.rpartition(".")[2] + yield url def metadata(self): return {} @@ -146,8 +154,7 @@ class GfycatImageExtractor(GfycatExtractor): if "gfyName" not in gfycat: self.log.warning("Skipping '%s' (malformed)", gfycat["gfyId"]) return - url = self._select_format(gfycat) - gfycat["date"] = text.parse_timestamp(gfycat.get("createDate")) + url = self._process(gfycat) yield Message.Directory, gfycat yield Message.Url, url, gfycat diff --git a/gallery_dl/extractor/inkbunny.py b/gallery_dl/extractor/inkbunny.py index cbe0f43..3d09d79 100644 --- a/gallery_dl/extractor/inkbunny.py +++ b/gallery_dl/extractor/inkbunny.py @@ -135,33 +135,123 @@ class InkbunnyUserExtractor(InkbunnyExtractor): return self.api.search(params) +class InkbunnyPoolExtractor(InkbunnyExtractor): + """Extractor for inkbunny pools""" + subcategory = "pool" + pattern = (BASE_PATTERN + r"/(?:" + r"poolview_process\.php\?pool_id=(\d+)|" + r"submissionsviewall\.php\?([^#]+&mode=pool&[^#]+))") + test = ( + ("https://inkbunny.net/poolview_process.php?pool_id=28985", { + "count": 9, + }), + ("https://inkbunny.net/submissionsviewall.php?rid=ffffffffff" + "&mode=pool&pool_id=28985&page=1&orderby=pool_order&random=no"), + ) + + def __init__(self, match): + InkbunnyExtractor.__init__(self, match) + pid = match.group(1) + if pid: + self.pool_id = pid + self.orderby = "pool_order" + else: + params = text.parse_query(match.group(2)) + self.pool_id = params.get("pool_id") + self.orderby = params.get("orderby", "pool_order") + + def posts(self): + params = { + "pool_id": self.pool_id, + "orderby": self.orderby, + } + return self.api.search(params) + + class InkbunnyFavoriteExtractor(InkbunnyExtractor): """Extractor for inkbunny user favorites""" subcategory = "favorite" - pattern = BASE_PATTERN + r"/userfavorites_process\.php\?favs_user_id=(\d+)" + pattern = (BASE_PATTERN + r"/(?:" + r"userfavorites_process\.php\?favs_user_id=(\d+)|" + r"submissionsviewall\.php\?([^#]+&mode=userfavs&[^#]+))") test = ( ("https://inkbunny.net/userfavorites_process.php?favs_user_id=20969", { "pattern": r"https://[\w.]+\.metapix\.net/files/full" r"/\d+/\d+_\w+_.+", "range": "20-50", }), + ("https://inkbunny.net/submissionsviewall.php?rid=ffffffffff" + "&mode=userfavs&random=no&orderby=fav_datetime&page=1&user_id=20969"), ) def __init__(self, match): InkbunnyExtractor.__init__(self, match) - self.user_id = match.group(1) + uid = match.group(1) + if uid: + self.user_id = uid + self.orderby = self.config("orderby", "fav_datetime") + else: + params = text.parse_query(match.group(2)) + self.user_id = params.get("user_id") + self.orderby = params.get("orderby", "fav_datetime") def posts(self): - orderby = self.config("orderby", "fav_datetime") params = { "favs_user_id": self.user_id, - "orderby" : orderby, + "orderby" : self.orderby, } - if orderby and orderby.startswith("unread_"): + if self.orderby and self.orderby.startswith("unread_"): params["unread_submissions"] = "yes" return self.api.search(params) +class InkbunnyFollowingExtractor(InkbunnyExtractor): + """Extractor for inkbunny user watches""" + subcategory = "following" + pattern = (BASE_PATTERN + r"/(?:" + r"watchlist_process\.php\?mode=watching&user_id=(\d+)|" + r"usersviewall\.php\?([^#]+&mode=watching&[^#]+))") + test = ( + (("https://inkbunny.net/watchlist_process.php" + "?mode=watching&user_id=20969"), { + "pattern": InkbunnyUserExtractor.pattern, + "count": ">= 90", + }), + ("https://inkbunny.net/usersviewall.php?rid=ffffffffff" + "&mode=watching&page=1&user_id=20969&orderby=added&namesonly="), + ) + + def __init__(self, match): + InkbunnyExtractor.__init__(self, match) + self.user_id = match.group(1) or \ + text.parse_query(match.group(2)).get("user_id") + + def items(self): + url = self.root + "/watchlist_process.php" + params = {"mode": "watching", "user_id": self.user_id} + + with self.request(url, params=params) as response: + url, _, params = response.url.partition("?") + page = response.text + + params = text.parse_query(params) + params["page"] = text.parse_int(params.get("page"), 1) + data = {"_extractor": InkbunnyUserExtractor} + + while True: + cnt = 0 + for user in text.extract_iter( + page, '<a class="widget_userNameSmall" href="', '"', + page.index('id="changethumboriginal_form"')): + cnt += 1 + yield Message.Queue, self.root + user, data + + if cnt < 20: + return + params["page"] += 1 + page = self.request(url, params=params).text + + class InkbunnyPostExtractor(InkbunnyExtractor): """Extractor for individual Inkbunny posts""" subcategory = "post" diff --git a/gallery_dl/extractor/kemonoparty.py b/gallery_dl/extractor/kemonoparty.py index c5f5ae7..d5aad67 100644 --- a/gallery_dl/extractor/kemonoparty.py +++ b/gallery_dl/extractor/kemonoparty.py @@ -14,7 +14,8 @@ from ..cache import cache import itertools import re -BASE_PATTERN = r"(?:https?://)?kemono\.party/([^/?#]+)/user/([^/?#]+)" +BASE_PATTERN = r"(?:https?://)?kemono\.party" +USER_PATTERN = BASE_PATTERN + r"/([^/?#]+)/user/([^/?#]+)" class KemonopartyExtractor(Extractor): @@ -29,7 +30,9 @@ class KemonopartyExtractor(Extractor): def items(self): self._prepare_ddosguard_cookies() - find_inline = re.compile(r'src="(/inline/[^"]+)').findall + find_inline = re.compile( + r'src="(?:https?://kemono\.party)?(/inline/[^"]+' + r'|/[0-9a-f]{2}/[0-9a-f]{2}/[0-9a-f]{64}\.[^"]+)').findall skip_service = \ "patreon" if self.config("patreon-skip-file", True) else None @@ -101,7 +104,7 @@ class KemonopartyExtractor(Extractor): class KemonopartyUserExtractor(KemonopartyExtractor): """Extractor for all posts from a kemono.party user listing""" subcategory = "user" - pattern = BASE_PATTERN + r"/?(?:\?o=(\d+))?(?:$|[?#])" + pattern = USER_PATTERN + r"/?(?:\?o=(\d+))?(?:$|[?#])" test = ( ("https://kemono.party/fanbox/user/6993449", { "range": "1-25", @@ -138,11 +141,11 @@ class KemonopartyUserExtractor(KemonopartyExtractor): class KemonopartyPostExtractor(KemonopartyExtractor): """Extractor for a single kemono.party post""" subcategory = "post" - pattern = BASE_PATTERN + r"/post/([^/?#]+)" + pattern = USER_PATTERN + r"/post/([^/?#]+)" test = ( ("https://kemono.party/fanbox/user/6993449/post/506575", { - "pattern": r"https://kemono\.party/data/files/fanbox" - r"/6993449/506575/P058kDFYus7DbqAkGlfWTlOr\.jpeg", + "pattern": r"https://kemono.party/data/21/0f" + r"/210f35388e28bbcf756db18dd516e2d82ce75[0-9a-f]+\.jpg", "keyword": { "added": "Wed, 06 May 2020 20:28:02 GMT", "content": str, @@ -197,10 +200,128 @@ class KemonopartyPostExtractor(KemonopartyExtractor): return (posts[0],) if len(posts) > 1 else posts +class KemonopartyDiscordExtractor(KemonopartyExtractor): + """Extractor for kemono.party discord servers""" + subcategory = "discord" + directory_fmt = ("{category}", "discord", "{server}", + "{channel_name|channel}") + filename_fmt = "{id}_{num:>02}_{filename}.{extension}" + archive_fmt = "discord_{server}_{id}_{num}" + pattern = BASE_PATTERN + r"/discord/server/(\d+)(?:/channel/(\d+))?#(.*)" + test = ( + (("https://kemono.party/discord" + "/server/488668827274444803#finish-work"), { + "count": 4, + "keyword": {"channel_name": "finish-work"}, + }), + (("https://kemono.party/discord" + "/server/256559665620451329/channel/462437519519383555#"), { + "pattern": r"https://kemono\.party/data/attachments/discord" + r"/256559665620451329/\d+/\d+/.+", + "count": ">= 2", + }), + # 'inline' files + (("https://kemono.party/discord" + "/server/315262215055736843/channel/315262215055736843#general"), { + "pattern": r"https://cdn\.discordapp\.com/attachments/\d+/\d+/.+$", + "range": "1-5", + "options": (("image-filter", "type == 'inline'"),), + }), + ) + + def __init__(self, match): + KemonopartyExtractor.__init__(self, match) + self.server, self.channel, self.channel_name = match.groups() + + def items(self): + self._prepare_ddosguard_cookies() + + find_inline = re.compile( + r"https?://(?:cdn\.discordapp.com|media\.discordapp\.net)" + r"(/[A-Za-z0-9-._~:/?#\[\]@!$&'()*+,;%=]+)").findall + + posts = self.posts() + max_posts = self.config("max-posts") + if max_posts: + posts = itertools.islice(posts, max_posts) + + for post in posts: + files = [] + append = files.append + for attachment in post["attachments"]: + attachment["type"] = "attachment" + append(attachment) + for path in find_inline(post["content"] or ""): + append({"path": "https://cdn.discordapp.com" + path, + "name": path, "type": "inline"}) + + post["channel_name"] = self.channel_name + post["date"] = text.parse_datetime( + post["published"], "%a, %d %b %Y %H:%M:%S %Z") + yield Message.Directory, post + + for post["num"], file in enumerate(files, 1): + post["type"] = file["type"] + url = file["path"] + if url[0] == "/": + url = self.root + "/data" + url + elif url.startswith("https://kemono.party"): + url = self.root + "/data" + url[20:] + + text.nameext_from_url(file["name"], post) + yield Message.Url, url, post + + def posts(self): + if self.channel is None: + url = "{}/api/discord/channels/lookup?q={}".format( + self.root, self.server) + for channel in self.request(url).json(): + if channel["name"] == self.channel_name: + self.channel = channel["id"] + break + else: + raise exception.NotFoundError("channel") + + url = "{}/api/discord/channel/{}".format(self.root, self.channel) + params = {"skip": 0} + + while True: + posts = self.request(url, params=params).json() + yield from posts + + if len(posts) < 25: + break + params["skip"] += 25 + + +class KemonopartyDiscordServerExtractor(KemonopartyExtractor): + subcategory = "discord-server" + pattern = BASE_PATTERN + r"/discord/server/(\d+)$" + test = ("https://kemono.party/discord/server/488668827274444803", { + "pattern": KemonopartyDiscordExtractor.pattern, + "count": 13, + }) + + def __init__(self, match): + KemonopartyExtractor.__init__(self, match) + self.server = match.group(1) + + def items(self): + url = "{}/api/discord/channels/lookup?q={}".format( + self.root, self.server) + channels = self.request(url).json() + + for channel in channels: + url = "{}/discord/server/{}/channel/{}#{}".format( + self.root, self.server, channel["id"], channel["name"]) + channel["_extractor"] = KemonopartyDiscordExtractor + yield Message.Queue, url, channel + + class KemonopartyFavoriteExtractor(KemonopartyExtractor): """Extractor for kemono.party favorites""" subcategory = "favorite" - pattern = r"(?:https?://)?kemono\.party/favorites" + pattern = BASE_PATTERN + r"/favorites" test = ("https://kemono.party/favorites", { "pattern": KemonopartyUserExtractor.pattern, "url": "f4b5b796979bcba824af84206578c79101c7f0e1", diff --git a/gallery_dl/extractor/mangadex.py b/gallery_dl/extractor/mangadex.py index 634a92d..ff1d7c3 100644 --- a/gallery_dl/extractor/mangadex.py +++ b/gallery_dl/extractor/mangadex.py @@ -209,22 +209,15 @@ class MangadexAPI(): return self._call("/manga/" + uuid)["data"] def manga_feed(self, uuid): - config = self.extractor.config - order = "desc" if config("chapter-reverse") else "asc" + order = "desc" if self.extractor.config("chapter-reverse") else "asc" params = { - "order[volume]" : order, - "order[chapter]" : order, - "translatedLanguage[]": config("lang"), - "contentRating[]" : [ - "safe", "suggestive", "erotica", "pornographic"], + "order[volume]" : order, + "order[chapter]": order, } return self._pagination("/manga/" + uuid + "/feed", params) def user_follows_manga_feed(self): - params = { - "order[publishAt]" : "desc", - "translatedLanguage[]": self.extractor.config("lang"), - } + params = {"order[publishAt]": "desc"} return self._pagination("/user/follows/manga/feed", params) def authenticate(self): @@ -275,8 +268,20 @@ class MangadexAPI(): def _pagination(self, endpoint, params=None): if params is None: params = {} + + config = self.extractor.config + ratings = config("ratings") + if ratings is None: + ratings = ("safe", "suggestive", "erotica", "pornographic") + + params["contentRating[]"] = ratings + params["translatedLanguage[]"] = config("lang") params["offset"] = 0 + api_params = config("api-parameters") + if api_params: + params.update(api_params) + while True: data = self._call(endpoint, params) yield from data["data"] diff --git a/gallery_dl/extractor/nhentai.py b/gallery_dl/extractor/nhentai.py index 20b716b..9df43e5 100644 --- a/gallery_dl/extractor/nhentai.py +++ b/gallery_dl/extractor/nhentai.py @@ -14,15 +14,10 @@ import collections import json -class NhentaiBase(): - """Base class for nhentai extractors""" +class NhentaiGalleryExtractor(GalleryExtractor): + """Extractor for image galleries from nhentai.net""" category = "nhentai" root = "https://nhentai.net" - media_url = "https://i.nhentai.net" - - -class NhentaiGalleryExtractor(NhentaiBase, GalleryExtractor): - """Extractor for image galleries from nhentai.net""" pattern = r"(?:https?://)?nhentai\.net/g/(\d+)" test = ("https://nhentai.net/g/147850/", { "url": "5179dbf0f96af44005a0ff705a0ad64ac26547d0", @@ -87,8 +82,8 @@ class NhentaiGalleryExtractor(NhentaiBase, GalleryExtractor): } def images(self, _): - ufmt = "{}/galleries/{}/{{}}.{{}}".format( - self.media_url, self.data["media_id"]) + ufmt = ("https://i.nhentai.net/galleries/" + + self.data["media_id"] + "/{}.{}") extdict = {"j": "jpg", "p": "png", "g": "gif"} return [ @@ -99,28 +94,24 @@ class NhentaiGalleryExtractor(NhentaiBase, GalleryExtractor): ] -class NhentaiSearchExtractor(NhentaiBase, Extractor): - """Extractor for nhentai search results""" - subcategory = "search" - pattern = r"(?:https?://)?nhentai\.net/search/?\?([^#]+)" - test = ("https://nhentai.net/search/?q=touhou", { - "pattern": NhentaiGalleryExtractor.pattern, - "count": 30, - "range": "1-30", - }) +class NhentaiExtractor(Extractor): + """Base class for nhentai extractors""" + category = "nhentai" + root = "https://nhentai.net" def __init__(self, match): Extractor.__init__(self, match) - self.params = text.parse_query(match.group(1)) + self.path, self.query = match.groups() def items(self): data = {"_extractor": NhentaiGalleryExtractor} - for gallery_id in self._pagination(self.params): + for gallery_id in self._pagination(): url = "{}/g/{}/".format(self.root, gallery_id) yield Message.Queue, url, data - def _pagination(self, params): - url = "{}/search/".format(self.root) + def _pagination(self): + url = self.root + self.path + params = text.parse_query(self.query) params["page"] = text.parse_int(params.get("page"), 1) while True: @@ -131,29 +122,40 @@ class NhentaiSearchExtractor(NhentaiBase, Extractor): params["page"] += 1 -class NhentaiFavoriteExtractor(NhentaiBase, Extractor): +class NhentaiTagExtractor(NhentaiExtractor): + """Extractor for nhentai tag searches""" + subcategory = "tag" + pattern = (r"(?:https?://)?nhentai\.net(" + r"/(?:artist|category|character|group|language|parody|tag)" + r"/[^/?#]+(?:/popular[^/?#]*)?/?)(?:\?([^#]+))?") + test = ( + ("https://nhentai.net/tag/sole-female/", { + "pattern": NhentaiGalleryExtractor.pattern, + "count": 30, + "range": "1-30", + }), + ("https://nhentai.net/artist/itou-life/"), + ("https://nhentai.net/group/itou-life/"), + ("https://nhentai.net/parody/touhou-project/"), + ("https://nhentai.net/character/patchouli-knowledge/popular"), + ("https://nhentai.net/category/doujinshi/popular-today"), + ("https://nhentai.net/language/english/popular-week"), + ) + + +class NhentaiSearchExtractor(NhentaiExtractor): + """Extractor for nhentai search results""" + subcategory = "search" + pattern = r"(?:https?://)?nhentai\.net(/search/?)\?([^#]+)" + test = ("https://nhentai.net/search/?q=touhou", { + "pattern": NhentaiGalleryExtractor.pattern, + "count": 30, + "range": "1-30", + }) + + +class NhentaiFavoriteExtractor(NhentaiExtractor): """Extractor for nhentai favorites""" subcategory = "favorite" - pattern = r"(?:https?://)?nhentai\.net/favorites/?(?:\?([^#]+))?" + pattern = r"(?:https?://)?nhentai\.net(/favorites/?)(?:\?([^#]+))?" test = ("https://nhentai.net/favorites/",) - - def __init__(self, match): - Extractor.__init__(self, match) - self.params = text.parse_query(match.group(1)) - - def items(self): - data = {"_extractor": NhentaiGalleryExtractor} - for gallery_id in self._pagination(self.params): - url = "{}/g/{}/".format(self.root, gallery_id) - yield Message.Queue, url, data - - def _pagination(self, params): - url = "{}/favorites/".format(self.root) - params["page"] = text.parse_int(params.get("page"), 1) - - while True: - page = self.request(url, params=params).text - yield from text.extract_iter(page, 'href="/g/', '/') - if 'class="next"' not in page: - return - params["page"] += 1 diff --git a/gallery_dl/extractor/patreon.py b/gallery_dl/extractor/patreon.py index 547465b..c7df089 100644 --- a/gallery_dl/extractor/patreon.py +++ b/gallery_dl/extractor/patreon.py @@ -32,22 +32,19 @@ class PatreonExtractor(Extractor): if "session_id" not in self.session.cookies: self.log.warning("no 'session_id' cookie set") PatreonExtractor._warning = False + generators = self._build_file_generators(self.config("files")) for post in self.posts(): if not post.get("current_user_can_view", True): self.log.warning("Not allowed to view post %s", post["id"]) continue + yield Message.Directory, post + post["num"] = 0 hashes = set() - - yield Message.Directory, post - for kind, url, name in itertools.chain( - self._images(post), - self._attachments(post), - self._postfile(post), - self._content(post), - ): + for kind, url, name in itertools.chain.from_iterable( + g(post) for g in generators): fhash = self._filehash(url) if fhash not in hashes or not fhash: hashes.add(fhash) @@ -82,15 +79,14 @@ class PatreonExtractor(Extractor): if url: yield "attachment", url, attachment["name"] - @staticmethod - def _content(post): + def _content(self, post): content = post.get("content") if content: for img in text.extract_iter( content, '<img data-media-id="', '>'): url = text.extract(img, 'src="', '"')[0] if url: - yield "content", url, url + yield "content", url, self._filename(url) or url def posts(self): """Return all relevant post objects""" @@ -155,7 +151,7 @@ class PatreonExtractor(Extractor): included[file["type"]][file["id"]] for file in files["data"] ] - return [] + return () @memcache(keyarg=1) def _user(self, url): @@ -212,6 +208,20 @@ class PatreonExtractor(Extractor): "&json-api-version=1.0" ) + def _build_file_generators(self, filetypes): + if filetypes is None: + return (self._images, self._attachments, + self._postfile, self._content) + genmap = { + "images" : self._images, + "attachments": self._attachments, + "postfile" : self._postfile, + "content" : self._content, + } + if isinstance(filetypes, str): + filetypes = filetypes.split(",") + return [genmap[ft] for ft in filetypes] + class PatreonCreatorExtractor(PatreonExtractor): """Extractor for a creator's works""" @@ -305,8 +315,9 @@ class PatreonPostExtractor(PatreonExtractor): "count": 4, }), # postfile + content - ("https://www.patreon.com/posts/19987002", { - "count": 4, + ("https://www.patreon.com/posts/56127163", { + "count": 3, + "keyword": {"filename": r"re:^(?!1).+$"}, }), # tags (#1539) ("https://www.patreon.com/posts/free-post-12497641", { diff --git a/gallery_dl/extractor/picarto.py b/gallery_dl/extractor/picarto.py new file mode 100644 index 0000000..77a07b4 --- /dev/null +++ b/gallery_dl/extractor/picarto.py @@ -0,0 +1,74 @@ +# -*- coding: utf-8 -*- + +# Copyright 2021 Mike Fährmann +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. + +"""Extractors for https://picarto.tv/""" + +from .common import Extractor, Message +from .. import text + + +class PicartoGalleryExtractor(Extractor): + """Extractor for picarto galleries""" + category = "picarto" + subcategory = "gallery" + root = "https://picarto.tv" + directory_fmt = ("{category}", "{channel[name]}") + filename_fmt = "{id} {title}.{extension}" + archive_fmt = "{id}" + pattern = r"(?:https?://)?picarto\.tv/([^/?#]+)/gallery" + test = ("https://picarto.tv/fnook/gallery/default/", { + "pattern": r"https://images\.picarto\.tv/gallery/\d/\d\d/\d+/artwork" + r"/[0-9a-f-]+/large-[0-9a-f]+\.(jpg|png|gif)", + "count": ">= 7", + "keyword": {"date": "type:datetime"}, + }) + + def __init__(self, match): + Extractor.__init__(self, match) + self.username = match.group(1) + + def items(self): + for post in self.posts(): + post["date"] = text.parse_datetime( + post["created_at"], "%Y-%m-%d %H:%M:%S") + variations = post.pop("variations", ()) + yield Message.Directory, post + + image = post["default_image"] + if not image: + continue + url = "https://images.picarto.tv/gallery/" + image["name"] + text.nameext_from_url(url, post) + yield Message.Url, url, post + + for variation in variations: + post.update(variation) + image = post["default_image"] + url = "https://images.picarto.tv/gallery/" + image["name"] + text.nameext_from_url(url, post) + yield Message.Url, url, post + + def posts(self): + url = "https://ptvintern.picarto.tv/api/channel-gallery" + params = { + "first": "30", + "page": 1, + "filter_params[album_id]": "", + "filter_params[channel_name]": self.username, + "filter_params[q]": "", + "filter_params[visibility]": "", + "order_by[field]": "published_at", + "order_by[order]": "DESC", + } + + while True: + posts = self.request(url, params=params).json() + if not posts: + return + yield from posts + params["page"] += 1 diff --git a/gallery_dl/extractor/pixiv.py b/gallery_dl/extractor/pixiv.py index e21a82c..8e47e2e 100644 --- a/gallery_dl/extractor/pixiv.py +++ b/gallery_dl/extractor/pixiv.py @@ -558,6 +558,68 @@ class PixivPixivisionExtractor(PixivExtractor): } +class PixivSketchExtractor(Extractor): + """Extractor for user pages on sketch.pixiv.net""" + category = "pixiv" + subcategory = "sketch" + directory_fmt = ("{category}", "sketch", "{user[unique_name]}") + filename_fmt = "{post_id} {id}.{extension}" + archive_fmt = "S{user[id]}_{id}" + root = "https://sketch.pixiv.net" + cookiedomain = ".pixiv.net" + pattern = r"(?:https?://)?sketch\.pixiv\.net/@([^/?#]+)" + test = ("https://sketch.pixiv.net/@nicoby", { + "pattern": r"https://img\-sketch\.pixiv\.net/uploads/medium" + r"/file/\d+/\d+\.(jpg|png)", + "count": ">= 35", + }) + + def __init__(self, match): + Extractor.__init__(self, match) + self.username = match.group(1) + + def items(self): + headers = {"Referer": "{}/@{}".format(self.root, self.username)} + + for post in self.posts(): + media = post["media"] + post["post_id"] = post["id"] + post["date"] = text.parse_datetime( + post["created_at"], "%Y-%m-%dT%H:%M:%S.%f%z") + util.delete_items(post, ("id", "media", "_links")) + + yield Message.Directory, post + post["_http_headers"] = headers + + for photo in media: + original = photo["photo"]["original"] + post["id"] = photo["id"] + post["width"] = original["width"] + post["height"] = original["height"] + + url = original["url"] + text.nameext_from_url(url, post) + yield Message.Url, url, post + + def posts(self): + url = "{}/api/walls/@{}/posts/public.json".format( + self.root, self.username) + headers = { + "Accept": "application/vnd.sketch-v4+json", + "X-Requested-With": "{}/@{}".format(self.root, self.username), + "Referer": self.root + "/", + } + + while True: + data = self.request(url, headers=headers).json() + yield from data["data"]["items"] + + next_url = data["_links"].get("next") + if not next_url: + return + url = self.root + next_url["href"] + + class PixivAppAPI(): """Minimal interface for the Pixiv App API for mobile devices diff --git a/gallery_dl/extractor/seisoparty.py b/gallery_dl/extractor/seisoparty.py index 28e049b..a2a24e0 100644 --- a/gallery_dl/extractor/seisoparty.py +++ b/gallery_dl/extractor/seisoparty.py @@ -9,7 +9,8 @@ """Extractors for https://seiso.party/""" from .common import Extractor, Message -from .. import text +from .. import text, exception +from ..cache import cache import re @@ -52,6 +53,25 @@ class SeisopartyExtractor(Extractor): "files" : self._find_files(page), } + def login(self): + username, password = self._get_auth_info() + if username: + self._update_cookies(self._login_impl(username, password)) + + @cache(maxage=28*24*3600, keyarg=1) + def _login_impl(self, username, password): + self.log.info("Logging in as %s", username) + + url = self.root + "/account/login" + data = {"username": username, "password": password} + + response = self.request(url, method="POST", data=data) + if response.url.endswith("/account/login") and \ + "Username or password is incorrect" in response.text: + raise exception.AuthenticationError() + + return {c.name: c.value for c in response.history[0].cookies} + class SeisopartyUserExtractor(SeisopartyExtractor): """Extractor for all posts from a seiso.party user listing""" @@ -136,3 +156,46 @@ class SeisopartyPostExtractor(SeisopartyExtractor): url = "{}/post/{}/{}/{}".format( self.root, self.service, self.user_id, self.post_id) return (self._parse_post(self.request(url).text, self.post_id),) + + +class SeisopartyFavoriteExtractor(SeisopartyExtractor): + """Extractor for seiso.party favorites""" + subcategory = "favorite" + pattern = r"(?:https?://)?seiso\.party/favorites/artists/?(?:\?([^#]+))?" + test = ( + ("https://seiso.party/favorites/artists", { + "pattern": SeisopartyUserExtractor.pattern, + "url": "0c862434bc3bbbe84cbf41c3a6152473a8cde683", + "count": 3, + }), + ("https://seiso.party/favorites/artists?sort=id&sort_direction=asc", { + "url": "629a8b9c6d3a8a64f521908bdb3d7426ac03f8d3", + }), + ) + + def __init__(self, match): + SeisopartyExtractor.__init__(self, match) + self.query = match.group(1) + + def items(self): + self._prepare_ddosguard_cookies() + self.login() + + url = self.root + "/favorites/artists" + data = {"_extractor": SeisopartyUserExtractor} + params = text.parse_query(self.query) + params["page"] = text.parse_int(params.get("page"), 1) + + while True: + page = self.request(url, params=params).text + + cnt = 0 + for card in text.extract_iter( + page, '<div class="artist-card', '</a>'): + path = text.extract(card, '<a href="', '"')[0] + yield Message.Queue, self.root + path, data + cnt += 1 + + if cnt < 25: + return + params["page"] += 1 diff --git a/gallery_dl/extractor/twitter.py b/gallery_dl/extractor/twitter.py index 4a3f6cd..568ee2e 100644 --- a/gallery_dl/extractor/twitter.py +++ b/gallery_dl/extractor/twitter.py @@ -41,6 +41,16 @@ class TwitterExtractor(Extractor): self.cards = self.config("cards", False) self._user_cache = {} + size = self.config("size") + if size is None: + self._size_image = "orig" + self._size_fallback = ("large", "medium", "small") + else: + if isinstance(size, str): + size = size.split(",") + self._size_image = size[0] + self._size_fallback = size[1:] + def items(self): self.login() metadata = self.metadata() @@ -115,7 +125,7 @@ class TwitterExtractor(Extractor): base, _, fmt = url.rpartition(".") base += "?format=" + fmt + "&name=" files.append(text.nameext_from_url(url, { - "url" : base + "orig", + "url" : base + self._size_image, "width" : width, "height" : height, "_fallback": self._image_fallback(base), @@ -123,11 +133,9 @@ class TwitterExtractor(Extractor): else: files.append({"url": media["media_url"]}) - @staticmethod - def _image_fallback(base): - yield base + "large" - yield base + "medium" - yield base + "small" + def _image_fallback(self, base): + for fmt in self._size_fallback: + yield base + fmt def _extract_card(self, tweet, files): card = tweet["card"] diff --git a/gallery_dl/extractor/vk.py b/gallery_dl/extractor/vk.py index 9dd2d47..9724c4b 100644 --- a/gallery_dl/extractor/vk.py +++ b/gallery_dl/extractor/vk.py @@ -12,18 +12,67 @@ from .common import Extractor, Message from .. import text import re +BASE_PATTERN = r"(?:https://)?(?:www\.|m\.)?vk\.com" -class VkPhotosExtractor(Extractor): - """Extractor for photos from a vk user""" + +class VkExtractor(Extractor): + """Base class for vk extractors""" category = "vk" - subcategory = "photos" directory_fmt = ("{category}", "{user[name]|user[id]}") filename_fmt = "{id}.{extension}" archive_fmt = "{id}" root = "https://vk.com" request_interval = 1.0 - pattern = (r"(?:https://)?(?:www\.|m\.)?vk\.com/(?:" - r"(?:albums|photos|id)(-?\d+)|([^/?#]+))") + + def items(self): + data = self.metadata() + yield Message.Directory, data + for photo in self.photos(): + photo.update(data) + yield Message.Url, photo["url"], photo + + def _pagination(self, photos_url, user_id): + sub = re.compile(r"/imp[fg]/").sub + needle = 'data-id="{}_'.format(user_id) + cnt = 0 + + headers = { + "X-Requested-With": "XMLHttpRequest", + "Origin" : self.root, + "Referer" : photos_url, + } + params = { + "al" : "1", + "al_ad" : "0", + "offset": 0, + "part" : "1", + } + + while True: + payload = self.request( + photos_url, method="POST", headers=headers, data=params + ).json()["payload"][1] + + offset = payload[0] + html = payload[1] + + for cnt, photo in enumerate(text.extract_iter(html, needle, ')')): + pid = photo[:photo.find('"')] + url = photo[photo.rindex("(")+1:] + url = sub("/", url.partition("?")[0]) + yield text.nameext_from_url(url, {"url": url, "id": pid}) + + if cnt <= 20 or offset == params["offset"]: + return + params["offset"] = offset + + +class VkPhotosExtractor(VkExtractor): + """Extractor for photos from a vk user""" + subcategory = "photos" + pattern = (BASE_PATTERN + r"/(?:" + r"(?:albums|photos|id)(-?\d+)" + r"|(?!album-?\d+_)([^/?#]+))") test = ( ("https://vk.com/id398982326", { "pattern": r"https://sun\d+-\d+\.userapi\.com/c\d+/v\d+" @@ -58,10 +107,14 @@ class VkPhotosExtractor(Extractor): ) def __init__(self, match): - Extractor.__init__(self, match) + VkExtractor.__init__(self, match) self.user_id, self.user_name = match.groups() - def items(self): + def photos(self): + url = "{}/photos{}".format(self.root, self.user_id) + return self._pagination(url, self.user_id) + + def metadata(self): if self.user_id: user_id = self.user_id prefix = "public" if user_id[0] == "-" else "id" @@ -70,40 +123,8 @@ class VkPhotosExtractor(Extractor): else: url = "{}/{}".format(self.root, self.user_name) data = self._extract_profile(url) - user_id = data["user"]["id"] - - photos_url = "{}/photos{}".format(self.root, user_id) - headers = { - "X-Requested-With": "XMLHttpRequest", - "Origin" : self.root, - "Referer" : photos_url, - } - params = { - "al" : "1", - "al_ad" : "0", - "offset": 0, - "part" : "1", - } - - yield Message.Directory, data - sub = re.compile(r"/imp[fg]/").sub - needle = 'data-id="{}_'.format(user_id) - cnt = 0 - - while True: - offset, html = self.request( - photos_url, method="POST", headers=headers, data=params - ).json()["payload"][1] - - for cnt, photo in enumerate(text.extract_iter(html, needle, ')')): - data["id"] = photo[:photo.find('"')] - url = photo[photo.rindex("(")+1:] - url = sub("/", url.partition("?")[0]) - yield Message.Url, url, text.nameext_from_url(url, data) - - if cnt <= 40 or offset == params["offset"]: - return - params["offset"] = offset + self.user_id = data["user"]["id"] + return data def _extract_profile(self, url): extr = text.extract_from(self.request(url).text) @@ -116,3 +137,32 @@ class VkPhotosExtractor(Extractor): '<span class="current_text">', '</span'))), "id" : extr('<a href="/albums', '"'), }} + + +class VkAlbumExtractor(VkExtractor): + """Extractor for a vk album""" + subcategory = "album" + directory_fmt = ("{category}", "{user[id]}", "{album[id]}") + pattern = BASE_PATTERN + r"/album(-?\d+)_(\d+)$" + test = ( + ("https://vk.com/album221469416_0", { + "count": 3, + }), + ("https://vk.com/album-165740836_281339889", { + "count": 12, + }), + ) + + def __init__(self, match): + VkExtractor.__init__(self, match) + self.user_id, self.album_id = match.groups() + + def photos(self): + url = "{}/album{}_{}".format(self.root, self.user_id, self.album_id) + return self._pagination(url, self.user_id) + + def metadata(self): + return { + "user": {"id": self.user_id}, + "album": {"id": self.album_id}, + } diff --git a/gallery_dl/postprocessor/compare.py b/gallery_dl/postprocessor/compare.py index a08cdc4..b3b94f7 100644 --- a/gallery_dl/postprocessor/compare.py +++ b/gallery_dl/postprocessor/compare.py @@ -20,36 +20,32 @@ class ComparePP(PostProcessor): PostProcessor.__init__(self, job) if options.get("shallow"): self._compare = self._compare_size + self._equal_exc = self._equal_cnt = 0 - action = options.get("action") - if action == "enumerate": - job.register_hooks({"file": self.enumerate}, options) - else: - job.register_hooks({"file": self.compare}, options) - action, _, smax = action.partition(":") - self._skipmax = text.parse_int(smax) - self._skipexc = self._skipcnt = 0 - if action == "abort": - self._skipexc = exception.StopExtraction - elif action == "terminate": - self._skipexc = exception.TerminateExtraction - elif action == "exit": - self._skipexc = sys.exit - - def compare(self, pathfmt): + equal = options.get("equal") + if equal: + equal, _, emax = equal.partition(":") + self._equal_max = text.parse_int(emax) + if equal == "abort": + self._equal_exc = exception.StopExtraction + elif equal == "terminate": + self._equal_exc = exception.TerminateExtraction + elif equal == "exit": + self._equal_exc = sys.exit + + job.register_hooks({"file": ( + self.enumerate + if options.get("action") == "enumerate" else + self.replace + )}, options) + + def replace(self, pathfmt): try: if self._compare(pathfmt.realpath, pathfmt.temppath): - if self._skipexc: - self._skipcnt += 1 - if self._skipcnt >= self._skipmax: - util.remove_file(pathfmt.temppath) - print() - raise self._skipexc() - pathfmt.delete = True - else: - self._skipcnt = 0 + return self._equal(pathfmt) except OSError: pass + self._equal_cnt = 0 def enumerate(self, pathfmt): num = 1 @@ -58,9 +54,10 @@ class ComparePP(PostProcessor): pathfmt.prefix = str(num) + "." pathfmt.set_extension(pathfmt.extension, False) num += 1 - pathfmt.delete = True + return self._equal(pathfmt) except OSError: pass + self._equal_cnt = 0 def _compare(self, f1, f2): return self._compare_size(f1, f2) and self._compare_content(f1, f2) @@ -81,5 +78,14 @@ class ComparePP(PostProcessor): if not buf1: return True + def _equal(self, pathfmt): + if self._equal_exc: + self._equal_cnt += 1 + if self._equal_cnt >= self._equal_max: + util.remove_file(pathfmt.temppath) + print() + raise self._equal_exc() + pathfmt.delete = True + __postprocessor__ = ComparePP diff --git a/gallery_dl/version.py b/gallery_dl/version.py index acc3b8d..ee01549 100644 --- a/gallery_dl/version.py +++ b/gallery_dl/version.py @@ -6,4 +6,4 @@ # it under the terms of the GNU General Public License version 2 as # published by the Free Software Foundation. -__version__ = "1.19.0" +__version__ = "1.19.1" diff --git a/test/test_cache.py b/test/test_cache.py index ecf482c..9b3623a 100644 --- a/test/test_cache.py +++ b/test/test_cache.py @@ -20,6 +20,7 @@ from gallery_dl import config, util # noqa E402 dbpath = tempfile.mkstemp()[1] config.set(("cache",), "file", dbpath) from gallery_dl import cache # noqa E402 +cache._init() # def tearDownModule(): |