diff options
| author | 2025-03-01 19:51:45 -0500 | |
|---|---|---|
| committer | 2025-03-01 19:51:45 -0500 | |
| commit | bc1c79d35e0a75bc8da8f6f010df779c4acca201 (patch) | |
| tree | 9d8808a5aec770221eb667160a3fbda61f9d5d49 | |
| parent | 75e3edb22dad2fc506494bb90ee6b331f5169adf (diff) | |
| parent | 889c7b8caec8fc0b9c7a583ed1d9cfa43518fc42 (diff) | |
Update upstream source from tag 'upstream/1.29.0'
Update to upstream version '1.29.0'
with Debian dir 7b309aa6ccc040a2faaf51d37a63f5233590a8d7
54 files changed, 2009 insertions, 256 deletions
diff --git a/CHANGELOG.md b/CHANGELOG.md index 8856682..4294e8a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,53 +1,59 @@ -## 1.28.5 - 2025-01-28 +## 1.29.0 - 2025-03-01 +### Changes +- build `gallery-dl.exe` on Windows 10 / Python 3.13 ([#6684](https://github.com/mikf/gallery-dl/issues/6684)) +- provide Windows 7 / Python 3.8 builds as `gallery-dl_x86.exe` ### Extractors #### Additions -- [nekohouse] add support ([#5241](https://github.com/mikf/gallery-dl/issues/5241), [#6738](https://github.com/mikf/gallery-dl/issues/6738)) -- [turboimagehost] add support for galleries ([#6855](https://github.com/mikf/gallery-dl/issues/6855)) -- [xfolio] add support ([#5514](https://github.com/mikf/gallery-dl/issues/5514), [#6351](https://github.com/mikf/gallery-dl/issues/6351), [#6837](https://github.com/mikf/gallery-dl/issues/6837)) +- [bilibili] add `user-articles-favorite` extractor ([#6725](https://github.com/mikf/gallery-dl/issues/6725) [#6781](https://github.com/mikf/gallery-dl/issues/6781)) +- [boosty] add `direct-messages` extractor ([#6768](https://github.com/mikf/gallery-dl/issues/6768)) +- [discord] add support ([#454](https://github.com/mikf/gallery-dl/issues/454) [#6836](https://github.com/mikf/gallery-dl/issues/6836) [#7059](https://github.com/mikf/gallery-dl/issues/7059) [#7067](https://github.com/mikf/gallery-dl/issues/7067)) +- [furry34] add support ([#1078](https://github.com/mikf/gallery-dl/issues/1078) [#7018](https://github.com/mikf/gallery-dl/issues/7018)) +- [hentaiera] add support ([#3046](https://github.com/mikf/gallery-dl/issues/3046) [#6952](https://github.com/mikf/gallery-dl/issues/6952) [#7020](https://github.com/mikf/gallery-dl/issues/7020)) +- [hentairox] add support ([#7003](https://github.com/mikf/gallery-dl/issues/7003)) +- [imgur] add support for personal posts ([#6990](https://github.com/mikf/gallery-dl/issues/6990)) +- [imhentai] add support ([#1660](https://github.com/mikf/gallery-dl/issues/1660) [#3046](https://github.com/mikf/gallery-dl/issues/3046) [#3824](https://github.com/mikf/gallery-dl/issues/3824) [#4338](https://github.com/mikf/gallery-dl/issues/4338) [#5936](https://github.com/mikf/gallery-dl/issues/5936)) +- [tiktok] add support ([#3061](https://github.com/mikf/gallery-dl/issues/3061) [#4177](https://github.com/mikf/gallery-dl/issues/4177) [#5646](https://github.com/mikf/gallery-dl/issues/5646) [#6878](https://github.com/mikf/gallery-dl/issues/6878) [#6708](https://github.com/mikf/gallery-dl/issues/6708)) +- [vsco] support `/video/` URLs ([#4295](https://github.com/mikf/gallery-dl/issues/4295) [#6973](https://github.com/mikf/gallery-dl/issues/6973)) #### Fixes -- [4archive] fix `TypeError` -- [adultempire] bypass age confirmation check -- [architizer] fix extraction -- [artstation] avoid Cloudflare challenges ([#5817](https://github.com/mikf/gallery-dl/issues/5817), [#5658](https://github.com/mikf/gallery-dl/issues/5658), [#5564](https://github.com/mikf/gallery-dl/issues/5564), [#5554](https://github.com/mikf/gallery-dl/issues/5554)) -- [deviantart] prevent crash when accessing `premium_folder` data ([#6873](https://github.com/mikf/gallery-dl/issues/6873)) -- [fapachi] fix extraction ([#6881](https://github.com/mikf/gallery-dl/issues/6881)) -- [issuu] fix `user` extractor -- [kemonoparty] fix `username` metadata and filtering by `tag` for `/posts` URLs ([#6833](https://github.com/mikf/gallery-dl/issues/6833)) -- [mangafox] fix chapter extraction -- [mangahere] fix chapter extraction -- [pixiv] fix `sanity_level` workaround ([#4327](https://github.com/mikf/gallery-dl/issues/4327)) -- [pornpics] fix pagination results from HTML pages -- [twitter] handle exceptions during file extraction ([#6647](https://github.com/mikf/gallery-dl/issues/6647)) -- [vsco] fix `JSONDecodeError` ([#6887](https://github.com/mikf/gallery-dl/issues/6887), [#6891](https://github.com/mikf/gallery-dl/issues/6891)) -- [weebcentral] fix extraction ([#6860](https://github.com/mikf/gallery-dl/issues/6860)) -- [xhamster] fix `gallery` extractor ([#6818](https://github.com/mikf/gallery-dl/issues/6818), [#6876](https://github.com/mikf/gallery-dl/issues/6876)) +- [bunkr] decrypt file URLs ([#7058](https://github.com/mikf/gallery-dl/issues/7058) [#7070](https://github.com/mikf/gallery-dl/issues/7070) [#7085](https://github.com/mikf/gallery-dl/issues/7085) [#7089](https://github.com/mikf/gallery-dl/issues/7089) [#7090](https://github.com/mikf/gallery-dl/issues/7090)) +- [chevereto/jpgfish] fix extraction ([#7073](https://github.com/mikf/gallery-dl/issues/7073) [#7079](https://github.com/mikf/gallery-dl/issues/7079)) +- [generic] fix config lookups by subcategory +- [philomena] fix `date` values without UTC offset ([#6921](https://github.com/mikf/gallery-dl/issues/6921)) +- [philomena] download `full` URLs to prevent potential 404 errors ([#6922](https://github.com/mikf/gallery-dl/issues/6922)) +- [pixiv] prevent exceptions during `comments` extraction ([#6965](https://github.com/mikf/gallery-dl/issues/6965)) +- [reddit] restrict subreddit search results ([#7025](https://github.com/mikf/gallery-dl/issues/7025)) +- [sankaku] fix extraction ([#7071](https://github.com/mikf/gallery-dl/issues/7071) [#7072](https://github.com/mikf/gallery-dl/issues/7072)) +- [subscribestar] fix `post` extractor ([#6582](https://github.com/mikf/gallery-dl/issues/6582)) +- [twitter] revert generated CSRF token length to 32 characters ([#6895](https://github.com/mikf/gallery-dl/issues/6895)) +- [vipergirls] change default `domain` to `viper.click` ([#4166](https://github.com/mikf/gallery-dl/issues/4166)) +- [weebcentral] fix extracting wrong number of chapter pages ([#6966](https://github.com/mikf/gallery-dl/issues/6966)) #### Improvements -- [batoto] use `chapter_id` in default archive IDs ([#6835](https://github.com/mikf/gallery-dl/issues/6835)) -- [e621] support `e621.cc` and `e621.anthro.fr` frontend URLs ([#6809](https://github.com/mikf/gallery-dl/issues/6809)) -- [e621] prevent premature pagination end ([#6886](https://github.com/mikf/gallery-dl/issues/6886)) -- [facebook] allow accessing all metadata in `directory` format strings ([#6874](https://github.com/mikf/gallery-dl/issues/6874)) -- [hiperdex] update domain to `hiperdex.com` -- [kemonoparty] enable filtering creator posts by tag ([#6833](https://github.com/mikf/gallery-dl/issues/6833)) -- [khinsider] add `covers` option ([#6844](https://github.com/mikf/gallery-dl/issues/6844)) -- [komikcast] update domain to `komikcast.la` -- [lofter] improve error handling ([#6865](https://github.com/mikf/gallery-dl/issues/6865)) -- [pornpics] avoid redirect when retrieving a gallery page -- [urlgalleries] support new URL format +- [b4k] update domain to `arch.b4k.dev` ([#6955](https://github.com/mikf/gallery-dl/issues/6955) [#6956](https://github.com/mikf/gallery-dl/issues/6956)) +- [bunkr] update default archive ID format ([#6935](https://github.com/mikf/gallery-dl/issues/6935)) +- [bunkr] provide fallback URLs for 403 download links ([#6732](https://github.com/mikf/gallery-dl/issues/6732) [#6972](https://github.com/mikf/gallery-dl/issues/6972)) +- [bunkr] implement fast `--range` support ([#6985](https://github.com/mikf/gallery-dl/issues/6985)) +- [furaffinity] use a default delay of 1 second between requests ([#7054](https://github.com/mikf/gallery-dl/issues/7054)) +- [itaku] support gallery section URLs ([#6951](https://github.com/mikf/gallery-dl/issues/6951)) +- [patreon] support `/profile/creators` URLs +- [subscribestar] detect and handle redirects ([#6916](https://github.com/mikf/gallery-dl/issues/6916)) +- [twibooru] match URLs with `www` subdomain ([#6903](https://github.com/mikf/gallery-dl/issues/6903)) +- [twitter] support `grok` cards content ([#7040](https://github.com/mikf/gallery-dl/issues/7040)) +- [vsco] improve `m3u8` handling +- [weibo] add `movies` option ([#6988](https://github.com/mikf/gallery-dl/issues/6988)) #### Metadata -- [bunkr] extract better `filename` metadata ([#6824](https://github.com/mikf/gallery-dl/issues/6824)) -- [hiperdex] fix `description` metadata -- [khinsider] extract more `album` metadata ([#6844](https://github.com/mikf/gallery-dl/issues/6844)) -- [mangaread] fix manga metadata extraction -- [rule34xyz] fix `date` and `tags` metadata -- [saint] fix metadata of `/d/` URLs -- [toyhouse] fix `date`, `artists`, and `characters` metadata -- [webtoons] fix `username` and `author_name` metadata -#### Removals -- [cohost] remove module -- [fanleaks] remove module -- [shimmie2] remove `tentaclerape.net` -- [szurubooru] remove `booru.foalcon.com` +- [bunkr] extract `id_url` metadata ([#6935](https://github.com/mikf/gallery-dl/issues/6935)) +- [erome] extract `tags` metadata ([#7076](https://github.com/mikf/gallery-dl/issues/7076)) +- [issuu] unescape HTML entities +- [newgrounds] provide `comment_html` metadata ([#7038](https://github.com/mikf/gallery-dl/issues/7038)) +- [patreon] extract `campaign` metadata ([#6989](https://github.com/mikf/gallery-dl/issues/6989)) +### Downloaders +- implement `downloader` options per extractor category +- [http] add `sleep-429` option ([#6996](https://github.com/mikf/gallery-dl/issues/6996)) +- [ytdl] support specifying `module` as filesystem paths ([#6991](https://github.com/mikf/gallery-dl/issues/6991)) +### Archives +- [archive] implement support for PostgreSQL databases ([#6152](https://github.com/mikf/gallery-dl/issues/6152)) +- [archive] add `archive-table` option ([#6152](https://github.com/mikf/gallery-dl/issues/6152)) ### Miscellaneous -- [docs] add `nix` docs to README ([#6606](https://github.com/mikf/gallery-dl/issues/6606)) -- [path] fix exception when using `--rename-to` + `--no-download` ([#6861](https://github.com/mikf/gallery-dl/issues/6861)) -- [release] include `scripts/run_tests.py` in release tarball ([#6856](https://github.com/mikf/gallery-dl/issues/6856)) +- [aes] handle errors during `cryptodome` import ([#6906](https://github.com/mikf/gallery-dl/issues/6906)) +- [executables] fix loading `certifi` SSL certificates ([#6393](https://github.com/mikf/gallery-dl/issues/6393)) +- improve `\f` format string handling for `--print` @@ -1,6 +1,6 @@ Metadata-Version: 2.2 Name: gallery_dl -Version: 1.28.5 +Version: 1.29.0 Summary: Command-line program to download image galleries and collections from several image hosting sites Home-page: https://github.com/mikf/gallery-dl Download-URL: https://github.com/mikf/gallery-dl/releases/latest @@ -89,6 +89,7 @@ Optional - PyYAML_: YAML configuration file support - toml_: TOML configuration file support for Python<3.11 - SecretStorage_: GNOME keyring passwords for ``--cookies-from-browser`` +- Psycopg_: PostgreSQL archive support Installation @@ -131,9 +132,9 @@ Standalone Executable Prebuilt executable files with a Python interpreter and required Python packages included are available for -- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.28.5/gallery-dl.exe>`__ +- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.29.0/gallery-dl.exe>`__ (Requires `Microsoft Visual C++ Redistributable Package (x86) <https://aka.ms/vs/17/release/vc_redist.x86.exe>`__) -- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.28.5/gallery-dl.bin>`__ +- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.29.0/gallery-dl.bin>`__ Nightly Builds @@ -527,6 +528,7 @@ To authenticate with a ``mastodon`` instance, run *gallery-dl* with .. _PyYAML: https://pyyaml.org/ .. _toml: https://pypi.org/project/toml/ .. _SecretStorage: https://pypi.org/project/SecretStorage/ +.. _Psycopg: https://www.psycopg.org/ .. _Snapd: https://docs.snapcraft.io/installing-snapd .. _OAuth: https://en.wikipedia.org/wiki/OAuth .. _Chocolatey: https://chocolatey.org/install @@ -34,6 +34,7 @@ Optional - PyYAML_: YAML configuration file support - toml_: TOML configuration file support for Python<3.11 - SecretStorage_: GNOME keyring passwords for ``--cookies-from-browser`` +- Psycopg_: PostgreSQL archive support Installation @@ -76,9 +77,9 @@ Standalone Executable Prebuilt executable files with a Python interpreter and required Python packages included are available for -- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.28.5/gallery-dl.exe>`__ +- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.29.0/gallery-dl.exe>`__ (Requires `Microsoft Visual C++ Redistributable Package (x86) <https://aka.ms/vs/17/release/vc_redist.x86.exe>`__) -- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.28.5/gallery-dl.bin>`__ +- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.29.0/gallery-dl.bin>`__ Nightly Builds @@ -472,6 +473,7 @@ To authenticate with a ``mastodon`` instance, run *gallery-dl* with .. _PyYAML: https://pyyaml.org/ .. _toml: https://pypi.org/project/toml/ .. _SecretStorage: https://pypi.org/project/SecretStorage/ +.. _Psycopg: https://www.psycopg.org/ .. _Snapd: https://docs.snapcraft.io/installing-snapd .. _OAuth: https://en.wikipedia.org/wiki/OAuth .. _Chocolatey: https://chocolatey.org/install diff --git a/data/man/gallery-dl.1 b/data/man/gallery-dl.1 index b172453..b17a8f4 100644 --- a/data/man/gallery-dl.1 +++ b/data/man/gallery-dl.1 @@ -1,4 +1,4 @@ -.TH "GALLERY-DL" "1" "2025-01-28" "1.28.5" "gallery-dl Manual" +.TH "GALLERY-DL" "1" "2025-03-01" "1.29.0" "gallery-dl Manual" .\" disable hyphenation .nh diff --git a/data/man/gallery-dl.conf.5 b/data/man/gallery-dl.conf.5 index 343188a..847efaa 100644 --- a/data/man/gallery-dl.conf.5 +++ b/data/man/gallery-dl.conf.5 @@ -1,4 +1,4 @@ -.TH "GALLERY-DL.CONF" "5" "2025-01-28" "1.28.5" "gallery-dl Manual" +.TH "GALLERY-DL.CONF" "5" "2025-03-01" "1.29.0" "gallery-dl Manual" .\" disable hyphenation .nh .\" disable justification (adjust text to left margin only) @@ -475,6 +475,9 @@ response before \f[I]retrying\f[] the request. \f[I]xfolio\f[], \f[I]zerochan\f[] .br +* \f[I]"1.0"\f[] +\f[I]furaffinity\f[] +.br * \f[I]"1.0-2.0"\f[] \f[I]flickr\f[], \f[I]pexels\f[], @@ -601,7 +604,7 @@ the API key found in your user profile, not the actual account password. \f[I]cookies\f[] is required Note: Leave the \f[I]password\f[] value empty or undefined -to be prompted for a passeword when performing a login +to be prompted for a password when performing a login (see \f[I]getpass()\f[]). @@ -814,11 +817,16 @@ or a \f[I]list\f[] with IP and explicit port number as elements. * \f[I]"Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:LATEST) Gecko/20100101 Firefox/LATEST"\f[]: otherwise .IP "Description:" 4 -User-Agent header value to be used for HTTP requests. +User-Agent header value used for HTTP requests. Setting this value to \f[I]"browser"\f[] will try to automatically detect and use the \f[I]User-Agent\f[] header of the system's default browser. +Note: +This option has *no* effect if +\f[I]extractor.browser\f[] +is enabled. + .SS extractor.*.browser .IP "Type:" 6 @@ -841,6 +849,15 @@ by using their default HTTP headers and TLS ciphers for HTTP requests. Optionally, the operating system used in the \f[I]User-Agent\f[] header can be specified after a \f[I]:\f[] (\f[I]windows\f[], \f[I]linux\f[], or \f[I]macos\f[]). +Note: +This option overrides +\f[I]user-agent\f[] +and sets custom +\f[I]headers\f[] +and +\f[I]ciphers\f[] +defaults. + Note: \f[I]requests\f[] and \f[I]urllib3\f[] only support HTTP/1.1, while a real browser would use HTTP/2. @@ -1071,13 +1088,19 @@ Note: Any \f[I]blacklist\f[] setting will automatically include .SS extractor.*.archive .IP "Type:" 6 -\f[I]Path\f[] +.br +* \f[I]string\f[] +.br +* \f[I]Path\f[] .IP "Default:" 9 \f[I]null\f[] .IP "Example:" 4 -"$HOME/.archives/{category}.sqlite3" +.br +* "$HOME/.archives/{category}.sqlite3" +.br +* "postgresql://user:pass@host/database" .IP "Description:" 4 File to store IDs of downloaded files in. Downloads of files @@ -1089,6 +1112,11 @@ database, as either lookup operations are significantly faster or memory requirements are significantly lower when the amount of stored IDs gets reasonably large. +If this value is a +\f[I]PostgreSQL Connection URI\f[], +the archive will use this PostgreSQL database as backend (requires +\f[I]Psycopg\f[]). + Note: Archive files that do not already exist get generated automatically. Note: Archive paths support regular \f[I]format string\f[] replacements, @@ -1154,7 +1182,10 @@ and only write them after successful job completion. \f[I]string\f[] .IP "Default:" 9 -\f[I]"{category}"\f[] +.br +* \f[I]""\f[] when \f[I]archive-table\f[] is set +.br +* \f[I]"{category}"\f[] otherwise .IP "Description:" 4 Prefix for archive IDs. @@ -1174,6 +1205,20 @@ See \f[I]<https://www.sqlite.org/pragma.html#toc>\f[] for available \f[I]PRAGMA\f[] statements and further details. +.SS extractor.*.archive-table +.IP "Type:" 6 +\f[I]string\f[] + +.IP "Default:" 9 +\f[I]"archive"\f[] + +.IP "Example:" 4 +"{category}" + +.IP "Description:" 4 +\f[I]Format string\f[] selecting the archive database table name. + + .SS extractor.*.actions .IP "Type:" 6 .br @@ -2562,6 +2607,41 @@ Leave \f[I]SIZE\f[] empty to download the regular, small avatar format. .br +.SS extractor.discord.embeds +.IP "Type:" 6 +\f[I]list\f[] of \f[I]strings\f[] + +.IP "Default:" 9 +\f[I]["image", "gifv", "video"]\f[] + +.IP "Description:" 4 +Selects which embed types to download from. + +Supported embed types are +\f[I]image\f[], \f[I]gifv\f[], \f[I]video\f[], \f[I]rich\f[], \f[I]article\f[], \f[I]link\f[]. + + +.SS extractor.discord.threads +.IP "Type:" 6 +\f[I]bool\f[] + +.IP "Default:" 9 +\f[I]true\f[] + +.IP "Description:" 4 +Extract threads from Discord text channels. + + +.SS extractor.discord.token +.IP "Type:" 6 +\f[I]string\f[] + +.IP "Description:" 4 +Discord Bot Token for API requests. + +You can follow \f[I]this guide\f[] to get a token. + + .SS extractor.[E621].metadata .IP "Type:" 6 .br @@ -4992,6 +5072,71 @@ To generate a token, visit \f[I]/user/USERNAME/list-tokens\f[] and click \f[I]Create Token\f[]. +.SS extractor.tiktok.audio +.IP "Type:" 6 +\f[I]bool\f[] + +.IP "Default:" 9 +\f[I]true\f[] + +.IP "Description:" 4 +Download audio tracks using \f[I]ytdl\f[]. + + +.SS extractor.tiktok.videos +.IP "Type:" 6 +\f[I]bool\f[] + +.IP "Default:" 9 +\f[I]true\f[] + +.IP "Description:" 4 +Download videos using \f[I]ytdl\f[]. + + +.SS extractor.tiktok.user.avatar +.IP "Type:" 6 +\f[I]bool\f[] + +.IP "Default:" 9 +\f[I]true\f[] + +.IP "Description:" 4 +Download user avatars. + + +.SS extractor.tiktok.user.module +.IP "Type:" 6 +\f[I]string\f[] + +.IP "Default:" 9 +\f[I]null\f[] + +.IP "Description:" 4 +Name or filesystem path of the \f[I]ytdl\f[] Python module +to extract posts from a \f[I]tiktok\f[] user profile with. + +See \f[I]extractor.ytdl.module\f[]. + + +.SS extractor.tiktok.user.tiktok-range +.IP "Type:" 6 +\f[I]string\f[] + +.IP "Default:" 9 +\f[I]null\f[] + +.IP "Example:" 4 +"1-20" + +.IP "Description:" 4 +Range or playlist indices of \f[I]tiktok\f[] user posts to extract. + +See +\f[I]ytdl/playlist_items\f[] +for details. + + .SS extractor.tumblr.avatar .IP "Type:" 6 \f[I]bool\f[] @@ -5517,7 +5662,7 @@ Selects how to handle exceeding the API rate limit. .IP "Description:" 4 When receiving a "Could not authenticate you" error while logged in with -\f[I]username & passeword\f[], +\f[I]username & password\f[], refresh the current login session and try to continue from where it left off. @@ -5716,7 +5861,7 @@ Available formats are \f[I]string\f[] .IP "Default:" 9 -\f[I]"vipergirls.to"\f[] +\f[I]"viper.click"\f[] .IP "Description:" 4 Specifies the domain used by \f[I]vipergirls\f[] extractors. @@ -5923,6 +6068,17 @@ It is possible to use \f[I]"all"\f[] instead of listing all values separately. Download \f[I]livephoto\f[] files. +.SS extractor.weibo.movies +.IP "Type:" 6 +\f[I]bool\f[] + +.IP "Default:" 9 +\f[I]false\f[] + +.IP "Description:" 4 +Download \f[I]movie\f[] videos. + + .SS extractor.weibo.retweets .IP "Type:" 6 \f[I]bool\f[] @@ -6059,13 +6215,22 @@ Note: Set \f[I]quiet\f[] and \f[I]no_warnings\f[] in .SS extractor.ytdl.module .IP "Type:" 6 -\f[I]string\f[] +.br +* \f[I]string\f[] +.br +* \f[I]Path\f[] .IP "Default:" 9 \f[I]null\f[] +.IP "Example:" 4 +.br +* "yt-dlp" +.br +* "/home/user/.local/lib/python3.13/site-packages/youtube_dl" + .IP "Description:" 4 -Name of the \f[I]ytdl\f[] Python module to import. +Name or filesystem path of the \f[I]ytdl\f[] Python module to import. Setting this to \f[I]null\f[] will try to import \f[I]"yt_dlp"\f[] followed by \f[I]"youtube_dl"\f[] as fallback. @@ -6485,6 +6650,22 @@ regardless of this option. regardless of this option. +.SS downloader.http.sleep-429 +.IP "Type:" 6 +\f[I]Duration\f[] + +.IP "Default:" 9 +\f[I]extractor.*.sleep-429\f[] + +.IP "Description:" 4 +Number of seconds to sleep when receiving a 429 Too Many Requests +response before \f[I]retrying\f[] the request. + +Note: Requires +\f[I]retry-codes\f[] +to include \f[I]429\f[]. + + .SS downloader.http.validate .IP "Type:" 6 \f[I]bool\f[] @@ -6580,13 +6761,22 @@ Note: Set \f[I]quiet\f[] and \f[I]no_warnings\f[] in .SS downloader.ytdl.module .IP "Type:" 6 -\f[I]string\f[] +.br +* \f[I]string\f[] +.br +* \f[I]Path\f[] .IP "Default:" 9 \f[I]null\f[] +.IP "Example:" 4 +.br +* "yt-dlp" +.br +* "/home/user/.local/lib/python3.13/site-packages/youtube_dl" + .IP "Description:" 4 -Name of the \f[I]ytdl\f[] Python module to import. +Name or filesystem path of the \f[I]ytdl\f[] Python module to import. Setting this to \f[I]null\f[] will try to import \f[I]"yt_dlp"\f[] followed by \f[I]"youtube_dl"\f[] as fallback. @@ -7047,17 +7237,25 @@ Only compare file sizes. Do not read and compare their content. .SS exec.archive .IP "Type:" 6 -\f[I]Path\f[] +.br +* \f[I]string\f[] +.br +* \f[I]Path\f[] .IP "Description:" 4 -File to store IDs of executed commands in, +Database to store IDs of executed commands in, similar to \f[I]extractor.*.archive\f[]. -\f[I]archive-format\f[], \f[I]archive-prefix\f[], and \f[I]archive-pragma\f[] options, -akin to -\f[I]extractor.*.archive-format\f[], -\f[I]extractor.*.archive-prefix\f[], and -\f[I]extractor.*.archive-pragma\f[], are supported as well. +The following archive options are also supported: + +.br +* \f[I]archive-format\f[] +.br +* \f[I]archive-prefix\f[] +.br +* \f[I]archive-pragma\f[] +.br +* \f[I]archive-table \f[] .SS exec.async @@ -7572,17 +7770,25 @@ Do not overwrite already existing files. .SS metadata.archive .IP "Type:" 6 -\f[I]Path\f[] +.br +* \f[I]string\f[] +.br +* \f[I]Path\f[] .IP "Description:" 4 -File to store IDs of generated metadata files in, +Database to store IDs of generated metadata files in, similar to \f[I]extractor.*.archive\f[]. -\f[I]archive-format\f[], \f[I]archive-prefix\f[], and \f[I]archive-pragma\f[] options, -akin to -\f[I]extractor.*.archive-format\f[], -\f[I]extractor.*.archive-prefix\f[], and -\f[I]extractor.*.archive-pragma\f[], are supported as well. +The following archive options are also supported: + +.br +* \f[I]archive-format\f[] +.br +* \f[I]archive-prefix\f[] +.br +* \f[I]archive-pragma\f[] +.br +* \f[I]archive-table \f[] .SS metadata.mtime @@ -7663,17 +7869,25 @@ The resulting value must be either a UNIX timestamp or a .SS python.archive .IP "Type:" 6 -\f[I]Path\f[] +.br +* \f[I]string\f[] +.br +* \f[I]Path\f[] .IP "Description:" 4 -File to store IDs of called Python functions in, +Database to store IDs of called Python functions in, similar to \f[I]extractor.*.archive\f[]. -\f[I]archive-format\f[], \f[I]archive-prefix\f[], and \f[I]archive-pragma\f[] options, -akin to -\f[I]extractor.*.archive-format\f[], -\f[I]extractor.*.archive-prefix\f[], and -\f[I]extractor.*.archive-pragma\f[], are supported as well. +The following archive options are also supported: + +.br +* \f[I]archive-format\f[] +.br +* \f[I]archive-prefix\f[] +.br +* \f[I]archive-pragma\f[] +.br +* \f[I]archive-table \f[] .SS python.event diff --git a/docs/gallery-dl.conf b/docs/gallery-dl.conf index f3c9fdb..ed85b01 100644 --- a/docs/gallery-dl.conf +++ b/docs/gallery-dl.conf @@ -37,6 +37,7 @@ "archive-pragma": [], "archive-event" : ["file"], "archive-mode" : "file", + "archive-table" : null, "cookies": null, "cookies-select": null, @@ -263,7 +264,8 @@ }, "furaffinity": { - "cookies" : null, + "cookies" : null, + "sleep-request": "1.0", "descriptions": "text", "external" : false, @@ -591,6 +593,17 @@ "username": "", "password": "" }, + "tiktok": + { + "audio" : true, + "videos": true, + + "user": { + "avatar": true, + "module": null, + "tiktok-range": null + } + }, "tsumino": { "username": "", @@ -672,7 +685,7 @@ "password": "", "sleep-request": "0.5", - "domain" : "vipergirls.to", + "domain" : "viper.click", "like" : false }, "vk": @@ -713,6 +726,7 @@ "gifs" : true, "include" : ["feed"], "livephoto": true, + "movies" : false, "retweets" : false, "videos" : true }, @@ -917,7 +931,8 @@ "consume-content" : false, "enabled" : true, "headers" : null, - "retry-codes" : [404, 429, 430], + "retry-codes" : [], + "sleep-429" : 60.0, "validate" : true }, diff --git a/gallery_dl.egg-info/PKG-INFO b/gallery_dl.egg-info/PKG-INFO index 6db2d05..148bf37 100644 --- a/gallery_dl.egg-info/PKG-INFO +++ b/gallery_dl.egg-info/PKG-INFO @@ -1,6 +1,6 @@ Metadata-Version: 2.2 Name: gallery_dl -Version: 1.28.5 +Version: 1.29.0 Summary: Command-line program to download image galleries and collections from several image hosting sites Home-page: https://github.com/mikf/gallery-dl Download-URL: https://github.com/mikf/gallery-dl/releases/latest @@ -89,6 +89,7 @@ Optional - PyYAML_: YAML configuration file support - toml_: TOML configuration file support for Python<3.11 - SecretStorage_: GNOME keyring passwords for ``--cookies-from-browser`` +- Psycopg_: PostgreSQL archive support Installation @@ -131,9 +132,9 @@ Standalone Executable Prebuilt executable files with a Python interpreter and required Python packages included are available for -- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.28.5/gallery-dl.exe>`__ +- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.29.0/gallery-dl.exe>`__ (Requires `Microsoft Visual C++ Redistributable Package (x86) <https://aka.ms/vs/17/release/vc_redist.x86.exe>`__) -- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.28.5/gallery-dl.bin>`__ +- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.29.0/gallery-dl.bin>`__ Nightly Builds @@ -527,6 +528,7 @@ To authenticate with a ``mastodon`` instance, run *gallery-dl* with .. _PyYAML: https://pyyaml.org/ .. _toml: https://pypi.org/project/toml/ .. _SecretStorage: https://pypi.org/project/SecretStorage/ +.. _Psycopg: https://www.psycopg.org/ .. _Snapd: https://docs.snapcraft.io/installing-snapd .. _OAuth: https://en.wikipedia.org/wiki/OAuth .. _Chocolatey: https://chocolatey.org/install diff --git a/gallery_dl.egg-info/SOURCES.txt b/gallery_dl.egg-info/SOURCES.txt index c5f560b..a29d3fe 100644 --- a/gallery_dl.egg-info/SOURCES.txt +++ b/gallery_dl.egg-info/SOURCES.txt @@ -81,6 +81,7 @@ gallery_dl/extractor/danbooru.py gallery_dl/extractor/desktopography.py gallery_dl/extractor/deviantart.py gallery_dl/extractor/directlink.py +gallery_dl/extractor/discord.py gallery_dl/extractor/dynastyscans.py gallery_dl/extractor/e621.py gallery_dl/extractor/erome.py @@ -95,6 +96,7 @@ gallery_dl/extractor/flickr.py gallery_dl/extractor/foolfuuka.py gallery_dl/extractor/foolslide.py gallery_dl/extractor/furaffinity.py +gallery_dl/extractor/furry34.py gallery_dl/extractor/fuskator.py gallery_dl/extractor/gelbooru.py gallery_dl/extractor/gelbooru_v01.py @@ -121,6 +123,7 @@ gallery_dl/extractor/imgbb.py gallery_dl/extractor/imgbox.py gallery_dl/extractor/imgth.py gallery_dl/extractor/imgur.py +gallery_dl/extractor/imhentai.py gallery_dl/extractor/inkbunny.py gallery_dl/extractor/instagram.py gallery_dl/extractor/issuu.py @@ -217,6 +220,7 @@ gallery_dl/extractor/szurubooru.py gallery_dl/extractor/tapas.py gallery_dl/extractor/tcbscans.py gallery_dl/extractor/telegraph.py +gallery_dl/extractor/tiktok.py gallery_dl/extractor/tmohentai.py gallery_dl/extractor/toyhouse.py gallery_dl/extractor/tsumino.py diff --git a/gallery_dl/aes.py b/gallery_dl/aes.py index 891104a..6727541 100644 --- a/gallery_dl/aes.py +++ b/gallery_dl/aes.py @@ -14,6 +14,13 @@ except ImportError: from Crypto.Cipher import AES as Cryptodome_AES except ImportError: Cryptodome_AES = None +except Exception as exc: + Cryptodome_AES = None + import logging + logging.getLogger("aes").warning( + "Error when trying to import 'Cryptodome' module (%s: %s)", + exc.__class__.__name__, exc) + del logging if Cryptodome_AES: diff --git a/gallery_dl/archive.py b/gallery_dl/archive.py index 5f05bbf..edecb10 100644 --- a/gallery_dl/archive.py +++ b/gallery_dl/archive.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2024 Mike Fährmann +# Copyright 2024-2025 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -9,50 +9,94 @@ """Download Archives""" import os -import sqlite3 -from . import formatter +import logging +from . import util, formatter + +log = logging.getLogger("archive") + + +def connect(path, prefix, format, + table=None, mode=None, pragma=None, kwdict=None, cache_key=None): + keygen = formatter.parse(prefix + format).format_map + + if isinstance(path, str) and path.startswith( + ("postgres://", "postgresql://")): + if mode == "memory": + cls = DownloadArchivePostgresqlMemory + else: + cls = DownloadArchivePostgresql + else: + path = util.expand_path(path) + if kwdict is not None and "{" in path: + path = formatter.parse(path).format_map(kwdict) + if mode == "memory": + cls = DownloadArchiveMemory + else: + cls = DownloadArchive + + if kwdict is not None and table: + table = formatter.parse(table).format_map(kwdict) + + return cls(path, keygen, table, pragma, cache_key) + + +def sanitize(name): + return '"' + name.replace('"', "_") + '"' class DownloadArchive(): + _sqlite3 = None + + def __init__(self, path, keygen, table=None, pragma=None, cache_key=None): + if self._sqlite3 is None: + DownloadArchive._sqlite3 = __import__("sqlite3") - def __init__(self, path, format_string, pragma=None, - cache_key="_archive_key"): try: - con = sqlite3.connect(path, timeout=60, check_same_thread=False) - except sqlite3.OperationalError: + con = self._sqlite3.connect( + path, timeout=60, check_same_thread=False) + except self._sqlite3.OperationalError: os.makedirs(os.path.dirname(path)) - con = sqlite3.connect(path, timeout=60, check_same_thread=False) + con = self._sqlite3.connect( + path, timeout=60, check_same_thread=False) con.isolation_level = None - self.keygen = formatter.parse(format_string).format_map + self.keygen = keygen self.connection = con self.close = con.close self.cursor = cursor = con.cursor() - self._cache_key = cache_key + self._cache_key = cache_key or "_archive_key" + + table = "archive" if table is None else sanitize(table) + self._stmt_select = ( + "SELECT 1 " + "FROM " + table + " " + "WHERE entry=? " + "LIMIT 1") + self._stmt_insert = ( + "INSERT OR IGNORE INTO " + table + " " + "(entry) VALUES (?)") if pragma: for stmt in pragma: cursor.execute("PRAGMA " + stmt) try: - cursor.execute("CREATE TABLE IF NOT EXISTS archive " + cursor.execute("CREATE TABLE IF NOT EXISTS " + table + " " "(entry TEXT PRIMARY KEY) WITHOUT ROWID") - except sqlite3.OperationalError: + except self._sqlite3.OperationalError: # fallback for missing WITHOUT ROWID support (#553) - cursor.execute("CREATE TABLE IF NOT EXISTS archive " + cursor.execute("CREATE TABLE IF NOT EXISTS " + table + " " "(entry TEXT PRIMARY KEY)") def add(self, kwdict): """Add item described by 'kwdict' to archive""" key = kwdict.get(self._cache_key) or self.keygen(kwdict) - self.cursor.execute( - "INSERT OR IGNORE INTO archive (entry) VALUES (?)", (key,)) + self.cursor.execute(self._stmt_insert, (key,)) def check(self, kwdict): """Return True if the item described by 'kwdict' exists in archive""" key = kwdict[self._cache_key] = self.keygen(kwdict) - self.cursor.execute( - "SELECT 1 FROM archive WHERE entry=? LIMIT 1", (key,)) + self.cursor.execute(self._stmt_select, (key,)) return self.cursor.fetchone() def finalize(self): @@ -61,9 +105,9 @@ class DownloadArchive(): class DownloadArchiveMemory(DownloadArchive): - def __init__(self, path, format_string, pragma=None, - cache_key="_archive_key"): - DownloadArchive.__init__(self, path, format_string, pragma, cache_key) + def __init__(self, path, keygen, table=None, pragma=None, cache_key=None): + DownloadArchive.__init__( + self, path, keygen, table, pragma, cache_key) self.keys = set() def add(self, kwdict): @@ -75,8 +119,7 @@ class DownloadArchiveMemory(DownloadArchive): key = kwdict[self._cache_key] = self.keygen(kwdict) if key in self.keys: return True - self.cursor.execute( - "SELECT 1 FROM archive WHERE entry=? LIMIT 1", (key,)) + self.cursor.execute(self._stmt_select, (key,)) return self.cursor.fetchone() def finalize(self): @@ -87,12 +130,110 @@ class DownloadArchiveMemory(DownloadArchive): with self.connection: try: cursor.execute("BEGIN") - except sqlite3.OperationalError: + except self._sqlite3.OperationalError: pass - stmt = "INSERT OR IGNORE INTO archive (entry) VALUES (?)" + stmt = self._stmt_insert if len(self.keys) < 100: for key in self.keys: cursor.execute(stmt, (key,)) else: cursor.executemany(stmt, ((key,) for key in self.keys)) + + +class DownloadArchivePostgresql(): + _psycopg = None + + def __init__(self, uri, keygen, table=None, pragma=None, cache_key=None): + if self._psycopg is None: + DownloadArchivePostgresql._psycopg = __import__("psycopg") + + self.connection = con = self._psycopg.connect(uri) + self.cursor = cursor = con.cursor() + self.close = con.close + self.keygen = keygen + self._cache_key = cache_key or "_archive_key" + + table = "archive" if table is None else sanitize(table) + self._stmt_select = ( + "SELECT true " + "FROM " + table + " " + "WHERE entry=%s " + "LIMIT 1") + self._stmt_insert = ( + "INSERT INTO " + table + " (entry) " + "VALUES (%s) " + "ON CONFLICT DO NOTHING") + + try: + cursor.execute("CREATE TABLE IF NOT EXISTS " + table + " " + "(entry TEXT PRIMARY KEY)") + con.commit() + except Exception as exc: + log.error("%s: %s when creating '%s' table: %s", + con, exc.__class__.__name__, table, exc) + con.rollback() + raise + + def add(self, kwdict): + key = kwdict.get(self._cache_key) or self.keygen(kwdict) + try: + self.cursor.execute(self._stmt_insert, (key,)) + self.connection.commit() + except Exception as exc: + log.error("%s: %s when writing entry: %s", + self.connection, exc.__class__.__name__, exc) + self.connection.rollback() + + def check(self, kwdict): + key = kwdict[self._cache_key] = self.keygen(kwdict) + try: + self.cursor.execute(self._stmt_select, (key,)) + return self.cursor.fetchone() + except Exception as exc: + log.error("%s: %s when checking entry: %s", + self.connection, exc.__class__.__name__, exc) + self.connection.rollback() + return False + + def finalize(self): + pass + + +class DownloadArchivePostgresqlMemory(DownloadArchivePostgresql): + + def __init__(self, path, keygen, table=None, pragma=None, cache_key=None): + DownloadArchivePostgresql.__init__( + self, path, keygen, table, pragma, cache_key) + self.keys = set() + + def add(self, kwdict): + self.keys.add( + kwdict.get(self._cache_key) or + self.keygen(kwdict)) + + def check(self, kwdict): + key = kwdict[self._cache_key] = self.keygen(kwdict) + if key in self.keys: + return True + try: + self.cursor.execute(self._stmt_select, (key,)) + return self.cursor.fetchone() + except Exception as exc: + log.error("%s: %s when checking entry: %s", + self.connection, exc.__class__.__name__, exc) + self.connection.rollback() + return False + + def finalize(self): + if not self.keys: + return + try: + self.cursor.executemany( + self._stmt_insert, + ((key,) for key in self.keys)) + self.connection.commit() + except Exception as exc: + log.error("%s: %s when writing entries: %s", + self.connection, exc.__class__.__name__, exc) + self.connection.rollback() diff --git a/gallery_dl/downloader/common.py b/gallery_dl/downloader/common.py index 1168d83..8430884 100644 --- a/gallery_dl/downloader/common.py +++ b/gallery_dl/downloader/common.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2014-2022 Mike Fährmann +# Copyright 2014-2025 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -10,6 +10,7 @@ import os from .. import config, util +_config = config._config class DownloaderBase(): @@ -17,8 +18,15 @@ class DownloaderBase(): scheme = "" def __init__(self, job): + extractor = job.extractor + + opts = self._extractor_config(extractor) + if opts: + self.opts = opts + self.config = self.config_opts + self.out = job.out - self.session = job.extractor.session + self.session = extractor.session self.part = self.config("part", True) self.partdir = self.config("part-directory") self.log = job.get_logger("downloader." + self.scheme) @@ -29,7 +37,7 @@ class DownloaderBase(): proxies = self.config("proxy", util.SENTINEL) if proxies is util.SENTINEL: - self.proxies = job.extractor._proxies + self.proxies = extractor._proxies else: self.proxies = util.build_proxy_map(proxies, self.log) @@ -37,5 +45,45 @@ class DownloaderBase(): """Interpolate downloader config value for 'key'""" return config.interpolate(("downloader", self.scheme), key, default) + def config_opts(self, key, default=None, conf=_config): + if key in conf: + return conf[key] + value = self.opts.get(key, util.SENTINEL) + if value is not util.SENTINEL: + return value + return config.interpolate(("downloader", self.scheme), key, default) + + def _extractor_config(self, extractor): + path = extractor._cfgpath + if not isinstance(path, list): + return self._extractor_opts(path[1], path[2]) + + opts = {} + for cat, sub in reversed(path): + popts = self._extractor_opts(cat, sub) + if popts: + opts.update(popts) + return opts + + def _extractor_opts(self, category, subcategory): + cfg = config.get(("extractor",), category) + if not cfg: + return None + + copts = cfg.get(self.scheme) + if copts: + if subcategory in cfg: + sopts = cfg[subcategory].get(self.scheme) + if sopts: + opts = copts.copy() + opts.update(sopts) + return opts + return copts + + if subcategory in cfg: + return cfg[subcategory].get(self.scheme) + + return None + def download(self, url, pathfmt): """Write data from 'url' into the file specified by 'pathfmt'""" diff --git a/gallery_dl/downloader/http.py b/gallery_dl/downloader/http.py index c8aeef8..449ffe8 100644 --- a/gallery_dl/downloader/http.py +++ b/gallery_dl/downloader/http.py @@ -12,7 +12,7 @@ import time import mimetypes from requests.exceptions import RequestException, ConnectionError, Timeout from .common import DownloaderBase -from .. import text, util +from .. import text, util, output from ssl import SSLError @@ -38,6 +38,7 @@ class HttpDownloader(DownloaderBase): self.verify = self.config("verify", extractor._verify) self.mtime = self.config("mtime", True) self.rate = self.config("rate") + interval_429 = self.config("sleep-429") if not self.config("consume-content", False): # this resets the underlying TCP connection, and therefore @@ -79,12 +80,16 @@ class HttpDownloader(DownloaderBase): self.receive = self._receive_rate if self.progress < 0.0: self.progress = 0.0 + if interval_429 is None: + self.interval_429 = extractor._interval_429 + else: + self.interval_429 = util.build_duration_func(interval_429) def download(self, url, pathfmt): try: return self._download_impl(url, pathfmt) except Exception: - print() + output.stderr_write("\n") raise finally: # remove file from incomplete downloads @@ -93,7 +98,7 @@ class HttpDownloader(DownloaderBase): def _download_impl(self, url, pathfmt): response = None - tries = 0 + tries = code = 0 msg = "" metadata = self.metadata @@ -111,10 +116,17 @@ class HttpDownloader(DownloaderBase): if response: self.release_conn(response) response = None + self.log.warning("%s (%s/%s)", msg, tries, self.retries+1) if tries > self.retries: return False - time.sleep(tries) + + if code == 429 and self.interval_429: + s = self.interval_429() + time.sleep(s if s > tries else tries) + else: + time.sleep(tries) + code = 0 tries += 1 file_header = None @@ -257,7 +269,7 @@ class HttpDownloader(DownloaderBase): else response.iter_content(16), b"") except (RequestException, SSLError) as exc: msg = str(exc) - print() + output.stderr_write("\n") continue if self._adjust_extension(pathfmt, file_header) and \ pathfmt.exists(): @@ -291,14 +303,14 @@ class HttpDownloader(DownloaderBase): self.receive(fp, content, size, offset) except (RequestException, SSLError) as exc: msg = str(exc) - print() + output.stderr_write("\n") continue # check file size if size and fp.tell() < size: msg = "file size mismatch ({} < {})".format( fp.tell(), size) - print() + output.stderr_write("\n") continue break @@ -317,7 +329,7 @@ class HttpDownloader(DownloaderBase): for _ in response.iter_content(self.chunk_size): pass except (RequestException, SSLError) as exc: - print() + output.stderr_write("\n") self.log.debug( "Unable to consume response body (%s: %s); " "closing the connection anyway", exc.__class__.__name__, exc) diff --git a/gallery_dl/downloader/ytdl.py b/gallery_dl/downloader/ytdl.py index 40cddec..1242098 100644 --- a/gallery_dl/downloader/ytdl.py +++ b/gallery_dl/downloader/ytdl.py @@ -48,6 +48,13 @@ class YoutubeDLDownloader(DownloaderBase): self.log.debug("", exc_info=exc) self.download = lambda u, p: False return False + + try: + ytdl_version = module.version.__version__ + except Exception: + ytdl_version = "" + self.log.debug("Using %s version %s", module, ytdl_version) + self.ytdl_instance = ytdl_instance = ytdl.construct_YoutubeDL( module, self, self.ytdl_opts) if self.outtmpl == "default": diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py index fc8d7b2..00b22d4 100644 --- a/gallery_dl/extractor/__init__.py +++ b/gallery_dl/extractor/__init__.py @@ -44,6 +44,7 @@ modules = [ "danbooru", "desktopography", "deviantart", + "discord", "dynastyscans", "e621", "erome", @@ -56,6 +57,7 @@ modules = [ "fapachi", "flickr", "furaffinity", + "furry34", "fuskator", "gelbooru", "gelbooru_v01", @@ -80,6 +82,7 @@ modules = [ "imgbox", "imgth", "imgur", + "imhentai", "inkbunny", "instagram", "issuu", @@ -168,6 +171,7 @@ modules = [ "tapas", "tcbscans", "telegraph", + "tiktok", "tmohentai", "toyhouse", "tsumino", diff --git a/gallery_dl/extractor/bilibili.py b/gallery_dl/extractor/bilibili.py index b9de165..597ec40 100644 --- a/gallery_dl/extractor/bilibili.py +++ b/gallery_dl/extractor/bilibili.py @@ -81,6 +81,27 @@ class BilibiliArticleExtractor(BilibiliExtractor): yield Message.Url, url, text.nameext_from_url(url, article) +class BilibiliUserArticlesFavoriteExtractor(BilibiliExtractor): + subcategory = "user-articles-favorite" + pattern = (r"(?:https?://)?space\.bilibili\.com" + r"/(\d+)/favlist\?fid=opus") + example = "https://space.bilibili.com/12345/favlist?fid=opus" + _warning = True + + def _init(self): + BilibiliExtractor._init(self) + if self._warning: + if not self.cookies_check(("SESSDATA",)): + self.log.error("'SESSDATA' cookie required") + BilibiliUserArticlesFavoriteExtractor._warning = False + + def items(self): + for article in self.api.user_favlist(): + article["_extractor"] = BilibiliArticleExtractor + url = "{}/opus/{}".format(self.root, article["opus_id"]) + yield Message.Queue, url, article + + class BilibiliAPI(): def __init__(self, extractor): self.extractor = extractor @@ -122,3 +143,28 @@ class BilibiliAPI(): raise exception.StopExtraction( "%s: Unable to extract INITIAL_STATE data", article_id) self.extractor.wait(seconds=300) + + def user_favlist(self): + endpoint = "/opus/feed/fav" + params = {"page": 1, "page_size": 20} + + while True: + data = self._call(endpoint, params)["data"] + + yield from data["items"] + + if not data.get("has_more"): + break + params["page"] += 1 + + def login_user_id(self): + url = "https://api.bilibili.com/x/space/v2/myinfo" + data = self.extractor.request(url).json() + + if data["code"] != 0: + self.extractor.log.debug("Server response: %s", data) + raise exception.StopExtraction("API request failed,Are you login?") + try: + return data["data"]["profile"]["mid"] + except Exception: + raise exception.StopExtraction("API request failed") diff --git a/gallery_dl/extractor/boosty.py b/gallery_dl/extractor/boosty.py index c28fad9..f3e441b 100644 --- a/gallery_dl/extractor/boosty.py +++ b/gallery_dl/extractor/boosty.py @@ -8,6 +8,7 @@ from .common import Extractor, Message from .. import text, util, exception +import itertools BASE_PATTERN = r"(?:https?://)?boosty\.to" @@ -53,7 +54,9 @@ class BoostyExtractor(Extractor): self.log.warning("Not allowed to access post %s", post["id"]) continue - files = self._process_post(post) + files = self._extract_files(post) + if self._user: + post["user"] = self._user data = { "post" : post, "user" : post.pop("user", None), @@ -69,15 +72,13 @@ class BoostyExtractor(Extractor): def posts(self): """Yield JSON content of all relevant posts""" - def _process_post(self, post): + def _extract_files(self, post): files = [] post["content"] = content = [] post["links"] = links = [] if "createdAt" in post: post["date"] = text.parse_timestamp(post["createdAt"]) - if self._user: - post["user"] = self._user for block in post["data"]: try: @@ -94,7 +95,7 @@ class BoostyExtractor(Extractor): elif type == "ok_video": if not self.videos: self.log.debug("%s: Skipping video %s", - post["int_id"], block["id"]) + post["id"], block["id"]) continue fmts = { fmt["type"]: fmt["url"] @@ -114,7 +115,7 @@ class BoostyExtractor(Extractor): else: self.log.warning( "%s: Found no suitable video format for %s", - post["int_id"], block["id"]) + post["id"], block["id"]) elif type == "link": url = block["url"] @@ -127,9 +128,12 @@ class BoostyExtractor(Extractor): elif type == "file": files.append(self._update_url(post, block)) + elif type == "smile": + content.append(":" + block["name"] + ":") + else: self.log.debug("%s: Unsupported data type '%s'", - post["int_id"], type) + post["id"], type) except Exception as exc: self.log.debug("%s: %s", exc.__class__.__name__, exc) @@ -219,6 +223,51 @@ class BoostyFollowingExtractor(BoostyExtractor): yield Message.Queue, url, user +class BoostyDirectMessagesExtractor(BoostyExtractor): + """Extractor for boosty.to direct messages""" + subcategory = "direct-messages" + directory_fmt = ("{category}", "{user[blogUrl]} ({user[id]})", + "Direct Messages") + pattern = BASE_PATTERN + r"/app/messages/?\?dialogId=(\d+)" + example = "https://boosty.to/app/messages?dialogId=12345" + + def items(self): + """Yield direct messages from a given dialog ID.""" + dialog_id = self.groups[0] + response = self.api.dialog(dialog_id) + signed_query = response.get("signedQuery") + + try: + messages = response["messages"]["data"] + offset = messages[0]["id"] + except Exception: + return + + try: + user = self.api.user(response["chatmate"]["url"]) + except Exception: + user = None + + messages.reverse() + for message in itertools.chain( + messages, + self.api.dialog_messages(dialog_id, offset=offset) + ): + message["signedQuery"] = signed_query + files = self._extract_files(message) + data = { + "post": message, + "user": user, + "count": len(files), + } + + yield Message.Directory, data + for data["num"], file in enumerate(files, 1): + data["file"] = file + url = file["url"] + yield Message.Url, url, text.nameext_from_url(url, data) + + class BoostyAPI(): """Interface for the Boosty API""" root = "https://api.boosty.to" @@ -367,3 +416,32 @@ class BoostyAPI(): if offset > data["total"]: return params["offset"] = offset + + def dialog(self, dialog_id): + endpoint = "/v1/dialog/{}".format(dialog_id) + return self._call(endpoint) + + def dialog_messages(self, dialog_id, limit=300, offset=None): + endpoint = "/v1/dialog/{}/message/".format(dialog_id) + params = { + "limit": limit, + "reverse": "true", + "offset": offset, + } + return self._pagination_dialog(endpoint, params) + + def _pagination_dialog(self, endpoint, params): + while True: + data = self._call(endpoint, params) + + yield from data["data"] + + try: + extra = data["extra"] + if extra.get("isLast"): + break + params["offset"] = offset = extra["offset"] + if not offset: + break + except Exception: + break diff --git a/gallery_dl/extractor/bunkr.py b/gallery_dl/extractor/bunkr.py index 25e9fd5..201b8f4 100644 --- a/gallery_dl/extractor/bunkr.py +++ b/gallery_dl/extractor/bunkr.py @@ -10,7 +10,8 @@ from .common import Extractor from .lolisafe import LolisafeAlbumExtractor -from .. import text, config, exception +from .. import text, util, config, exception +import binascii import random if config.get(("extractor", "bunkr"), "tlds"): @@ -60,6 +61,8 @@ class BunkrAlbumExtractor(LolisafeAlbumExtractor): """Extractor for bunkr.si albums""" category = "bunkr" root = "https://bunkr.si" + root_dl = "https://get.bunkrr.su" + archive_fmt = "{album_id}_{id|id_url}" pattern = BASE_PATTERN + r"/a/([^/?#]+)" example = "https://bunkr.si/a/ID" @@ -68,6 +71,11 @@ class BunkrAlbumExtractor(LolisafeAlbumExtractor): domain = self.groups[0] or self.groups[1] if domain not in LEGACY_DOMAINS: self.root = "https://" + domain + self.offset = 0 + + def skip(self, num): + self.offset = num + return num def request(self, url, **kwargs): kwargs["encoding"] = "utf-8" @@ -132,6 +140,9 @@ class BunkrAlbumExtractor(LolisafeAlbumExtractor): } def _extract_files(self, items): + if self.offset: + items = util.advance(items, self.offset) + for item in items: try: url = text.unescape(text.extr(item, ' href="', '"')) @@ -154,26 +165,43 @@ class BunkrAlbumExtractor(LolisafeAlbumExtractor): self.log.debug("", exc_info=exc) def _extract_file(self, webpage_url): - response = self.request(webpage_url) - page = response.text - file_url = (text.extr(page, '<source src="', '"') or - text.extr(page, '<img src="', '"')) + page = self.request(webpage_url).text + data_id = text.extr(page, 'data-file-id="', '"') + referer = self.root_dl + "/file/" + data_id + + url = self.root_dl + "/api/vs" + headers = {"Referer": referer} + data = self.request( + url, method="POST", headers=headers, json={"id": data_id}).json() + + if data.get("encrypted"): + file_url = self._decrypt_url(data["url"], data["timestamp"]) + else: + file_url = data["url"] + file_name = (text.extr(page, 'property="og:title" content="', '"') or text.extr(page, "<title>", " | Bunkr<")) - - if not file_url: - webpage_url = text.unescape(text.rextract( - page, ' href="', '"', page.rindex("Download"))[0]) - response = self.request(webpage_url) - file_url = text.rextract(response.text, ' href="', '"')[0] + fallback = text.extr(page, 'property="og:url" content="', '"') return { - "file" : text.unescape(file_url), + "file" : file_url, "name" : text.unescape(file_name), - "_http_headers" : {"Referer": response.url}, + "id_url" : data_id, + "_fallback" : (fallback,) if fallback else (), + "_http_headers" : {"Referer": referer}, "_http_validate": self._validate, } + def _decrypt_url(self, encrypted_b64, timestamp): + encrypted_bytes = binascii.a2b_base64(encrypted_b64) + key = "SECRET_KEY_{}".format(timestamp // 3600).encode() + div = len(key) + + return bytes([ + encrypted_bytes[i] ^ key[i % div] + for i in range(len(encrypted_bytes)) + ]).decode() + def _validate(self, response): if response.history and response.url.endswith("/maintenance-vid.mp4"): self.log.warning("File server in maintenance mode") diff --git a/gallery_dl/extractor/chevereto.py b/gallery_dl/extractor/chevereto.py index aedcea4..de22a7b 100644 --- a/gallery_dl/extractor/chevereto.py +++ b/gallery_dl/extractor/chevereto.py @@ -57,7 +57,8 @@ class CheveretoImageExtractor(CheveretoExtractor): image = { "id" : self.path.rpartition(".")[2], - "url" : extr('<meta property="og:image" content="', '"'), + "url" : (extr('<meta property="og:image" content="', '"') or + extr('url: "', '"')), "album": text.extr(extr("Added to <a", "/a>"), ">", "<"), "user" : extr('username: "', '"'), } diff --git a/gallery_dl/extractor/common.py b/gallery_dl/extractor/common.py index 13fd88a..d58db6f 100644 --- a/gallery_dl/extractor/common.py +++ b/gallery_dl/extractor/common.py @@ -915,7 +915,7 @@ def _build_requests_adapter(ssl_options, ssl_ciphers, source_address): options=ssl_options or None, ciphers=ssl_ciphers) if not requests.__version__ < "2.32": # https://github.com/psf/requests/pull/6731 - ssl_context.load_default_certs() + ssl_context.load_verify_locations(requests.certs.where()) ssl_context.check_hostname = False else: ssl_context = None diff --git a/gallery_dl/extractor/discord.py b/gallery_dl/extractor/discord.py new file mode 100644 index 0000000..6a5fcc9 --- /dev/null +++ b/gallery_dl/extractor/discord.py @@ -0,0 +1,399 @@ +# -*- coding: utf-8 -*- + +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. + +"""Extractors for https://discord.com/""" + +from .common import Extractor, Message +from .. import text, exception + + +BASE_PATTERN = r"(?:https?://)?discord\.com" + + +class DiscordExtractor(Extractor): + """Base class for Discord extractors""" + category = "discord" + root = "https://discord.com" + directory_fmt = ("{category}", "{server_id}_{server}", + "{channel_id}_{channel}") + filename_fmt = "{message_id}_{num:>02}_{filename}.{extension}" + archive_fmt = "{message_id}_{num}" + + cdn_fmt = "https://cdn.discordapp.com/{}/{}/{}.png?size=4096" + + server_metadata = {} + server_channels_metadata = {} + + def _init(self): + self.token = self.config("token") + self.enabled_embeds = self.config("embeds", ["image", "gifv", "video"]) + self.enabled_threads = self.config("threads", True) + self.api = DiscordAPI(self) + + def extract_message_text(self, message): + text_content = [message["content"]] + + for embed in message["embeds"]: + if embed["type"] == "rich": + try: + text_content.append(embed["author"]["name"]) + except Exception: + pass + text_content.append(embed.get("title", "")) + text_content.append(embed.get("description", "")) + + for field in embed.get("fields", []): + text_content.append(field.get("name", "")) + text_content.append(field.get("value", "")) + + text_content.append(embed.get("footer", {}).get("text", "")) + + if message.get("poll"): + text_content.append(message["poll"]["question"]["text"]) + for answer in message["poll"]["answers"]: + text_content.append(answer["poll_media"]["text"]) + + return "\n".join(t for t in text_content if t) + + def extract_message(self, message): + # https://discord.com/developers/docs/resources/message#message-object-message-types + if message["type"] in (0, 19, 21): + message_metadata = {} + message_metadata.update(self.server_metadata) + message_metadata.update( + self.server_channels_metadata[message["channel_id"]]) + message_metadata.update({ + "author": message["author"]["username"], + "author_id": message["author"]["id"], + "author_files": [], + "message": self.extract_message_text(message), + "message_id": message["id"], + "date": text.parse_datetime( + message["timestamp"], "%Y-%m-%dT%H:%M:%S.%f%z" + ), + "files": [] + }) + + for icon_type, icon_path in ( + ("avatar", "avatars"), + ("banner", "banners") + ): + if message["author"].get(icon_type): + message_metadata["author_files"].append({ + "url": self.cdn_fmt.format( + icon_path, + message_metadata["author_id"], + message["author"][icon_type] + ), + "filename": icon_type, + "extension": "png", + }) + + for attachment in message["attachments"]: + message_metadata["files"].append({ + "url": attachment["url"], + "type": "attachment", + }) + + for embed in message["embeds"]: + if embed["type"] in self.enabled_embeds: + for field in ("video", "image", "thumbnail"): + if field not in embed: + continue + url = embed[field].get("proxy_url") + if url is not None: + message_metadata["files"].append({ + "url": url, + "type": "embed", + }) + break + + for num, file in enumerate(message_metadata["files"], start=1): + text.nameext_from_url(file["url"], file) + file["num"] = num + + yield Message.Directory, message_metadata + + for file in message_metadata["files"]: + message_metadata_file = message_metadata.copy() + message_metadata_file.update(file) + yield Message.Url, file["url"], message_metadata_file + + def extract_channel_text(self, channel_id): + for message in self.api.get_channel_messages(channel_id): + yield from self.extract_message(message) + + def extract_channel_threads(self, channel_id): + for thread in self.api.get_channel_threads(channel_id): + id = self.parse_channel(thread)["channel_id"] + yield from self.extract_channel_text(id) + + def extract_channel(self, channel_id, safe=False): + try: + if channel_id not in self.server_channels_metadata: + self.parse_channel(self.api.get_channel(channel_id)) + + channel_type = ( + self.server_channels_metadata[channel_id]["channel_type"] + ) + + # https://discord.com/developers/docs/resources/channel#channel-object-channel-types + if channel_type in (0, 5): + yield from self.extract_channel_text(channel_id) + if self.enabled_threads: + yield from self.extract_channel_threads(channel_id) + elif channel_type in (1, 3, 10, 11, 12): + yield from self.extract_channel_text(channel_id) + elif channel_type in (15, 16): + yield from self.extract_channel_threads(channel_id) + elif channel_type in (4,): + for channel in self.server_channels_metadata.copy().values(): + if channel["parent_id"] == channel_id: + yield from self.extract_channel( + channel["channel_id"], safe=True) + elif not safe: + raise exception.StopExtraction( + "This channel type is not supported." + ) + except exception.HttpError as exc: + if not (exc.status == 403 and safe): + raise + + def parse_channel(self, channel): + parent_id = channel.get("parent_id") + channel_metadata = { + "channel": channel.get("name", ""), + "channel_id": channel.get("id"), + "channel_type": channel.get("type"), + "channel_topic": channel.get("topic", ""), + "parent_id": parent_id, + "is_thread": "thread_metadata" in channel + } + + if parent_id in self.server_channels_metadata: + parent_metadata = self.server_channels_metadata[parent_id] + channel_metadata.update({ + "parent": parent_metadata["channel"], + "parent_type": parent_metadata["channel_type"] + }) + + if channel_metadata["channel_type"] in (1, 3): + channel_metadata.update({ + "channel": "DMs", + "recipients": ( + [user["username"] for user in channel["recipients"]] + ), + "recipients_id": ( + [user["id"] for user in channel["recipients"]] + ) + }) + + channel_id = channel_metadata["channel_id"] + + self.server_channels_metadata[channel_id] = channel_metadata + return channel_metadata + + def parse_server(self, server): + self.server_metadata = { + "server": server["name"], + "server_id": server["id"], + "server_files": [], + "owner_id": server["owner_id"] + } + + for icon_type, icon_path in ( + ("icon", "icons"), + ("banner", "banners"), + ("splash", "splashes"), + ("discovery_splash", "discovery-splashes") + ): + if server.get(icon_type): + self.server_metadata["server_files"].append({ + "url": self.cdn_fmt.format( + icon_path, + self.server_metadata["server_id"], + server[icon_type] + ), + "filename": icon_type, + "extension": "png", + }) + + return self.server_metadata + + def build_server_and_channels(self, server_id): + server = self.api.get_server(server_id) + self.parse_server(server) + + for channel in self.api.get_server_channels(server_id): + self.parse_channel(channel) + + +class DiscordChannelExtractor(DiscordExtractor): + subcategory = "channel" + pattern = BASE_PATTERN + r"/channels/(\d+)/(?:\d+/threads/)?(\d+)/?$" + example = "https://discord.com/channels/1234567890/9876543210" + + def items(self): + server_id, channel_id = self.groups + + self.build_server_and_channels(server_id) + + return self.extract_channel(channel_id) + + +class DiscordMessageExtractor(DiscordExtractor): + subcategory = "message" + pattern = BASE_PATTERN + r"/channels/(\d+)/(\d+)/(\d+)/?$" + example = "https://discord.com/channels/1234567890/9876543210/2468013579" + + def items(self): + server_id, channel_id, message_id = self.groups + + self.build_server_and_channels(server_id) + + if channel_id not in self.server_channels_metadata: + self.parse_channel(self.api.get_channel(channel_id)) + + return self.extract_message( + self.api.get_message(channel_id, message_id)) + + +class DiscordServerExtractor(DiscordExtractor): + subcategory = "server" + pattern = BASE_PATTERN + r"/channels/(\d+)/?$" + example = "https://discord.com/channels/1234567890" + + def items(self): + server_id = self.groups[0] + + self.build_server_and_channels(server_id) + + for channel in self.server_channels_metadata.copy().values(): + if channel["channel_type"] in (0, 5, 15, 16): + yield from self.extract_channel( + channel["channel_id"], safe=True) + + +class DiscordDirectMessagesExtractor(DiscordExtractor): + subcategory = "direct-messages" + directory_fmt = ("{category}", "Direct Messages", + "{channel_id}_{recipients:J,}") + pattern = BASE_PATTERN + r"/channels/@me/(\d+)/?$" + example = "https://discord.com/channels/@me/1234567890" + + def items(self): + return self.extract_channel(self.groups[0]) + + +class DiscordDirectMessageExtractor(DiscordExtractor): + subcategory = "direct-message" + directory_fmt = ("{category}", "Direct Messages", + "{channel_id}_{recipients:J,}") + pattern = BASE_PATTERN + r"/channels/@me/(\d+)/(\d+)/?$" + example = "https://discord.com/channels/@me/1234567890/9876543210" + + def items(self): + channel_id, message_id = self.groups + + self.parse_channel(self.api.get_channel(channel_id)) + + return self.extract_message( + self.api.get_message(channel_id, message_id)) + + +class DiscordAPI(): + """Interface for the Discord API v10 + + https://discord.com/developers/docs/reference + """ + + def __init__(self, extractor): + self.extractor = extractor + self.root = extractor.root + "/api/v10" + self.headers = {"Authorization": extractor.token} + + def get_server(self, server_id): + """Get server information""" + return self._call("/guilds/" + server_id) + + def get_server_channels(self, server_id): + """Get server channels""" + return self._call("/guilds/" + server_id + "/channels") + + def get_channel(self, channel_id): + """Get channel information""" + return self._call("/channels/" + channel_id) + + def get_channel_threads(self, channel_id): + """Get channel threads""" + THREADS_BATCH = 25 + + def _method(offset): + return self._call("/channels/" + channel_id + "/threads/search", { + "sort_by": "last_message_time", + "sort_order": "desc", + "limit": THREADS_BATCH, + "offset": + offset, + })["threads"] + + return self._pagination(_method, THREADS_BATCH) + + def get_channel_messages(self, channel_id): + """Get channel messages""" + MESSAGES_BATCH = 100 + + before = None + + def _method(_): + nonlocal before + messages = self._call("/channels/" + channel_id + "/messages", { + "limit": MESSAGES_BATCH, + "before": before + }) + before = messages[-1]["id"] + return messages + + return self._pagination(_method, MESSAGES_BATCH) + + def get_message(self, channel_id, message_id): + """Get message information""" + return self._call("/channels/" + channel_id + "/messages", { + "limit": 1, + "around": message_id + })[0] + + def _call(self, endpoint, params=None): + url = self.root + endpoint + try: + response = self.extractor.request( + url, params=params, headers=self.headers) + except exception.HttpError as exc: + if exc.status == 401: + self._raise_invalid_token() + raise + return response.json() + + def _pagination(self, method, batch): + offset = 0 + while True: + data = method(offset) + yield from data + if len(data) < batch: + return + offset += len(data) + + @staticmethod + def _raise_invalid_token(): + raise exception.AuthenticationError("""Invalid or missing token. +Please provide a valid token following these instructions: + +1) Open Discord in your browser (https://discord.com/app); +2) Open your browser's Developer Tools (F12) and switch to the Network panel; +3) Reload the page and select any request going to https://discord.com/api/...; +4) In the "Headers" tab, look for an entry beginning with "Authorization: "; +5) Right-click the entry and click "Copy Value"; +6) Paste the token in your configuration file under "extractor.discord.token", +or run this command with the -o "token=[your token]" argument.""") diff --git a/gallery_dl/extractor/erome.py b/gallery_dl/extractor/erome.py index e6d136f..55549de 100644 --- a/gallery_dl/extractor/erome.py +++ b/gallery_dl/extractor/erome.py @@ -44,6 +44,8 @@ class EromeExtractor(Extractor): pos = page.index('<div class="user-profile', pos) user, pos = text.extract( page, 'href="https://www.erome.com/', '"', pos) + tags, pos = text.extract( + page, '<p class="mt-10"', '</p>', pos) urls = [] date = None @@ -59,11 +61,13 @@ class EromeExtractor(Extractor): date = text.parse_timestamp(ts) data = { - "album_id" : album_id, - "title" : text.unescape(title), - "user" : text.unquote(user), - "count" : len(urls), - "date" : date, + "album_id": album_id, + "title" : text.unescape(title), + "user" : text.unquote(user), + "count" : len(urls), + "date" : date, + "tags" : [t.replace("+", " ") + for t in text.extract_iter(tags, "?q=", '"')], "_http_headers": {"Referer": url}, } diff --git a/gallery_dl/extractor/foolfuuka.py b/gallery_dl/extractor/foolfuuka.py index 44c4542..5f90afc 100644 --- a/gallery_dl/extractor/foolfuuka.py +++ b/gallery_dl/extractor/foolfuuka.py @@ -81,8 +81,8 @@ BASE_PATTERN = FoolfuukaExtractor.update({ "pattern": r"(?:www\.)?archiveofsins\.com", }, "b4k": { - "root": "https://arch.b4k.co", - "pattern": r"arch\.b4k\.co", + "root": "https://arch.b4k.dev", + "pattern": r"arch\.b4k\.(?:dev|co)", }, "desuarchive": { "root": "https://desuarchive.org", diff --git a/gallery_dl/extractor/furaffinity.py b/gallery_dl/extractor/furaffinity.py index d253582..1466390 100644 --- a/gallery_dl/extractor/furaffinity.py +++ b/gallery_dl/extractor/furaffinity.py @@ -23,6 +23,7 @@ class FuraffinityExtractor(Extractor): cookies_domain = ".furaffinity.net" cookies_names = ("a", "b") root = "https://www.furaffinity.net" + request_interval = 1.0 _warning = True def __init__(self, match): diff --git a/gallery_dl/extractor/furry34.py b/gallery_dl/extractor/furry34.py new file mode 100644 index 0000000..e0c7fdb --- /dev/null +++ b/gallery_dl/extractor/furry34.py @@ -0,0 +1,156 @@ +# -*- coding: utf-8 -*- + +# Copyright 2025 Mike Fährmann +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. + +"""Extractors for https://furry34.com/""" + +from .booru import BooruExtractor +from .. import text +import collections + +BASE_PATTERN = r"(?:https?://)?(?:www\.)?furry34\.com" + + +class Furry34Extractor(BooruExtractor): + category = "furry34" + root = "https://furry34.com" + root_cdn = "https://furry34com.b-cdn.net" + filename_fmt = "{category}_{id}.{extension}" + per_page = 30 + + TAG_TYPES = { + None: "general", + 1 : "general", + 2 : "copyright", + 4 : "character", + 8 : "artist", + } + FORMATS = ( + ("100", "mov.mp4"), + ("101", "mov720.mp4"), + ("102", "mov480.mp4"), + ("10" , "pic.jpg"), + ) + + def _file_url(self, post): + files = post["files"] + for fmt, extension in self.FORMATS: + if fmt in files: + break + else: + fmt = next(iter(files)) + + post_id = post["id"] + root = self.root_cdn if files[fmt][0] else self.root + post["file_url"] = url = "{}/posts/{}/{}/{}.{}".format( + root, post_id // 1000, post_id, post_id, extension) + post["format_id"] = fmt + post["format"] = extension.partition(".")[0] + + return url + + def _prepare(self, post): + post.pop("files", None) + post["date"] = text.parse_datetime( + post["created"], "%Y-%m-%dT%H:%M:%S.%fZ") + post["filename"], _, post["format"] = post["filename"].rpartition(".") + if "tags" in post: + post["tags"] = [t["value"] for t in post["tags"]] + + def _tags(self, post, _): + if "tags" not in post: + post.update(self._fetch_post(post["id"])) + + tags = collections.defaultdict(list) + for tag in post["tags"]: + tags[tag["type"] or 1].append(tag["value"]) + types = self.TAG_TYPES + for type, values in tags.items(): + post["tags_" + types[type]] = values + + def _fetch_post(self, post_id): + url = "{}/api/v2/post/{}".format(self.root, post_id) + return self.request(url).json() + + def _pagination(self, endpoint, params=None): + url = "{}/api{}".format(self.root, endpoint) + + if params is None: + params = {} + params["sortBy"] = 0 + params["take"] = self.per_page + threshold = self.per_page + + while True: + data = self.request(url, method="POST", json=params).json() + + yield from data["items"] + + if len(data["items"]) < threshold: + return + params["cursor"] = data.get("cursor") + + +class Furry34PostExtractor(Furry34Extractor): + subcategory = "post" + archive_fmt = "{id}" + pattern = BASE_PATTERN + r"/post/(\d+)" + example = "https://furry34.com/post/12345" + + def posts(self): + return (self._fetch_post(self.groups[0]),) + + +class Furry34PlaylistExtractor(Furry34Extractor): + subcategory = "playlist" + directory_fmt = ("{category}", "{playlist_id}") + archive_fmt = "p_{playlist_id}_{id}" + pattern = BASE_PATTERN + r"/playlists/view/(\d+)" + example = "https://furry34.com/playlists/view/12345" + + def metadata(self): + return {"playlist_id": self.groups[0]} + + def posts(self): + endpoint = "/v2/post/search/playlist/" + self.groups[0] + return self._pagination(endpoint) + + +class Furry34TagExtractor(Furry34Extractor): + subcategory = "tag" + directory_fmt = ("{category}", "{search_tags}") + archive_fmt = "t_{search_tags}_{id}" + pattern = BASE_PATTERN + r"/(?:([^/?#]+))?(?:/?\?([^#]+))?(?:$|#)" + example = "https://furry34.com/TAG" + + def _init(self): + tag, query = self.groups + params = text.parse_query(query) + + self.tags = tags = [] + if tag: + tags.extend(text.unquote(text.unquote(tag)).split("|")) + if "tags" in params: + tags.extend(params["tags"].split("|")) + + type = params.get("type") + if type == "video": + self.type = 1 + elif type == "image": + self.type = 0 + else: + self.type = None + + def metadata(self): + return {"search_tags": " ".join(self.tags)} + + def posts(self): + endpoint = "/v2/post/search/root" + params = {"includeTags": [t.replace("_", " ") for t in self.tags]} + if self.type is not None: + params["type"] = self.type + return self._pagination(endpoint, params) diff --git a/gallery_dl/extractor/generic.py b/gallery_dl/extractor/generic.py index 370cd43..4b04732 100644 --- a/gallery_dl/extractor/generic.py +++ b/gallery_dl/extractor/generic.py @@ -37,6 +37,7 @@ class GenericExtractor(Extractor): example = "generic:https://www.nongnu.org/lzip/" def __init__(self, match): + self.subcategory = match.group('domain') Extractor.__init__(self, match) # Strip the "g(eneric):" prefix @@ -54,7 +55,6 @@ class GenericExtractor(Extractor): self.scheme = 'https://' self.url = text.ensure_http_scheme(self.url, self.scheme) - self.subcategory = match.group('domain') self.path = match.group('path') # Used to resolve relative image urls diff --git a/gallery_dl/extractor/imgur.py b/gallery_dl/extractor/imgur.py index 481fb1e..20f8ea4 100644 --- a/gallery_dl/extractor/imgur.py +++ b/gallery_dl/extractor/imgur.py @@ -142,7 +142,8 @@ class ImgurGalleryExtractor(ImgurExtractor): class ImgurUserExtractor(ImgurExtractor): """Extractor for all images posted by a user""" subcategory = "user" - pattern = BASE_PATTERN + r"/user/([^/?#]+)(?:/posts|/submitted)?/?$" + pattern = (BASE_PATTERN + r"/user/(?!me(?:/|$|\?|#))" + r"([^/?#]+)(?:/posts|/submitted)?/?$") example = "https://imgur.com/user/USER" def items(self): @@ -174,6 +175,23 @@ class ImgurFavoriteFolderExtractor(ImgurExtractor): self.key, self.folder_id)) +class ImgurMeExtractor(ImgurExtractor): + """Extractor for your personal uploads""" + subcategory = "me" + pattern = BASE_PATTERN + r"/user/me(?:/posts)?(/hidden)?" + example = "https://imgur.com/user/me" + + def items(self): + if not self.cookies_check(("accesstoken",)): + self.log.error("'accesstoken' cookie required") + + if self.groups[0]: + posts = self.api.accounts_me_hiddenalbums() + else: + posts = self.api.accounts_me_allposts() + return self._items_queue(posts) + + class ImgurSubredditExtractor(ImgurExtractor): """Extractor for a subreddits's imgur links""" subcategory = "subreddit" @@ -215,6 +233,10 @@ class ImgurAPI(): self.client_id = extractor.config("client-id") or "546c25a59c58ad7" self.headers = {"Authorization": "Client-ID " + self.client_id} + def account_submissions(self, account): + endpoint = "/3/account/{}/submissions".format(account) + return self._pagination(endpoint) + def account_favorites(self, account): endpoint = "/3/account/{}/gallery_favorites".format(account) return self._pagination(endpoint) @@ -224,15 +246,29 @@ class ImgurAPI(): account, folder_id) return self._pagination_v2(endpoint) + def accounts_me_allposts(self): + endpoint = "/post/v1/accounts/me/all_posts" + params = { + "include": "media,tags,account", + "page" : 1, + "sort" : "-created_at", + } + return self._pagination_v2(endpoint, params) + + def accounts_me_hiddenalbums(self): + endpoint = "/post/v1/accounts/me/hidden_albums" + params = { + "include": "media,tags,account", + "page" : 1, + "sort" : "-created_at", + } + return self._pagination_v2(endpoint, params) + def gallery_search(self, query): endpoint = "/3/gallery/search" params = {"q": query} return self._pagination(endpoint, params) - def account_submissions(self, account): - endpoint = "/3/account/{}/submissions".format(account) - return self._pagination(endpoint) - def gallery_subreddit(self, subreddit): endpoint = "/3/gallery/r/{}".format(subreddit) return self._pagination(endpoint) @@ -284,12 +320,16 @@ class ImgurAPI(): if params is None: params = {} params["client_id"] = self.client_id - params["page"] = 0 - params["sort"] = "newest" + if "page" not in params: + params["page"] = 0 + if "sort" not in params: + params["sort"] = "newest" headers = {"Origin": "https://imgur.com"} while True: - data = self._call(endpoint, params, headers)["data"] + data = self._call(endpoint, params, headers) + if "data" in data: + data = data["data"] if not data: return yield from data diff --git a/gallery_dl/extractor/imhentai.py b/gallery_dl/extractor/imhentai.py new file mode 100644 index 0000000..0439f5b --- /dev/null +++ b/gallery_dl/extractor/imhentai.py @@ -0,0 +1,140 @@ +# -*- coding: utf-8 -*- + +# Copyright 2025 Mike Fährmann +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. + +"""Extractors for https://imhentai.xxx/ and mirror sites""" + +from .common import GalleryExtractor, BaseExtractor, Message +from .. import text, util + + +class ImhentaiExtractor(BaseExtractor): + basecategory = "IMHentai" + + def _pagination(self, url): + prev = None + base = self.root + "/gallery/" + data = {"_extractor": ImhentaiGalleryExtractor} + + while True: + page = self.request(url).text + extr = text.extract_from(page) + + while True: + gallery_id = extr('<a href="/gallery/', '"') + if gallery_id == prev: + continue + if not gallery_id: + break + yield Message.Queue, base + gallery_id, data + prev = gallery_id + + href = text.rextract(page, "class='page-link' href='", "'")[0] + if not href or href == "#": + return + if href[0] == "/": + if href[1] == "/": + href = "https:" + href + else: + href = self.root + href + url = href + + +BASE_PATTERN = ImhentaiExtractor.update({ + "imhentai": { + "root": "https://imhentai.xxx", + "pattern": r"(?:www\.)?imhentai\.xxx", + }, + "hentaiera": { + "root": "https://hentaiera.com", + "pattern": r"(?:www\.)?hentaiera\.com", + }, + "hentairox": { + "root": "https://hentairox.com", + "pattern": r"(?:www\.)?hentairox\.com", + }, +}) + + +class ImhentaiGalleryExtractor(ImhentaiExtractor, GalleryExtractor): + """Extractor for imhentai galleries""" + pattern = BASE_PATTERN + r"/(?:gallery|view)/(\d+)" + example = "https://imhentai.xxx/gallery/12345/" + + def __init__(self, match): + ImhentaiExtractor.__init__(self, match) + self.gallery_id = self.groups[-1] + self.gallery_url = "{}/gallery/{}/".format(self.root, self.gallery_id) + + def metadata(self, page): + extr = text.extract_from(page) + + data = { + "gallery_id": text.parse_int(self.gallery_id), + "title" : text.unescape(extr("<h1>", "<")), + "title_alt" : text.unescape(extr('class="subtitle">', "<")), + "parody" : self._split(extr(">Parodies", "</li>")), + "character" : self._split(extr(">Characters", "</li>")), + "tags" : self._split(extr(">Tags", "</li>")), + "artist" : self._split(extr(">Artists", "</li>")), + "group" : self._split(extr(">Groups", "</li>")), + "language" : self._split(extr(">Languages", "</li>")), + "type" : extr("href='/category/", "/"), + } + + if data["language"]: + data["lang"] = util.language_to_code(data["language"][0]) + + return data + + def _split(self, html): + results = [] + for tag in text.extract_iter(html, ">", "</a>"): + tag = tag.partition(" <span class='badge'>")[0] + if "<" in tag: + tag = text.remove_html(tag) + results.append(tag) + return results + + def images(self, page): + data = util.json_loads(text.extr(page, "$.parseJSON('", "'")) + base = text.extr(page, 'data-src="', '"').rpartition("/")[0] + "/" + exts = {"j": "jpg", "p": "png", "g": "gif", "w": "webp", "a": "avif"} + + results = [] + for i in map(str, range(1, len(data)+1)): + ext, width, height = data[i].split(",") + url = base + i + "." + exts[ext] + results.append((url, { + "width" : text.parse_int(width), + "height": text.parse_int(height), + })) + return results + + +class ImhentaiTagExtractor(ImhentaiExtractor): + """Extractor for imhentai tag searches""" + subcategory = "tag" + pattern = (BASE_PATTERN + r"(/(?:" + r"artist|category|character|group|language|parody|tag" + r")/([^/?#]+))") + example = "https://imhentai.xxx/tag/TAG/" + + def items(self): + url = self.root + self.groups[-2] + "/" + return self._pagination(url) + + +class ImhentaiSearchExtractor(ImhentaiExtractor): + """Extractor for imhentai search results""" + subcategory = "search" + pattern = BASE_PATTERN + r"/search/?\?([^#]+)" + example = "https://imhentai.xxx/search/?key=QUERY" + + def items(self): + url = self.root + "/search/?" + self.groups[-1] + return self._pagination(url) diff --git a/gallery_dl/extractor/issuu.py b/gallery_dl/extractor/issuu.py index b900113..65717b4 100644 --- a/gallery_dl/extractor/issuu.py +++ b/gallery_dl/extractor/issuu.py @@ -30,8 +30,8 @@ class IssuuPublicationExtractor(IssuuBase, GalleryExtractor): def metadata(self, page): pos = page.rindex('id="initial-data"') - data = util.json_loads(text.rextract( - page, '<script data-json="', '"', pos)[0].replace(""", '"')) + data = util.json_loads(text.unescape(text.rextract( + page, '<script data-json="', '"', pos)[0])) doc = data["initialDocumentData"]["document"] doc["date"] = text.parse_datetime( diff --git a/gallery_dl/extractor/itaku.py b/gallery_dl/extractor/itaku.py index 7f941bb..5c91eb9 100644 --- a/gallery_dl/extractor/itaku.py +++ b/gallery_dl/extractor/itaku.py @@ -24,10 +24,6 @@ class ItakuExtractor(Extractor): archive_fmt = "{id}" request_interval = (0.5, 1.5) - def __init__(self, match): - Extractor.__init__(self, match) - self.item = match.group(1) - def _init(self): self.api = ItakuAPI(self) self.videos = self.config("videos", True) @@ -62,11 +58,11 @@ class ItakuExtractor(Extractor): class ItakuGalleryExtractor(ItakuExtractor): """Extractor for posts from an itaku user gallery""" subcategory = "gallery" - pattern = BASE_PATTERN + r"/profile/([^/?#]+)/gallery" + pattern = BASE_PATTERN + r"/profile/([^/?#]+)/gallery(?:/(\d+))?" example = "https://itaku.ee/profile/USER/gallery" def posts(self): - return self.api.galleries_images(self.item) + return self.api.galleries_images(*self.groups) class ItakuImageExtractor(ItakuExtractor): @@ -75,7 +71,7 @@ class ItakuImageExtractor(ItakuExtractor): example = "https://itaku.ee/images/12345" def posts(self): - return (self.api.image(self.item),) + return (self.api.image(self.groups[0]),) class ItakuSearchExtractor(ItakuExtractor): @@ -84,7 +80,7 @@ class ItakuSearchExtractor(ItakuExtractor): example = "https://itaku.ee/home/images?tags=SEARCH" def posts(self): - params = text.parse_query_list(self.item) + params = text.parse_query_list(self.groups[0]) return self.api.search_images(params) @@ -138,7 +134,7 @@ class ItakuAPI(): params = { "cursor" : None, "owner" : self.user(username)["owner"], - "section" : section, + "sections" : section, "date_range": "", "maturity_rating": ("SFW", "Questionable", "NSFW"), "ordering" : "-date_added", diff --git a/gallery_dl/extractor/newgrounds.py b/gallery_dl/extractor/newgrounds.py index 8ffa14b..648f7df 100644 --- a/gallery_dl/extractor/newgrounds.py +++ b/gallery_dl/extractor/newgrounds.py @@ -190,8 +190,8 @@ class NewgroundsExtractor(Extractor): extr = text.extract_from(page) data = extract_data(extr, post_url) - data["_comment"] = extr( - 'id="author_comments"', '</div>').partition(">")[2] + data["comment_html"] = data["_comment"] = extr( + 'id="author_comments"', '</div>').partition(">")[2].strip() data["comment"] = text.unescape(text.remove_html( data["_comment"] .replace("<p><br></p>", "\n\n").replace("<br>", "\n"), "", "")) diff --git a/gallery_dl/extractor/oauth.py b/gallery_dl/extractor/oauth.py index e7540f8..815a214 100644 --- a/gallery_dl/extractor/oauth.py +++ b/gallery_dl/extractor/oauth.py @@ -83,8 +83,9 @@ class OAuthBase(Extractor): browser = None if browser and browser.open(url): - name = getattr(browser, "name", None) or "Browser" - self.log.info("Opening URL in %s:", name.capitalize()) + name = getattr(browser, "name", None) + if name: + self.log.info("Opening URL with %s:", name.capitalize()) else: self.log.info("Please open this URL in your browser:") diff --git a/gallery_dl/extractor/patreon.py b/gallery_dl/extractor/patreon.py index 866e93a..f5a33d5 100644 --- a/gallery_dl/extractor/patreon.py +++ b/gallery_dl/extractor/patreon.py @@ -169,6 +169,12 @@ class PatreonExtractor(Extractor): attr["date"] = text.parse_datetime( attr["published_at"], "%Y-%m-%dT%H:%M:%S.%f%z") + try: + attr["campaign"] = (included["campaign"][ + relationships["campaign"]["data"]["id"]]) + except Exception: + attr["campaign"] = None + tags = relationships.get("user_defined_tags") attr["tags"] = [ tag["id"].replace("user_defined;", "") @@ -324,7 +330,8 @@ class PatreonCreatorExtractor(PatreonExtractor): subcategory = "creator" pattern = (r"(?:https?://)?(?:www\.)?patreon\.com" r"/(?!(?:home|join|posts|login|signup)(?:$|[/?#]))" - r"(?:c/)?([^/?#]+)(?:/posts)?/?(?:\?([^#]+))?") + r"(?:profile/creators|(?:c/)?([^/?#]+)(?:/posts)?)" + r"/?(?:\?([^#]+))?") example = "https://www.patreon.com/USER" def posts(self): @@ -345,7 +352,7 @@ class PatreonCreatorExtractor(PatreonExtractor): return self._pagination(url) def _get_campaign_id(self, creator, query): - if creator.startswith("id:"): + if creator and creator.startswith("id:"): return creator[3:] campaign_id = query.get("c") or query.get("campaign_id") diff --git a/gallery_dl/extractor/philomena.py b/gallery_dl/extractor/philomena.py index 1b67272..201d4d6 100644 --- a/gallery_dl/extractor/philomena.py +++ b/gallery_dl/extractor/philomena.py @@ -10,7 +10,6 @@ from .booru import BooruExtractor from .. import text, exception -import operator class PhilomenaExtractor(BooruExtractor): @@ -24,17 +23,22 @@ class PhilomenaExtractor(BooruExtractor): def _init(self): self.api = PhilomenaAPI(self) - if not self.config("svg", True): - self._file_url = operator.itemgetter("view_url") + self.svg = self.config("svg", True) def _file_url(self, post): - if post["format"] == "svg": - return post["view_url"].rpartition(".")[0] + ".svg" - return post["view_url"] + try: + url = post["representations"]["full"] + except Exception: + url = post["view_url"] + + if self.svg and post["format"] == "svg": + return url.rpartition(".")[0] + ".svg" + return url @staticmethod def _prepare(post): - post["date"] = text.parse_datetime(post["created_at"]) + post["date"] = text.parse_datetime( + post["created_at"][:19], "%Y-%m-%dT%H:%M:%S") BASE_PATTERN = PhilomenaExtractor.update({ diff --git a/gallery_dl/extractor/pixiv.py b/gallery_dl/extractor/pixiv.py index 7fe8869..8a4905d 100644 --- a/gallery_dl/extractor/pixiv.py +++ b/gallery_dl/extractor/pixiv.py @@ -71,9 +71,12 @@ class PixivExtractor(Extractor): if self.meta_user: work.update(self.api.user_detail(work["user"]["id"])) if self.meta_comments: - if work["total_comments"]: - work["comments"] = list( - self.api.illust_comments(work["id"])) + if work["total_comments"] and not work.get("_ajax"): + try: + work["comments"] = list( + self.api.illust_comments(work["id"])) + except Exception: + work["comments"] = () else: work["comments"] = () if self.meta_bookmark and work["is_bookmarked"]: diff --git a/gallery_dl/extractor/reddit.py b/gallery_dl/extractor/reddit.py index 89eafc8..f36b1f5 100644 --- a/gallery_dl/extractor/reddit.py +++ b/gallery_dl/extractor/reddit.py @@ -259,6 +259,8 @@ class RedditSubredditExtractor(RedditExtractor): self.subreddit, sub, params = match.groups() self.params = text.parse_query(params) if sub: + if sub == "search" and "restrict_sr" not in self.params: + self.params["restrict_sr"] = "1" self.subcategory += "-" + sub RedditExtractor.__init__(self, match) diff --git a/gallery_dl/extractor/sankaku.py b/gallery_dl/extractor/sankaku.py index 5e3a958..b5cdb9c 100644 --- a/gallery_dl/extractor/sankaku.py +++ b/gallery_dl/extractor/sankaku.py @@ -194,7 +194,6 @@ class SankakuAPI(): self.extractor = extractor self.headers = { "Accept" : "application/vnd.sankaku.api+json;v=2", - "Platform" : "web-app", "Api-Version": None, "Origin" : extractor.root, } diff --git a/gallery_dl/extractor/subscribestar.py b/gallery_dl/extractor/subscribestar.py index 8668330..6c43941 100644 --- a/gallery_dl/extractor/subscribestar.py +++ b/gallery_dl/extractor/subscribestar.py @@ -51,6 +51,23 @@ class SubscribestarExtractor(Extractor): def posts(self): """Yield HTML content of all relevant posts""" + def request(self, url, **kwargs): + while True: + response = Extractor.request(self, url, **kwargs) + + if response.history and "/verify_subscriber" in response.url: + raise exception.StopExtraction( + "HTTP redirect to %s", response.url) + + content = response.content + if len(content) < 250 and b">redirected<" in content: + url = text.unescape(text.extr( + content, b'href="', b'"').decode()) + self.log.debug("HTML redirect message for %s", url) + continue + + return response + def login(self): if self.cookies_check(self.cookies_names): return @@ -189,10 +206,11 @@ class SubscribestarPostExtractor(SubscribestarExtractor): extr = text.extract_from(html) return { "post_id" : text.parse_int(extr('data-id="', '"')), - "author_name": text.unescape(extr('href="/', '"')), - "author_id" : text.parse_int(extr('data-user-id="', '"')), - "author_nick": text.unescape(extr('alt="', '"')), "date" : self._parse_datetime(extr( - '<span class="star_link-types">', '<')), + '<div class="section-title_date">', '<')), "content" : extr('<body>', '</body>').strip(), + "author_name": text.unescape(extr( + 'class="star_link" href="/', '"')), + "author_id" : text.parse_int(extr('data-user-id="', '"')), + "author_nick": text.unescape(extr('alt="', '"')), } diff --git a/gallery_dl/extractor/tiktok.py b/gallery_dl/extractor/tiktok.py new file mode 100644 index 0000000..f129b1c --- /dev/null +++ b/gallery_dl/extractor/tiktok.py @@ -0,0 +1,253 @@ +# -*- coding: utf-8 -*- + +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. + +"""Extractors for https://www.tiktok.com/""" + +from .common import Extractor, Message +from .. import text, util, ytdl, exception + +BASE_PATTERN = r"(?:https?://)?(?:www\.)?tiktokv?\.com" + + +class TiktokExtractor(Extractor): + """Base class for TikTok extractors""" + category = "tiktok" + directory_fmt = ("{category}", "{user}") + filename_fmt = ( + "{id}{num:?_//>02} {title[b:150]}{img_id:? [/]/}.{extension}") + archive_fmt = "{id}_{num}_{img_id}" + root = "https://www.tiktok.com" + cookies_domain = ".tiktok.com" + + def _init(self): + self.audio = self.config("audio", True) + self.video = self.config("videos", True) + if not self.config("avatar", True): + self.avatar = util.false + + def items(self): + # We assume that all of the URLs served by urls() come from the same + # author. + downloaded_avatar = not self.avatar() + + for tiktok_url in self.urls(): + tiktok_url = self._sanitize_url(tiktok_url) + data = self._extract_rehydration_data(tiktok_url) + if "webapp.video-detail" not in data: + # Only /video/ links result in the video-detail dict we need. + # Try again using that form of link. + tiktok_url = self._sanitize_url( + data["seo.abtest"]["canonical"]) + data = self._extract_rehydration_data(tiktok_url) + video_detail = data["webapp.video-detail"] + + if not self._check_status_code(video_detail, tiktok_url): + continue + + post = video_detail["itemInfo"]["itemStruct"] + author = post["author"] + post["user"] = user = author["uniqueId"] + post["date"] = text.parse_timestamp(post["createTime"]) + original_title = title = post["desc"] + + if not downloaded_avatar: + avatar_url = author["avatarLarger"] + avatar = self._generate_avatar( + avatar_url, post, user, author["id"]) + yield Message.Directory, avatar + yield Message.Url, avatar_url, avatar + downloaded_avatar = True + + yield Message.Directory, post + ytdl_media = False + + if "imagePost" in post: + if not original_title: + title = "TikTok photo #{}".format(post["id"]) + img_list = post["imagePost"]["images"] + for i, img in enumerate(img_list, 1): + url = img["imageURL"]["urlList"][0] + text.nameext_from_url(url, post) + post.update({ + "type" : "image", + "image" : img, + "title" : title, + "num" : i, + "img_id": post["filename"].partition("~")[0], + "width" : img["imageWidth"], + "height": img["imageHeight"], + }) + yield Message.Url, url, post + + if self.audio and "music" in post: + ytdl_media = "audio" + + elif self.video and "video" in post: + ytdl_media = "video" + + else: + self.log.info("%s: Skipping post", tiktok_url) + + if ytdl_media: + if not original_title: + title = "TikTok {} #{}".format(ytdl_media, post["id"]) + post.update({ + "type" : ytdl_media, + "image" : None, + "filename" : "", + "extension" : "mp3" if ytdl_media == "audio" else "mp4", + "title" : title, + "num" : 0, + "img_id" : "", + "width" : 0, + "height" : 0, + }) + yield Message.Url, "ytdl:" + tiktok_url, post + + # If we couldn't download the avatar because the given user has no + # posts, we'll need to make a separate request for the user's page + # and download the avatar that way. + if not downloaded_avatar: + user_name = self.avatar() + profile_url = "https://www.tiktok.com/@{}".format(user_name) + data = self._extract_rehydration_data(profile_url) + data = data["webapp.user-detail"]["userInfo"]["user"] + data["user"] = user_name + avatar_url = data["avatarLarger"] + avatar = self._generate_avatar( + avatar_url, data, user_name, data["id"]) + yield Message.Directory, avatar + yield Message.Url, avatar_url, avatar + + def avatar(self): + return False + + def _generate_avatar(self, avatar_url, data, user_name, user_id): + avatar = text.nameext_from_url(avatar_url, data.copy()) + avatar.update({ + "type" : "avatar", + "title" : "@" + user_name, + "id" : user_id, + "img_id": avatar["filename"].partition("~")[0], + "num" : 0, + }) + return avatar + + def _sanitize_url(self, url): + return text.ensure_http_scheme(url.replace("/photo/", "/video/", 1)) + + def _extract_rehydration_data(self, url): + html = self.request(url).text + data = text.extr( + html, '<script id="__UNIVERSAL_DATA_FOR_REHYDRATION__" ' + 'type="application/json">', '</script>') + return util.json_loads(data)["__DEFAULT_SCOPE__"] + + def _check_status_code(self, detail, url): + status = detail.get("statusCode") + if not status: + return True + + if status == 10222: + self.log.error("%s: Login required to access this post", url) + elif status == 10204: + self.log.error("%s: Requested post not available", url) + elif status == 10231: + self.log.error("%s: Region locked - Try downloading with a" + "VPN/proxy connection", url) + else: + self.log.error( + "%s: Received unknown error code %s ('%s')", + url, status, detail.get("statusMsg") or "") + return False + + +class TiktokPostExtractor(TiktokExtractor): + """Extract a single video or photo TikTok link""" + subcategory = "post" + pattern = BASE_PATTERN + r"/(?:@([\w_.-]*)|share)/(?:phot|vide)o/(\d+)" + example = "https://www.tiktok.com/@USER/photo/1234567890" + + def urls(self): + user, post_id = self.groups + url = "{}/@{}/video/{}".format(self.root, user or "", post_id) + return (url,) + + +class TiktokVmpostExtractor(TiktokExtractor): + """Extract a single video or photo TikTok VM link""" + subcategory = "vmpost" + pattern = (r"(?:https?://)?(?:" + r"(?:v[mt]\.)?tiktok\.com|(?:www\.)?tiktok\.com/t" + r")/(?!@)([^/?#]+)") + example = "https://vm.tiktok.com/1a2B3c4E5" + + def items(self): + url = text.ensure_http_scheme(self.url) + headers = {"User-Agent": "facebookexternalhit/1.1"} + + response = self.request(url, headers=headers, method="HEAD", + allow_redirects=False, notfound="post") + + url = response.headers.get("Location") + if not url or len(url) <= 28: + # https://www.tiktok.com/?_r=1 + raise exception.NotFoundError("post") + + data = {"_extractor": TiktokPostExtractor} + yield Message.Queue, url.partition("?")[0], data + + +class TiktokUserExtractor(TiktokExtractor): + """Extract a TikTok user's profile""" + subcategory = "user" + pattern = BASE_PATTERN + r"/@([\w_.-]+)/?(?:$|\?|#)" + example = "https://www.tiktok.com/@USER" + + def urls(self): + """Attempt to use yt-dlp/youtube-dl to extract links from a + user's page""" + + try: + module = ytdl.import_module(self.config("module")) + except (ImportError, SyntaxError) as exc: + self.log.error("Cannot import module '%s'", + getattr(exc, "name", "")) + self.log.debug("", exc_info=exc) + raise exception.ExtractionError("yt-dlp or youtube-dl is required " + "for this feature!") + extr_opts = { + "extract_flat" : True, + "ignore_no_formats_error": True, + } + user_opts = { + "retries" : self._retries, + "socket_timeout" : self._timeout, + "nocheckcertificate" : not self._verify, + "playlist_items" : str(self.config("tiktok-range", "")), + } + if self._proxies: + user_opts["proxy"] = self._proxies.get("http") + + ytdl_instance = ytdl.construct_YoutubeDL( + module, self, user_opts, extr_opts) + + # transfer cookies to ytdl + if self.cookies: + set_cookie = ytdl_instance.cookiejar.set_cookie + for cookie in self.cookies: + set_cookie(cookie) + + with ytdl_instance as ydl: + info_dict = ydl._YoutubeDL__extract_info( + "{}/@{}".format(self.root, self.groups[0]), + ydl.get_info_extractor("TikTokUser"), + False, {}, True) + # This should include video and photo posts in /video/ URL form. + return [video["url"] for video in info_dict["entries"]] + + def avatar(self): + return self.groups[0] diff --git a/gallery_dl/extractor/twibooru.py b/gallery_dl/extractor/twibooru.py index a725a2c..3b0ea36 100644 --- a/gallery_dl/extractor/twibooru.py +++ b/gallery_dl/extractor/twibooru.py @@ -12,7 +12,7 @@ from .booru import BooruExtractor from .. import text, exception import operator -BASE_PATTERN = r"(?:https?://)?twibooru\.org" +BASE_PATTERN = r"(?:https?://)?(?:www\.)?twibooru\.org" class TwibooruExtractor(BooruExtractor): diff --git a/gallery_dl/extractor/twitter.py b/gallery_dl/extractor/twitter.py index 840e846..c391bad 100644 --- a/gallery_dl/extractor/twitter.py +++ b/gallery_dl/extractor/twitter.py @@ -234,6 +234,13 @@ class TwitterExtractor(Extractor): for fmt in self._size_fallback: yield base + fmt + def _extract_components(self, tweet, data, files): + for component_id in data["components"]: + com = data["component_objects"][component_id] + for conv in com["data"]["conversation_preview"]: + for url in conv.get("mediaUrls") or (): + files.append({"url": url}) + def _extract_card(self, tweet, files): card = tweet["card"] if "legacy" in card: @@ -272,7 +279,11 @@ class TwitterExtractor(Extractor): return elif name == "unified_card": data = util.json_loads(bvals["unified_card"]["string_value"]) - self._extract_media(tweet, data["media_entities"].values(), files) + if "media_entities" in data: + self._extract_media( + tweet, data["media_entities"].values(), files) + if "component_objects" in data: + self._extract_components(tweet, data, files) return if self.cards == "ytdl": @@ -1065,7 +1076,7 @@ class TwitterAPI(): else: csrf_token = None if not csrf_token: - csrf_token = util.generate_token(80) + csrf_token = util.generate_token() cookies.set("ct0", csrf_token, domain=cookies_domain) auth_token = cookies.get("auth_token", domain=cookies_domain) diff --git a/gallery_dl/extractor/vipergirls.py b/gallery_dl/extractor/vipergirls.py index 5cde0d6..af3f32d 100644 --- a/gallery_dl/extractor/vipergirls.py +++ b/gallery_dl/extractor/vipergirls.py @@ -29,7 +29,17 @@ class VipergirlsExtractor(Extractor): def _init(self): domain = self.config("domain") if domain: - self.root = text.ensure_http_scheme(domain) + pos = domain.find("://") + if pos >= 0: + self.root = domain.rstrip("/") + self.cookies_domain = "." + domain[pos+1:].strip("/") + else: + domain = domain.strip("/") + self.root = "https://" + domain + self.cookies_domain = "." + domain + else: + self.root = "https://viper.click" + self.cookies_domain = ".viper.click" def items(self): self.login() diff --git a/gallery_dl/extractor/vsco.py b/gallery_dl/extractor/vsco.py index 1c0c172..a53409c 100644 --- a/gallery_dl/extractor/vsco.py +++ b/gallery_dl/extractor/vsco.py @@ -38,7 +38,7 @@ class VscoExtractor(Extractor): if img["is_video"]: if not videos: continue - url = "https://" + img["video_url"] + url = text.ensure_http_scheme(img["video_url"]) else: base = img["responsive_url"].partition("/")[2] cdn, _, path = base.partition("/") @@ -63,6 +63,10 @@ class VscoExtractor(Extractor): "height": img["height"], "description": img.get("description") or "", }) + if data["extension"] == "m3u8": + url = "ytdl:" + url + data["_ytdl_manifest"] = "hls" + data["extension"] = "mp4" yield Message.Url, url, data def images(self): @@ -294,12 +298,33 @@ class VscoImageExtractor(VscoExtractor): pattern = USER_PATTERN + r"/media/([0-9a-fA-F]+)" example = "https://vsco.co/USER/media/0123456789abcdef" - def __init__(self, match): - VscoExtractor.__init__(self, match) - self.media_id = match.group(2) - def images(self): - url = "{}/{}/media/{}".format(self.root, self.user, self.media_id) + url = "{}/{}/media/{}".format(self.root, self.user, self.groups[1]) data = self._extract_preload_state(url) media = data["medias"]["byId"].popitem()[1]["media"] return (self._transform_media(media),) + + +class VscoVideoExtractor(VscoExtractor): + """Extractor for vsco.co videos links""" + subcategory = "video" + pattern = USER_PATTERN + r"/video/([^/?#]+)" + example = "https://vsco.co/USER/video/012345678-9abc-def0" + + def images(self): + url = "{}/{}/video/{}".format(self.root, self.user, self.groups[1]) + data = self._extract_preload_state(url) + media = data["medias"]["byId"].popitem()[1]["media"] + + return ({ + "_id" : media["id"], + "is_video" : True, + "grid_name" : "", + "upload_date" : media["createdDate"], + "responsive_url": media["posterUrl"], + "video_url" : "ytdl:" + media.get("playbackUrl"), + "image_meta" : None, + "width" : media["width"], + "height" : media["height"], + "description" : media["description"], + },) diff --git a/gallery_dl/extractor/weebcentral.py b/gallery_dl/extractor/weebcentral.py index fc1badb..cacefd6 100644 --- a/gallery_dl/extractor/weebcentral.py +++ b/gallery_dl/extractor/weebcentral.py @@ -50,14 +50,16 @@ class WeebcentralChapterExtractor(WeebcentralBase, ChapterExtractor): def metadata(self, page): extr = text.extract_from(page) manga_id = extr("'series_id': '", "'") - - data = self._extract_manga_data(manga_id) - data["chapter_id"] = self.groups[1] - data["chapter_type"] = extr("'chapter_type': '", "'") - + chapter_type = extr("'chapter_type': '", "'") chapter, sep, minor = extr("'number': '", "'").partition(".") - data["chapter"] = text.parse_int(chapter) - data["chapter_minor"] = sep + minor + + data = { + "chapter": text.parse_int(chapter), + "chapter_id": self.groups[1], + "chapter_type": chapter_type, + "chapter_minor": sep + minor, + } + data.update(self._extract_manga_data(manga_id)) return data diff --git a/gallery_dl/extractor/weibo.py b/gallery_dl/extractor/weibo.py index 9885d79..3ed5a06 100644 --- a/gallery_dl/extractor/weibo.py +++ b/gallery_dl/extractor/weibo.py @@ -33,6 +33,7 @@ class WeiboExtractor(Extractor): self.livephoto = self.config("livephoto", True) self.retweets = self.config("retweets", False) self.videos = self.config("videos", True) + self.movies = self.config("movies", False) self.gifs = self.config("gifs", True) self.gifs_video = (self.gifs == "video") @@ -134,7 +135,10 @@ class WeiboExtractor(Extractor): if "page_info" in status: info = status["page_info"] if "media_info" in info and self.videos: - append(self._extract_video(info["media_info"])) + if info.get("type") != "5" or self.movies: + append(self._extract_video(info["media_info"])) + else: + self.log.debug("%s: Ignoring 'movie' video", status["id"]) def _extract_video(self, info): try: diff --git a/gallery_dl/job.py b/gallery_dl/job.py index 2914927..bea35e3 100644 --- a/gallery_dl/job.py +++ b/gallery_dl/job.py @@ -551,28 +551,24 @@ class DownloadJob(Job): archive_path = cfg("archive") if archive_path: - archive_path = util.expand_path(archive_path) - + archive_table = cfg("archive-table") archive_prefix = cfg("archive-prefix") if archive_prefix is None: - archive_prefix = extr.category + archive_prefix = extr.category if archive_table is None else "" archive_format = cfg("archive-format") if archive_format is None: archive_format = extr.archive_fmt try: - if "{" in archive_path: - archive_path = formatter.parse( - archive_path).format_map(kwdict) - if cfg("archive-mode") == "memory": - archive_cls = archive.DownloadArchiveMemory - else: - archive_cls = archive.DownloadArchive - self.archive = archive_cls( + self.archive = archive.connect( archive_path, - archive_prefix + archive_format, + archive_prefix, + archive_format, + archive_table, + cfg("archive-mode"), cfg("archive-pragma"), + kwdict, ) except Exception as exc: extr.log.warning( diff --git a/gallery_dl/option.py b/gallery_dl/option.py index 222679a..3c03271 100644 --- a/gallery_dl/option.py +++ b/gallery_dl/option.py @@ -179,11 +179,15 @@ class PrintAction(argparse.Action): if not format_string: return - if "{" not in format_string and \ - " " not in format_string and \ - format_string[0] != "\f": - format_string = "{" + format_string + "}" - if format_string[-1] != "\n": + if format_string.startswith("\\f"): + format_string = "\f" + format_string[2:] + + if format_string[0] == "\f": + if format_string[1] == "F" and format_string[-1] != "\n": + format_string += "\n" + elif "{" not in format_string and " " not in format_string: + format_string = "{" + format_string + "}\n" + elif format_string[-1] != "\n": format_string += "\n" namespace.postprocessors.append({ diff --git a/gallery_dl/postprocessor/common.py b/gallery_dl/postprocessor/common.py index a9143a6..3099547 100644 --- a/gallery_dl/postprocessor/common.py +++ b/gallery_dl/postprocessor/common.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2018-2023 Mike Fährmann +# Copyright 2018-2025 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -8,7 +8,7 @@ """Common classes and constants used by postprocessor modules.""" -from .. import util, formatter, archive +from .. import archive class PostProcessor(): @@ -25,11 +25,11 @@ class PostProcessor(): archive_path = options.get("archive") if archive_path: extr = job.extractor - archive_path = util.expand_path(archive_path) + archive_table = options.get("archive-table") archive_prefix = options.get("archive-prefix") if archive_prefix is None: - archive_prefix = extr.category + archive_prefix = extr.category if archive_table is None else "" archive_format = options.get("archive-format") if archive_format is None: @@ -38,13 +38,14 @@ class PostProcessor(): archive_format = prefix + extr.archive_fmt try: - if "{" in archive_path: - archive_path = formatter.parse(archive_path).format_map( - job.pathfmt.kwdict) - self.archive = archive.DownloadArchive( + self.archive = archive.connect( archive_path, - archive_prefix + archive_format, + archive_prefix, + archive_format, + archive_table, + "file", options.get("archive-pragma"), + job.pathfmt.kwdict, "_archive_" + self.name, ) except Exception as exc: diff --git a/gallery_dl/postprocessor/compare.py b/gallery_dl/postprocessor/compare.py index 3bb63c8..c6bc54d 100644 --- a/gallery_dl/postprocessor/compare.py +++ b/gallery_dl/postprocessor/compare.py @@ -9,7 +9,7 @@ """Compare versions of the same file and replace/enumerate them on mismatch""" from .common import PostProcessor -from .. import text, util, exception +from .. import text, util, output, exception import os @@ -83,7 +83,7 @@ class ComparePP(PostProcessor): self._equal_cnt += 1 if self._equal_cnt >= self._equal_max: util.remove_file(pathfmt.temppath) - print() + output.stderr_write("\n") raise self._equal_exc() pathfmt.delete = True diff --git a/gallery_dl/postprocessor/ugoira.py b/gallery_dl/postprocessor/ugoira.py index fec4ab0..3a32b39 100644 --- a/gallery_dl/postprocessor/ugoira.py +++ b/gallery_dl/postprocessor/ugoira.py @@ -9,7 +9,7 @@ """Convert Pixiv Ugoira to WebM""" from .common import PostProcessor -from .. import util +from .. import util, output import subprocess import tempfile import zipfile @@ -226,13 +226,13 @@ class UgoiraPP(PostProcessor): if self._finalize: self._finalize(pathfmt, tempdir) except OSError as exc: - print() + output.stderr_write("\n") self.log.error("Unable to invoke FFmpeg (%s: %s)", exc.__class__.__name__, exc) self.log.debug("", exc_info=exc) pathfmt.realpath = pathfmt.temppath except Exception as exc: - print() + output.stderr_write("\n") self.log.error("%s: %s", exc.__class__.__name__, exc) self.log.debug("", exc_info=exc) pathfmt.realpath = pathfmt.temppath @@ -296,7 +296,7 @@ class UgoiraPP(PostProcessor): out = None if self.output else subprocess.DEVNULL retcode = util.Popen(args, stdout=out, stderr=out).wait() if retcode: - print() + output.stderr_write("\n") self.log.error("Non-zero exit status when running %s (%s)", args, retcode) raise ValueError() diff --git a/gallery_dl/update.py b/gallery_dl/update.py index b068e37..6650ec4 100644 --- a/gallery_dl/update.py +++ b/gallery_dl/update.py @@ -12,7 +12,7 @@ import sys from .extractor.common import Extractor, Message from .job import DownloadJob -from . import util, version, exception +from . import util, version, output, exception REPOS = { "stable" : "mikf/gallery-dl", @@ -23,14 +23,14 @@ REPOS = { BINARIES_STABLE = { "windows" : "gallery-dl.exe", - "windows_x86": "gallery-dl.exe", "windows_x64": "gallery-dl.exe", + "windows_x86": "gallery-dl_x86.exe", "linux" : "gallery-dl.bin", } BINARIES_DEV = { "windows" : "gallery-dl_windows.exe", - "windows_x86": "gallery-dl_windows_x86.exe", "windows_x64": "gallery-dl_windows.exe", + "windows_x86": "gallery-dl_windows_x86.exe", "linux" : "gallery-dl_linux", "macos" : "gallery-dl_macos", } @@ -143,13 +143,13 @@ class UpdateJob(DownloadJob): def _warning(self, msg, *args): if self._newline: self._newline = False - print() + output.stderr_write("\n") self.extractor.log.warning(msg, *args) def _error(self, msg, *args): if self._newline: self._newline = False - print() + output.stderr_write("\n") self.status |= 1 self.extractor.log.error(msg, *args) diff --git a/gallery_dl/util.py b/gallery_dl/util.py index 2302088..7034c0c 100644 --- a/gallery_dl/util.py +++ b/gallery_dl/util.py @@ -770,7 +770,7 @@ def import_file(path): finally: del sys.path[0] else: - return __import__(name) + return __import__(name.replace("-", "_")) def build_duration_func(duration, min=0.0): diff --git a/gallery_dl/version.py b/gallery_dl/version.py index d252bed..0c75005 100644 --- a/gallery_dl/version.py +++ b/gallery_dl/version.py @@ -6,5 +6,5 @@ # it under the terms of the GNU General Public License version 2 as # published by the Free Software Foundation. -__version__ = "1.28.5" +__version__ = "1.29.0" __variant__ = None diff --git a/gallery_dl/ytdl.py b/gallery_dl/ytdl.py index 32545e2..319e781 100644 --- a/gallery_dl/ytdl.py +++ b/gallery_dl/ytdl.py @@ -20,7 +20,7 @@ def import_module(module_name): return __import__("yt_dlp") except (ImportError, SyntaxError): return __import__("youtube_dl") - return __import__(module_name.replace("-", "_")) + return util.import_file(module_name) def construct_YoutubeDL(module, obj, user_opts, system_opts=None): diff --git a/test/test_downloader.py b/test/test_downloader.py index 35cccc4..5a9a20b 100644 --- a/test/test_downloader.py +++ b/test/test_downloader.py @@ -20,7 +20,6 @@ import tempfile import threading import http.server - sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) from gallery_dl import downloader, extractor, output, config, path # noqa E402 from gallery_dl.downloader.http import MIME_TYPES, SIGNATURE_CHECKS # noqa E402 @@ -55,6 +54,9 @@ class TestDownloaderModule(unittest.TestCase): else: del sys.modules["youtube_dl"] + def setUp(self): + downloader._cache.clear() + def tearDown(self): downloader._cache.clear() @@ -107,6 +109,64 @@ class TestDownloaderModule(unittest.TestCase): self.assertEqual(import_module.call_count, 1) +class TestDownloaderConfig(unittest.TestCase): + + def setUp(self): + config.clear() + + def tearDown(self): + config.clear() + + def test_default_http(self): + job = FakeJob() + extr = job.extractor + dl = downloader.find("http")(job) + + self.assertEqual(dl.adjust_extension, True) + self.assertEqual(dl.chunk_size, 32768) + self.assertEqual(dl.metadata, None) + self.assertEqual(dl.progress, 3.0) + self.assertEqual(dl.validate, True) + self.assertEqual(dl.headers, None) + self.assertEqual(dl.minsize, None) + self.assertEqual(dl.maxsize, None) + self.assertEqual(dl.mtime, True) + self.assertEqual(dl.rate, None) + self.assertEqual(dl.part, True) + self.assertEqual(dl.partdir, None) + + self.assertIs(dl.interval_429, extr._interval_429) + self.assertIs(dl.retry_codes, extr._retry_codes) + self.assertIs(dl.retries, extr._retries) + self.assertIs(dl.timeout, extr._timeout) + self.assertIs(dl.proxies, extr._proxies) + self.assertIs(dl.verify, extr._verify) + + def test_config_http(self): + config.set((), "rate", 42) + config.set((), "mtime", False) + config.set((), "headers", {"foo": "bar"}) + config.set(("downloader",), "retries", -1) + config.set(("downloader", "http"), "filesize-min", "10k") + config.set(("extractor", "generic"), "verify", False) + config.set(("extractor", "generic", "example.org"), "timeout", 10) + config.set(("extractor", "generic", "http"), "part", False) + config.set( + ("extractor", "generic", "example.org", "http"), "headers", {}) + + job = FakeJob() + dl = downloader.find("http")(job) + + self.assertEqual(dl.headers, {"foo": "bar"}) + self.assertEqual(dl.minsize, 10240) + self.assertEqual(dl.retries, float("inf")) + self.assertEqual(dl.timeout, 10) + self.assertEqual(dl.verify, False) + self.assertEqual(dl.mtime, False) + self.assertEqual(dl.rate, 42) + self.assertEqual(dl.part, False) + + class TestDownloaderBase(unittest.TestCase): @classmethod |
