aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLibravatarUnit 193 <unit193@unit193.net>2025-03-01 19:51:45 -0500
committerLibravatarUnit 193 <unit193@unit193.net>2025-03-01 19:51:45 -0500
commitbc1c79d35e0a75bc8da8f6f010df779c4acca201 (patch)
tree9d8808a5aec770221eb667160a3fbda61f9d5d49
parent75e3edb22dad2fc506494bb90ee6b331f5169adf (diff)
parent889c7b8caec8fc0b9c7a583ed1d9cfa43518fc42 (diff)
Update upstream source from tag 'upstream/1.29.0'
Update to upstream version '1.29.0' with Debian dir 7b309aa6ccc040a2faaf51d37a63f5233590a8d7
-rw-r--r--CHANGELOG.md100
-rw-r--r--PKG-INFO8
-rw-r--r--README.rst6
-rw-r--r--data/man/gallery-dl.12
-rw-r--r--data/man/gallery-dl.conf.5280
-rw-r--r--docs/gallery-dl.conf21
-rw-r--r--gallery_dl.egg-info/PKG-INFO8
-rw-r--r--gallery_dl.egg-info/SOURCES.txt4
-rw-r--r--gallery_dl/aes.py7
-rw-r--r--gallery_dl/archive.py189
-rw-r--r--gallery_dl/downloader/common.py54
-rw-r--r--gallery_dl/downloader/http.py28
-rw-r--r--gallery_dl/downloader/ytdl.py7
-rw-r--r--gallery_dl/extractor/__init__.py4
-rw-r--r--gallery_dl/extractor/bilibili.py46
-rw-r--r--gallery_dl/extractor/boosty.py92
-rw-r--r--gallery_dl/extractor/bunkr.py54
-rw-r--r--gallery_dl/extractor/chevereto.py3
-rw-r--r--gallery_dl/extractor/common.py2
-rw-r--r--gallery_dl/extractor/discord.py399
-rw-r--r--gallery_dl/extractor/erome.py14
-rw-r--r--gallery_dl/extractor/foolfuuka.py4
-rw-r--r--gallery_dl/extractor/furaffinity.py1
-rw-r--r--gallery_dl/extractor/furry34.py156
-rw-r--r--gallery_dl/extractor/generic.py2
-rw-r--r--gallery_dl/extractor/imgur.py56
-rw-r--r--gallery_dl/extractor/imhentai.py140
-rw-r--r--gallery_dl/extractor/issuu.py4
-rw-r--r--gallery_dl/extractor/itaku.py14
-rw-r--r--gallery_dl/extractor/newgrounds.py4
-rw-r--r--gallery_dl/extractor/oauth.py5
-rw-r--r--gallery_dl/extractor/patreon.py11
-rw-r--r--gallery_dl/extractor/philomena.py18
-rw-r--r--gallery_dl/extractor/pixiv.py9
-rw-r--r--gallery_dl/extractor/reddit.py2
-rw-r--r--gallery_dl/extractor/sankaku.py1
-rw-r--r--gallery_dl/extractor/subscribestar.py26
-rw-r--r--gallery_dl/extractor/tiktok.py253
-rw-r--r--gallery_dl/extractor/twibooru.py2
-rw-r--r--gallery_dl/extractor/twitter.py15
-rw-r--r--gallery_dl/extractor/vipergirls.py12
-rw-r--r--gallery_dl/extractor/vsco.py37
-rw-r--r--gallery_dl/extractor/weebcentral.py16
-rw-r--r--gallery_dl/extractor/weibo.py6
-rw-r--r--gallery_dl/job.py20
-rw-r--r--gallery_dl/option.py14
-rw-r--r--gallery_dl/postprocessor/common.py19
-rw-r--r--gallery_dl/postprocessor/compare.py4
-rw-r--r--gallery_dl/postprocessor/ugoira.py8
-rw-r--r--gallery_dl/update.py10
-rw-r--r--gallery_dl/util.py2
-rw-r--r--gallery_dl/version.py2
-rw-r--r--gallery_dl/ytdl.py2
-rw-r--r--test/test_downloader.py62
54 files changed, 2009 insertions, 256 deletions
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 8856682..4294e8a 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,53 +1,59 @@
-## 1.28.5 - 2025-01-28
+## 1.29.0 - 2025-03-01
+### Changes
+- build `gallery-dl.exe` on Windows 10 / Python 3.13 ([#6684](https://github.com/mikf/gallery-dl/issues/6684))
+- provide Windows 7 / Python 3.8 builds as `gallery-dl_x86.exe`
### Extractors
#### Additions
-- [nekohouse] add support ([#5241](https://github.com/mikf/gallery-dl/issues/5241), [#6738](https://github.com/mikf/gallery-dl/issues/6738))
-- [turboimagehost] add support for galleries ([#6855](https://github.com/mikf/gallery-dl/issues/6855))
-- [xfolio] add support ([#5514](https://github.com/mikf/gallery-dl/issues/5514), [#6351](https://github.com/mikf/gallery-dl/issues/6351), [#6837](https://github.com/mikf/gallery-dl/issues/6837))
+- [bilibili] add `user-articles-favorite` extractor ([#6725](https://github.com/mikf/gallery-dl/issues/6725) [#6781](https://github.com/mikf/gallery-dl/issues/6781))
+- [boosty] add `direct-messages` extractor ([#6768](https://github.com/mikf/gallery-dl/issues/6768))
+- [discord] add support ([#454](https://github.com/mikf/gallery-dl/issues/454) [#6836](https://github.com/mikf/gallery-dl/issues/6836) [#7059](https://github.com/mikf/gallery-dl/issues/7059) [#7067](https://github.com/mikf/gallery-dl/issues/7067))
+- [furry34] add support ([#1078](https://github.com/mikf/gallery-dl/issues/1078) [#7018](https://github.com/mikf/gallery-dl/issues/7018))
+- [hentaiera] add support ([#3046](https://github.com/mikf/gallery-dl/issues/3046) [#6952](https://github.com/mikf/gallery-dl/issues/6952) [#7020](https://github.com/mikf/gallery-dl/issues/7020))
+- [hentairox] add support ([#7003](https://github.com/mikf/gallery-dl/issues/7003))
+- [imgur] add support for personal posts ([#6990](https://github.com/mikf/gallery-dl/issues/6990))
+- [imhentai] add support ([#1660](https://github.com/mikf/gallery-dl/issues/1660) [#3046](https://github.com/mikf/gallery-dl/issues/3046) [#3824](https://github.com/mikf/gallery-dl/issues/3824) [#4338](https://github.com/mikf/gallery-dl/issues/4338) [#5936](https://github.com/mikf/gallery-dl/issues/5936))
+- [tiktok] add support ([#3061](https://github.com/mikf/gallery-dl/issues/3061) [#4177](https://github.com/mikf/gallery-dl/issues/4177) [#5646](https://github.com/mikf/gallery-dl/issues/5646) [#6878](https://github.com/mikf/gallery-dl/issues/6878) [#6708](https://github.com/mikf/gallery-dl/issues/6708))
+- [vsco] support `/video/` URLs ([#4295](https://github.com/mikf/gallery-dl/issues/4295) [#6973](https://github.com/mikf/gallery-dl/issues/6973))
#### Fixes
-- [4archive] fix `TypeError`
-- [adultempire] bypass age confirmation check
-- [architizer] fix extraction
-- [artstation] avoid Cloudflare challenges ([#5817](https://github.com/mikf/gallery-dl/issues/5817), [#5658](https://github.com/mikf/gallery-dl/issues/5658), [#5564](https://github.com/mikf/gallery-dl/issues/5564), [#5554](https://github.com/mikf/gallery-dl/issues/5554))
-- [deviantart] prevent crash when accessing `premium_folder` data ([#6873](https://github.com/mikf/gallery-dl/issues/6873))
-- [fapachi] fix extraction ([#6881](https://github.com/mikf/gallery-dl/issues/6881))
-- [issuu] fix `user` extractor
-- [kemonoparty] fix `username` metadata and filtering by `tag` for `/posts` URLs ([#6833](https://github.com/mikf/gallery-dl/issues/6833))
-- [mangafox] fix chapter extraction
-- [mangahere] fix chapter extraction
-- [pixiv] fix `sanity_level` workaround ([#4327](https://github.com/mikf/gallery-dl/issues/4327))
-- [pornpics] fix pagination results from HTML pages
-- [twitter] handle exceptions during file extraction ([#6647](https://github.com/mikf/gallery-dl/issues/6647))
-- [vsco] fix `JSONDecodeError` ([#6887](https://github.com/mikf/gallery-dl/issues/6887), [#6891](https://github.com/mikf/gallery-dl/issues/6891))
-- [weebcentral] fix extraction ([#6860](https://github.com/mikf/gallery-dl/issues/6860))
-- [xhamster] fix `gallery` extractor ([#6818](https://github.com/mikf/gallery-dl/issues/6818), [#6876](https://github.com/mikf/gallery-dl/issues/6876))
+- [bunkr] decrypt file URLs ([#7058](https://github.com/mikf/gallery-dl/issues/7058) [#7070](https://github.com/mikf/gallery-dl/issues/7070) [#7085](https://github.com/mikf/gallery-dl/issues/7085) [#7089](https://github.com/mikf/gallery-dl/issues/7089) [#7090](https://github.com/mikf/gallery-dl/issues/7090))
+- [chevereto/jpgfish] fix extraction ([#7073](https://github.com/mikf/gallery-dl/issues/7073) [#7079](https://github.com/mikf/gallery-dl/issues/7079))
+- [generic] fix config lookups by subcategory
+- [philomena] fix `date` values without UTC offset ([#6921](https://github.com/mikf/gallery-dl/issues/6921))
+- [philomena] download `full` URLs to prevent potential 404 errors ([#6922](https://github.com/mikf/gallery-dl/issues/6922))
+- [pixiv] prevent exceptions during `comments` extraction ([#6965](https://github.com/mikf/gallery-dl/issues/6965))
+- [reddit] restrict subreddit search results ([#7025](https://github.com/mikf/gallery-dl/issues/7025))
+- [sankaku] fix extraction ([#7071](https://github.com/mikf/gallery-dl/issues/7071) [#7072](https://github.com/mikf/gallery-dl/issues/7072))
+- [subscribestar] fix `post` extractor ([#6582](https://github.com/mikf/gallery-dl/issues/6582))
+- [twitter] revert generated CSRF token length to 32 characters ([#6895](https://github.com/mikf/gallery-dl/issues/6895))
+- [vipergirls] change default `domain` to `viper.click` ([#4166](https://github.com/mikf/gallery-dl/issues/4166))
+- [weebcentral] fix extracting wrong number of chapter pages ([#6966](https://github.com/mikf/gallery-dl/issues/6966))
#### Improvements
-- [batoto] use `chapter_id` in default archive IDs ([#6835](https://github.com/mikf/gallery-dl/issues/6835))
-- [e621] support `e621.cc` and `e621.anthro.fr` frontend URLs ([#6809](https://github.com/mikf/gallery-dl/issues/6809))
-- [e621] prevent premature pagination end ([#6886](https://github.com/mikf/gallery-dl/issues/6886))
-- [facebook] allow accessing all metadata in `directory` format strings ([#6874](https://github.com/mikf/gallery-dl/issues/6874))
-- [hiperdex] update domain to `hiperdex.com`
-- [kemonoparty] enable filtering creator posts by tag ([#6833](https://github.com/mikf/gallery-dl/issues/6833))
-- [khinsider] add `covers` option ([#6844](https://github.com/mikf/gallery-dl/issues/6844))
-- [komikcast] update domain to `komikcast.la`
-- [lofter] improve error handling ([#6865](https://github.com/mikf/gallery-dl/issues/6865))
-- [pornpics] avoid redirect when retrieving a gallery page
-- [urlgalleries] support new URL format
+- [b4k] update domain to `arch.b4k.dev` ([#6955](https://github.com/mikf/gallery-dl/issues/6955) [#6956](https://github.com/mikf/gallery-dl/issues/6956))
+- [bunkr] update default archive ID format ([#6935](https://github.com/mikf/gallery-dl/issues/6935))
+- [bunkr] provide fallback URLs for 403 download links ([#6732](https://github.com/mikf/gallery-dl/issues/6732) [#6972](https://github.com/mikf/gallery-dl/issues/6972))
+- [bunkr] implement fast `--range` support ([#6985](https://github.com/mikf/gallery-dl/issues/6985))
+- [furaffinity] use a default delay of 1 second between requests ([#7054](https://github.com/mikf/gallery-dl/issues/7054))
+- [itaku] support gallery section URLs ([#6951](https://github.com/mikf/gallery-dl/issues/6951))
+- [patreon] support `/profile/creators` URLs
+- [subscribestar] detect and handle redirects ([#6916](https://github.com/mikf/gallery-dl/issues/6916))
+- [twibooru] match URLs with `www` subdomain ([#6903](https://github.com/mikf/gallery-dl/issues/6903))
+- [twitter] support `grok` cards content ([#7040](https://github.com/mikf/gallery-dl/issues/7040))
+- [vsco] improve `m3u8` handling
+- [weibo] add `movies` option ([#6988](https://github.com/mikf/gallery-dl/issues/6988))
#### Metadata
-- [bunkr] extract better `filename` metadata ([#6824](https://github.com/mikf/gallery-dl/issues/6824))
-- [hiperdex] fix `description` metadata
-- [khinsider] extract more `album` metadata ([#6844](https://github.com/mikf/gallery-dl/issues/6844))
-- [mangaread] fix manga metadata extraction
-- [rule34xyz] fix `date` and `tags` metadata
-- [saint] fix metadata of `/d/` URLs
-- [toyhouse] fix `date`, `artists`, and `characters` metadata
-- [webtoons] fix `username` and `author_name` metadata
-#### Removals
-- [cohost] remove module
-- [fanleaks] remove module
-- [shimmie2] remove `tentaclerape.net`
-- [szurubooru] remove `booru.foalcon.com`
+- [bunkr] extract `id_url` metadata ([#6935](https://github.com/mikf/gallery-dl/issues/6935))
+- [erome] extract `tags` metadata ([#7076](https://github.com/mikf/gallery-dl/issues/7076))
+- [issuu] unescape HTML entities
+- [newgrounds] provide `comment_html` metadata ([#7038](https://github.com/mikf/gallery-dl/issues/7038))
+- [patreon] extract `campaign` metadata ([#6989](https://github.com/mikf/gallery-dl/issues/6989))
+### Downloaders
+- implement `downloader` options per extractor category
+- [http] add `sleep-429` option ([#6996](https://github.com/mikf/gallery-dl/issues/6996))
+- [ytdl] support specifying `module` as filesystem paths ([#6991](https://github.com/mikf/gallery-dl/issues/6991))
+### Archives
+- [archive] implement support for PostgreSQL databases ([#6152](https://github.com/mikf/gallery-dl/issues/6152))
+- [archive] add `archive-table` option ([#6152](https://github.com/mikf/gallery-dl/issues/6152))
### Miscellaneous
-- [docs] add `nix` docs to README ([#6606](https://github.com/mikf/gallery-dl/issues/6606))
-- [path] fix exception when using `--rename-to` + `--no-download` ([#6861](https://github.com/mikf/gallery-dl/issues/6861))
-- [release] include `scripts/run_tests.py` in release tarball ([#6856](https://github.com/mikf/gallery-dl/issues/6856))
+- [aes] handle errors during `cryptodome` import ([#6906](https://github.com/mikf/gallery-dl/issues/6906))
+- [executables] fix loading `certifi` SSL certificates ([#6393](https://github.com/mikf/gallery-dl/issues/6393))
+- improve `\f` format string handling for `--print`
diff --git a/PKG-INFO b/PKG-INFO
index 6db2d05..148bf37 100644
--- a/PKG-INFO
+++ b/PKG-INFO
@@ -1,6 +1,6 @@
Metadata-Version: 2.2
Name: gallery_dl
-Version: 1.28.5
+Version: 1.29.0
Summary: Command-line program to download image galleries and collections from several image hosting sites
Home-page: https://github.com/mikf/gallery-dl
Download-URL: https://github.com/mikf/gallery-dl/releases/latest
@@ -89,6 +89,7 @@ Optional
- PyYAML_: YAML configuration file support
- toml_: TOML configuration file support for Python<3.11
- SecretStorage_: GNOME keyring passwords for ``--cookies-from-browser``
+- Psycopg_: PostgreSQL archive support
Installation
@@ -131,9 +132,9 @@ Standalone Executable
Prebuilt executable files with a Python interpreter and
required Python packages included are available for
-- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.28.5/gallery-dl.exe>`__
+- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.29.0/gallery-dl.exe>`__
(Requires `Microsoft Visual C++ Redistributable Package (x86) <https://aka.ms/vs/17/release/vc_redist.x86.exe>`__)
-- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.28.5/gallery-dl.bin>`__
+- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.29.0/gallery-dl.bin>`__
Nightly Builds
@@ -527,6 +528,7 @@ To authenticate with a ``mastodon`` instance, run *gallery-dl* with
.. _PyYAML: https://pyyaml.org/
.. _toml: https://pypi.org/project/toml/
.. _SecretStorage: https://pypi.org/project/SecretStorage/
+.. _Psycopg: https://www.psycopg.org/
.. _Snapd: https://docs.snapcraft.io/installing-snapd
.. _OAuth: https://en.wikipedia.org/wiki/OAuth
.. _Chocolatey: https://chocolatey.org/install
diff --git a/README.rst b/README.rst
index 4033183..5825ead 100644
--- a/README.rst
+++ b/README.rst
@@ -34,6 +34,7 @@ Optional
- PyYAML_: YAML configuration file support
- toml_: TOML configuration file support for Python<3.11
- SecretStorage_: GNOME keyring passwords for ``--cookies-from-browser``
+- Psycopg_: PostgreSQL archive support
Installation
@@ -76,9 +77,9 @@ Standalone Executable
Prebuilt executable files with a Python interpreter and
required Python packages included are available for
-- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.28.5/gallery-dl.exe>`__
+- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.29.0/gallery-dl.exe>`__
(Requires `Microsoft Visual C++ Redistributable Package (x86) <https://aka.ms/vs/17/release/vc_redist.x86.exe>`__)
-- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.28.5/gallery-dl.bin>`__
+- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.29.0/gallery-dl.bin>`__
Nightly Builds
@@ -472,6 +473,7 @@ To authenticate with a ``mastodon`` instance, run *gallery-dl* with
.. _PyYAML: https://pyyaml.org/
.. _toml: https://pypi.org/project/toml/
.. _SecretStorage: https://pypi.org/project/SecretStorage/
+.. _Psycopg: https://www.psycopg.org/
.. _Snapd: https://docs.snapcraft.io/installing-snapd
.. _OAuth: https://en.wikipedia.org/wiki/OAuth
.. _Chocolatey: https://chocolatey.org/install
diff --git a/data/man/gallery-dl.1 b/data/man/gallery-dl.1
index b172453..b17a8f4 100644
--- a/data/man/gallery-dl.1
+++ b/data/man/gallery-dl.1
@@ -1,4 +1,4 @@
-.TH "GALLERY-DL" "1" "2025-01-28" "1.28.5" "gallery-dl Manual"
+.TH "GALLERY-DL" "1" "2025-03-01" "1.29.0" "gallery-dl Manual"
.\" disable hyphenation
.nh
diff --git a/data/man/gallery-dl.conf.5 b/data/man/gallery-dl.conf.5
index 343188a..847efaa 100644
--- a/data/man/gallery-dl.conf.5
+++ b/data/man/gallery-dl.conf.5
@@ -1,4 +1,4 @@
-.TH "GALLERY-DL.CONF" "5" "2025-01-28" "1.28.5" "gallery-dl Manual"
+.TH "GALLERY-DL.CONF" "5" "2025-03-01" "1.29.0" "gallery-dl Manual"
.\" disable hyphenation
.nh
.\" disable justification (adjust text to left margin only)
@@ -475,6 +475,9 @@ response before \f[I]retrying\f[] the request.
\f[I]xfolio\f[],
\f[I]zerochan\f[]
.br
+* \f[I]"1.0"\f[]
+\f[I]furaffinity\f[]
+.br
* \f[I]"1.0-2.0"\f[]
\f[I]flickr\f[],
\f[I]pexels\f[],
@@ -601,7 +604,7 @@ the API key found in your user profile, not the actual account password.
\f[I]cookies\f[] is required
Note: Leave the \f[I]password\f[] value empty or undefined
-to be prompted for a passeword when performing a login
+to be prompted for a password when performing a login
(see \f[I]getpass()\f[]).
@@ -814,11 +817,16 @@ or a \f[I]list\f[] with IP and explicit port number as elements.
* \f[I]"Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:LATEST) Gecko/20100101 Firefox/LATEST"\f[]: otherwise
.IP "Description:" 4
-User-Agent header value to be used for HTTP requests.
+User-Agent header value used for HTTP requests.
Setting this value to \f[I]"browser"\f[] will try to automatically detect
and use the \f[I]User-Agent\f[] header of the system's default browser.
+Note:
+This option has *no* effect if
+\f[I]extractor.browser\f[]
+is enabled.
+
.SS extractor.*.browser
.IP "Type:" 6
@@ -841,6 +849,15 @@ by using their default HTTP headers and TLS ciphers for HTTP requests.
Optionally, the operating system used in the \f[I]User-Agent\f[] header can be
specified after a \f[I]:\f[] (\f[I]windows\f[], \f[I]linux\f[], or \f[I]macos\f[]).
+Note:
+This option overrides
+\f[I]user-agent\f[]
+and sets custom
+\f[I]headers\f[]
+and
+\f[I]ciphers\f[]
+defaults.
+
Note: \f[I]requests\f[] and \f[I]urllib3\f[] only support HTTP/1.1, while a real
browser would use HTTP/2.
@@ -1071,13 +1088,19 @@ Note: Any \f[I]blacklist\f[] setting will automatically include
.SS extractor.*.archive
.IP "Type:" 6
-\f[I]Path\f[]
+.br
+* \f[I]string\f[]
+.br
+* \f[I]Path\f[]
.IP "Default:" 9
\f[I]null\f[]
.IP "Example:" 4
-"$HOME/.archives/{category}.sqlite3"
+.br
+* "$HOME/.archives/{category}.sqlite3"
+.br
+* "postgresql://user:pass@host/database"
.IP "Description:" 4
File to store IDs of downloaded files in. Downloads of files
@@ -1089,6 +1112,11 @@ database, as either lookup operations are significantly faster or
memory requirements are significantly lower when the
amount of stored IDs gets reasonably large.
+If this value is a
+\f[I]PostgreSQL Connection URI\f[],
+the archive will use this PostgreSQL database as backend (requires
+\f[I]Psycopg\f[]).
+
Note: Archive files that do not already exist get generated automatically.
Note: Archive paths support regular \f[I]format string\f[] replacements,
@@ -1154,7 +1182,10 @@ and only write them after successful job completion.
\f[I]string\f[]
.IP "Default:" 9
-\f[I]"{category}"\f[]
+.br
+* \f[I]""\f[] when \f[I]archive-table\f[] is set
+.br
+* \f[I]"{category}"\f[] otherwise
.IP "Description:" 4
Prefix for archive IDs.
@@ -1174,6 +1205,20 @@ See \f[I]<https://www.sqlite.org/pragma.html#toc>\f[]
for available \f[I]PRAGMA\f[] statements and further details.
+.SS extractor.*.archive-table
+.IP "Type:" 6
+\f[I]string\f[]
+
+.IP "Default:" 9
+\f[I]"archive"\f[]
+
+.IP "Example:" 4
+"{category}"
+
+.IP "Description:" 4
+\f[I]Format string\f[] selecting the archive database table name.
+
+
.SS extractor.*.actions
.IP "Type:" 6
.br
@@ -2562,6 +2607,41 @@ Leave \f[I]SIZE\f[] empty to download the regular, small avatar format.
.br
+.SS extractor.discord.embeds
+.IP "Type:" 6
+\f[I]list\f[] of \f[I]strings\f[]
+
+.IP "Default:" 9
+\f[I]["image", "gifv", "video"]\f[]
+
+.IP "Description:" 4
+Selects which embed types to download from.
+
+Supported embed types are
+\f[I]image\f[], \f[I]gifv\f[], \f[I]video\f[], \f[I]rich\f[], \f[I]article\f[], \f[I]link\f[].
+
+
+.SS extractor.discord.threads
+.IP "Type:" 6
+\f[I]bool\f[]
+
+.IP "Default:" 9
+\f[I]true\f[]
+
+.IP "Description:" 4
+Extract threads from Discord text channels.
+
+
+.SS extractor.discord.token
+.IP "Type:" 6
+\f[I]string\f[]
+
+.IP "Description:" 4
+Discord Bot Token for API requests.
+
+You can follow \f[I]this guide\f[] to get a token.
+
+
.SS extractor.[E621].metadata
.IP "Type:" 6
.br
@@ -4992,6 +5072,71 @@ To generate a token, visit \f[I]/user/USERNAME/list-tokens\f[]
and click \f[I]Create Token\f[].
+.SS extractor.tiktok.audio
+.IP "Type:" 6
+\f[I]bool\f[]
+
+.IP "Default:" 9
+\f[I]true\f[]
+
+.IP "Description:" 4
+Download audio tracks using \f[I]ytdl\f[].
+
+
+.SS extractor.tiktok.videos
+.IP "Type:" 6
+\f[I]bool\f[]
+
+.IP "Default:" 9
+\f[I]true\f[]
+
+.IP "Description:" 4
+Download videos using \f[I]ytdl\f[].
+
+
+.SS extractor.tiktok.user.avatar
+.IP "Type:" 6
+\f[I]bool\f[]
+
+.IP "Default:" 9
+\f[I]true\f[]
+
+.IP "Description:" 4
+Download user avatars.
+
+
+.SS extractor.tiktok.user.module
+.IP "Type:" 6
+\f[I]string\f[]
+
+.IP "Default:" 9
+\f[I]null\f[]
+
+.IP "Description:" 4
+Name or filesystem path of the \f[I]ytdl\f[] Python module
+to extract posts from a \f[I]tiktok\f[] user profile with.
+
+See \f[I]extractor.ytdl.module\f[].
+
+
+.SS extractor.tiktok.user.tiktok-range
+.IP "Type:" 6
+\f[I]string\f[]
+
+.IP "Default:" 9
+\f[I]null\f[]
+
+.IP "Example:" 4
+"1-20"
+
+.IP "Description:" 4
+Range or playlist indices of \f[I]tiktok\f[] user posts to extract.
+
+See
+\f[I]ytdl/playlist_items\f[]
+for details.
+
+
.SS extractor.tumblr.avatar
.IP "Type:" 6
\f[I]bool\f[]
@@ -5517,7 +5662,7 @@ Selects how to handle exceeding the API rate limit.
.IP "Description:" 4
When receiving a "Could not authenticate you" error while logged in with
-\f[I]username & passeword\f[],
+\f[I]username & password\f[],
refresh the current login session and
try to continue from where it left off.
@@ -5716,7 +5861,7 @@ Available formats are
\f[I]string\f[]
.IP "Default:" 9
-\f[I]"vipergirls.to"\f[]
+\f[I]"viper.click"\f[]
.IP "Description:" 4
Specifies the domain used by \f[I]vipergirls\f[] extractors.
@@ -5923,6 +6068,17 @@ It is possible to use \f[I]"all"\f[] instead of listing all values separately.
Download \f[I]livephoto\f[] files.
+.SS extractor.weibo.movies
+.IP "Type:" 6
+\f[I]bool\f[]
+
+.IP "Default:" 9
+\f[I]false\f[]
+
+.IP "Description:" 4
+Download \f[I]movie\f[] videos.
+
+
.SS extractor.weibo.retweets
.IP "Type:" 6
\f[I]bool\f[]
@@ -6059,13 +6215,22 @@ Note: Set \f[I]quiet\f[] and \f[I]no_warnings\f[] in
.SS extractor.ytdl.module
.IP "Type:" 6
-\f[I]string\f[]
+.br
+* \f[I]string\f[]
+.br
+* \f[I]Path\f[]
.IP "Default:" 9
\f[I]null\f[]
+.IP "Example:" 4
+.br
+* "yt-dlp"
+.br
+* "/home/user/.local/lib/python3.13/site-packages/youtube_dl"
+
.IP "Description:" 4
-Name of the \f[I]ytdl\f[] Python module to import.
+Name or filesystem path of the \f[I]ytdl\f[] Python module to import.
Setting this to \f[I]null\f[] will try to import \f[I]"yt_dlp"\f[]
followed by \f[I]"youtube_dl"\f[] as fallback.
@@ -6485,6 +6650,22 @@ regardless of this option.
regardless of this option.
+.SS downloader.http.sleep-429
+.IP "Type:" 6
+\f[I]Duration\f[]
+
+.IP "Default:" 9
+\f[I]extractor.*.sleep-429\f[]
+
+.IP "Description:" 4
+Number of seconds to sleep when receiving a 429 Too Many Requests
+response before \f[I]retrying\f[] the request.
+
+Note: Requires
+\f[I]retry-codes\f[]
+to include \f[I]429\f[].
+
+
.SS downloader.http.validate
.IP "Type:" 6
\f[I]bool\f[]
@@ -6580,13 +6761,22 @@ Note: Set \f[I]quiet\f[] and \f[I]no_warnings\f[] in
.SS downloader.ytdl.module
.IP "Type:" 6
-\f[I]string\f[]
+.br
+* \f[I]string\f[]
+.br
+* \f[I]Path\f[]
.IP "Default:" 9
\f[I]null\f[]
+.IP "Example:" 4
+.br
+* "yt-dlp"
+.br
+* "/home/user/.local/lib/python3.13/site-packages/youtube_dl"
+
.IP "Description:" 4
-Name of the \f[I]ytdl\f[] Python module to import.
+Name or filesystem path of the \f[I]ytdl\f[] Python module to import.
Setting this to \f[I]null\f[] will try to import \f[I]"yt_dlp"\f[]
followed by \f[I]"youtube_dl"\f[] as fallback.
@@ -7047,17 +7237,25 @@ Only compare file sizes. Do not read and compare their content.
.SS exec.archive
.IP "Type:" 6
-\f[I]Path\f[]
+.br
+* \f[I]string\f[]
+.br
+* \f[I]Path\f[]
.IP "Description:" 4
-File to store IDs of executed commands in,
+Database to store IDs of executed commands in,
similar to \f[I]extractor.*.archive\f[].
-\f[I]archive-format\f[], \f[I]archive-prefix\f[], and \f[I]archive-pragma\f[] options,
-akin to
-\f[I]extractor.*.archive-format\f[],
-\f[I]extractor.*.archive-prefix\f[], and
-\f[I]extractor.*.archive-pragma\f[], are supported as well.
+The following archive options are also supported:
+
+.br
+* \f[I]archive-format\f[]
+.br
+* \f[I]archive-prefix\f[]
+.br
+* \f[I]archive-pragma\f[]
+.br
+* \f[I]archive-table \f[]
.SS exec.async
@@ -7572,17 +7770,25 @@ Do not overwrite already existing files.
.SS metadata.archive
.IP "Type:" 6
-\f[I]Path\f[]
+.br
+* \f[I]string\f[]
+.br
+* \f[I]Path\f[]
.IP "Description:" 4
-File to store IDs of generated metadata files in,
+Database to store IDs of generated metadata files in,
similar to \f[I]extractor.*.archive\f[].
-\f[I]archive-format\f[], \f[I]archive-prefix\f[], and \f[I]archive-pragma\f[] options,
-akin to
-\f[I]extractor.*.archive-format\f[],
-\f[I]extractor.*.archive-prefix\f[], and
-\f[I]extractor.*.archive-pragma\f[], are supported as well.
+The following archive options are also supported:
+
+.br
+* \f[I]archive-format\f[]
+.br
+* \f[I]archive-prefix\f[]
+.br
+* \f[I]archive-pragma\f[]
+.br
+* \f[I]archive-table \f[]
.SS metadata.mtime
@@ -7663,17 +7869,25 @@ The resulting value must be either a UNIX timestamp or a
.SS python.archive
.IP "Type:" 6
-\f[I]Path\f[]
+.br
+* \f[I]string\f[]
+.br
+* \f[I]Path\f[]
.IP "Description:" 4
-File to store IDs of called Python functions in,
+Database to store IDs of called Python functions in,
similar to \f[I]extractor.*.archive\f[].
-\f[I]archive-format\f[], \f[I]archive-prefix\f[], and \f[I]archive-pragma\f[] options,
-akin to
-\f[I]extractor.*.archive-format\f[],
-\f[I]extractor.*.archive-prefix\f[], and
-\f[I]extractor.*.archive-pragma\f[], are supported as well.
+The following archive options are also supported:
+
+.br
+* \f[I]archive-format\f[]
+.br
+* \f[I]archive-prefix\f[]
+.br
+* \f[I]archive-pragma\f[]
+.br
+* \f[I]archive-table \f[]
.SS python.event
diff --git a/docs/gallery-dl.conf b/docs/gallery-dl.conf
index f3c9fdb..ed85b01 100644
--- a/docs/gallery-dl.conf
+++ b/docs/gallery-dl.conf
@@ -37,6 +37,7 @@
"archive-pragma": [],
"archive-event" : ["file"],
"archive-mode" : "file",
+ "archive-table" : null,
"cookies": null,
"cookies-select": null,
@@ -263,7 +264,8 @@
},
"furaffinity":
{
- "cookies" : null,
+ "cookies" : null,
+ "sleep-request": "1.0",
"descriptions": "text",
"external" : false,
@@ -591,6 +593,17 @@
"username": "",
"password": ""
},
+ "tiktok":
+ {
+ "audio" : true,
+ "videos": true,
+
+ "user": {
+ "avatar": true,
+ "module": null,
+ "tiktok-range": null
+ }
+ },
"tsumino":
{
"username": "",
@@ -672,7 +685,7 @@
"password": "",
"sleep-request": "0.5",
- "domain" : "vipergirls.to",
+ "domain" : "viper.click",
"like" : false
},
"vk":
@@ -713,6 +726,7 @@
"gifs" : true,
"include" : ["feed"],
"livephoto": true,
+ "movies" : false,
"retweets" : false,
"videos" : true
},
@@ -917,7 +931,8 @@
"consume-content" : false,
"enabled" : true,
"headers" : null,
- "retry-codes" : [404, 429, 430],
+ "retry-codes" : [],
+ "sleep-429" : 60.0,
"validate" : true
},
diff --git a/gallery_dl.egg-info/PKG-INFO b/gallery_dl.egg-info/PKG-INFO
index 6db2d05..148bf37 100644
--- a/gallery_dl.egg-info/PKG-INFO
+++ b/gallery_dl.egg-info/PKG-INFO
@@ -1,6 +1,6 @@
Metadata-Version: 2.2
Name: gallery_dl
-Version: 1.28.5
+Version: 1.29.0
Summary: Command-line program to download image galleries and collections from several image hosting sites
Home-page: https://github.com/mikf/gallery-dl
Download-URL: https://github.com/mikf/gallery-dl/releases/latest
@@ -89,6 +89,7 @@ Optional
- PyYAML_: YAML configuration file support
- toml_: TOML configuration file support for Python<3.11
- SecretStorage_: GNOME keyring passwords for ``--cookies-from-browser``
+- Psycopg_: PostgreSQL archive support
Installation
@@ -131,9 +132,9 @@ Standalone Executable
Prebuilt executable files with a Python interpreter and
required Python packages included are available for
-- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.28.5/gallery-dl.exe>`__
+- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.29.0/gallery-dl.exe>`__
(Requires `Microsoft Visual C++ Redistributable Package (x86) <https://aka.ms/vs/17/release/vc_redist.x86.exe>`__)
-- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.28.5/gallery-dl.bin>`__
+- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.29.0/gallery-dl.bin>`__
Nightly Builds
@@ -527,6 +528,7 @@ To authenticate with a ``mastodon`` instance, run *gallery-dl* with
.. _PyYAML: https://pyyaml.org/
.. _toml: https://pypi.org/project/toml/
.. _SecretStorage: https://pypi.org/project/SecretStorage/
+.. _Psycopg: https://www.psycopg.org/
.. _Snapd: https://docs.snapcraft.io/installing-snapd
.. _OAuth: https://en.wikipedia.org/wiki/OAuth
.. _Chocolatey: https://chocolatey.org/install
diff --git a/gallery_dl.egg-info/SOURCES.txt b/gallery_dl.egg-info/SOURCES.txt
index c5f560b..a29d3fe 100644
--- a/gallery_dl.egg-info/SOURCES.txt
+++ b/gallery_dl.egg-info/SOURCES.txt
@@ -81,6 +81,7 @@ gallery_dl/extractor/danbooru.py
gallery_dl/extractor/desktopography.py
gallery_dl/extractor/deviantart.py
gallery_dl/extractor/directlink.py
+gallery_dl/extractor/discord.py
gallery_dl/extractor/dynastyscans.py
gallery_dl/extractor/e621.py
gallery_dl/extractor/erome.py
@@ -95,6 +96,7 @@ gallery_dl/extractor/flickr.py
gallery_dl/extractor/foolfuuka.py
gallery_dl/extractor/foolslide.py
gallery_dl/extractor/furaffinity.py
+gallery_dl/extractor/furry34.py
gallery_dl/extractor/fuskator.py
gallery_dl/extractor/gelbooru.py
gallery_dl/extractor/gelbooru_v01.py
@@ -121,6 +123,7 @@ gallery_dl/extractor/imgbb.py
gallery_dl/extractor/imgbox.py
gallery_dl/extractor/imgth.py
gallery_dl/extractor/imgur.py
+gallery_dl/extractor/imhentai.py
gallery_dl/extractor/inkbunny.py
gallery_dl/extractor/instagram.py
gallery_dl/extractor/issuu.py
@@ -217,6 +220,7 @@ gallery_dl/extractor/szurubooru.py
gallery_dl/extractor/tapas.py
gallery_dl/extractor/tcbscans.py
gallery_dl/extractor/telegraph.py
+gallery_dl/extractor/tiktok.py
gallery_dl/extractor/tmohentai.py
gallery_dl/extractor/toyhouse.py
gallery_dl/extractor/tsumino.py
diff --git a/gallery_dl/aes.py b/gallery_dl/aes.py
index 891104a..6727541 100644
--- a/gallery_dl/aes.py
+++ b/gallery_dl/aes.py
@@ -14,6 +14,13 @@ except ImportError:
from Crypto.Cipher import AES as Cryptodome_AES
except ImportError:
Cryptodome_AES = None
+except Exception as exc:
+ Cryptodome_AES = None
+ import logging
+ logging.getLogger("aes").warning(
+ "Error when trying to import 'Cryptodome' module (%s: %s)",
+ exc.__class__.__name__, exc)
+ del logging
if Cryptodome_AES:
diff --git a/gallery_dl/archive.py b/gallery_dl/archive.py
index 5f05bbf..edecb10 100644
--- a/gallery_dl/archive.py
+++ b/gallery_dl/archive.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2024 Mike Fährmann
+# Copyright 2024-2025 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -9,50 +9,94 @@
"""Download Archives"""
import os
-import sqlite3
-from . import formatter
+import logging
+from . import util, formatter
+
+log = logging.getLogger("archive")
+
+
+def connect(path, prefix, format,
+ table=None, mode=None, pragma=None, kwdict=None, cache_key=None):
+ keygen = formatter.parse(prefix + format).format_map
+
+ if isinstance(path, str) and path.startswith(
+ ("postgres://", "postgresql://")):
+ if mode == "memory":
+ cls = DownloadArchivePostgresqlMemory
+ else:
+ cls = DownloadArchivePostgresql
+ else:
+ path = util.expand_path(path)
+ if kwdict is not None and "{" in path:
+ path = formatter.parse(path).format_map(kwdict)
+ if mode == "memory":
+ cls = DownloadArchiveMemory
+ else:
+ cls = DownloadArchive
+
+ if kwdict is not None and table:
+ table = formatter.parse(table).format_map(kwdict)
+
+ return cls(path, keygen, table, pragma, cache_key)
+
+
+def sanitize(name):
+ return '"' + name.replace('"', "_") + '"'
class DownloadArchive():
+ _sqlite3 = None
+
+ def __init__(self, path, keygen, table=None, pragma=None, cache_key=None):
+ if self._sqlite3 is None:
+ DownloadArchive._sqlite3 = __import__("sqlite3")
- def __init__(self, path, format_string, pragma=None,
- cache_key="_archive_key"):
try:
- con = sqlite3.connect(path, timeout=60, check_same_thread=False)
- except sqlite3.OperationalError:
+ con = self._sqlite3.connect(
+ path, timeout=60, check_same_thread=False)
+ except self._sqlite3.OperationalError:
os.makedirs(os.path.dirname(path))
- con = sqlite3.connect(path, timeout=60, check_same_thread=False)
+ con = self._sqlite3.connect(
+ path, timeout=60, check_same_thread=False)
con.isolation_level = None
- self.keygen = formatter.parse(format_string).format_map
+ self.keygen = keygen
self.connection = con
self.close = con.close
self.cursor = cursor = con.cursor()
- self._cache_key = cache_key
+ self._cache_key = cache_key or "_archive_key"
+
+ table = "archive" if table is None else sanitize(table)
+ self._stmt_select = (
+ "SELECT 1 "
+ "FROM " + table + " "
+ "WHERE entry=? "
+ "LIMIT 1")
+ self._stmt_insert = (
+ "INSERT OR IGNORE INTO " + table + " "
+ "(entry) VALUES (?)")
if pragma:
for stmt in pragma:
cursor.execute("PRAGMA " + stmt)
try:
- cursor.execute("CREATE TABLE IF NOT EXISTS archive "
+ cursor.execute("CREATE TABLE IF NOT EXISTS " + table + " "
"(entry TEXT PRIMARY KEY) WITHOUT ROWID")
- except sqlite3.OperationalError:
+ except self._sqlite3.OperationalError:
# fallback for missing WITHOUT ROWID support (#553)
- cursor.execute("CREATE TABLE IF NOT EXISTS archive "
+ cursor.execute("CREATE TABLE IF NOT EXISTS " + table + " "
"(entry TEXT PRIMARY KEY)")
def add(self, kwdict):
"""Add item described by 'kwdict' to archive"""
key = kwdict.get(self._cache_key) or self.keygen(kwdict)
- self.cursor.execute(
- "INSERT OR IGNORE INTO archive (entry) VALUES (?)", (key,))
+ self.cursor.execute(self._stmt_insert, (key,))
def check(self, kwdict):
"""Return True if the item described by 'kwdict' exists in archive"""
key = kwdict[self._cache_key] = self.keygen(kwdict)
- self.cursor.execute(
- "SELECT 1 FROM archive WHERE entry=? LIMIT 1", (key,))
+ self.cursor.execute(self._stmt_select, (key,))
return self.cursor.fetchone()
def finalize(self):
@@ -61,9 +105,9 @@ class DownloadArchive():
class DownloadArchiveMemory(DownloadArchive):
- def __init__(self, path, format_string, pragma=None,
- cache_key="_archive_key"):
- DownloadArchive.__init__(self, path, format_string, pragma, cache_key)
+ def __init__(self, path, keygen, table=None, pragma=None, cache_key=None):
+ DownloadArchive.__init__(
+ self, path, keygen, table, pragma, cache_key)
self.keys = set()
def add(self, kwdict):
@@ -75,8 +119,7 @@ class DownloadArchiveMemory(DownloadArchive):
key = kwdict[self._cache_key] = self.keygen(kwdict)
if key in self.keys:
return True
- self.cursor.execute(
- "SELECT 1 FROM archive WHERE entry=? LIMIT 1", (key,))
+ self.cursor.execute(self._stmt_select, (key,))
return self.cursor.fetchone()
def finalize(self):
@@ -87,12 +130,110 @@ class DownloadArchiveMemory(DownloadArchive):
with self.connection:
try:
cursor.execute("BEGIN")
- except sqlite3.OperationalError:
+ except self._sqlite3.OperationalError:
pass
- stmt = "INSERT OR IGNORE INTO archive (entry) VALUES (?)"
+ stmt = self._stmt_insert
if len(self.keys) < 100:
for key in self.keys:
cursor.execute(stmt, (key,))
else:
cursor.executemany(stmt, ((key,) for key in self.keys))
+
+
+class DownloadArchivePostgresql():
+ _psycopg = None
+
+ def __init__(self, uri, keygen, table=None, pragma=None, cache_key=None):
+ if self._psycopg is None:
+ DownloadArchivePostgresql._psycopg = __import__("psycopg")
+
+ self.connection = con = self._psycopg.connect(uri)
+ self.cursor = cursor = con.cursor()
+ self.close = con.close
+ self.keygen = keygen
+ self._cache_key = cache_key or "_archive_key"
+
+ table = "archive" if table is None else sanitize(table)
+ self._stmt_select = (
+ "SELECT true "
+ "FROM " + table + " "
+ "WHERE entry=%s "
+ "LIMIT 1")
+ self._stmt_insert = (
+ "INSERT INTO " + table + " (entry) "
+ "VALUES (%s) "
+ "ON CONFLICT DO NOTHING")
+
+ try:
+ cursor.execute("CREATE TABLE IF NOT EXISTS " + table + " "
+ "(entry TEXT PRIMARY KEY)")
+ con.commit()
+ except Exception as exc:
+ log.error("%s: %s when creating '%s' table: %s",
+ con, exc.__class__.__name__, table, exc)
+ con.rollback()
+ raise
+
+ def add(self, kwdict):
+ key = kwdict.get(self._cache_key) or self.keygen(kwdict)
+ try:
+ self.cursor.execute(self._stmt_insert, (key,))
+ self.connection.commit()
+ except Exception as exc:
+ log.error("%s: %s when writing entry: %s",
+ self.connection, exc.__class__.__name__, exc)
+ self.connection.rollback()
+
+ def check(self, kwdict):
+ key = kwdict[self._cache_key] = self.keygen(kwdict)
+ try:
+ self.cursor.execute(self._stmt_select, (key,))
+ return self.cursor.fetchone()
+ except Exception as exc:
+ log.error("%s: %s when checking entry: %s",
+ self.connection, exc.__class__.__name__, exc)
+ self.connection.rollback()
+ return False
+
+ def finalize(self):
+ pass
+
+
+class DownloadArchivePostgresqlMemory(DownloadArchivePostgresql):
+
+ def __init__(self, path, keygen, table=None, pragma=None, cache_key=None):
+ DownloadArchivePostgresql.__init__(
+ self, path, keygen, table, pragma, cache_key)
+ self.keys = set()
+
+ def add(self, kwdict):
+ self.keys.add(
+ kwdict.get(self._cache_key) or
+ self.keygen(kwdict))
+
+ def check(self, kwdict):
+ key = kwdict[self._cache_key] = self.keygen(kwdict)
+ if key in self.keys:
+ return True
+ try:
+ self.cursor.execute(self._stmt_select, (key,))
+ return self.cursor.fetchone()
+ except Exception as exc:
+ log.error("%s: %s when checking entry: %s",
+ self.connection, exc.__class__.__name__, exc)
+ self.connection.rollback()
+ return False
+
+ def finalize(self):
+ if not self.keys:
+ return
+ try:
+ self.cursor.executemany(
+ self._stmt_insert,
+ ((key,) for key in self.keys))
+ self.connection.commit()
+ except Exception as exc:
+ log.error("%s: %s when writing entries: %s",
+ self.connection, exc.__class__.__name__, exc)
+ self.connection.rollback()
diff --git a/gallery_dl/downloader/common.py b/gallery_dl/downloader/common.py
index 1168d83..8430884 100644
--- a/gallery_dl/downloader/common.py
+++ b/gallery_dl/downloader/common.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2014-2022 Mike Fährmann
+# Copyright 2014-2025 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -10,6 +10,7 @@
import os
from .. import config, util
+_config = config._config
class DownloaderBase():
@@ -17,8 +18,15 @@ class DownloaderBase():
scheme = ""
def __init__(self, job):
+ extractor = job.extractor
+
+ opts = self._extractor_config(extractor)
+ if opts:
+ self.opts = opts
+ self.config = self.config_opts
+
self.out = job.out
- self.session = job.extractor.session
+ self.session = extractor.session
self.part = self.config("part", True)
self.partdir = self.config("part-directory")
self.log = job.get_logger("downloader." + self.scheme)
@@ -29,7 +37,7 @@ class DownloaderBase():
proxies = self.config("proxy", util.SENTINEL)
if proxies is util.SENTINEL:
- self.proxies = job.extractor._proxies
+ self.proxies = extractor._proxies
else:
self.proxies = util.build_proxy_map(proxies, self.log)
@@ -37,5 +45,45 @@ class DownloaderBase():
"""Interpolate downloader config value for 'key'"""
return config.interpolate(("downloader", self.scheme), key, default)
+ def config_opts(self, key, default=None, conf=_config):
+ if key in conf:
+ return conf[key]
+ value = self.opts.get(key, util.SENTINEL)
+ if value is not util.SENTINEL:
+ return value
+ return config.interpolate(("downloader", self.scheme), key, default)
+
+ def _extractor_config(self, extractor):
+ path = extractor._cfgpath
+ if not isinstance(path, list):
+ return self._extractor_opts(path[1], path[2])
+
+ opts = {}
+ for cat, sub in reversed(path):
+ popts = self._extractor_opts(cat, sub)
+ if popts:
+ opts.update(popts)
+ return opts
+
+ def _extractor_opts(self, category, subcategory):
+ cfg = config.get(("extractor",), category)
+ if not cfg:
+ return None
+
+ copts = cfg.get(self.scheme)
+ if copts:
+ if subcategory in cfg:
+ sopts = cfg[subcategory].get(self.scheme)
+ if sopts:
+ opts = copts.copy()
+ opts.update(sopts)
+ return opts
+ return copts
+
+ if subcategory in cfg:
+ return cfg[subcategory].get(self.scheme)
+
+ return None
+
def download(self, url, pathfmt):
"""Write data from 'url' into the file specified by 'pathfmt'"""
diff --git a/gallery_dl/downloader/http.py b/gallery_dl/downloader/http.py
index c8aeef8..449ffe8 100644
--- a/gallery_dl/downloader/http.py
+++ b/gallery_dl/downloader/http.py
@@ -12,7 +12,7 @@ import time
import mimetypes
from requests.exceptions import RequestException, ConnectionError, Timeout
from .common import DownloaderBase
-from .. import text, util
+from .. import text, util, output
from ssl import SSLError
@@ -38,6 +38,7 @@ class HttpDownloader(DownloaderBase):
self.verify = self.config("verify", extractor._verify)
self.mtime = self.config("mtime", True)
self.rate = self.config("rate")
+ interval_429 = self.config("sleep-429")
if not self.config("consume-content", False):
# this resets the underlying TCP connection, and therefore
@@ -79,12 +80,16 @@ class HttpDownloader(DownloaderBase):
self.receive = self._receive_rate
if self.progress < 0.0:
self.progress = 0.0
+ if interval_429 is None:
+ self.interval_429 = extractor._interval_429
+ else:
+ self.interval_429 = util.build_duration_func(interval_429)
def download(self, url, pathfmt):
try:
return self._download_impl(url, pathfmt)
except Exception:
- print()
+ output.stderr_write("\n")
raise
finally:
# remove file from incomplete downloads
@@ -93,7 +98,7 @@ class HttpDownloader(DownloaderBase):
def _download_impl(self, url, pathfmt):
response = None
- tries = 0
+ tries = code = 0
msg = ""
metadata = self.metadata
@@ -111,10 +116,17 @@ class HttpDownloader(DownloaderBase):
if response:
self.release_conn(response)
response = None
+
self.log.warning("%s (%s/%s)", msg, tries, self.retries+1)
if tries > self.retries:
return False
- time.sleep(tries)
+
+ if code == 429 and self.interval_429:
+ s = self.interval_429()
+ time.sleep(s if s > tries else tries)
+ else:
+ time.sleep(tries)
+ code = 0
tries += 1
file_header = None
@@ -257,7 +269,7 @@ class HttpDownloader(DownloaderBase):
else response.iter_content(16), b"")
except (RequestException, SSLError) as exc:
msg = str(exc)
- print()
+ output.stderr_write("\n")
continue
if self._adjust_extension(pathfmt, file_header) and \
pathfmt.exists():
@@ -291,14 +303,14 @@ class HttpDownloader(DownloaderBase):
self.receive(fp, content, size, offset)
except (RequestException, SSLError) as exc:
msg = str(exc)
- print()
+ output.stderr_write("\n")
continue
# check file size
if size and fp.tell() < size:
msg = "file size mismatch ({} < {})".format(
fp.tell(), size)
- print()
+ output.stderr_write("\n")
continue
break
@@ -317,7 +329,7 @@ class HttpDownloader(DownloaderBase):
for _ in response.iter_content(self.chunk_size):
pass
except (RequestException, SSLError) as exc:
- print()
+ output.stderr_write("\n")
self.log.debug(
"Unable to consume response body (%s: %s); "
"closing the connection anyway", exc.__class__.__name__, exc)
diff --git a/gallery_dl/downloader/ytdl.py b/gallery_dl/downloader/ytdl.py
index 40cddec..1242098 100644
--- a/gallery_dl/downloader/ytdl.py
+++ b/gallery_dl/downloader/ytdl.py
@@ -48,6 +48,13 @@ class YoutubeDLDownloader(DownloaderBase):
self.log.debug("", exc_info=exc)
self.download = lambda u, p: False
return False
+
+ try:
+ ytdl_version = module.version.__version__
+ except Exception:
+ ytdl_version = ""
+ self.log.debug("Using %s version %s", module, ytdl_version)
+
self.ytdl_instance = ytdl_instance = ytdl.construct_YoutubeDL(
module, self, self.ytdl_opts)
if self.outtmpl == "default":
diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py
index fc8d7b2..00b22d4 100644
--- a/gallery_dl/extractor/__init__.py
+++ b/gallery_dl/extractor/__init__.py
@@ -44,6 +44,7 @@ modules = [
"danbooru",
"desktopography",
"deviantart",
+ "discord",
"dynastyscans",
"e621",
"erome",
@@ -56,6 +57,7 @@ modules = [
"fapachi",
"flickr",
"furaffinity",
+ "furry34",
"fuskator",
"gelbooru",
"gelbooru_v01",
@@ -80,6 +82,7 @@ modules = [
"imgbox",
"imgth",
"imgur",
+ "imhentai",
"inkbunny",
"instagram",
"issuu",
@@ -168,6 +171,7 @@ modules = [
"tapas",
"tcbscans",
"telegraph",
+ "tiktok",
"tmohentai",
"toyhouse",
"tsumino",
diff --git a/gallery_dl/extractor/bilibili.py b/gallery_dl/extractor/bilibili.py
index b9de165..597ec40 100644
--- a/gallery_dl/extractor/bilibili.py
+++ b/gallery_dl/extractor/bilibili.py
@@ -81,6 +81,27 @@ class BilibiliArticleExtractor(BilibiliExtractor):
yield Message.Url, url, text.nameext_from_url(url, article)
+class BilibiliUserArticlesFavoriteExtractor(BilibiliExtractor):
+ subcategory = "user-articles-favorite"
+ pattern = (r"(?:https?://)?space\.bilibili\.com"
+ r"/(\d+)/favlist\?fid=opus")
+ example = "https://space.bilibili.com/12345/favlist?fid=opus"
+ _warning = True
+
+ def _init(self):
+ BilibiliExtractor._init(self)
+ if self._warning:
+ if not self.cookies_check(("SESSDATA",)):
+ self.log.error("'SESSDATA' cookie required")
+ BilibiliUserArticlesFavoriteExtractor._warning = False
+
+ def items(self):
+ for article in self.api.user_favlist():
+ article["_extractor"] = BilibiliArticleExtractor
+ url = "{}/opus/{}".format(self.root, article["opus_id"])
+ yield Message.Queue, url, article
+
+
class BilibiliAPI():
def __init__(self, extractor):
self.extractor = extractor
@@ -122,3 +143,28 @@ class BilibiliAPI():
raise exception.StopExtraction(
"%s: Unable to extract INITIAL_STATE data", article_id)
self.extractor.wait(seconds=300)
+
+ def user_favlist(self):
+ endpoint = "/opus/feed/fav"
+ params = {"page": 1, "page_size": 20}
+
+ while True:
+ data = self._call(endpoint, params)["data"]
+
+ yield from data["items"]
+
+ if not data.get("has_more"):
+ break
+ params["page"] += 1
+
+ def login_user_id(self):
+ url = "https://api.bilibili.com/x/space/v2/myinfo"
+ data = self.extractor.request(url).json()
+
+ if data["code"] != 0:
+ self.extractor.log.debug("Server response: %s", data)
+ raise exception.StopExtraction("API request failed,Are you login?")
+ try:
+ return data["data"]["profile"]["mid"]
+ except Exception:
+ raise exception.StopExtraction("API request failed")
diff --git a/gallery_dl/extractor/boosty.py b/gallery_dl/extractor/boosty.py
index c28fad9..f3e441b 100644
--- a/gallery_dl/extractor/boosty.py
+++ b/gallery_dl/extractor/boosty.py
@@ -8,6 +8,7 @@
from .common import Extractor, Message
from .. import text, util, exception
+import itertools
BASE_PATTERN = r"(?:https?://)?boosty\.to"
@@ -53,7 +54,9 @@ class BoostyExtractor(Extractor):
self.log.warning("Not allowed to access post %s", post["id"])
continue
- files = self._process_post(post)
+ files = self._extract_files(post)
+ if self._user:
+ post["user"] = self._user
data = {
"post" : post,
"user" : post.pop("user", None),
@@ -69,15 +72,13 @@ class BoostyExtractor(Extractor):
def posts(self):
"""Yield JSON content of all relevant posts"""
- def _process_post(self, post):
+ def _extract_files(self, post):
files = []
post["content"] = content = []
post["links"] = links = []
if "createdAt" in post:
post["date"] = text.parse_timestamp(post["createdAt"])
- if self._user:
- post["user"] = self._user
for block in post["data"]:
try:
@@ -94,7 +95,7 @@ class BoostyExtractor(Extractor):
elif type == "ok_video":
if not self.videos:
self.log.debug("%s: Skipping video %s",
- post["int_id"], block["id"])
+ post["id"], block["id"])
continue
fmts = {
fmt["type"]: fmt["url"]
@@ -114,7 +115,7 @@ class BoostyExtractor(Extractor):
else:
self.log.warning(
"%s: Found no suitable video format for %s",
- post["int_id"], block["id"])
+ post["id"], block["id"])
elif type == "link":
url = block["url"]
@@ -127,9 +128,12 @@ class BoostyExtractor(Extractor):
elif type == "file":
files.append(self._update_url(post, block))
+ elif type == "smile":
+ content.append(":" + block["name"] + ":")
+
else:
self.log.debug("%s: Unsupported data type '%s'",
- post["int_id"], type)
+ post["id"], type)
except Exception as exc:
self.log.debug("%s: %s", exc.__class__.__name__, exc)
@@ -219,6 +223,51 @@ class BoostyFollowingExtractor(BoostyExtractor):
yield Message.Queue, url, user
+class BoostyDirectMessagesExtractor(BoostyExtractor):
+ """Extractor for boosty.to direct messages"""
+ subcategory = "direct-messages"
+ directory_fmt = ("{category}", "{user[blogUrl]} ({user[id]})",
+ "Direct Messages")
+ pattern = BASE_PATTERN + r"/app/messages/?\?dialogId=(\d+)"
+ example = "https://boosty.to/app/messages?dialogId=12345"
+
+ def items(self):
+ """Yield direct messages from a given dialog ID."""
+ dialog_id = self.groups[0]
+ response = self.api.dialog(dialog_id)
+ signed_query = response.get("signedQuery")
+
+ try:
+ messages = response["messages"]["data"]
+ offset = messages[0]["id"]
+ except Exception:
+ return
+
+ try:
+ user = self.api.user(response["chatmate"]["url"])
+ except Exception:
+ user = None
+
+ messages.reverse()
+ for message in itertools.chain(
+ messages,
+ self.api.dialog_messages(dialog_id, offset=offset)
+ ):
+ message["signedQuery"] = signed_query
+ files = self._extract_files(message)
+ data = {
+ "post": message,
+ "user": user,
+ "count": len(files),
+ }
+
+ yield Message.Directory, data
+ for data["num"], file in enumerate(files, 1):
+ data["file"] = file
+ url = file["url"]
+ yield Message.Url, url, text.nameext_from_url(url, data)
+
+
class BoostyAPI():
"""Interface for the Boosty API"""
root = "https://api.boosty.to"
@@ -367,3 +416,32 @@ class BoostyAPI():
if offset > data["total"]:
return
params["offset"] = offset
+
+ def dialog(self, dialog_id):
+ endpoint = "/v1/dialog/{}".format(dialog_id)
+ return self._call(endpoint)
+
+ def dialog_messages(self, dialog_id, limit=300, offset=None):
+ endpoint = "/v1/dialog/{}/message/".format(dialog_id)
+ params = {
+ "limit": limit,
+ "reverse": "true",
+ "offset": offset,
+ }
+ return self._pagination_dialog(endpoint, params)
+
+ def _pagination_dialog(self, endpoint, params):
+ while True:
+ data = self._call(endpoint, params)
+
+ yield from data["data"]
+
+ try:
+ extra = data["extra"]
+ if extra.get("isLast"):
+ break
+ params["offset"] = offset = extra["offset"]
+ if not offset:
+ break
+ except Exception:
+ break
diff --git a/gallery_dl/extractor/bunkr.py b/gallery_dl/extractor/bunkr.py
index 25e9fd5..201b8f4 100644
--- a/gallery_dl/extractor/bunkr.py
+++ b/gallery_dl/extractor/bunkr.py
@@ -10,7 +10,8 @@
from .common import Extractor
from .lolisafe import LolisafeAlbumExtractor
-from .. import text, config, exception
+from .. import text, util, config, exception
+import binascii
import random
if config.get(("extractor", "bunkr"), "tlds"):
@@ -60,6 +61,8 @@ class BunkrAlbumExtractor(LolisafeAlbumExtractor):
"""Extractor for bunkr.si albums"""
category = "bunkr"
root = "https://bunkr.si"
+ root_dl = "https://get.bunkrr.su"
+ archive_fmt = "{album_id}_{id|id_url}"
pattern = BASE_PATTERN + r"/a/([^/?#]+)"
example = "https://bunkr.si/a/ID"
@@ -68,6 +71,11 @@ class BunkrAlbumExtractor(LolisafeAlbumExtractor):
domain = self.groups[0] or self.groups[1]
if domain not in LEGACY_DOMAINS:
self.root = "https://" + domain
+ self.offset = 0
+
+ def skip(self, num):
+ self.offset = num
+ return num
def request(self, url, **kwargs):
kwargs["encoding"] = "utf-8"
@@ -132,6 +140,9 @@ class BunkrAlbumExtractor(LolisafeAlbumExtractor):
}
def _extract_files(self, items):
+ if self.offset:
+ items = util.advance(items, self.offset)
+
for item in items:
try:
url = text.unescape(text.extr(item, ' href="', '"'))
@@ -154,26 +165,43 @@ class BunkrAlbumExtractor(LolisafeAlbumExtractor):
self.log.debug("", exc_info=exc)
def _extract_file(self, webpage_url):
- response = self.request(webpage_url)
- page = response.text
- file_url = (text.extr(page, '<source src="', '"') or
- text.extr(page, '<img src="', '"'))
+ page = self.request(webpage_url).text
+ data_id = text.extr(page, 'data-file-id="', '"')
+ referer = self.root_dl + "/file/" + data_id
+
+ url = self.root_dl + "/api/vs"
+ headers = {"Referer": referer}
+ data = self.request(
+ url, method="POST", headers=headers, json={"id": data_id}).json()
+
+ if data.get("encrypted"):
+ file_url = self._decrypt_url(data["url"], data["timestamp"])
+ else:
+ file_url = data["url"]
+
file_name = (text.extr(page, 'property="og:title" content="', '"') or
text.extr(page, "<title>", " | Bunkr<"))
-
- if not file_url:
- webpage_url = text.unescape(text.rextract(
- page, ' href="', '"', page.rindex("Download"))[0])
- response = self.request(webpage_url)
- file_url = text.rextract(response.text, ' href="', '"')[0]
+ fallback = text.extr(page, 'property="og:url" content="', '"')
return {
- "file" : text.unescape(file_url),
+ "file" : file_url,
"name" : text.unescape(file_name),
- "_http_headers" : {"Referer": response.url},
+ "id_url" : data_id,
+ "_fallback" : (fallback,) if fallback else (),
+ "_http_headers" : {"Referer": referer},
"_http_validate": self._validate,
}
+ def _decrypt_url(self, encrypted_b64, timestamp):
+ encrypted_bytes = binascii.a2b_base64(encrypted_b64)
+ key = "SECRET_KEY_{}".format(timestamp // 3600).encode()
+ div = len(key)
+
+ return bytes([
+ encrypted_bytes[i] ^ key[i % div]
+ for i in range(len(encrypted_bytes))
+ ]).decode()
+
def _validate(self, response):
if response.history and response.url.endswith("/maintenance-vid.mp4"):
self.log.warning("File server in maintenance mode")
diff --git a/gallery_dl/extractor/chevereto.py b/gallery_dl/extractor/chevereto.py
index aedcea4..de22a7b 100644
--- a/gallery_dl/extractor/chevereto.py
+++ b/gallery_dl/extractor/chevereto.py
@@ -57,7 +57,8 @@ class CheveretoImageExtractor(CheveretoExtractor):
image = {
"id" : self.path.rpartition(".")[2],
- "url" : extr('<meta property="og:image" content="', '"'),
+ "url" : (extr('<meta property="og:image" content="', '"') or
+ extr('url: "', '"')),
"album": text.extr(extr("Added to <a", "/a>"), ">", "<"),
"user" : extr('username: "', '"'),
}
diff --git a/gallery_dl/extractor/common.py b/gallery_dl/extractor/common.py
index 13fd88a..d58db6f 100644
--- a/gallery_dl/extractor/common.py
+++ b/gallery_dl/extractor/common.py
@@ -915,7 +915,7 @@ def _build_requests_adapter(ssl_options, ssl_ciphers, source_address):
options=ssl_options or None, ciphers=ssl_ciphers)
if not requests.__version__ < "2.32":
# https://github.com/psf/requests/pull/6731
- ssl_context.load_default_certs()
+ ssl_context.load_verify_locations(requests.certs.where())
ssl_context.check_hostname = False
else:
ssl_context = None
diff --git a/gallery_dl/extractor/discord.py b/gallery_dl/extractor/discord.py
new file mode 100644
index 0000000..6a5fcc9
--- /dev/null
+++ b/gallery_dl/extractor/discord.py
@@ -0,0 +1,399 @@
+# -*- coding: utf-8 -*-
+
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Extractors for https://discord.com/"""
+
+from .common import Extractor, Message
+from .. import text, exception
+
+
+BASE_PATTERN = r"(?:https?://)?discord\.com"
+
+
+class DiscordExtractor(Extractor):
+ """Base class for Discord extractors"""
+ category = "discord"
+ root = "https://discord.com"
+ directory_fmt = ("{category}", "{server_id}_{server}",
+ "{channel_id}_{channel}")
+ filename_fmt = "{message_id}_{num:>02}_{filename}.{extension}"
+ archive_fmt = "{message_id}_{num}"
+
+ cdn_fmt = "https://cdn.discordapp.com/{}/{}/{}.png?size=4096"
+
+ server_metadata = {}
+ server_channels_metadata = {}
+
+ def _init(self):
+ self.token = self.config("token")
+ self.enabled_embeds = self.config("embeds", ["image", "gifv", "video"])
+ self.enabled_threads = self.config("threads", True)
+ self.api = DiscordAPI(self)
+
+ def extract_message_text(self, message):
+ text_content = [message["content"]]
+
+ for embed in message["embeds"]:
+ if embed["type"] == "rich":
+ try:
+ text_content.append(embed["author"]["name"])
+ except Exception:
+ pass
+ text_content.append(embed.get("title", ""))
+ text_content.append(embed.get("description", ""))
+
+ for field in embed.get("fields", []):
+ text_content.append(field.get("name", ""))
+ text_content.append(field.get("value", ""))
+
+ text_content.append(embed.get("footer", {}).get("text", ""))
+
+ if message.get("poll"):
+ text_content.append(message["poll"]["question"]["text"])
+ for answer in message["poll"]["answers"]:
+ text_content.append(answer["poll_media"]["text"])
+
+ return "\n".join(t for t in text_content if t)
+
+ def extract_message(self, message):
+ # https://discord.com/developers/docs/resources/message#message-object-message-types
+ if message["type"] in (0, 19, 21):
+ message_metadata = {}
+ message_metadata.update(self.server_metadata)
+ message_metadata.update(
+ self.server_channels_metadata[message["channel_id"]])
+ message_metadata.update({
+ "author": message["author"]["username"],
+ "author_id": message["author"]["id"],
+ "author_files": [],
+ "message": self.extract_message_text(message),
+ "message_id": message["id"],
+ "date": text.parse_datetime(
+ message["timestamp"], "%Y-%m-%dT%H:%M:%S.%f%z"
+ ),
+ "files": []
+ })
+
+ for icon_type, icon_path in (
+ ("avatar", "avatars"),
+ ("banner", "banners")
+ ):
+ if message["author"].get(icon_type):
+ message_metadata["author_files"].append({
+ "url": self.cdn_fmt.format(
+ icon_path,
+ message_metadata["author_id"],
+ message["author"][icon_type]
+ ),
+ "filename": icon_type,
+ "extension": "png",
+ })
+
+ for attachment in message["attachments"]:
+ message_metadata["files"].append({
+ "url": attachment["url"],
+ "type": "attachment",
+ })
+
+ for embed in message["embeds"]:
+ if embed["type"] in self.enabled_embeds:
+ for field in ("video", "image", "thumbnail"):
+ if field not in embed:
+ continue
+ url = embed[field].get("proxy_url")
+ if url is not None:
+ message_metadata["files"].append({
+ "url": url,
+ "type": "embed",
+ })
+ break
+
+ for num, file in enumerate(message_metadata["files"], start=1):
+ text.nameext_from_url(file["url"], file)
+ file["num"] = num
+
+ yield Message.Directory, message_metadata
+
+ for file in message_metadata["files"]:
+ message_metadata_file = message_metadata.copy()
+ message_metadata_file.update(file)
+ yield Message.Url, file["url"], message_metadata_file
+
+ def extract_channel_text(self, channel_id):
+ for message in self.api.get_channel_messages(channel_id):
+ yield from self.extract_message(message)
+
+ def extract_channel_threads(self, channel_id):
+ for thread in self.api.get_channel_threads(channel_id):
+ id = self.parse_channel(thread)["channel_id"]
+ yield from self.extract_channel_text(id)
+
+ def extract_channel(self, channel_id, safe=False):
+ try:
+ if channel_id not in self.server_channels_metadata:
+ self.parse_channel(self.api.get_channel(channel_id))
+
+ channel_type = (
+ self.server_channels_metadata[channel_id]["channel_type"]
+ )
+
+ # https://discord.com/developers/docs/resources/channel#channel-object-channel-types
+ if channel_type in (0, 5):
+ yield from self.extract_channel_text(channel_id)
+ if self.enabled_threads:
+ yield from self.extract_channel_threads(channel_id)
+ elif channel_type in (1, 3, 10, 11, 12):
+ yield from self.extract_channel_text(channel_id)
+ elif channel_type in (15, 16):
+ yield from self.extract_channel_threads(channel_id)
+ elif channel_type in (4,):
+ for channel in self.server_channels_metadata.copy().values():
+ if channel["parent_id"] == channel_id:
+ yield from self.extract_channel(
+ channel["channel_id"], safe=True)
+ elif not safe:
+ raise exception.StopExtraction(
+ "This channel type is not supported."
+ )
+ except exception.HttpError as exc:
+ if not (exc.status == 403 and safe):
+ raise
+
+ def parse_channel(self, channel):
+ parent_id = channel.get("parent_id")
+ channel_metadata = {
+ "channel": channel.get("name", ""),
+ "channel_id": channel.get("id"),
+ "channel_type": channel.get("type"),
+ "channel_topic": channel.get("topic", ""),
+ "parent_id": parent_id,
+ "is_thread": "thread_metadata" in channel
+ }
+
+ if parent_id in self.server_channels_metadata:
+ parent_metadata = self.server_channels_metadata[parent_id]
+ channel_metadata.update({
+ "parent": parent_metadata["channel"],
+ "parent_type": parent_metadata["channel_type"]
+ })
+
+ if channel_metadata["channel_type"] in (1, 3):
+ channel_metadata.update({
+ "channel": "DMs",
+ "recipients": (
+ [user["username"] for user in channel["recipients"]]
+ ),
+ "recipients_id": (
+ [user["id"] for user in channel["recipients"]]
+ )
+ })
+
+ channel_id = channel_metadata["channel_id"]
+
+ self.server_channels_metadata[channel_id] = channel_metadata
+ return channel_metadata
+
+ def parse_server(self, server):
+ self.server_metadata = {
+ "server": server["name"],
+ "server_id": server["id"],
+ "server_files": [],
+ "owner_id": server["owner_id"]
+ }
+
+ for icon_type, icon_path in (
+ ("icon", "icons"),
+ ("banner", "banners"),
+ ("splash", "splashes"),
+ ("discovery_splash", "discovery-splashes")
+ ):
+ if server.get(icon_type):
+ self.server_metadata["server_files"].append({
+ "url": self.cdn_fmt.format(
+ icon_path,
+ self.server_metadata["server_id"],
+ server[icon_type]
+ ),
+ "filename": icon_type,
+ "extension": "png",
+ })
+
+ return self.server_metadata
+
+ def build_server_and_channels(self, server_id):
+ server = self.api.get_server(server_id)
+ self.parse_server(server)
+
+ for channel in self.api.get_server_channels(server_id):
+ self.parse_channel(channel)
+
+
+class DiscordChannelExtractor(DiscordExtractor):
+ subcategory = "channel"
+ pattern = BASE_PATTERN + r"/channels/(\d+)/(?:\d+/threads/)?(\d+)/?$"
+ example = "https://discord.com/channels/1234567890/9876543210"
+
+ def items(self):
+ server_id, channel_id = self.groups
+
+ self.build_server_and_channels(server_id)
+
+ return self.extract_channel(channel_id)
+
+
+class DiscordMessageExtractor(DiscordExtractor):
+ subcategory = "message"
+ pattern = BASE_PATTERN + r"/channels/(\d+)/(\d+)/(\d+)/?$"
+ example = "https://discord.com/channels/1234567890/9876543210/2468013579"
+
+ def items(self):
+ server_id, channel_id, message_id = self.groups
+
+ self.build_server_and_channels(server_id)
+
+ if channel_id not in self.server_channels_metadata:
+ self.parse_channel(self.api.get_channel(channel_id))
+
+ return self.extract_message(
+ self.api.get_message(channel_id, message_id))
+
+
+class DiscordServerExtractor(DiscordExtractor):
+ subcategory = "server"
+ pattern = BASE_PATTERN + r"/channels/(\d+)/?$"
+ example = "https://discord.com/channels/1234567890"
+
+ def items(self):
+ server_id = self.groups[0]
+
+ self.build_server_and_channels(server_id)
+
+ for channel in self.server_channels_metadata.copy().values():
+ if channel["channel_type"] in (0, 5, 15, 16):
+ yield from self.extract_channel(
+ channel["channel_id"], safe=True)
+
+
+class DiscordDirectMessagesExtractor(DiscordExtractor):
+ subcategory = "direct-messages"
+ directory_fmt = ("{category}", "Direct Messages",
+ "{channel_id}_{recipients:J,}")
+ pattern = BASE_PATTERN + r"/channels/@me/(\d+)/?$"
+ example = "https://discord.com/channels/@me/1234567890"
+
+ def items(self):
+ return self.extract_channel(self.groups[0])
+
+
+class DiscordDirectMessageExtractor(DiscordExtractor):
+ subcategory = "direct-message"
+ directory_fmt = ("{category}", "Direct Messages",
+ "{channel_id}_{recipients:J,}")
+ pattern = BASE_PATTERN + r"/channels/@me/(\d+)/(\d+)/?$"
+ example = "https://discord.com/channels/@me/1234567890/9876543210"
+
+ def items(self):
+ channel_id, message_id = self.groups
+
+ self.parse_channel(self.api.get_channel(channel_id))
+
+ return self.extract_message(
+ self.api.get_message(channel_id, message_id))
+
+
+class DiscordAPI():
+ """Interface for the Discord API v10
+
+ https://discord.com/developers/docs/reference
+ """
+
+ def __init__(self, extractor):
+ self.extractor = extractor
+ self.root = extractor.root + "/api/v10"
+ self.headers = {"Authorization": extractor.token}
+
+ def get_server(self, server_id):
+ """Get server information"""
+ return self._call("/guilds/" + server_id)
+
+ def get_server_channels(self, server_id):
+ """Get server channels"""
+ return self._call("/guilds/" + server_id + "/channels")
+
+ def get_channel(self, channel_id):
+ """Get channel information"""
+ return self._call("/channels/" + channel_id)
+
+ def get_channel_threads(self, channel_id):
+ """Get channel threads"""
+ THREADS_BATCH = 25
+
+ def _method(offset):
+ return self._call("/channels/" + channel_id + "/threads/search", {
+ "sort_by": "last_message_time",
+ "sort_order": "desc",
+ "limit": THREADS_BATCH,
+ "offset": + offset,
+ })["threads"]
+
+ return self._pagination(_method, THREADS_BATCH)
+
+ def get_channel_messages(self, channel_id):
+ """Get channel messages"""
+ MESSAGES_BATCH = 100
+
+ before = None
+
+ def _method(_):
+ nonlocal before
+ messages = self._call("/channels/" + channel_id + "/messages", {
+ "limit": MESSAGES_BATCH,
+ "before": before
+ })
+ before = messages[-1]["id"]
+ return messages
+
+ return self._pagination(_method, MESSAGES_BATCH)
+
+ def get_message(self, channel_id, message_id):
+ """Get message information"""
+ return self._call("/channels/" + channel_id + "/messages", {
+ "limit": 1,
+ "around": message_id
+ })[0]
+
+ def _call(self, endpoint, params=None):
+ url = self.root + endpoint
+ try:
+ response = self.extractor.request(
+ url, params=params, headers=self.headers)
+ except exception.HttpError as exc:
+ if exc.status == 401:
+ self._raise_invalid_token()
+ raise
+ return response.json()
+
+ def _pagination(self, method, batch):
+ offset = 0
+ while True:
+ data = method(offset)
+ yield from data
+ if len(data) < batch:
+ return
+ offset += len(data)
+
+ @staticmethod
+ def _raise_invalid_token():
+ raise exception.AuthenticationError("""Invalid or missing token.
+Please provide a valid token following these instructions:
+
+1) Open Discord in your browser (https://discord.com/app);
+2) Open your browser's Developer Tools (F12) and switch to the Network panel;
+3) Reload the page and select any request going to https://discord.com/api/...;
+4) In the "Headers" tab, look for an entry beginning with "Authorization: ";
+5) Right-click the entry and click "Copy Value";
+6) Paste the token in your configuration file under "extractor.discord.token",
+or run this command with the -o "token=[your token]" argument.""")
diff --git a/gallery_dl/extractor/erome.py b/gallery_dl/extractor/erome.py
index e6d136f..55549de 100644
--- a/gallery_dl/extractor/erome.py
+++ b/gallery_dl/extractor/erome.py
@@ -44,6 +44,8 @@ class EromeExtractor(Extractor):
pos = page.index('<div class="user-profile', pos)
user, pos = text.extract(
page, 'href="https://www.erome.com/', '"', pos)
+ tags, pos = text.extract(
+ page, '<p class="mt-10"', '</p>', pos)
urls = []
date = None
@@ -59,11 +61,13 @@ class EromeExtractor(Extractor):
date = text.parse_timestamp(ts)
data = {
- "album_id" : album_id,
- "title" : text.unescape(title),
- "user" : text.unquote(user),
- "count" : len(urls),
- "date" : date,
+ "album_id": album_id,
+ "title" : text.unescape(title),
+ "user" : text.unquote(user),
+ "count" : len(urls),
+ "date" : date,
+ "tags" : [t.replace("+", " ")
+ for t in text.extract_iter(tags, "?q=", '"')],
"_http_headers": {"Referer": url},
}
diff --git a/gallery_dl/extractor/foolfuuka.py b/gallery_dl/extractor/foolfuuka.py
index 44c4542..5f90afc 100644
--- a/gallery_dl/extractor/foolfuuka.py
+++ b/gallery_dl/extractor/foolfuuka.py
@@ -81,8 +81,8 @@ BASE_PATTERN = FoolfuukaExtractor.update({
"pattern": r"(?:www\.)?archiveofsins\.com",
},
"b4k": {
- "root": "https://arch.b4k.co",
- "pattern": r"arch\.b4k\.co",
+ "root": "https://arch.b4k.dev",
+ "pattern": r"arch\.b4k\.(?:dev|co)",
},
"desuarchive": {
"root": "https://desuarchive.org",
diff --git a/gallery_dl/extractor/furaffinity.py b/gallery_dl/extractor/furaffinity.py
index d253582..1466390 100644
--- a/gallery_dl/extractor/furaffinity.py
+++ b/gallery_dl/extractor/furaffinity.py
@@ -23,6 +23,7 @@ class FuraffinityExtractor(Extractor):
cookies_domain = ".furaffinity.net"
cookies_names = ("a", "b")
root = "https://www.furaffinity.net"
+ request_interval = 1.0
_warning = True
def __init__(self, match):
diff --git a/gallery_dl/extractor/furry34.py b/gallery_dl/extractor/furry34.py
new file mode 100644
index 0000000..e0c7fdb
--- /dev/null
+++ b/gallery_dl/extractor/furry34.py
@@ -0,0 +1,156 @@
+# -*- coding: utf-8 -*-
+
+# Copyright 2025 Mike Fährmann
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Extractors for https://furry34.com/"""
+
+from .booru import BooruExtractor
+from .. import text
+import collections
+
+BASE_PATTERN = r"(?:https?://)?(?:www\.)?furry34\.com"
+
+
+class Furry34Extractor(BooruExtractor):
+ category = "furry34"
+ root = "https://furry34.com"
+ root_cdn = "https://furry34com.b-cdn.net"
+ filename_fmt = "{category}_{id}.{extension}"
+ per_page = 30
+
+ TAG_TYPES = {
+ None: "general",
+ 1 : "general",
+ 2 : "copyright",
+ 4 : "character",
+ 8 : "artist",
+ }
+ FORMATS = (
+ ("100", "mov.mp4"),
+ ("101", "mov720.mp4"),
+ ("102", "mov480.mp4"),
+ ("10" , "pic.jpg"),
+ )
+
+ def _file_url(self, post):
+ files = post["files"]
+ for fmt, extension in self.FORMATS:
+ if fmt in files:
+ break
+ else:
+ fmt = next(iter(files))
+
+ post_id = post["id"]
+ root = self.root_cdn if files[fmt][0] else self.root
+ post["file_url"] = url = "{}/posts/{}/{}/{}.{}".format(
+ root, post_id // 1000, post_id, post_id, extension)
+ post["format_id"] = fmt
+ post["format"] = extension.partition(".")[0]
+
+ return url
+
+ def _prepare(self, post):
+ post.pop("files", None)
+ post["date"] = text.parse_datetime(
+ post["created"], "%Y-%m-%dT%H:%M:%S.%fZ")
+ post["filename"], _, post["format"] = post["filename"].rpartition(".")
+ if "tags" in post:
+ post["tags"] = [t["value"] for t in post["tags"]]
+
+ def _tags(self, post, _):
+ if "tags" not in post:
+ post.update(self._fetch_post(post["id"]))
+
+ tags = collections.defaultdict(list)
+ for tag in post["tags"]:
+ tags[tag["type"] or 1].append(tag["value"])
+ types = self.TAG_TYPES
+ for type, values in tags.items():
+ post["tags_" + types[type]] = values
+
+ def _fetch_post(self, post_id):
+ url = "{}/api/v2/post/{}".format(self.root, post_id)
+ return self.request(url).json()
+
+ def _pagination(self, endpoint, params=None):
+ url = "{}/api{}".format(self.root, endpoint)
+
+ if params is None:
+ params = {}
+ params["sortBy"] = 0
+ params["take"] = self.per_page
+ threshold = self.per_page
+
+ while True:
+ data = self.request(url, method="POST", json=params).json()
+
+ yield from data["items"]
+
+ if len(data["items"]) < threshold:
+ return
+ params["cursor"] = data.get("cursor")
+
+
+class Furry34PostExtractor(Furry34Extractor):
+ subcategory = "post"
+ archive_fmt = "{id}"
+ pattern = BASE_PATTERN + r"/post/(\d+)"
+ example = "https://furry34.com/post/12345"
+
+ def posts(self):
+ return (self._fetch_post(self.groups[0]),)
+
+
+class Furry34PlaylistExtractor(Furry34Extractor):
+ subcategory = "playlist"
+ directory_fmt = ("{category}", "{playlist_id}")
+ archive_fmt = "p_{playlist_id}_{id}"
+ pattern = BASE_PATTERN + r"/playlists/view/(\d+)"
+ example = "https://furry34.com/playlists/view/12345"
+
+ def metadata(self):
+ return {"playlist_id": self.groups[0]}
+
+ def posts(self):
+ endpoint = "/v2/post/search/playlist/" + self.groups[0]
+ return self._pagination(endpoint)
+
+
+class Furry34TagExtractor(Furry34Extractor):
+ subcategory = "tag"
+ directory_fmt = ("{category}", "{search_tags}")
+ archive_fmt = "t_{search_tags}_{id}"
+ pattern = BASE_PATTERN + r"/(?:([^/?#]+))?(?:/?\?([^#]+))?(?:$|#)"
+ example = "https://furry34.com/TAG"
+
+ def _init(self):
+ tag, query = self.groups
+ params = text.parse_query(query)
+
+ self.tags = tags = []
+ if tag:
+ tags.extend(text.unquote(text.unquote(tag)).split("|"))
+ if "tags" in params:
+ tags.extend(params["tags"].split("|"))
+
+ type = params.get("type")
+ if type == "video":
+ self.type = 1
+ elif type == "image":
+ self.type = 0
+ else:
+ self.type = None
+
+ def metadata(self):
+ return {"search_tags": " ".join(self.tags)}
+
+ def posts(self):
+ endpoint = "/v2/post/search/root"
+ params = {"includeTags": [t.replace("_", " ") for t in self.tags]}
+ if self.type is not None:
+ params["type"] = self.type
+ return self._pagination(endpoint, params)
diff --git a/gallery_dl/extractor/generic.py b/gallery_dl/extractor/generic.py
index 370cd43..4b04732 100644
--- a/gallery_dl/extractor/generic.py
+++ b/gallery_dl/extractor/generic.py
@@ -37,6 +37,7 @@ class GenericExtractor(Extractor):
example = "generic:https://www.nongnu.org/lzip/"
def __init__(self, match):
+ self.subcategory = match.group('domain')
Extractor.__init__(self, match)
# Strip the "g(eneric):" prefix
@@ -54,7 +55,6 @@ class GenericExtractor(Extractor):
self.scheme = 'https://'
self.url = text.ensure_http_scheme(self.url, self.scheme)
- self.subcategory = match.group('domain')
self.path = match.group('path')
# Used to resolve relative image urls
diff --git a/gallery_dl/extractor/imgur.py b/gallery_dl/extractor/imgur.py
index 481fb1e..20f8ea4 100644
--- a/gallery_dl/extractor/imgur.py
+++ b/gallery_dl/extractor/imgur.py
@@ -142,7 +142,8 @@ class ImgurGalleryExtractor(ImgurExtractor):
class ImgurUserExtractor(ImgurExtractor):
"""Extractor for all images posted by a user"""
subcategory = "user"
- pattern = BASE_PATTERN + r"/user/([^/?#]+)(?:/posts|/submitted)?/?$"
+ pattern = (BASE_PATTERN + r"/user/(?!me(?:/|$|\?|#))"
+ r"([^/?#]+)(?:/posts|/submitted)?/?$")
example = "https://imgur.com/user/USER"
def items(self):
@@ -174,6 +175,23 @@ class ImgurFavoriteFolderExtractor(ImgurExtractor):
self.key, self.folder_id))
+class ImgurMeExtractor(ImgurExtractor):
+ """Extractor for your personal uploads"""
+ subcategory = "me"
+ pattern = BASE_PATTERN + r"/user/me(?:/posts)?(/hidden)?"
+ example = "https://imgur.com/user/me"
+
+ def items(self):
+ if not self.cookies_check(("accesstoken",)):
+ self.log.error("'accesstoken' cookie required")
+
+ if self.groups[0]:
+ posts = self.api.accounts_me_hiddenalbums()
+ else:
+ posts = self.api.accounts_me_allposts()
+ return self._items_queue(posts)
+
+
class ImgurSubredditExtractor(ImgurExtractor):
"""Extractor for a subreddits's imgur links"""
subcategory = "subreddit"
@@ -215,6 +233,10 @@ class ImgurAPI():
self.client_id = extractor.config("client-id") or "546c25a59c58ad7"
self.headers = {"Authorization": "Client-ID " + self.client_id}
+ def account_submissions(self, account):
+ endpoint = "/3/account/{}/submissions".format(account)
+ return self._pagination(endpoint)
+
def account_favorites(self, account):
endpoint = "/3/account/{}/gallery_favorites".format(account)
return self._pagination(endpoint)
@@ -224,15 +246,29 @@ class ImgurAPI():
account, folder_id)
return self._pagination_v2(endpoint)
+ def accounts_me_allposts(self):
+ endpoint = "/post/v1/accounts/me/all_posts"
+ params = {
+ "include": "media,tags,account",
+ "page" : 1,
+ "sort" : "-created_at",
+ }
+ return self._pagination_v2(endpoint, params)
+
+ def accounts_me_hiddenalbums(self):
+ endpoint = "/post/v1/accounts/me/hidden_albums"
+ params = {
+ "include": "media,tags,account",
+ "page" : 1,
+ "sort" : "-created_at",
+ }
+ return self._pagination_v2(endpoint, params)
+
def gallery_search(self, query):
endpoint = "/3/gallery/search"
params = {"q": query}
return self._pagination(endpoint, params)
- def account_submissions(self, account):
- endpoint = "/3/account/{}/submissions".format(account)
- return self._pagination(endpoint)
-
def gallery_subreddit(self, subreddit):
endpoint = "/3/gallery/r/{}".format(subreddit)
return self._pagination(endpoint)
@@ -284,12 +320,16 @@ class ImgurAPI():
if params is None:
params = {}
params["client_id"] = self.client_id
- params["page"] = 0
- params["sort"] = "newest"
+ if "page" not in params:
+ params["page"] = 0
+ if "sort" not in params:
+ params["sort"] = "newest"
headers = {"Origin": "https://imgur.com"}
while True:
- data = self._call(endpoint, params, headers)["data"]
+ data = self._call(endpoint, params, headers)
+ if "data" in data:
+ data = data["data"]
if not data:
return
yield from data
diff --git a/gallery_dl/extractor/imhentai.py b/gallery_dl/extractor/imhentai.py
new file mode 100644
index 0000000..0439f5b
--- /dev/null
+++ b/gallery_dl/extractor/imhentai.py
@@ -0,0 +1,140 @@
+# -*- coding: utf-8 -*-
+
+# Copyright 2025 Mike Fährmann
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Extractors for https://imhentai.xxx/ and mirror sites"""
+
+from .common import GalleryExtractor, BaseExtractor, Message
+from .. import text, util
+
+
+class ImhentaiExtractor(BaseExtractor):
+ basecategory = "IMHentai"
+
+ def _pagination(self, url):
+ prev = None
+ base = self.root + "/gallery/"
+ data = {"_extractor": ImhentaiGalleryExtractor}
+
+ while True:
+ page = self.request(url).text
+ extr = text.extract_from(page)
+
+ while True:
+ gallery_id = extr('<a href="/gallery/', '"')
+ if gallery_id == prev:
+ continue
+ if not gallery_id:
+ break
+ yield Message.Queue, base + gallery_id, data
+ prev = gallery_id
+
+ href = text.rextract(page, "class='page-link' href='", "'")[0]
+ if not href or href == "#":
+ return
+ if href[0] == "/":
+ if href[1] == "/":
+ href = "https:" + href
+ else:
+ href = self.root + href
+ url = href
+
+
+BASE_PATTERN = ImhentaiExtractor.update({
+ "imhentai": {
+ "root": "https://imhentai.xxx",
+ "pattern": r"(?:www\.)?imhentai\.xxx",
+ },
+ "hentaiera": {
+ "root": "https://hentaiera.com",
+ "pattern": r"(?:www\.)?hentaiera\.com",
+ },
+ "hentairox": {
+ "root": "https://hentairox.com",
+ "pattern": r"(?:www\.)?hentairox\.com",
+ },
+})
+
+
+class ImhentaiGalleryExtractor(ImhentaiExtractor, GalleryExtractor):
+ """Extractor for imhentai galleries"""
+ pattern = BASE_PATTERN + r"/(?:gallery|view)/(\d+)"
+ example = "https://imhentai.xxx/gallery/12345/"
+
+ def __init__(self, match):
+ ImhentaiExtractor.__init__(self, match)
+ self.gallery_id = self.groups[-1]
+ self.gallery_url = "{}/gallery/{}/".format(self.root, self.gallery_id)
+
+ def metadata(self, page):
+ extr = text.extract_from(page)
+
+ data = {
+ "gallery_id": text.parse_int(self.gallery_id),
+ "title" : text.unescape(extr("<h1>", "<")),
+ "title_alt" : text.unescape(extr('class="subtitle">', "<")),
+ "parody" : self._split(extr(">Parodies", "</li>")),
+ "character" : self._split(extr(">Characters", "</li>")),
+ "tags" : self._split(extr(">Tags", "</li>")),
+ "artist" : self._split(extr(">Artists", "</li>")),
+ "group" : self._split(extr(">Groups", "</li>")),
+ "language" : self._split(extr(">Languages", "</li>")),
+ "type" : extr("href='/category/", "/"),
+ }
+
+ if data["language"]:
+ data["lang"] = util.language_to_code(data["language"][0])
+
+ return data
+
+ def _split(self, html):
+ results = []
+ for tag in text.extract_iter(html, ">", "</a>"):
+ tag = tag.partition(" <span class='badge'>")[0]
+ if "<" in tag:
+ tag = text.remove_html(tag)
+ results.append(tag)
+ return results
+
+ def images(self, page):
+ data = util.json_loads(text.extr(page, "$.parseJSON('", "'"))
+ base = text.extr(page, 'data-src="', '"').rpartition("/")[0] + "/"
+ exts = {"j": "jpg", "p": "png", "g": "gif", "w": "webp", "a": "avif"}
+
+ results = []
+ for i in map(str, range(1, len(data)+1)):
+ ext, width, height = data[i].split(",")
+ url = base + i + "." + exts[ext]
+ results.append((url, {
+ "width" : text.parse_int(width),
+ "height": text.parse_int(height),
+ }))
+ return results
+
+
+class ImhentaiTagExtractor(ImhentaiExtractor):
+ """Extractor for imhentai tag searches"""
+ subcategory = "tag"
+ pattern = (BASE_PATTERN + r"(/(?:"
+ r"artist|category|character|group|language|parody|tag"
+ r")/([^/?#]+))")
+ example = "https://imhentai.xxx/tag/TAG/"
+
+ def items(self):
+ url = self.root + self.groups[-2] + "/"
+ return self._pagination(url)
+
+
+class ImhentaiSearchExtractor(ImhentaiExtractor):
+ """Extractor for imhentai search results"""
+ subcategory = "search"
+ pattern = BASE_PATTERN + r"/search/?\?([^#]+)"
+ example = "https://imhentai.xxx/search/?key=QUERY"
+
+ def items(self):
+ url = self.root + "/search/?" + self.groups[-1]
+ return self._pagination(url)
diff --git a/gallery_dl/extractor/issuu.py b/gallery_dl/extractor/issuu.py
index b900113..65717b4 100644
--- a/gallery_dl/extractor/issuu.py
+++ b/gallery_dl/extractor/issuu.py
@@ -30,8 +30,8 @@ class IssuuPublicationExtractor(IssuuBase, GalleryExtractor):
def metadata(self, page):
pos = page.rindex('id="initial-data"')
- data = util.json_loads(text.rextract(
- page, '<script data-json="', '"', pos)[0].replace("&quot;", '"'))
+ data = util.json_loads(text.unescape(text.rextract(
+ page, '<script data-json="', '"', pos)[0]))
doc = data["initialDocumentData"]["document"]
doc["date"] = text.parse_datetime(
diff --git a/gallery_dl/extractor/itaku.py b/gallery_dl/extractor/itaku.py
index 7f941bb..5c91eb9 100644
--- a/gallery_dl/extractor/itaku.py
+++ b/gallery_dl/extractor/itaku.py
@@ -24,10 +24,6 @@ class ItakuExtractor(Extractor):
archive_fmt = "{id}"
request_interval = (0.5, 1.5)
- def __init__(self, match):
- Extractor.__init__(self, match)
- self.item = match.group(1)
-
def _init(self):
self.api = ItakuAPI(self)
self.videos = self.config("videos", True)
@@ -62,11 +58,11 @@ class ItakuExtractor(Extractor):
class ItakuGalleryExtractor(ItakuExtractor):
"""Extractor for posts from an itaku user gallery"""
subcategory = "gallery"
- pattern = BASE_PATTERN + r"/profile/([^/?#]+)/gallery"
+ pattern = BASE_PATTERN + r"/profile/([^/?#]+)/gallery(?:/(\d+))?"
example = "https://itaku.ee/profile/USER/gallery"
def posts(self):
- return self.api.galleries_images(self.item)
+ return self.api.galleries_images(*self.groups)
class ItakuImageExtractor(ItakuExtractor):
@@ -75,7 +71,7 @@ class ItakuImageExtractor(ItakuExtractor):
example = "https://itaku.ee/images/12345"
def posts(self):
- return (self.api.image(self.item),)
+ return (self.api.image(self.groups[0]),)
class ItakuSearchExtractor(ItakuExtractor):
@@ -84,7 +80,7 @@ class ItakuSearchExtractor(ItakuExtractor):
example = "https://itaku.ee/home/images?tags=SEARCH"
def posts(self):
- params = text.parse_query_list(self.item)
+ params = text.parse_query_list(self.groups[0])
return self.api.search_images(params)
@@ -138,7 +134,7 @@ class ItakuAPI():
params = {
"cursor" : None,
"owner" : self.user(username)["owner"],
- "section" : section,
+ "sections" : section,
"date_range": "",
"maturity_rating": ("SFW", "Questionable", "NSFW"),
"ordering" : "-date_added",
diff --git a/gallery_dl/extractor/newgrounds.py b/gallery_dl/extractor/newgrounds.py
index 8ffa14b..648f7df 100644
--- a/gallery_dl/extractor/newgrounds.py
+++ b/gallery_dl/extractor/newgrounds.py
@@ -190,8 +190,8 @@ class NewgroundsExtractor(Extractor):
extr = text.extract_from(page)
data = extract_data(extr, post_url)
- data["_comment"] = extr(
- 'id="author_comments"', '</div>').partition(">")[2]
+ data["comment_html"] = data["_comment"] = extr(
+ 'id="author_comments"', '</div>').partition(">")[2].strip()
data["comment"] = text.unescape(text.remove_html(
data["_comment"]
.replace("<p><br></p>", "\n\n").replace("<br>", "\n"), "", ""))
diff --git a/gallery_dl/extractor/oauth.py b/gallery_dl/extractor/oauth.py
index e7540f8..815a214 100644
--- a/gallery_dl/extractor/oauth.py
+++ b/gallery_dl/extractor/oauth.py
@@ -83,8 +83,9 @@ class OAuthBase(Extractor):
browser = None
if browser and browser.open(url):
- name = getattr(browser, "name", None) or "Browser"
- self.log.info("Opening URL in %s:", name.capitalize())
+ name = getattr(browser, "name", None)
+ if name:
+ self.log.info("Opening URL with %s:", name.capitalize())
else:
self.log.info("Please open this URL in your browser:")
diff --git a/gallery_dl/extractor/patreon.py b/gallery_dl/extractor/patreon.py
index 866e93a..f5a33d5 100644
--- a/gallery_dl/extractor/patreon.py
+++ b/gallery_dl/extractor/patreon.py
@@ -169,6 +169,12 @@ class PatreonExtractor(Extractor):
attr["date"] = text.parse_datetime(
attr["published_at"], "%Y-%m-%dT%H:%M:%S.%f%z")
+ try:
+ attr["campaign"] = (included["campaign"][
+ relationships["campaign"]["data"]["id"]])
+ except Exception:
+ attr["campaign"] = None
+
tags = relationships.get("user_defined_tags")
attr["tags"] = [
tag["id"].replace("user_defined;", "")
@@ -324,7 +330,8 @@ class PatreonCreatorExtractor(PatreonExtractor):
subcategory = "creator"
pattern = (r"(?:https?://)?(?:www\.)?patreon\.com"
r"/(?!(?:home|join|posts|login|signup)(?:$|[/?#]))"
- r"(?:c/)?([^/?#]+)(?:/posts)?/?(?:\?([^#]+))?")
+ r"(?:profile/creators|(?:c/)?([^/?#]+)(?:/posts)?)"
+ r"/?(?:\?([^#]+))?")
example = "https://www.patreon.com/USER"
def posts(self):
@@ -345,7 +352,7 @@ class PatreonCreatorExtractor(PatreonExtractor):
return self._pagination(url)
def _get_campaign_id(self, creator, query):
- if creator.startswith("id:"):
+ if creator and creator.startswith("id:"):
return creator[3:]
campaign_id = query.get("c") or query.get("campaign_id")
diff --git a/gallery_dl/extractor/philomena.py b/gallery_dl/extractor/philomena.py
index 1b67272..201d4d6 100644
--- a/gallery_dl/extractor/philomena.py
+++ b/gallery_dl/extractor/philomena.py
@@ -10,7 +10,6 @@
from .booru import BooruExtractor
from .. import text, exception
-import operator
class PhilomenaExtractor(BooruExtractor):
@@ -24,17 +23,22 @@ class PhilomenaExtractor(BooruExtractor):
def _init(self):
self.api = PhilomenaAPI(self)
- if not self.config("svg", True):
- self._file_url = operator.itemgetter("view_url")
+ self.svg = self.config("svg", True)
def _file_url(self, post):
- if post["format"] == "svg":
- return post["view_url"].rpartition(".")[0] + ".svg"
- return post["view_url"]
+ try:
+ url = post["representations"]["full"]
+ except Exception:
+ url = post["view_url"]
+
+ if self.svg and post["format"] == "svg":
+ return url.rpartition(".")[0] + ".svg"
+ return url
@staticmethod
def _prepare(post):
- post["date"] = text.parse_datetime(post["created_at"])
+ post["date"] = text.parse_datetime(
+ post["created_at"][:19], "%Y-%m-%dT%H:%M:%S")
BASE_PATTERN = PhilomenaExtractor.update({
diff --git a/gallery_dl/extractor/pixiv.py b/gallery_dl/extractor/pixiv.py
index 7fe8869..8a4905d 100644
--- a/gallery_dl/extractor/pixiv.py
+++ b/gallery_dl/extractor/pixiv.py
@@ -71,9 +71,12 @@ class PixivExtractor(Extractor):
if self.meta_user:
work.update(self.api.user_detail(work["user"]["id"]))
if self.meta_comments:
- if work["total_comments"]:
- work["comments"] = list(
- self.api.illust_comments(work["id"]))
+ if work["total_comments"] and not work.get("_ajax"):
+ try:
+ work["comments"] = list(
+ self.api.illust_comments(work["id"]))
+ except Exception:
+ work["comments"] = ()
else:
work["comments"] = ()
if self.meta_bookmark and work["is_bookmarked"]:
diff --git a/gallery_dl/extractor/reddit.py b/gallery_dl/extractor/reddit.py
index 89eafc8..f36b1f5 100644
--- a/gallery_dl/extractor/reddit.py
+++ b/gallery_dl/extractor/reddit.py
@@ -259,6 +259,8 @@ class RedditSubredditExtractor(RedditExtractor):
self.subreddit, sub, params = match.groups()
self.params = text.parse_query(params)
if sub:
+ if sub == "search" and "restrict_sr" not in self.params:
+ self.params["restrict_sr"] = "1"
self.subcategory += "-" + sub
RedditExtractor.__init__(self, match)
diff --git a/gallery_dl/extractor/sankaku.py b/gallery_dl/extractor/sankaku.py
index 5e3a958..b5cdb9c 100644
--- a/gallery_dl/extractor/sankaku.py
+++ b/gallery_dl/extractor/sankaku.py
@@ -194,7 +194,6 @@ class SankakuAPI():
self.extractor = extractor
self.headers = {
"Accept" : "application/vnd.sankaku.api+json;v=2",
- "Platform" : "web-app",
"Api-Version": None,
"Origin" : extractor.root,
}
diff --git a/gallery_dl/extractor/subscribestar.py b/gallery_dl/extractor/subscribestar.py
index 8668330..6c43941 100644
--- a/gallery_dl/extractor/subscribestar.py
+++ b/gallery_dl/extractor/subscribestar.py
@@ -51,6 +51,23 @@ class SubscribestarExtractor(Extractor):
def posts(self):
"""Yield HTML content of all relevant posts"""
+ def request(self, url, **kwargs):
+ while True:
+ response = Extractor.request(self, url, **kwargs)
+
+ if response.history and "/verify_subscriber" in response.url:
+ raise exception.StopExtraction(
+ "HTTP redirect to %s", response.url)
+
+ content = response.content
+ if len(content) < 250 and b">redirected<" in content:
+ url = text.unescape(text.extr(
+ content, b'href="', b'"').decode())
+ self.log.debug("HTML redirect message for %s", url)
+ continue
+
+ return response
+
def login(self):
if self.cookies_check(self.cookies_names):
return
@@ -189,10 +206,11 @@ class SubscribestarPostExtractor(SubscribestarExtractor):
extr = text.extract_from(html)
return {
"post_id" : text.parse_int(extr('data-id="', '"')),
- "author_name": text.unescape(extr('href="/', '"')),
- "author_id" : text.parse_int(extr('data-user-id="', '"')),
- "author_nick": text.unescape(extr('alt="', '"')),
"date" : self._parse_datetime(extr(
- '<span class="star_link-types">', '<')),
+ '<div class="section-title_date">', '<')),
"content" : extr('<body>', '</body>').strip(),
+ "author_name": text.unescape(extr(
+ 'class="star_link" href="/', '"')),
+ "author_id" : text.parse_int(extr('data-user-id="', '"')),
+ "author_nick": text.unescape(extr('alt="', '"')),
}
diff --git a/gallery_dl/extractor/tiktok.py b/gallery_dl/extractor/tiktok.py
new file mode 100644
index 0000000..f129b1c
--- /dev/null
+++ b/gallery_dl/extractor/tiktok.py
@@ -0,0 +1,253 @@
+# -*- coding: utf-8 -*-
+
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Extractors for https://www.tiktok.com/"""
+
+from .common import Extractor, Message
+from .. import text, util, ytdl, exception
+
+BASE_PATTERN = r"(?:https?://)?(?:www\.)?tiktokv?\.com"
+
+
+class TiktokExtractor(Extractor):
+ """Base class for TikTok extractors"""
+ category = "tiktok"
+ directory_fmt = ("{category}", "{user}")
+ filename_fmt = (
+ "{id}{num:?_//>02} {title[b:150]}{img_id:? [/]/}.{extension}")
+ archive_fmt = "{id}_{num}_{img_id}"
+ root = "https://www.tiktok.com"
+ cookies_domain = ".tiktok.com"
+
+ def _init(self):
+ self.audio = self.config("audio", True)
+ self.video = self.config("videos", True)
+ if not self.config("avatar", True):
+ self.avatar = util.false
+
+ def items(self):
+ # We assume that all of the URLs served by urls() come from the same
+ # author.
+ downloaded_avatar = not self.avatar()
+
+ for tiktok_url in self.urls():
+ tiktok_url = self._sanitize_url(tiktok_url)
+ data = self._extract_rehydration_data(tiktok_url)
+ if "webapp.video-detail" not in data:
+ # Only /video/ links result in the video-detail dict we need.
+ # Try again using that form of link.
+ tiktok_url = self._sanitize_url(
+ data["seo.abtest"]["canonical"])
+ data = self._extract_rehydration_data(tiktok_url)
+ video_detail = data["webapp.video-detail"]
+
+ if not self._check_status_code(video_detail, tiktok_url):
+ continue
+
+ post = video_detail["itemInfo"]["itemStruct"]
+ author = post["author"]
+ post["user"] = user = author["uniqueId"]
+ post["date"] = text.parse_timestamp(post["createTime"])
+ original_title = title = post["desc"]
+
+ if not downloaded_avatar:
+ avatar_url = author["avatarLarger"]
+ avatar = self._generate_avatar(
+ avatar_url, post, user, author["id"])
+ yield Message.Directory, avatar
+ yield Message.Url, avatar_url, avatar
+ downloaded_avatar = True
+
+ yield Message.Directory, post
+ ytdl_media = False
+
+ if "imagePost" in post:
+ if not original_title:
+ title = "TikTok photo #{}".format(post["id"])
+ img_list = post["imagePost"]["images"]
+ for i, img in enumerate(img_list, 1):
+ url = img["imageURL"]["urlList"][0]
+ text.nameext_from_url(url, post)
+ post.update({
+ "type" : "image",
+ "image" : img,
+ "title" : title,
+ "num" : i,
+ "img_id": post["filename"].partition("~")[0],
+ "width" : img["imageWidth"],
+ "height": img["imageHeight"],
+ })
+ yield Message.Url, url, post
+
+ if self.audio and "music" in post:
+ ytdl_media = "audio"
+
+ elif self.video and "video" in post:
+ ytdl_media = "video"
+
+ else:
+ self.log.info("%s: Skipping post", tiktok_url)
+
+ if ytdl_media:
+ if not original_title:
+ title = "TikTok {} #{}".format(ytdl_media, post["id"])
+ post.update({
+ "type" : ytdl_media,
+ "image" : None,
+ "filename" : "",
+ "extension" : "mp3" if ytdl_media == "audio" else "mp4",
+ "title" : title,
+ "num" : 0,
+ "img_id" : "",
+ "width" : 0,
+ "height" : 0,
+ })
+ yield Message.Url, "ytdl:" + tiktok_url, post
+
+ # If we couldn't download the avatar because the given user has no
+ # posts, we'll need to make a separate request for the user's page
+ # and download the avatar that way.
+ if not downloaded_avatar:
+ user_name = self.avatar()
+ profile_url = "https://www.tiktok.com/@{}".format(user_name)
+ data = self._extract_rehydration_data(profile_url)
+ data = data["webapp.user-detail"]["userInfo"]["user"]
+ data["user"] = user_name
+ avatar_url = data["avatarLarger"]
+ avatar = self._generate_avatar(
+ avatar_url, data, user_name, data["id"])
+ yield Message.Directory, avatar
+ yield Message.Url, avatar_url, avatar
+
+ def avatar(self):
+ return False
+
+ def _generate_avatar(self, avatar_url, data, user_name, user_id):
+ avatar = text.nameext_from_url(avatar_url, data.copy())
+ avatar.update({
+ "type" : "avatar",
+ "title" : "@" + user_name,
+ "id" : user_id,
+ "img_id": avatar["filename"].partition("~")[0],
+ "num" : 0,
+ })
+ return avatar
+
+ def _sanitize_url(self, url):
+ return text.ensure_http_scheme(url.replace("/photo/", "/video/", 1))
+
+ def _extract_rehydration_data(self, url):
+ html = self.request(url).text
+ data = text.extr(
+ html, '<script id="__UNIVERSAL_DATA_FOR_REHYDRATION__" '
+ 'type="application/json">', '</script>')
+ return util.json_loads(data)["__DEFAULT_SCOPE__"]
+
+ def _check_status_code(self, detail, url):
+ status = detail.get("statusCode")
+ if not status:
+ return True
+
+ if status == 10222:
+ self.log.error("%s: Login required to access this post", url)
+ elif status == 10204:
+ self.log.error("%s: Requested post not available", url)
+ elif status == 10231:
+ self.log.error("%s: Region locked - Try downloading with a"
+ "VPN/proxy connection", url)
+ else:
+ self.log.error(
+ "%s: Received unknown error code %s ('%s')",
+ url, status, detail.get("statusMsg") or "")
+ return False
+
+
+class TiktokPostExtractor(TiktokExtractor):
+ """Extract a single video or photo TikTok link"""
+ subcategory = "post"
+ pattern = BASE_PATTERN + r"/(?:@([\w_.-]*)|share)/(?:phot|vide)o/(\d+)"
+ example = "https://www.tiktok.com/@USER/photo/1234567890"
+
+ def urls(self):
+ user, post_id = self.groups
+ url = "{}/@{}/video/{}".format(self.root, user or "", post_id)
+ return (url,)
+
+
+class TiktokVmpostExtractor(TiktokExtractor):
+ """Extract a single video or photo TikTok VM link"""
+ subcategory = "vmpost"
+ pattern = (r"(?:https?://)?(?:"
+ r"(?:v[mt]\.)?tiktok\.com|(?:www\.)?tiktok\.com/t"
+ r")/(?!@)([^/?#]+)")
+ example = "https://vm.tiktok.com/1a2B3c4E5"
+
+ def items(self):
+ url = text.ensure_http_scheme(self.url)
+ headers = {"User-Agent": "facebookexternalhit/1.1"}
+
+ response = self.request(url, headers=headers, method="HEAD",
+ allow_redirects=False, notfound="post")
+
+ url = response.headers.get("Location")
+ if not url or len(url) <= 28:
+ # https://www.tiktok.com/?_r=1
+ raise exception.NotFoundError("post")
+
+ data = {"_extractor": TiktokPostExtractor}
+ yield Message.Queue, url.partition("?")[0], data
+
+
+class TiktokUserExtractor(TiktokExtractor):
+ """Extract a TikTok user's profile"""
+ subcategory = "user"
+ pattern = BASE_PATTERN + r"/@([\w_.-]+)/?(?:$|\?|#)"
+ example = "https://www.tiktok.com/@USER"
+
+ def urls(self):
+ """Attempt to use yt-dlp/youtube-dl to extract links from a
+ user's page"""
+
+ try:
+ module = ytdl.import_module(self.config("module"))
+ except (ImportError, SyntaxError) as exc:
+ self.log.error("Cannot import module '%s'",
+ getattr(exc, "name", ""))
+ self.log.debug("", exc_info=exc)
+ raise exception.ExtractionError("yt-dlp or youtube-dl is required "
+ "for this feature!")
+ extr_opts = {
+ "extract_flat" : True,
+ "ignore_no_formats_error": True,
+ }
+ user_opts = {
+ "retries" : self._retries,
+ "socket_timeout" : self._timeout,
+ "nocheckcertificate" : not self._verify,
+ "playlist_items" : str(self.config("tiktok-range", "")),
+ }
+ if self._proxies:
+ user_opts["proxy"] = self._proxies.get("http")
+
+ ytdl_instance = ytdl.construct_YoutubeDL(
+ module, self, user_opts, extr_opts)
+
+ # transfer cookies to ytdl
+ if self.cookies:
+ set_cookie = ytdl_instance.cookiejar.set_cookie
+ for cookie in self.cookies:
+ set_cookie(cookie)
+
+ with ytdl_instance as ydl:
+ info_dict = ydl._YoutubeDL__extract_info(
+ "{}/@{}".format(self.root, self.groups[0]),
+ ydl.get_info_extractor("TikTokUser"),
+ False, {}, True)
+ # This should include video and photo posts in /video/ URL form.
+ return [video["url"] for video in info_dict["entries"]]
+
+ def avatar(self):
+ return self.groups[0]
diff --git a/gallery_dl/extractor/twibooru.py b/gallery_dl/extractor/twibooru.py
index a725a2c..3b0ea36 100644
--- a/gallery_dl/extractor/twibooru.py
+++ b/gallery_dl/extractor/twibooru.py
@@ -12,7 +12,7 @@ from .booru import BooruExtractor
from .. import text, exception
import operator
-BASE_PATTERN = r"(?:https?://)?twibooru\.org"
+BASE_PATTERN = r"(?:https?://)?(?:www\.)?twibooru\.org"
class TwibooruExtractor(BooruExtractor):
diff --git a/gallery_dl/extractor/twitter.py b/gallery_dl/extractor/twitter.py
index 840e846..c391bad 100644
--- a/gallery_dl/extractor/twitter.py
+++ b/gallery_dl/extractor/twitter.py
@@ -234,6 +234,13 @@ class TwitterExtractor(Extractor):
for fmt in self._size_fallback:
yield base + fmt
+ def _extract_components(self, tweet, data, files):
+ for component_id in data["components"]:
+ com = data["component_objects"][component_id]
+ for conv in com["data"]["conversation_preview"]:
+ for url in conv.get("mediaUrls") or ():
+ files.append({"url": url})
+
def _extract_card(self, tweet, files):
card = tweet["card"]
if "legacy" in card:
@@ -272,7 +279,11 @@ class TwitterExtractor(Extractor):
return
elif name == "unified_card":
data = util.json_loads(bvals["unified_card"]["string_value"])
- self._extract_media(tweet, data["media_entities"].values(), files)
+ if "media_entities" in data:
+ self._extract_media(
+ tweet, data["media_entities"].values(), files)
+ if "component_objects" in data:
+ self._extract_components(tweet, data, files)
return
if self.cards == "ytdl":
@@ -1065,7 +1076,7 @@ class TwitterAPI():
else:
csrf_token = None
if not csrf_token:
- csrf_token = util.generate_token(80)
+ csrf_token = util.generate_token()
cookies.set("ct0", csrf_token, domain=cookies_domain)
auth_token = cookies.get("auth_token", domain=cookies_domain)
diff --git a/gallery_dl/extractor/vipergirls.py b/gallery_dl/extractor/vipergirls.py
index 5cde0d6..af3f32d 100644
--- a/gallery_dl/extractor/vipergirls.py
+++ b/gallery_dl/extractor/vipergirls.py
@@ -29,7 +29,17 @@ class VipergirlsExtractor(Extractor):
def _init(self):
domain = self.config("domain")
if domain:
- self.root = text.ensure_http_scheme(domain)
+ pos = domain.find("://")
+ if pos >= 0:
+ self.root = domain.rstrip("/")
+ self.cookies_domain = "." + domain[pos+1:].strip("/")
+ else:
+ domain = domain.strip("/")
+ self.root = "https://" + domain
+ self.cookies_domain = "." + domain
+ else:
+ self.root = "https://viper.click"
+ self.cookies_domain = ".viper.click"
def items(self):
self.login()
diff --git a/gallery_dl/extractor/vsco.py b/gallery_dl/extractor/vsco.py
index 1c0c172..a53409c 100644
--- a/gallery_dl/extractor/vsco.py
+++ b/gallery_dl/extractor/vsco.py
@@ -38,7 +38,7 @@ class VscoExtractor(Extractor):
if img["is_video"]:
if not videos:
continue
- url = "https://" + img["video_url"]
+ url = text.ensure_http_scheme(img["video_url"])
else:
base = img["responsive_url"].partition("/")[2]
cdn, _, path = base.partition("/")
@@ -63,6 +63,10 @@ class VscoExtractor(Extractor):
"height": img["height"],
"description": img.get("description") or "",
})
+ if data["extension"] == "m3u8":
+ url = "ytdl:" + url
+ data["_ytdl_manifest"] = "hls"
+ data["extension"] = "mp4"
yield Message.Url, url, data
def images(self):
@@ -294,12 +298,33 @@ class VscoImageExtractor(VscoExtractor):
pattern = USER_PATTERN + r"/media/([0-9a-fA-F]+)"
example = "https://vsco.co/USER/media/0123456789abcdef"
- def __init__(self, match):
- VscoExtractor.__init__(self, match)
- self.media_id = match.group(2)
-
def images(self):
- url = "{}/{}/media/{}".format(self.root, self.user, self.media_id)
+ url = "{}/{}/media/{}".format(self.root, self.user, self.groups[1])
data = self._extract_preload_state(url)
media = data["medias"]["byId"].popitem()[1]["media"]
return (self._transform_media(media),)
+
+
+class VscoVideoExtractor(VscoExtractor):
+ """Extractor for vsco.co videos links"""
+ subcategory = "video"
+ pattern = USER_PATTERN + r"/video/([^/?#]+)"
+ example = "https://vsco.co/USER/video/012345678-9abc-def0"
+
+ def images(self):
+ url = "{}/{}/video/{}".format(self.root, self.user, self.groups[1])
+ data = self._extract_preload_state(url)
+ media = data["medias"]["byId"].popitem()[1]["media"]
+
+ return ({
+ "_id" : media["id"],
+ "is_video" : True,
+ "grid_name" : "",
+ "upload_date" : media["createdDate"],
+ "responsive_url": media["posterUrl"],
+ "video_url" : "ytdl:" + media.get("playbackUrl"),
+ "image_meta" : None,
+ "width" : media["width"],
+ "height" : media["height"],
+ "description" : media["description"],
+ },)
diff --git a/gallery_dl/extractor/weebcentral.py b/gallery_dl/extractor/weebcentral.py
index fc1badb..cacefd6 100644
--- a/gallery_dl/extractor/weebcentral.py
+++ b/gallery_dl/extractor/weebcentral.py
@@ -50,14 +50,16 @@ class WeebcentralChapterExtractor(WeebcentralBase, ChapterExtractor):
def metadata(self, page):
extr = text.extract_from(page)
manga_id = extr("'series_id': '", "'")
-
- data = self._extract_manga_data(manga_id)
- data["chapter_id"] = self.groups[1]
- data["chapter_type"] = extr("'chapter_type': '", "'")
-
+ chapter_type = extr("'chapter_type': '", "'")
chapter, sep, minor = extr("'number': '", "'").partition(".")
- data["chapter"] = text.parse_int(chapter)
- data["chapter_minor"] = sep + minor
+
+ data = {
+ "chapter": text.parse_int(chapter),
+ "chapter_id": self.groups[1],
+ "chapter_type": chapter_type,
+ "chapter_minor": sep + minor,
+ }
+ data.update(self._extract_manga_data(manga_id))
return data
diff --git a/gallery_dl/extractor/weibo.py b/gallery_dl/extractor/weibo.py
index 9885d79..3ed5a06 100644
--- a/gallery_dl/extractor/weibo.py
+++ b/gallery_dl/extractor/weibo.py
@@ -33,6 +33,7 @@ class WeiboExtractor(Extractor):
self.livephoto = self.config("livephoto", True)
self.retweets = self.config("retweets", False)
self.videos = self.config("videos", True)
+ self.movies = self.config("movies", False)
self.gifs = self.config("gifs", True)
self.gifs_video = (self.gifs == "video")
@@ -134,7 +135,10 @@ class WeiboExtractor(Extractor):
if "page_info" in status:
info = status["page_info"]
if "media_info" in info and self.videos:
- append(self._extract_video(info["media_info"]))
+ if info.get("type") != "5" or self.movies:
+ append(self._extract_video(info["media_info"]))
+ else:
+ self.log.debug("%s: Ignoring 'movie' video", status["id"])
def _extract_video(self, info):
try:
diff --git a/gallery_dl/job.py b/gallery_dl/job.py
index 2914927..bea35e3 100644
--- a/gallery_dl/job.py
+++ b/gallery_dl/job.py
@@ -551,28 +551,24 @@ class DownloadJob(Job):
archive_path = cfg("archive")
if archive_path:
- archive_path = util.expand_path(archive_path)
-
+ archive_table = cfg("archive-table")
archive_prefix = cfg("archive-prefix")
if archive_prefix is None:
- archive_prefix = extr.category
+ archive_prefix = extr.category if archive_table is None else ""
archive_format = cfg("archive-format")
if archive_format is None:
archive_format = extr.archive_fmt
try:
- if "{" in archive_path:
- archive_path = formatter.parse(
- archive_path).format_map(kwdict)
- if cfg("archive-mode") == "memory":
- archive_cls = archive.DownloadArchiveMemory
- else:
- archive_cls = archive.DownloadArchive
- self.archive = archive_cls(
+ self.archive = archive.connect(
archive_path,
- archive_prefix + archive_format,
+ archive_prefix,
+ archive_format,
+ archive_table,
+ cfg("archive-mode"),
cfg("archive-pragma"),
+ kwdict,
)
except Exception as exc:
extr.log.warning(
diff --git a/gallery_dl/option.py b/gallery_dl/option.py
index 222679a..3c03271 100644
--- a/gallery_dl/option.py
+++ b/gallery_dl/option.py
@@ -179,11 +179,15 @@ class PrintAction(argparse.Action):
if not format_string:
return
- if "{" not in format_string and \
- " " not in format_string and \
- format_string[0] != "\f":
- format_string = "{" + format_string + "}"
- if format_string[-1] != "\n":
+ if format_string.startswith("\\f"):
+ format_string = "\f" + format_string[2:]
+
+ if format_string[0] == "\f":
+ if format_string[1] == "F" and format_string[-1] != "\n":
+ format_string += "\n"
+ elif "{" not in format_string and " " not in format_string:
+ format_string = "{" + format_string + "}\n"
+ elif format_string[-1] != "\n":
format_string += "\n"
namespace.postprocessors.append({
diff --git a/gallery_dl/postprocessor/common.py b/gallery_dl/postprocessor/common.py
index a9143a6..3099547 100644
--- a/gallery_dl/postprocessor/common.py
+++ b/gallery_dl/postprocessor/common.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2018-2023 Mike Fährmann
+# Copyright 2018-2025 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -8,7 +8,7 @@
"""Common classes and constants used by postprocessor modules."""
-from .. import util, formatter, archive
+from .. import archive
class PostProcessor():
@@ -25,11 +25,11 @@ class PostProcessor():
archive_path = options.get("archive")
if archive_path:
extr = job.extractor
- archive_path = util.expand_path(archive_path)
+ archive_table = options.get("archive-table")
archive_prefix = options.get("archive-prefix")
if archive_prefix is None:
- archive_prefix = extr.category
+ archive_prefix = extr.category if archive_table is None else ""
archive_format = options.get("archive-format")
if archive_format is None:
@@ -38,13 +38,14 @@ class PostProcessor():
archive_format = prefix + extr.archive_fmt
try:
- if "{" in archive_path:
- archive_path = formatter.parse(archive_path).format_map(
- job.pathfmt.kwdict)
- self.archive = archive.DownloadArchive(
+ self.archive = archive.connect(
archive_path,
- archive_prefix + archive_format,
+ archive_prefix,
+ archive_format,
+ archive_table,
+ "file",
options.get("archive-pragma"),
+ job.pathfmt.kwdict,
"_archive_" + self.name,
)
except Exception as exc:
diff --git a/gallery_dl/postprocessor/compare.py b/gallery_dl/postprocessor/compare.py
index 3bb63c8..c6bc54d 100644
--- a/gallery_dl/postprocessor/compare.py
+++ b/gallery_dl/postprocessor/compare.py
@@ -9,7 +9,7 @@
"""Compare versions of the same file and replace/enumerate them on mismatch"""
from .common import PostProcessor
-from .. import text, util, exception
+from .. import text, util, output, exception
import os
@@ -83,7 +83,7 @@ class ComparePP(PostProcessor):
self._equal_cnt += 1
if self._equal_cnt >= self._equal_max:
util.remove_file(pathfmt.temppath)
- print()
+ output.stderr_write("\n")
raise self._equal_exc()
pathfmt.delete = True
diff --git a/gallery_dl/postprocessor/ugoira.py b/gallery_dl/postprocessor/ugoira.py
index fec4ab0..3a32b39 100644
--- a/gallery_dl/postprocessor/ugoira.py
+++ b/gallery_dl/postprocessor/ugoira.py
@@ -9,7 +9,7 @@
"""Convert Pixiv Ugoira to WebM"""
from .common import PostProcessor
-from .. import util
+from .. import util, output
import subprocess
import tempfile
import zipfile
@@ -226,13 +226,13 @@ class UgoiraPP(PostProcessor):
if self._finalize:
self._finalize(pathfmt, tempdir)
except OSError as exc:
- print()
+ output.stderr_write("\n")
self.log.error("Unable to invoke FFmpeg (%s: %s)",
exc.__class__.__name__, exc)
self.log.debug("", exc_info=exc)
pathfmt.realpath = pathfmt.temppath
except Exception as exc:
- print()
+ output.stderr_write("\n")
self.log.error("%s: %s", exc.__class__.__name__, exc)
self.log.debug("", exc_info=exc)
pathfmt.realpath = pathfmt.temppath
@@ -296,7 +296,7 @@ class UgoiraPP(PostProcessor):
out = None if self.output else subprocess.DEVNULL
retcode = util.Popen(args, stdout=out, stderr=out).wait()
if retcode:
- print()
+ output.stderr_write("\n")
self.log.error("Non-zero exit status when running %s (%s)",
args, retcode)
raise ValueError()
diff --git a/gallery_dl/update.py b/gallery_dl/update.py
index b068e37..6650ec4 100644
--- a/gallery_dl/update.py
+++ b/gallery_dl/update.py
@@ -12,7 +12,7 @@ import sys
from .extractor.common import Extractor, Message
from .job import DownloadJob
-from . import util, version, exception
+from . import util, version, output, exception
REPOS = {
"stable" : "mikf/gallery-dl",
@@ -23,14 +23,14 @@ REPOS = {
BINARIES_STABLE = {
"windows" : "gallery-dl.exe",
- "windows_x86": "gallery-dl.exe",
"windows_x64": "gallery-dl.exe",
+ "windows_x86": "gallery-dl_x86.exe",
"linux" : "gallery-dl.bin",
}
BINARIES_DEV = {
"windows" : "gallery-dl_windows.exe",
- "windows_x86": "gallery-dl_windows_x86.exe",
"windows_x64": "gallery-dl_windows.exe",
+ "windows_x86": "gallery-dl_windows_x86.exe",
"linux" : "gallery-dl_linux",
"macos" : "gallery-dl_macos",
}
@@ -143,13 +143,13 @@ class UpdateJob(DownloadJob):
def _warning(self, msg, *args):
if self._newline:
self._newline = False
- print()
+ output.stderr_write("\n")
self.extractor.log.warning(msg, *args)
def _error(self, msg, *args):
if self._newline:
self._newline = False
- print()
+ output.stderr_write("\n")
self.status |= 1
self.extractor.log.error(msg, *args)
diff --git a/gallery_dl/util.py b/gallery_dl/util.py
index 2302088..7034c0c 100644
--- a/gallery_dl/util.py
+++ b/gallery_dl/util.py
@@ -770,7 +770,7 @@ def import_file(path):
finally:
del sys.path[0]
else:
- return __import__(name)
+ return __import__(name.replace("-", "_"))
def build_duration_func(duration, min=0.0):
diff --git a/gallery_dl/version.py b/gallery_dl/version.py
index d252bed..0c75005 100644
--- a/gallery_dl/version.py
+++ b/gallery_dl/version.py
@@ -6,5 +6,5 @@
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
-__version__ = "1.28.5"
+__version__ = "1.29.0"
__variant__ = None
diff --git a/gallery_dl/ytdl.py b/gallery_dl/ytdl.py
index 32545e2..319e781 100644
--- a/gallery_dl/ytdl.py
+++ b/gallery_dl/ytdl.py
@@ -20,7 +20,7 @@ def import_module(module_name):
return __import__("yt_dlp")
except (ImportError, SyntaxError):
return __import__("youtube_dl")
- return __import__(module_name.replace("-", "_"))
+ return util.import_file(module_name)
def construct_YoutubeDL(module, obj, user_opts, system_opts=None):
diff --git a/test/test_downloader.py b/test/test_downloader.py
index 35cccc4..5a9a20b 100644
--- a/test/test_downloader.py
+++ b/test/test_downloader.py
@@ -20,7 +20,6 @@ import tempfile
import threading
import http.server
-
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from gallery_dl import downloader, extractor, output, config, path # noqa E402
from gallery_dl.downloader.http import MIME_TYPES, SIGNATURE_CHECKS # noqa E402
@@ -55,6 +54,9 @@ class TestDownloaderModule(unittest.TestCase):
else:
del sys.modules["youtube_dl"]
+ def setUp(self):
+ downloader._cache.clear()
+
def tearDown(self):
downloader._cache.clear()
@@ -107,6 +109,64 @@ class TestDownloaderModule(unittest.TestCase):
self.assertEqual(import_module.call_count, 1)
+class TestDownloaderConfig(unittest.TestCase):
+
+ def setUp(self):
+ config.clear()
+
+ def tearDown(self):
+ config.clear()
+
+ def test_default_http(self):
+ job = FakeJob()
+ extr = job.extractor
+ dl = downloader.find("http")(job)
+
+ self.assertEqual(dl.adjust_extension, True)
+ self.assertEqual(dl.chunk_size, 32768)
+ self.assertEqual(dl.metadata, None)
+ self.assertEqual(dl.progress, 3.0)
+ self.assertEqual(dl.validate, True)
+ self.assertEqual(dl.headers, None)
+ self.assertEqual(dl.minsize, None)
+ self.assertEqual(dl.maxsize, None)
+ self.assertEqual(dl.mtime, True)
+ self.assertEqual(dl.rate, None)
+ self.assertEqual(dl.part, True)
+ self.assertEqual(dl.partdir, None)
+
+ self.assertIs(dl.interval_429, extr._interval_429)
+ self.assertIs(dl.retry_codes, extr._retry_codes)
+ self.assertIs(dl.retries, extr._retries)
+ self.assertIs(dl.timeout, extr._timeout)
+ self.assertIs(dl.proxies, extr._proxies)
+ self.assertIs(dl.verify, extr._verify)
+
+ def test_config_http(self):
+ config.set((), "rate", 42)
+ config.set((), "mtime", False)
+ config.set((), "headers", {"foo": "bar"})
+ config.set(("downloader",), "retries", -1)
+ config.set(("downloader", "http"), "filesize-min", "10k")
+ config.set(("extractor", "generic"), "verify", False)
+ config.set(("extractor", "generic", "example.org"), "timeout", 10)
+ config.set(("extractor", "generic", "http"), "part", False)
+ config.set(
+ ("extractor", "generic", "example.org", "http"), "headers", {})
+
+ job = FakeJob()
+ dl = downloader.find("http")(job)
+
+ self.assertEqual(dl.headers, {"foo": "bar"})
+ self.assertEqual(dl.minsize, 10240)
+ self.assertEqual(dl.retries, float("inf"))
+ self.assertEqual(dl.timeout, 10)
+ self.assertEqual(dl.verify, False)
+ self.assertEqual(dl.mtime, False)
+ self.assertEqual(dl.rate, 42)
+ self.assertEqual(dl.part, False)
+
+
class TestDownloaderBase(unittest.TestCase):
@classmethod