summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLibravatarUnit 193 <unit193@unit193.net>2024-09-07 18:33:19 -0400
committerLibravatarUnit 193 <unit193@unit193.net>2024-09-07 18:33:19 -0400
commit1f3ffe32342852fd9ea9e7704022488f3a1222bd (patch)
treecb255a091b73e96840de0f6f44b36dff1acab4b9
parentb5e56c51e491b41f9eb6a895459c185788a377e5 (diff)
New upstream version 1.27.4.upstream/1.27.4
-rw-r--r--CHANGELOG.md67
-rw-r--r--PKG-INFO6
-rw-r--r--README.rst4
-rw-r--r--data/completion/_gallery-dl10
-rw-r--r--data/completion/gallery-dl2
-rw-r--r--data/completion/gallery-dl.fish10
-rw-r--r--data/man/gallery-dl.122
-rw-r--r--data/man/gallery-dl.conf.5299
-rw-r--r--gallery_dl.egg-info/PKG-INFO6
-rw-r--r--gallery_dl.egg-info/SOURCES.txt2
-rw-r--r--gallery_dl/__init__.py7
-rw-r--r--gallery_dl/cookies.py9
-rw-r--r--gallery_dl/downloader/ytdl.py5
-rw-r--r--gallery_dl/extractor/batoto.py27
-rw-r--r--gallery_dl/extractor/bunkr.py50
-rw-r--r--gallery_dl/extractor/cyberdrop.py14
-rw-r--r--gallery_dl/extractor/deviantart.py11
-rw-r--r--gallery_dl/extractor/e621.py24
-rw-r--r--gallery_dl/extractor/exhentai.py2
-rw-r--r--gallery_dl/extractor/flickr.py38
-rw-r--r--gallery_dl/extractor/furaffinity.py5
-rw-r--r--gallery_dl/extractor/generic.py8
-rw-r--r--gallery_dl/extractor/gofile.py3
-rw-r--r--gallery_dl/extractor/hitomi.py1
-rw-r--r--gallery_dl/extractor/instagram.py29
-rw-r--r--gallery_dl/extractor/koharu.py25
-rw-r--r--gallery_dl/extractor/lolisafe.py2
-rw-r--r--gallery_dl/extractor/newgrounds.py10
-rw-r--r--gallery_dl/extractor/pixiv.py90
-rw-r--r--gallery_dl/extractor/sankaku.py5
-rw-r--r--gallery_dl/extractor/sexcom.py19
-rw-r--r--gallery_dl/extractor/szurubooru.py8
-rw-r--r--gallery_dl/extractor/toyhouse.py3
-rw-r--r--gallery_dl/extractor/tumblr.py3
-rw-r--r--gallery_dl/extractor/twitter.py29
-rw-r--r--gallery_dl/extractor/wikimedia.py124
-rw-r--r--gallery_dl/extractor/ytdl.py17
-rw-r--r--gallery_dl/formatter.py18
-rw-r--r--gallery_dl/job.py9
-rw-r--r--gallery_dl/option.py38
-rw-r--r--gallery_dl/path.py28
-rw-r--r--gallery_dl/postprocessor/__init__.py2
-rw-r--r--gallery_dl/postprocessor/hash.py71
-rw-r--r--gallery_dl/postprocessor/metadata.py34
-rw-r--r--gallery_dl/postprocessor/rename.py91
-rw-r--r--gallery_dl/postprocessor/ugoira.py169
-rw-r--r--gallery_dl/util.py57
-rw-r--r--gallery_dl/version.py2
-rw-r--r--gallery_dl/ytdl.py2
-rw-r--r--test/test_downloader.py6
-rw-r--r--test/test_formatter.py6
-rw-r--r--test/test_postprocessor.py136
-rw-r--r--test/test_util.py46
-rw-r--r--test/test_ytdl.py2
54 files changed, 1382 insertions, 331 deletions
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 70a33e6..7397593 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,20 +1,57 @@
-## 1.27.3 - 2024-08-10
+## 1.27.4 - 2024-09-06
### Extractors
#### Additions
-- [bunkr] support `bunkr.ci` and `bunkrrr.org` ([#5970](https://github.com/mikf/gallery-dl/issues/5970))
-- [furaffinity] add `submissions` extractor ([#5954](https://github.com/mikf/gallery-dl/issues/5954))
-- [hentaicosplays] support `hentai-cosplay-xxx.com` ([#5959](https://github.com/mikf/gallery-dl/issues/5959))
+- [sexcom] add `likes` extractor ([#6149](https://github.com/mikf/gallery-dl/issues/6149))
+- [wikimedia] add `wiki` extractor ([#6050](https://github.com/mikf/gallery-dl/issues/6050))
#### Fixes
-- [behance] fix `KeyError: 'fields'` ([#5965](https://github.com/mikf/gallery-dl/issues/5965))
-- [behance] fix video extraction ([#5965](https://github.com/mikf/gallery-dl/issues/5965))
-- [cien] extract all files when authenticated ([#5934](https://github.com/mikf/gallery-dl/issues/5934))
-- [deviantart] fix `KeyError - 'category'` ([#5960](https://github.com/mikf/gallery-dl/issues/5960), [#5961](https://github.com/mikf/gallery-dl/issues/5961), [#5969](https://github.com/mikf/gallery-dl/issues/5969), [#5971](https://github.com/mikf/gallery-dl/issues/5971), [#5976](https://github.com/mikf/gallery-dl/issues/5976), [#5978](https://github.com/mikf/gallery-dl/issues/5978))
-- [fanbox] update pagination logic ([#5949](https://github.com/mikf/gallery-dl/issues/5949), [#5951](https://github.com/mikf/gallery-dl/issues/5951), [#5956](https://github.com/mikf/gallery-dl/issues/5956))
-- [hotleak] fix AttributeError ([#5950](https://github.com/mikf/gallery-dl/issues/5950))
-- [instagram] restore GraphQL API functionality ([#5920](https://github.com/mikf/gallery-dl/issues/5920))
-- [twitter] update `x-csrf-token` header during login ([#5945](https://github.com/mikf/gallery-dl/issues/5945))
+- [bunkr] fix file downloads ([#6037](https://github.com/mikf/gallery-dl/issues/6037))
+- [cyberdrop] fix extraction
+- [deviantart] fix `"pagination": "manual"` for cursor-based endpoints ([#6079](https://github.com/mikf/gallery-dl/issues/6079))
+- [deviantart] fix `"original": "images"` ([#6124](https://github.com/mikf/gallery-dl/issues/6124))
+- [exhentai] fix `limits` option ([#6090](https://github.com/mikf/gallery-dl/issues/6090))
+- [flickr] make `exif` and `context` metadata extraction non-fatal ([#6002](https://github.com/mikf/gallery-dl/issues/6002), [#6077](https://github.com/mikf/gallery-dl/issues/6077))
+- [flickr] make `album` metadata extraction non-fatal ([#3441](https://github.com/mikf/gallery-dl/issues/3441))
+- [furaffinity] fix `favorite` pagination ([#6151](https://github.com/mikf/gallery-dl/issues/6151))
+- [gofile] fix `KeyError: 'childrenIds'` ([#5993](https://github.com/mikf/gallery-dl/issues/5993))
+- [newgrounds] fix warning for age-restricted posts ([#6005](https://github.com/mikf/gallery-dl/issues/6005))
+- [toyhouse] fix extraction of image URLs
+- [tumblr] fix `401 Unauthorized` for likes when using api-key ([#5994](https://github.com/mikf/gallery-dl/issues/5994))
+- [twitter] fix pinned Tweet extraction ([#6102](https://github.com/mikf/gallery-dl/issues/6102))
+- [ytdl] fix processing playlists of playlists ([#6127](https://github.com/mikf/gallery-dl/issues/6127))
#### Improvements
-- [bunkr] fail downloads for `maintenance` files ([#5952](https://github.com/mikf/gallery-dl/issues/5952))
-- [zerochan] improve tag redirect handling, add `redirects` option ([#5891](https://github.com/mikf/gallery-dl/issues/5891))
+- [bcbnsfw] use `*` as query when retrieving all posts ([#6135](https://github.com/mikf/gallery-dl/issues/6135))
+- [bunkr] support `bunkr:` URL prefix ([#6017](https://github.com/mikf/gallery-dl/issues/6017))
+- [e621] cache pool metadata API calls ([#6001](https://github.com/mikf/gallery-dl/issues/6001))
+- [generic] better directory names ([#6104](https://github.com/mikf/gallery-dl/issues/6104))
+- [koharu] improve format selection ([#6088](https://github.com/mikf/gallery-dl/issues/6088))
+- [pixiv] implement downloading "original" ugoira frames ([#6056](https://github.com/mikf/gallery-dl/issues/6056))
+- [pixiv] use mobile API for `series` ([#5983](https://github.com/mikf/gallery-dl/issues/5983))
+#### Metadata
+- [batoto] improve chapter info regex ([#5988](https://github.com/mikf/gallery-dl/issues/5988), [#5997](https://github.com/mikf/gallery-dl/issues/5997))
+- [batoto] extract `chapter_url` metadata ([#5562](https://github.com/mikf/gallery-dl/issues/5562))
+- [batoto] improve `title` extraction ([#5988](https://github.com/mikf/gallery-dl/issues/5988))
+- [hitomi] extract `extension_original` metadata ([#6049](https://github.com/mikf/gallery-dl/issues/6049))
+- [instagram] add `post_date` metadata field ([#6081](https://github.com/mikf/gallery-dl/issues/6081), [#6091](https://github.com/mikf/gallery-dl/issues/6091))
+- [sankaku] restore old `tags` format ([#6043](https://github.com/mikf/gallery-dl/issues/6043))
+- [twitter] extract `type` metadata ([#6111](https://github.com/mikf/gallery-dl/issues/6111))
+#### Options
+- [bunkr] add `tlds` option to match URLs with all possible TLDs ([#5875](https://github.com/mikf/gallery-dl/issues/5875), [#6017](https://github.com/mikf/gallery-dl/issues/6017))
+- [instagram] add `max-posts` option ([#6054](https://github.com/mikf/gallery-dl/issues/6054))
+- [instagram] add `info` as a possible `include` value
+- [instagram] allow disabling `cursor` output
+- [twitter] add `info` as a possible `include` value ([#6114](https://github.com/mikf/gallery-dl/issues/6114))
+- [twitter] allow disabling `cursor` output ([#5990](https://github.com/mikf/gallery-dl/issues/5990))
### Post Processors
-- [metadata] add `base-directory` option ([#5262](https://github.com/mikf/gallery-dl/issues/5262), [#5728](https://github.com/mikf/gallery-dl/issues/5728))
+- [hash] add `hash` post processor to compute file hash metadata ([#6099](https://github.com/mikf/gallery-dl/issues/6099))
+- [metadata] add `include` and `exclude` options ([#6058](https://github.com/mikf/gallery-dl/issues/6058))
+- [metadata] fix using `..` in directories on Windows ([#5942](https://github.com/mikf/gallery-dl/issues/5942), [#6094](https://github.com/mikf/gallery-dl/issues/6094))
+- [rename] add `rename` post processor to rename previously downloaded files ([#5846](https://github.com/mikf/gallery-dl/issues/5846), [#6044](https://github.com/mikf/gallery-dl/issues/6044))
+- [ugoira] support converting "original" frames ([#6056](https://github.com/mikf/gallery-dl/issues/6056))
+- [ugoira] add `skip` option ([#6056](https://github.com/mikf/gallery-dl/issues/6056))
+### Miscellaneous
+- [cookies:firefox] extract only cookies without container by default ([#5957](https://github.com/mikf/gallery-dl/issues/5957))
+- [formatter] implement `A` format specifier ([#6036](https://github.com/mikf/gallery-dl/issues/6036))
+- [tests] fix bug when running tests in a certain order
+- [util] extend `CustomNone` with arithmetic operators ([#6007](https://github.com/mikf/gallery-dl/issues/6007), [#6009](https://github.com/mikf/gallery-dl/issues/6009))
+- add `--rename` and `--rename-to` command-line options ([#5846](https://github.com/mikf/gallery-dl/issues/5846), [#6044](https://github.com/mikf/gallery-dl/issues/6044))
+- add `input-files` config option ([#6059](https://github.com/mikf/gallery-dl/issues/6059))
diff --git a/PKG-INFO b/PKG-INFO
index 5d32a2a..35dd6ce 100644
--- a/PKG-INFO
+++ b/PKG-INFO
@@ -1,6 +1,6 @@
Metadata-Version: 2.1
Name: gallery_dl
-Version: 1.27.3
+Version: 1.27.4
Summary: Command-line program to download image galleries and collections from several image hosting sites
Home-page: https://github.com/mikf/gallery-dl
Download-URL: https://github.com/mikf/gallery-dl/releases/latest
@@ -114,9 +114,9 @@ Standalone Executable
Prebuilt executable files with a Python interpreter and
required Python packages included are available for
-- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.27.3/gallery-dl.exe>`__
+- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.27.4/gallery-dl.exe>`__
(Requires `Microsoft Visual C++ Redistributable Package (x86) <https://aka.ms/vs/17/release/vc_redist.x86.exe>`__)
-- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.27.3/gallery-dl.bin>`__
+- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.27.4/gallery-dl.bin>`__
Nightly Builds
diff --git a/README.rst b/README.rst
index 263d33f..ab85c75 100644
--- a/README.rst
+++ b/README.rst
@@ -74,9 +74,9 @@ Standalone Executable
Prebuilt executable files with a Python interpreter and
required Python packages included are available for
-- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.27.3/gallery-dl.exe>`__
+- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.27.4/gallery-dl.exe>`__
(Requires `Microsoft Visual C++ Redistributable Package (x86) <https://aka.ms/vs/17/release/vc_redist.x86.exe>`__)
-- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.27.3/gallery-dl.bin>`__
+- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.27.4/gallery-dl.bin>`__
Nightly Builds
diff --git a/data/completion/_gallery-dl b/data/completion/_gallery-dl
index 3308e98..1353fa8 100644
--- a/data/completion/_gallery-dl
+++ b/data/completion/_gallery-dl
@@ -13,9 +13,7 @@ _arguments -s -S \
{-X,--extractors}'[Load external extractors from PATH]':'<path>' \
--user-agent'[User-Agent request header]':'<ua>' \
--clear-cache'[Delete cached login sessions, cookies, etc. for MODULE (ALL to delete everything)]':'<module>' \
-{-U,--update}'[Update to the latest version]' \
---update-to'[Switch to a dfferent release channel (stable or dev) or upgrade/downgrade to a specific version]':'<channel[@tag]>' \
---update-check'[Check if a newer version is available]' \
+{-U,--update-check}'[Check if a newer version is available]' \
{-i,--input-file}'[Download URLs found in FILE ('\''-'\'' for stdin). More than one --input-file can be specified]':'<file>':_files \
{-I,--input-file-comment}'[Download URLs found in FILE. Comment them out after they were downloaded successfully.]':'<file>':_files \
{-x,--input-file-delete}'[Download URLs found in FILE. Delete them after they were downloaded successfully.]':'<file>':_files \
@@ -65,7 +63,7 @@ _arguments -s -S \
--netrc'[Enable .netrc authentication data]' \
{-C,--cookies}'[File to load additional cookies from]':'<file>':_files \
--cookies-export'[Export session cookies to FILE]':'<file>':_files \
---cookies-from-browser'[Name of the browser to load cookies from, with optional domain prefixed with '\''/'\'', keyring name prefixed with '\''+'\'', profile prefixed with '\'':'\'', and container prefixed with '\''::'\'' ('\''none'\'' for no container)]':'<browser[/domain][+keyring][:profile][::container]>' \
+--cookies-from-browser'[Name of the browser to load cookies from, with optional domain prefixed with '\''/'\'', keyring name prefixed with '\''+'\'', profile prefixed with '\'':'\'', and container prefixed with '\''::'\'' ('\''none'\'' for no container (default), '\''all'\'' for all containers)]':'<browser[/domain][+keyring][:profile][::container]>' \
{-A,--abort}'[Stop current extractor run after N consecutive file downloads were skipped]':'<n>' \
{-T,--terminate}'[Stop current and parent extractor runs after N consecutive file downloads were skipped]':'<n>' \
--filesize-min'[Do not download files smaller than SIZE (e.g. 500k or 2.5M)]':'<size>' \
@@ -84,7 +82,9 @@ _arguments -s -S \
--zip'[Store downloaded files in a ZIP archive]' \
--cbz'[Store downloaded files in a CBZ archive]' \
--mtime'[Set file modification times according to metadata selected by NAME. Examples: '\''date'\'' or '\''status\[date\]'\'']':'<name>' \
---ugoira'[Convert Pixiv Ugoira to FORMAT using FFmpeg. Supported formats are '\''webm'\'', '\''mp4'\'', '\''gif'\'', '\''vp8'\'', '\''vp9'\'', '\''vp9-lossless'\'', '\''copy'\''.]':'<format>' \
+--rename'[Rename previously downloaded files from FORMAT to the current filename format]':'<format>' \
+--rename-to'[Rename previously downloaded files from the current filename format to FORMAT]':'<format>' \
+--ugoira'[Convert Pixiv Ugoira to FMT using FFmpeg. Supported formats are '\''webm'\'', '\''mp4'\'', '\''gif'\'', '\''vp8'\'', '\''vp9'\'', '\''vp9-lossless'\'', '\''copy'\''.]':'<fmt>' \
--exec'[Execute CMD for each downloaded file. Supported replacement fields are {} or {_path}, {_directory}, {_filename}. Example: --exec "convert {} {}.png && rm {}"]':'<cmd>' \
--exec-after'[Execute CMD after all files were downloaded. Example: --exec-after "cd {_directory} && convert * ../doc.pdf"]':'<cmd>' && rc=0
diff --git a/data/completion/gallery-dl b/data/completion/gallery-dl
index 0d933fa..32d9705 100644
--- a/data/completion/gallery-dl
+++ b/data/completion/gallery-dl
@@ -10,7 +10,7 @@ _gallery_dl()
elif [[ "${prev}" =~ ^()$ ]]; then
COMPREPLY=( $(compgen -d -- "${cur}") )
else
- COMPREPLY=( $(compgen -W "--help --version --filename --destination --directory --extractors --user-agent --clear-cache --update --update-to --update-check --input-file --input-file-comment --input-file-delete --no-input --quiet --warning --verbose --get-urls --resolve-urls --dump-json --resolve-json --simulate --extractor-info --list-keywords --error-file --list-modules --list-extractors --write-log --write-unsupported --write-pages --print-traffic --no-colors --retries --http-timeout --proxy --source-address --no-check-certificate --limit-rate --chunk-size --sleep --sleep-request --sleep-extractor --no-part --no-skip --no-mtime --no-download --option --config --config-yaml --config-toml --config-create --config-status --config-open --config-ignore --ignore-config --username --password --netrc --cookies --cookies-export --cookies-from-browser --abort --terminate --filesize-min --filesize-max --download-archive --range --chapter-range --filter --chapter-filter --postprocessor --no-postprocessors --postprocessor-option --write-metadata --write-info-json --write-infojson --write-tags --zip --cbz --mtime --mtime-from-date --ugoira --ugoira-conv --ugoira-conv-lossless --ugoira-conv-copy --exec --exec-after" -- "${cur}") )
+ COMPREPLY=( $(compgen -W "--help --version --filename --destination --directory --extractors --user-agent --clear-cache --update-check --input-file --input-file-comment --input-file-delete --no-input --quiet --warning --verbose --get-urls --resolve-urls --dump-json --resolve-json --simulate --extractor-info --list-keywords --error-file --list-modules --list-extractors --write-log --write-unsupported --write-pages --print-traffic --no-colors --retries --http-timeout --proxy --source-address --no-check-certificate --limit-rate --chunk-size --sleep --sleep-request --sleep-extractor --no-part --no-skip --no-mtime --no-download --option --config --config-yaml --config-toml --config-create --config-status --config-open --config-ignore --ignore-config --username --password --netrc --cookies --cookies-export --cookies-from-browser --abort --terminate --filesize-min --filesize-max --download-archive --range --chapter-range --filter --chapter-filter --postprocessor --no-postprocessors --postprocessor-option --write-metadata --write-info-json --write-infojson --write-tags --zip --cbz --mtime --mtime-from-date --rename --rename-to --ugoira --ugoira-conv --ugoira-conv-lossless --ugoira-conv-copy --exec --exec-after" -- "${cur}") )
fi
}
diff --git a/data/completion/gallery-dl.fish b/data/completion/gallery-dl.fish
index 7243998..971ba68 100644
--- a/data/completion/gallery-dl.fish
+++ b/data/completion/gallery-dl.fish
@@ -7,9 +7,7 @@ complete -c gallery-dl -x -a '(__fish_complete_directories)' -s 'D' -l 'director
complete -c gallery-dl -x -a '(__fish_complete_directories)' -s 'X' -l 'extractors' -d 'Load external extractors from PATH'
complete -c gallery-dl -x -l 'user-agent' -d 'User-Agent request header'
complete -c gallery-dl -x -l 'clear-cache' -d 'Delete cached login sessions, cookies, etc. for MODULE (ALL to delete everything)'
-complete -c gallery-dl -s 'U' -l 'update' -d 'Update to the latest version'
-complete -c gallery-dl -x -l 'update-to' -d 'Switch to a dfferent release channel (stable or dev) or upgrade/downgrade to a specific version'
-complete -c gallery-dl -l 'update-check' -d 'Check if a newer version is available'
+complete -c gallery-dl -s 'U' -l 'update-check' -d 'Check if a newer version is available'
complete -c gallery-dl -r -F -s 'i' -l 'input-file' -d 'Download URLs found in FILE ("-" for stdin). More than one --input-file can be specified'
complete -c gallery-dl -r -F -s 'I' -l 'input-file-comment' -d 'Download URLs found in FILE. Comment them out after they were downloaded successfully.'
complete -c gallery-dl -r -F -s 'x' -l 'input-file-delete' -d 'Download URLs found in FILE. Delete them after they were downloaded successfully.'
@@ -60,7 +58,7 @@ complete -c gallery-dl -x -s 'p' -l 'password' -d 'Password belonging to the giv
complete -c gallery-dl -l 'netrc' -d 'Enable .netrc authentication data'
complete -c gallery-dl -r -F -s 'C' -l 'cookies' -d 'File to load additional cookies from'
complete -c gallery-dl -r -F -l 'cookies-export' -d 'Export session cookies to FILE'
-complete -c gallery-dl -x -l 'cookies-from-browser' -d 'Name of the browser to load cookies from, with optional domain prefixed with "/", keyring name prefixed with "+", profile prefixed with ":", and container prefixed with "::" ("none" for no container)'
+complete -c gallery-dl -x -l 'cookies-from-browser' -d 'Name of the browser to load cookies from, with optional domain prefixed with "/", keyring name prefixed with "+", profile prefixed with ":", and container prefixed with "::" ("none" for no container (default), "all" for all containers)'
complete -c gallery-dl -x -s 'A' -l 'abort' -d 'Stop current extractor run after N consecutive file downloads were skipped'
complete -c gallery-dl -x -s 'T' -l 'terminate' -d 'Stop current and parent extractor runs after N consecutive file downloads were skipped'
complete -c gallery-dl -x -l 'filesize-min' -d 'Do not download files smaller than SIZE (e.g. 500k or 2.5M)'
@@ -81,7 +79,9 @@ complete -c gallery-dl -l 'zip' -d 'Store downloaded files in a ZIP archive'
complete -c gallery-dl -l 'cbz' -d 'Store downloaded files in a CBZ archive'
complete -c gallery-dl -x -l 'mtime' -d 'Set file modification times according to metadata selected by NAME. Examples: "date" or "status[date]"'
complete -c gallery-dl -l 'mtime-from-date' -d '==SUPPRESS=='
-complete -c gallery-dl -x -l 'ugoira' -d 'Convert Pixiv Ugoira to FORMAT using FFmpeg. Supported formats are "webm", "mp4", "gif", "vp8", "vp9", "vp9-lossless", "copy".'
+complete -c gallery-dl -x -l 'rename' -d 'Rename previously downloaded files from FORMAT to the current filename format'
+complete -c gallery-dl -x -l 'rename-to' -d 'Rename previously downloaded files from the current filename format to FORMAT'
+complete -c gallery-dl -x -l 'ugoira' -d 'Convert Pixiv Ugoira to FMT using FFmpeg. Supported formats are "webm", "mp4", "gif", "vp8", "vp9", "vp9-lossless", "copy".'
complete -c gallery-dl -l 'ugoira-conv' -d '==SUPPRESS=='
complete -c gallery-dl -l 'ugoira-conv-lossless' -d '==SUPPRESS=='
complete -c gallery-dl -l 'ugoira-conv-copy' -d '==SUPPRESS=='
diff --git a/data/man/gallery-dl.1 b/data/man/gallery-dl.1
index 97af9f9..591daae 100644
--- a/data/man/gallery-dl.1
+++ b/data/man/gallery-dl.1
@@ -1,4 +1,4 @@
-.TH "GALLERY-DL" "1" "2024-08-10" "1.27.3" "gallery-dl Manual"
+.TH "GALLERY-DL" "1" "2024-09-06" "1.27.4" "gallery-dl Manual"
.\" disable hyphenation
.nh
@@ -41,13 +41,7 @@ User-Agent request header
.B "\-\-clear\-cache" \f[I]MODULE\f[]
Delete cached login sessions, cookies, etc. for MODULE (ALL to delete everything)
.TP
-.B "\-U, \-\-update"
-Update to the latest version
-.TP
-.B "\-\-update\-to" \f[I]CHANNEL[@TAG]\f[]
-Switch to a dfferent release channel (stable or dev) or upgrade/downgrade to a specific version
-.TP
-.B "\-\-update\-check"
+.B "\-U, \-\-update\-check"
Check if a newer version is available
.TP
.B "\-i, \-\-input\-file" \f[I]FILE\f[]
@@ -198,7 +192,7 @@ File to load additional cookies from
Export session cookies to FILE
.TP
.B "\-\-cookies\-from\-browser" \f[I]BROWSER[/DOMAIN][+KEYRING][:PROFILE][::CONTAINER]\f[]
-Name of the browser to load cookies from, with optional domain prefixed with '/', keyring name prefixed with '+', profile prefixed with ':', and container prefixed with '::' ('none' for no container)
+Name of the browser to load cookies from, with optional domain prefixed with '/', keyring name prefixed with '+', profile prefixed with ':', and container prefixed with '::' ('none' for no container (default), 'all' for all containers)
.TP
.B "\-A, \-\-abort" \f[I]N\f[]
Stop current extractor run after N consecutive file downloads were skipped
@@ -254,8 +248,14 @@ Store downloaded files in a CBZ archive
.B "\-\-mtime" \f[I]NAME\f[]
Set file modification times according to metadata selected by NAME. Examples: 'date' or 'status[date]'
.TP
-.B "\-\-ugoira" \f[I]FORMAT\f[]
-Convert Pixiv Ugoira to FORMAT using FFmpeg. Supported formats are 'webm', 'mp4', 'gif', 'vp8', 'vp9', 'vp9-lossless', 'copy'.
+.B "\-\-rename" \f[I]FORMAT\f[]
+Rename previously downloaded files from FORMAT to the current filename format
+.TP
+.B "\-\-rename\-to" \f[I]FORMAT\f[]
+Rename previously downloaded files from the current filename format to FORMAT
+.TP
+.B "\-\-ugoira" \f[I]FMT\f[]
+Convert Pixiv Ugoira to FMT using FFmpeg. Supported formats are 'webm', 'mp4', 'gif', 'vp8', 'vp9', 'vp9-lossless', 'copy'.
.TP
.B "\-\-exec" \f[I]CMD\f[]
Execute CMD for each downloaded file. Supported replacement fields are {} or {_path}, {_directory}, {_filename}. Example: --exec "convert {} {}.png && rm {}"
diff --git a/data/man/gallery-dl.conf.5 b/data/man/gallery-dl.conf.5
index 49c3ec3..e0d75ac 100644
--- a/data/man/gallery-dl.conf.5
+++ b/data/man/gallery-dl.conf.5
@@ -1,4 +1,4 @@
-.TH "GALLERY-DL.CONF" "5" "2024-08-10" "1.27.3" "gallery-dl Manual"
+.TH "GALLERY-DL.CONF" "5" "2024-09-06" "1.27.4" "gallery-dl Manual"
.\" disable hyphenation
.nh
.\" disable justification (adjust text to left margin only)
@@ -1697,6 +1697,22 @@ Sets the maximum depth of returned reply posts.
Process reposts.
+.SS extractor.bunkr.tlds
+.IP "Type:" 6
+\f[I]bool\f[]
+
+.IP "Default:" 9
+\f[I]false\f[]
+
+.IP "Description:" 4
+Controls which \f[I]bunkr\f[] TLDs to accept.
+
+.br
+* \f[I]true\f[]: Match URLs with *all* possible TLDs (e.g. \f[I]bunkr.xyz\f[] or \f[I]bunkrrr.duck\f[])
+.br
+* \f[I]false\f[]: Match only URLs with known TLDs
+
+
.SS extractor.cien.files
.IP "Type:" 6
\f[I]list\f[] of \f[I]strings\f[]
@@ -2798,6 +2814,31 @@ Selects which API endpoints to use.
* \f[I]"graphql"\f[]: GraphQL API - lower-resolution media
+.SS extractor.instagram.cursor
+.IP "Type:" 6
+.br
+* \f[I]bool\f[]
+.br
+* \f[I]string\f[]
+
+.IP "Default:" 9
+\f[I]true\f[]
+
+.IP "Example:" 4
+"3414259811154179155_25025320"
+
+.IP "Description:" 4
+Controls from which position to start the extraction process from.
+
+.br
+* \f[I]true\f[]: Start from the beginning.
+Log the most recent \f[I]cursor\f[] value when interrupted before reaching the end.
+.br
+* \f[I]false\f[]: Start from the beginning.
+.br
+* any \f[I]string\f[]: Start from the position defined by this value.
+
+
.SS extractor.instagram.include
.IP "Type:" 6
.br
@@ -2824,11 +2865,23 @@ Possible values are
\f[I]"tagged"\f[],
\f[I]"stories"\f[],
\f[I]"highlights"\f[],
+\f[I]"info"\f[],
\f[I]"avatar"\f[].
It is possible to use \f[I]"all"\f[] instead of listing all values separately.
+.SS extractor.instagram.max-posts
+.IP "Type:" 6
+\f[I]integer\f[]
+
+.IP "Default:" 9
+\f[I]null\f[]
+
+.IP "Description:" 4
+Limit the number of posts to download.
+
+
.SS extractor.instagram.metadata
.IP "Type:" 6
\f[I]bool\f[]
@@ -3092,17 +3145,22 @@ to be downloaded as individual image files.
.SS extractor.koharu.format
.IP "Type:" 6
-\f[I]string\f[]
+.br
+* \f[I]string\f[]
+.br
+* \f[I]list\f[] of \f[I]strings\f[]
.IP "Default:" 9
-\f[I]"original"\f[]
+\f[I]["0", "1600", "1280", "980", "780"]\f[]
.IP "Description:" 4
-Name of the image format to download.
+Name(s) of the image format to download.
-Available formats are
+When more than one format is given, the first available one is selected.
+
+Possible formats are
.br
-\f[I]"780"\f[], \f[I]"980"\f[], \f[I]"1280"\f[], \f[I]"1600"\f[], \f[I]"0"\f[]/\f[I]"original"\f[]
+\f[I]"780"\f[], \f[I]"980"\f[], \f[I]"1280"\f[], \f[I]"1600"\f[], \f[I]"0"\f[] (original)
.br
@@ -4650,6 +4708,33 @@ Controls how to handle Cross Site Request Forgery (CSRF) tokens.
* \f[I]"cookies"\f[]: Use token given by the \f[I]ct0\f[] cookie if present.
+.SS extractor.twitter.cursor
+.IP "Type:" 6
+.br
+* \f[I]bool\f[]
+.br
+* \f[I]string\f[]
+
+.IP "Default:" 9
+\f[I]true\f[]
+
+.IP "Example:" 4
+"1/DAABCgABGVKi5lE___oKAAIYbfYNcxrQLggAAwAAAAIAAA"
+
+.IP "Description:" 4
+Controls from which position to start the extraction process from.
+
+.br
+* \f[I]true\f[]: Start from the beginning.
+Log the most recent \f[I]cursor\f[] value when interrupted before reaching the end.
+.br
+* \f[I]false\f[]: Start from the beginning.
+.br
+* any \f[I]string\f[]: Start from the position defined by this value.
+
+Note: A \f[I]cursor\f[] value from one timeline cannot be used with another.
+
+
.SS extractor.twitter.expand
.IP "Type:" 6
\f[I]bool\f[]
@@ -4702,6 +4787,7 @@ A (comma-separated) list of subcategories to include
when processing a user profile.
Possible values are
+\f[I]"info"\f[],
\f[I]"avatar"\f[],
\f[I]"background"\f[],
\f[I]"timeline"\f[],
@@ -5241,6 +5327,19 @@ will be taken from the original posts, not the retweeted posts.
Download video files.
+.SS extractor.wikimedia.limit
+.IP "Type:" 6
+\f[I]integer\f[]
+
+.IP "Default:" 9
+\f[I]50\f[]
+
+.IP "Description:" 4
+Number of results to return in a single API query.
+
+The value must be between 10 and 500.
+
+
.SS extractor.ytdl.cmdline-args
.IP "Type:" 6
.br
@@ -6372,6 +6471,97 @@ The event(s) for which \f[I]exec.command\f[] is run.
See \f[I]metadata.event\f[] for a list of available events.
+.SS hash.chunk-size
+.IP "Type:" 6
+\f[I]integer\f[]
+
+.IP "Default:" 9
+\f[I]32768\f[]
+
+.IP "Description:" 4
+Number of bytes read per chunk during file hash computation.
+
+
+.SS hash.event
+.IP "Type:" 6
+.br
+* \f[I]string\f[]
+.br
+* \f[I]list\f[] of \f[I]strings\f[]
+
+.IP "Default:" 9
+\f[I]"file"\f[]
+
+.IP "Description:" 4
+The event(s) for which \f[I]file hashes\f[] are computed.
+
+See \f[I]metadata.event\f[] for a list of available events.
+
+
+.SS hash.filename
+.IP "Type:" 6
+.br
+* \f[I]bool\f[]
+
+.IP "Default:" 9
+\f[I]false\f[]
+
+.IP "Description:" 4
+Rebuild \f[I]filenames\f[] after computing
+\f[I]hash digests\f[] and adding them to the metadata dict.
+
+
+.SS hash.hashes
+.IP "Type:" 6
+.br
+* \f[I]string\f[]
+.br
+* \f[I]object\f[] (field name -> hash algorithm)
+
+.IP "Default:" 9
+\f[I]"md5,sha1"\f[]
+
+.IP "Example:" 4
+.. code:: json
+
+"sha256:hash_sha,sha3_512:hash_sha3"
+
+.. code:: json
+
+{
+"hash_sha" : "sha256",
+"hash_sha3": "sha3_512"
+}
+
+
+.IP "Description:" 4
+Hash digests to compute.
+
+For a list of available hash algorithms, run
+
+.. code::
+
+python -c "import hashlib; print('\\n'.join(hashlib.algorithms_available))"
+
+or see \f[I]python/hashlib\f[].
+
+.br
+* If this is a \f[I]string\f[],
+it is parsed as a a comma-separated list of algorthm-fieldname pairs:
+
+.. code::
+
+[<hash algorithm> ":"] <field name> ["," ...]
+
+When \f[I]<hash algorithm>\f[] is omitted,
+\f[I]<field name>\f[] is used as algorithm name.
+
+.br
+* If this is an \f[I]object\f[],
+it is a \f[I]<field name>\f[] to \f[I]<algorithm name>\f[] mapping
+for hash digests to compute.
+
+
.SS metadata.mode
.IP "Type:" 6
\f[I]string\f[]
@@ -6538,6 +6728,32 @@ e.g. a Tweet on Twitter or a post on Patreon.
After downloading all files of a post
+.SS metadata.include
+.IP "Type:" 6
+\f[I]list\f[] of \f[I]strings\f[]
+
+.IP "Example:" 4
+["id", "width", "height", "description"]
+
+.IP "Description:" 4
+Include only the given top-level keys when writing JSON data.
+
+Note: Missing or undefined fields will be silently ignored.
+
+
+.SS metadata.exclude
+.IP "Type:" 6
+\f[I]list\f[] of \f[I]strings\f[]
+
+.IP "Example:" 4
+["blocked", "watching", "status"]
+
+.IP "Description:" 4
+Exclude all given keys from written JSON data.
+
+Note: Cannot be used with \f[I]metadata.include\f[].
+
+
.SS metadata.fields
.IP "Type:" 6
.br
@@ -6847,6 +7063,37 @@ and gets called with the current metadata dict as argument.
or the \f[I]Path\f[] to a .py file,
+.SS rename.from
+.IP "Type:" 6
+\f[I]string\f[]
+
+.IP "Description:" 4
+The \f[I]format string\f[] for filenames to rename.
+
+When no value is given, \f[I]extractor.*.filename\f[] is used.
+
+
+.SS rename.to
+.IP "Type:" 6
+\f[I]string\f[]
+
+.IP "Description:" 4
+The \f[I]format string\f[] for target filenames.
+
+When no value is given, \f[I]extractor.*.filename\f[] is used.
+
+
+.SS rename.skip
+.IP "Type:" 6
+\f[I]bool\f[]
+
+.IP "Default:" 9
+\f[I]true\f[]
+
+.IP "Description:" 4
+Do not rename a file when another file with the target name already exists.
+
+
.SS ugoira.extension
.IP "Type:" 6
\f[I]string\f[]
@@ -7026,6 +7273,30 @@ Allow repeating the last frame when necessary
to prevent it from only being displayed for a very short amount of time.
+.SS ugoira.skip
+.IP "Type:" 6
+\f[I]bool\f[]
+
+.IP "Default:" 9
+\f[I]true\f[]
+
+.IP "Description:" 4
+Do not convert frames if target file already exists.
+
+
+.SS zip.compression
+.IP "Type:" 6
+\f[I]string\f[]
+
+.IP "Default:" 9
+\f[I]"store"\f[]
+
+.IP "Description:" 4
+Compression method to use when writing the archive.
+
+Possible values are \f[I]"store"\f[], \f[I]"zip"\f[], \f[I]"bzip2"\f[], \f[I]"lzma"\f[].
+
+
.SS zip.extension
.IP "Type:" 6
\f[I]string\f[]
@@ -7191,6 +7462,17 @@ For example, setting this option to \f[I]"#"\f[] would allow a replacement
operation to be \f[I]Rold#new#\f[] instead of the default \f[I]Rold/new/\f[]
+.SS input-files
+.IP "Type:" 6
+\f[I]list\f[] of \f[I]Path\f[]
+
+.IP "Example:" 4
+["~/urls.txt", "$HOME/input"]
+
+.IP "Description:" 4
+Additional input files.
+
+
.SS signals-ignore
.IP "Type:" 6
\f[I]list\f[] of \f[I]strings\f[]
@@ -7575,17 +7857,20 @@ Compare versions of the same file and replace/enumerate them on mismatch
.br
\f[I]exec\f[]
Execute external commands
+\f[I]hash\f[]
+Compute file hash digests
\f[I]metadata\f[]
Write metadata to separate files
\f[I]mtime\f[]
Set file modification time according to its metadata
\f[I]python\f[]
Call Python functions
+\f[I]rename\f[]
+Rename previously downloaded files
\f[I]ugoira\f[]
Convert Pixiv Ugoira to WebM using \f[I]ffmpeg\f[]
\f[I]zip\f[]
Store files in a ZIP archive
-\f[I]ytdl\f[]
diff --git a/gallery_dl.egg-info/PKG-INFO b/gallery_dl.egg-info/PKG-INFO
index 5d32a2a..35dd6ce 100644
--- a/gallery_dl.egg-info/PKG-INFO
+++ b/gallery_dl.egg-info/PKG-INFO
@@ -1,6 +1,6 @@
Metadata-Version: 2.1
Name: gallery_dl
-Version: 1.27.3
+Version: 1.27.4
Summary: Command-line program to download image galleries and collections from several image hosting sites
Home-page: https://github.com/mikf/gallery-dl
Download-URL: https://github.com/mikf/gallery-dl/releases/latest
@@ -114,9 +114,9 @@ Standalone Executable
Prebuilt executable files with a Python interpreter and
required Python packages included are available for
-- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.27.3/gallery-dl.exe>`__
+- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.27.4/gallery-dl.exe>`__
(Requires `Microsoft Visual C++ Redistributable Package (x86) <https://aka.ms/vs/17/release/vc_redist.x86.exe>`__)
-- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.27.3/gallery-dl.bin>`__
+- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.27.4/gallery-dl.bin>`__
Nightly Builds
diff --git a/gallery_dl.egg-info/SOURCES.txt b/gallery_dl.egg-info/SOURCES.txt
index de5738a..854bfaa 100644
--- a/gallery_dl.egg-info/SOURCES.txt
+++ b/gallery_dl.egg-info/SOURCES.txt
@@ -239,9 +239,11 @@ gallery_dl/postprocessor/classify.py
gallery_dl/postprocessor/common.py
gallery_dl/postprocessor/compare.py
gallery_dl/postprocessor/exec.py
+gallery_dl/postprocessor/hash.py
gallery_dl/postprocessor/metadata.py
gallery_dl/postprocessor/mtime.py
gallery_dl/postprocessor/python.py
+gallery_dl/postprocessor/rename.py
gallery_dl/postprocessor/ugoira.py
gallery_dl/postprocessor/zip.py
test/test_cache.py
diff --git a/gallery_dl/__init__.py b/gallery_dl/__init__.py
index 4b39c15..663fe99 100644
--- a/gallery_dl/__init__.py
+++ b/gallery_dl/__init__.py
@@ -238,6 +238,13 @@ def main():
return config.open_extern()
else:
+ input_files = config.get((), "input-files")
+ if input_files:
+ for input_file in input_files:
+ if isinstance(input_file, str):
+ input_file = (input_file, None)
+ args.input_files.append(input_file)
+
if not args.urls and not args.input_files:
parser.error(
"The following arguments are required: URL\n"
diff --git a/gallery_dl/cookies.py b/gallery_dl/cookies.py
index f017929..deb7c7b 100644
--- a/gallery_dl/cookies.py
+++ b/gallery_dl/cookies.py
@@ -179,11 +179,14 @@ def _firefox_cookies_database(profile=None, container=None):
"{}".format(search_root))
_log_debug("Extracting cookies from %s", path)
- if container == "none":
+ if not container or container == "none":
container_id = False
_log_debug("Only loading cookies not belonging to any container")
- elif container:
+ elif container == "all":
+ container_id = None
+
+ else:
containers_path = os.path.join(
os.path.dirname(path), "containers.json")
@@ -207,8 +210,6 @@ def _firefox_cookies_database(profile=None, container=None):
container))
_log_debug("Only loading cookies from container '%s' (ID %s)",
container, container_id)
- else:
- container_id = None
return path, container_id
diff --git a/gallery_dl/downloader/ytdl.py b/gallery_dl/downloader/ytdl.py
index 87e7756..b3bec21 100644
--- a/gallery_dl/downloader/ytdl.py
+++ b/gallery_dl/downloader/ytdl.py
@@ -42,8 +42,9 @@ class YoutubeDLDownloader(DownloaderBase):
if not ytdl_instance:
try:
module = ytdl.import_module(self.config("module"))
- except ImportError as exc:
- self.log.error("Cannot import module '%s'", exc.name)
+ except (ImportError, SyntaxError) as exc:
+ self.log.error("Cannot import module '%s'",
+ getattr(exc, "name", ""))
self.log.debug("", exc_info=True)
self.download = lambda u, p: False
return False
diff --git a/gallery_dl/extractor/batoto.py b/gallery_dl/extractor/batoto.py
index 2adb142..786acd9 100644
--- a/gallery_dl/extractor/batoto.py
+++ b/gallery_dl/extractor/batoto.py
@@ -51,28 +51,29 @@ class BatotoChapterExtractor(BatotoBase, ChapterExtractor):
if not manga:
manga = extr('link-hover">', "<")
info = text.remove_html(extr('link-hover">', "</"))
+ info = text.unescape(info)
match = re.match(
- r"(?:Volume\s+(\d+) )?"
- r"\w+\s+(\d+)(.*)", info)
+ r"(?i)(?:(?:Volume|S(?:eason)?)\s*(\d+)\s+)?"
+ r"(?:Chapter|Episode)\s*(\d+)([\w.]*)", info)
if match:
volume, chapter, minor = match.groups()
- title = text.remove_html(extr(
- "selected>", "</option")).partition(" : ")[2]
else:
volume = chapter = 0
minor = ""
- title = info
return {
- "manga" : text.unescape(manga),
- "manga_id" : text.parse_int(manga_id),
- "title" : text.unescape(title),
- "volume" : text.parse_int(volume),
- "chapter" : text.parse_int(chapter),
- "chapter_minor": minor,
- "chapter_id" : text.parse_int(self.chapter_id),
- "date" : text.parse_timestamp(extr(' time="', '"')[:-3]),
+ "manga" : text.unescape(manga),
+ "manga_id" : text.parse_int(manga_id),
+ "chapter_url" : extr(self.chapter_id + "-ch_", '"'),
+ "title" : text.unescape(text.remove_html(extr(
+ "selected>", "</option")).partition(" : ")[2]),
+ "volume" : text.parse_int(volume),
+ "chapter" : text.parse_int(chapter),
+ "chapter_minor" : minor,
+ "chapter_string": info,
+ "chapter_id" : text.parse_int(self.chapter_id),
+ "date" : text.parse_timestamp(extr(' time="', '"')[:-3]),
}
def images(self, page):
diff --git a/gallery_dl/extractor/bunkr.py b/gallery_dl/extractor/bunkr.py
index 240bbd3..780bdf1 100644
--- a/gallery_dl/extractor/bunkr.py
+++ b/gallery_dl/extractor/bunkr.py
@@ -6,15 +6,24 @@
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
-"""Extractors for https://bunkr.sk/"""
+"""Extractors for https://bunkr.si/"""
from .lolisafe import LolisafeAlbumExtractor
-from .. import text
-
-BASE_PATTERN = (
- r"(?:https?://)?(?:app\.)?(bunkr+"
- r"\.(?:s[kiu]|[cf]i|ru|la|is|to|ac|black|cat|media|red|site|ws|org))"
-)
+from .. import text, config
+
+
+if config.get(("extractor", "bunkr"), "tlds"):
+ BASE_PATTERN = (
+ r"(?:bunkr:(?:https?://)?([^/?#]+)|"
+ r"(?:https?://)?(?:app\.)?(bunkr+\.\w+))"
+ )
+else:
+ BASE_PATTERN = (
+ r"(?:bunkr:(?:https?://)?([^/?#]+)|"
+ r"(?:https?://)?(?:app\.)?(bunkr+"
+ r"\.(?:s[kiu]|[cf]i|ru|la|is|to|a[cx]"
+ r"|black|cat|media|red|site|ws|org)))"
+ )
LEGACY_DOMAINS = {
"bunkr.ru",
@@ -28,15 +37,15 @@ LEGACY_DOMAINS = {
class BunkrAlbumExtractor(LolisafeAlbumExtractor):
- """Extractor for bunkr.sk albums"""
+ """Extractor for bunkr.si albums"""
category = "bunkr"
- root = "https://bunkr.sk"
+ root = "https://bunkr.si"
pattern = BASE_PATTERN + r"/a/([^/?#]+)"
- example = "https://bunkr.sk/a/ID"
+ example = "https://bunkr.si/a/ID"
def __init__(self, match):
LolisafeAlbumExtractor.__init__(self, match)
- domain = match.group(match.lastindex-1)
+ domain = self.groups[0] or self.groups[1]
if domain not in LEGACY_DOMAINS:
self.root = "https://" + domain
@@ -69,11 +78,16 @@ class BunkrAlbumExtractor(LolisafeAlbumExtractor):
def _extract_file(self, url):
page = self.request(url).text
- return (
- text.extr(page, '<source src="', '"') or
- text.extr(page, '<img src="', '"') or
- text.rextract(page, ' href="', '"', page.rindex("Download"))[0]
- )
+ url = (text.extr(page, '<source src="', '"') or
+ text.extr(page, '<img src="', '"'))
+
+ if not url:
+ url_download = text.rextract(
+ page, ' href="', '"', page.rindex("Download"))[0]
+ page = self.request(text.unescape(url_download)).text
+ url = text.unescape(text.rextract(page, ' href="', '"')[0])
+
+ return url
def _validate(self, response):
if response.history and response.url.endswith("/maintenance-vid.mp4"):
@@ -83,11 +97,11 @@ class BunkrAlbumExtractor(LolisafeAlbumExtractor):
class BunkrMediaExtractor(BunkrAlbumExtractor):
- """Extractor for bunkr.sk media links"""
+ """Extractor for bunkr.si media links"""
subcategory = "media"
directory_fmt = ("{category}",)
pattern = BASE_PATTERN + r"(/[vid]/[^/?#]+)"
- example = "https://bunkr.sk/v/FILENAME"
+ example = "https://bunkr.si/v/FILENAME"
def fetch_album(self, album_id):
try:
diff --git a/gallery_dl/extractor/cyberdrop.py b/gallery_dl/extractor/cyberdrop.py
index d864960..a514696 100644
--- a/gallery_dl/extractor/cyberdrop.py
+++ b/gallery_dl/extractor/cyberdrop.py
@@ -14,6 +14,7 @@ from .. import text
class CyberdropAlbumExtractor(lolisafe.LolisafeAlbumExtractor):
category = "cyberdrop"
root = "https://cyberdrop.me"
+ root_api = "https://api.cyberdrop.me"
pattern = r"(?:https?://)?(?:www\.)?cyberdrop\.(?:me|to)/a/([^/?#]+)"
example = "https://cyberdrop.me/a/ID"
@@ -55,5 +56,14 @@ class CyberdropAlbumExtractor(lolisafe.LolisafeAlbumExtractor):
def _extract_files(self, file_ids):
for file_id in file_ids:
- url = "{}/api/f/{}".format(self.root, file_id)
- yield self.request(url).json()
+ try:
+ url = "{}/api/file/info/{}".format(self.root_api, file_id)
+ file = self.request(url).json()
+ auth = self.request(file["auth_url"]).json()
+ file["url"] = auth["url"]
+ except Exception as exc:
+ self.log.warning("%s (%s: %s)",
+ file_id, exc.__class__.__name__, exc)
+ continue
+
+ yield file
diff --git a/gallery_dl/extractor/deviantart.py b/gallery_dl/extractor/deviantart.py
index f3ea4e7..ea70b58 100644
--- a/gallery_dl/extractor/deviantart.py
+++ b/gallery_dl/extractor/deviantart.py
@@ -69,11 +69,12 @@ class DeviantartExtractor(Extractor):
self.quality = ",q_{}".format(self.quality)
self.quality_sub = re.compile(r",q_\d+").sub
- if self.original != "image":
- self._update_content = self._update_content_default
- else:
- self._update_content = self._update_content_image
+ if isinstance(self.original, str) and \
+ self.original.lower().startswith("image"):
self.original = True
+ self._update_content = self._update_content_image
+ else:
+ self._update_content = self._update_content_default
journals = self.config("journals", "html")
if journals == "html":
@@ -1462,6 +1463,8 @@ class DeviantartOAuthAPI():
return
if "next_cursor" in data:
+ if not data["next_cursor"]:
+ return
params["offset"] = None
params["cursor"] = data["next_cursor"]
elif data["next_offset"] is not None:
diff --git a/gallery_dl/extractor/e621.py b/gallery_dl/extractor/e621.py
index af963bc..553ec22 100644
--- a/gallery_dl/extractor/e621.py
+++ b/gallery_dl/extractor/e621.py
@@ -10,6 +10,7 @@
from .common import Message
from . import danbooru
+from ..cache import memcache
from .. import text, util
@@ -44,16 +45,11 @@ class E621Extractor(danbooru.DanbooruExtractor):
self.root[8:], md5[0:2], md5[2:4], md5, file["ext"])
if notes and post.get("has_notes"):
- url = "{}/notes.json?search[post_id]={}".format(
- self.root, post["id"])
- post["notes"] = self.request(url).json()
+ post["notes"] = self._get_notes(post["id"])
if pools and post["pools"]:
- url = "{}/pools.json?search[id]={}".format(
- self.root, ",".join(map(str, post["pools"])))
- post["pools"] = _pools = self.request(url).json()
- for pool in _pools:
- pool["name"] = pool["name"].replace("_", " ")
+ post["pools"] = self._get_pools(
+ ",".join(map(str, post["pools"])))
post["filename"] = file["md5"]
post["extension"] = file["ext"]
@@ -64,6 +60,18 @@ class E621Extractor(danbooru.DanbooruExtractor):
yield Message.Directory, post
yield Message.Url, file["url"], post
+ def _get_notes(self, id):
+ return self.request(
+ "{}/notes.json?search[post_id]={}".format(self.root, id)).json()
+
+ @memcache(keyarg=1)
+ def _get_pools(self, ids):
+ pools = self.request(
+ "{}/pools.json?search[id]={}".format(self.root, ids)).json()
+ for pool in pools:
+ pool["name"] = pool["name"].replace("_", " ")
+ return pools
+
BASE_PATTERN = E621Extractor.update({
"e621": {
diff --git a/gallery_dl/extractor/exhentai.py b/gallery_dl/extractor/exhentai.py
index 1b4f995..01af7a4 100644
--- a/gallery_dl/extractor/exhentai.py
+++ b/gallery_dl/extractor/exhentai.py
@@ -430,7 +430,7 @@ class ExhentaiGalleryExtractor(ExhentaiExtractor):
}
page = self.request(url, cookies=cookies).text
- current = text.extr(page, "<strong>", "</strong>")
+ current = text.extr(page, "<strong>", "</strong>").replace(",", "")
self.log.debug("Image Limits: %s/%s", current, self.limits)
self._remaining = self.limits - text.parse_int(current)
diff --git a/gallery_dl/extractor/flickr.py b/gallery_dl/extractor/flickr.py
index c94a110..1b4971c 100644
--- a/gallery_dl/extractor/flickr.py
+++ b/gallery_dl/extractor/flickr.py
@@ -75,11 +75,8 @@ class FlickrImageExtractor(FlickrExtractor):
def items(self):
photo = self.api.photos_getInfo(self.item_id)
- if self.api.exif:
- photo.update(self.api.photos_getExif(self.item_id))
- if self.api.contexts:
- photo.update(self.api.photos_getAllContexts(self.item_id))
+ self.api._extract_metadata(photo)
if photo["media"] == "video" and self.api.videos:
self.api._extract_video(photo)
else:
@@ -135,8 +132,13 @@ class FlickrAlbumExtractor(FlickrExtractor):
def metadata(self):
data = FlickrExtractor.metadata(self)
- data["album"] = self.api.photosets_getInfo(
- self.album_id, self.user["nsid"])
+ try:
+ data["album"] = self.api.photosets_getInfo(
+ self.album_id, self.user["nsid"])
+ except Exception:
+ data["album"] = {}
+ self.log.warning("%s: Unable to retrieve album metadata",
+ self.album_id)
return data
def photos(self):
@@ -407,6 +409,8 @@ class FlickrAPI(oauth.OAuth1API):
self.log.debug("Server response: %s", data)
if data["code"] == 1:
raise exception.NotFoundError(self.extractor.subcategory)
+ elif data["code"] == 2:
+ raise exception.AuthorizationError(msg)
elif data["code"] == 98:
raise exception.AuthenticationError(msg)
elif data["code"] == 99:
@@ -453,10 +457,7 @@ class FlickrAPI(oauth.OAuth1API):
photo["date"] = text.parse_timestamp(photo["dateupload"])
photo["tags"] = photo["tags"].split()
- if self.exif:
- photo.update(self.photos_getExif(photo["id"]))
- if self.contexts:
- photo.update(self.photos_getAllContexts(photo["id"]))
+ self._extract_metadata(photo)
photo["id"] = text.parse_int(photo["id"])
if "owner" in photo:
@@ -512,6 +513,23 @@ class FlickrAPI(oauth.OAuth1API):
photo["width"] = photo["height"] = 0
return photo
+ def _extract_metadata(self, photo):
+ if self.exif:
+ try:
+ photo.update(self.photos_getExif(photo["id"]))
+ except Exception as exc:
+ self.log.warning(
+ "Unable to retrieve 'exif' data for %s (%s: %s)",
+ photo["id"], exc.__class__.__name__, exc)
+
+ if self.contexts:
+ try:
+ photo.update(self.photos_getAllContexts(photo["id"]))
+ except Exception as exc:
+ self.log.warning(
+ "Unable to retrieve 'contexts' data for %s (%s: %s)",
+ photo["id"], exc.__class__.__name__, exc)
+
@staticmethod
def _clean_info(info):
info["title"] = info["title"]["_content"]
diff --git a/gallery_dl/extractor/furaffinity.py b/gallery_dl/extractor/furaffinity.py
index 3055426..d253582 100644
--- a/gallery_dl/extractor/furaffinity.py
+++ b/gallery_dl/extractor/furaffinity.py
@@ -179,6 +179,11 @@ class FuraffinityExtractor(Extractor):
break
self._favorite_id = text.parse_int(extr('data-fav-id="', '"'))
yield post_id
+
+ pos = page.find('type="submit">Next</button>')
+ if pos >= 0:
+ path = text.rextract(page, '<form action="', '"', pos)[0]
+ continue
path = text.extr(page, 'right" href="', '"')
def _pagination_search(self, query):
diff --git a/gallery_dl/extractor/generic.py b/gallery_dl/extractor/generic.py
index 16d4340..a6c1d5a 100644
--- a/gallery_dl/extractor/generic.py
+++ b/gallery_dl/extractor/generic.py
@@ -15,7 +15,7 @@ import re
class GenericExtractor(Extractor):
"""Extractor for images in a generic web page."""
category = "generic"
- directory_fmt = ("{category}", "{pageurl}")
+ directory_fmt = ("{category}", "{subcategory}", "{path}")
archive_fmt = "{imageurl}"
# By default, the generic extractor is disabled
@@ -52,7 +52,10 @@ class GenericExtractor(Extractor):
self.scheme = match.group('scheme')
else:
self.scheme = 'https://'
- self.url = self.scheme + self.url
+ self.url = text.ensure_http_scheme(self.url, self.scheme)
+
+ self.subcategory = match.group('domain')
+ self.path = match.group('path')
# Used to resolve relative image urls
self.root = self.scheme + match.group('domain')
@@ -87,6 +90,7 @@ class GenericExtractor(Extractor):
def metadata(self, page):
"""Extract generic webpage metadata, return them in a dict."""
data = {}
+ data['path'] = self.path.replace("/", "")
data['pageurl'] = self.url
data['title'] = text.extr(page, '<title>', "</title>")
data['description'] = text.extr(
diff --git a/gallery_dl/extractor/gofile.py b/gallery_dl/extractor/gofile.py
index f0eb4e9..52b4ae6 100644
--- a/gallery_dl/extractor/gofile.py
+++ b/gallery_dl/extractor/gofile.py
@@ -47,8 +47,7 @@ class GofileFolderExtractor(Extractor):
raise exception.AuthorizationError("Password required")
num = 0
- for content_id in folder["childrenIds"]:
- content = contents[content_id]
+ for content in contents.values():
content["folder"] = folder
if content["type"] == "file":
diff --git a/gallery_dl/extractor/hitomi.py b/gallery_dl/extractor/hitomi.py
index 9b74700..18df9df 100644
--- a/gallery_dl/extractor/hitomi.py
+++ b/gallery_dl/extractor/hitomi.py
@@ -89,6 +89,7 @@ class HitomiGalleryExtractor(GalleryExtractor):
path = ext = "webp"
ihash = image["hash"]
idata = text.nameext_from_url(image["name"])
+ idata["extension_original"] = idata["extension"]
if ext:
idata["extension"] = ext
diff --git a/gallery_dl/extractor/instagram.py b/gallery_dl/extractor/instagram.py
index c05fe72..422c865 100644
--- a/gallery_dl/extractor/instagram.py
+++ b/gallery_dl/extractor/instagram.py
@@ -12,6 +12,7 @@
from .common import Extractor, Message
from .. import text, util, exception
from ..cache import cache, memcache
+import itertools
import binascii
import json
import re
@@ -57,12 +58,17 @@ class InstagramExtractor(Extractor):
data = self.metadata()
videos = self.config("videos", True)
previews = self.config("previews", False)
+ max_posts = self.config("max-posts")
video_headers = {"User-Agent": "Mozilla/5.0"}
order = self.config("order-files")
reverse = order[0] in ("r", "d") if order else False
- for post in self.posts():
+ posts = self.posts()
+ if max_posts:
+ posts = itertools.islice(posts, max_posts)
+
+ for post in posts:
if "__typename" in post:
post = self._parse_post_graphql(post)
@@ -159,15 +165,19 @@ class InstagramExtractor(Extractor):
if "title" in post:
data["highlight_title"] = post["title"]
if "created_at" in post:
- data["date"] = text.parse_timestamp(post.get("created_at"))
+ data["post_date"] = data["date"] = text.parse_timestamp(
+ post.get("created_at"))
else: # regular image/video post
+ date = text.parse_timestamp(post.get("taken_at"))
data = {
"post_id" : post["pk"],
"post_shortcode": post["code"],
+ "post_url": "{}/p/{}/".format(self.root, post["code"]),
+ "post_date": date,
+ "date": date,
"likes": post.get("like_count", 0),
"pinned": post.get("timeline_pinned_user_ids", ()),
- "date": text.parse_timestamp(post.get("taken_at")),
"liked": post.get("has_liked", False),
}
@@ -206,7 +216,6 @@ class InstagramExtractor(Extractor):
data["owner_id"] = owner["pk"]
data["username"] = owner.get("username")
data["fullname"] = owner.get("full_name")
- data["post_url"] = "{}/p/{}/".format(self.root, data["post_shortcode"])
data["_files"] = files = []
for num, item in enumerate(items, 1):
@@ -269,7 +278,6 @@ class InstagramExtractor(Extractor):
owner = post["owner"]
data = {
"typename" : typename,
- "date" : text.parse_timestamp(post["taken_at_timestamp"]),
"likes" : post["edge_media_preview_like"]["count"],
"liked" : post.get("viewer_has_liked", False),
"pinned" : pinned,
@@ -279,11 +287,13 @@ class InstagramExtractor(Extractor):
"post_id" : post["id"],
"post_shortcode": post["shortcode"],
"post_url" : "{}/p/{}/".format(self.root, post["shortcode"]),
+ "post_date" : text.parse_timestamp(post["taken_at_timestamp"]),
"description": text.parse_unicode_escapes("\n".join(
edge["node"]["text"]
for edge in post["edge_media_to_caption"]["edges"]
)),
}
+ data["date"] = data["post_date"]
tags = self._find_tags(data["description"])
if tags:
@@ -313,6 +323,7 @@ class InstagramExtractor(Extractor):
media = {
"num": num,
"media_id" : node["id"],
+ "date" : data["date"],
"shortcode" : (node.get("shortcode") or
shortcode_from_id(node["id"])),
"display_url": node["display_url"],
@@ -328,6 +339,7 @@ class InstagramExtractor(Extractor):
dimensions = post["dimensions"]
media = {
"media_id" : post["id"],
+ "date" : data["date"],
"shortcode" : post["shortcode"],
"display_url": post["display_url"],
"video_url" : post.get("video_url"),
@@ -378,7 +390,11 @@ class InstagramExtractor(Extractor):
"full_name": user["full_name"]})
def _init_cursor(self):
- return self.config("cursor") or None
+ cursor = self.config("cursor", True)
+ if not cursor:
+ self._update_cursor = util.identity
+ elif isinstance(cursor, str):
+ return cursor
def _update_cursor(self, cursor):
self.log.debug("Cursor: %s", cursor)
@@ -418,6 +434,7 @@ class InstagramUserExtractor(InstagramExtractor):
base = "{}/{}/".format(self.root, self.item)
stories = "{}/stories/{}/".format(self.root, self.item)
return self._dispatch_extractors((
+ (InstagramInfoExtractor , base + "info/"),
(InstagramAvatarExtractor , base + "avatar/"),
(InstagramStoriesExtractor , stories),
(InstagramHighlightsExtractor, base + "highlights/"),
diff --git a/gallery_dl/extractor/koharu.py b/gallery_dl/extractor/koharu.py
index 979b1a2..cacf504 100644
--- a/gallery_dl/extractor/koharu.py
+++ b/gallery_dl/extractor/koharu.py
@@ -161,16 +161,29 @@ class KoharuGalleryExtractor(KoharuExtractor, GalleryExtractor):
return results
def _select_format(self, formats):
- if not self.fmt or self.fmt == "original":
- fmtid = "0"
+ fmt = self.fmt
+
+ if not fmt or fmt == "best":
+ fmtids = ("0", "1600", "1280", "980", "780")
+ elif isinstance(fmt, str):
+ fmtids = fmt.split(",")
+ elif isinstance(fmt, list):
+ fmtids = fmt
else:
- fmtid = str(self.fmt)
+ fmtids = (str(self.fmt),)
- try:
- fmt = formats[fmtid]
- except KeyError:
+ for fmtid in fmtids:
+ try:
+ fmt = formats[fmtid]
+ if fmt["id"]:
+ break
+ except KeyError:
+ self.log.debug("%s: Format %s is not available",
+ self.groups[0], fmtid)
+ else:
raise exception.NotFoundError("format")
+ self.log.debug("%s: Selected format %s", self.groups[0], fmtid)
fmt["w"] = fmtid
return fmt
diff --git a/gallery_dl/extractor/lolisafe.py b/gallery_dl/extractor/lolisafe.py
index 3d7d685..117b88b 100644
--- a/gallery_dl/extractor/lolisafe.py
+++ b/gallery_dl/extractor/lolisafe.py
@@ -34,7 +34,7 @@ class LolisafeAlbumExtractor(LolisafeExtractor):
def __init__(self, match):
LolisafeExtractor.__init__(self, match)
- self.album_id = match.group(match.lastindex)
+ self.album_id = self.groups[-1]
def _init(self):
domain = self.config("domain")
diff --git a/gallery_dl/extractor/newgrounds.py b/gallery_dl/extractor/newgrounds.py
index ecd6619..5fc0ce5 100644
--- a/gallery_dl/extractor/newgrounds.py
+++ b/gallery_dl/extractor/newgrounds.py
@@ -171,15 +171,17 @@ class NewgroundsExtractor(Extractor):
if self.flash:
url += "/format/flash"
- with self.request(url, fatal=False) as response:
- if response.status_code >= 400:
- return {}
- page = response.text
+ response = self.request(url, fatal=False)
+ page = response.text
pos = page.find('id="adults_only"')
if pos >= 0:
msg = text.extract(page, 'class="highlight">', '<', pos)[0]
self.log.warning('"%s"', msg)
+ return {}
+
+ if response.status_code >= 400:
+ return {}
extr = text.extract_from(page)
data = extract_data(extr, post_url)
diff --git a/gallery_dl/extractor/pixiv.py b/gallery_dl/extractor/pixiv.py
index d732894..3479b88 100644
--- a/gallery_dl/extractor/pixiv.py
+++ b/gallery_dl/extractor/pixiv.py
@@ -94,12 +94,39 @@ class PixivExtractor(Extractor):
work.get("id"), exc.message)
continue
- url = ugoira["zip_urls"]["medium"].replace(
- "_ugoira600x600", "_ugoira1920x1080")
- work["frames"] = ugoira["frames"]
+ url = ugoira["zip_urls"]["medium"]
+ work["frames"] = frames = ugoira["frames"]
work["date_url"] = self._date_from_url(url)
work["_http_adjust_extension"] = False
- yield Message.Url, url, text.nameext_from_url(url, work)
+
+ if self.load_ugoira == "original":
+ base, sep, _ = url.rpartition("_ugoira")
+ base = base.replace(
+ "/img-zip-ugoira/", "/img-original/", 1) + sep
+
+ for ext in ("jpg", "png", "gif"):
+ try:
+ url = ("{}0.{}".format(base, ext))
+ self.request(url, method="HEAD")
+ break
+ except exception.HttpError:
+ pass
+ else:
+ self.log.warning(
+ "Unable to find Ugoira frame URLs (%s)",
+ work.get("id"))
+ continue
+
+ for num, frame in enumerate(frames):
+ url = ("{}{}.{}".format(base, num, ext))
+ work["num"] = work["_ugoira_frame_index"] = num
+ work["suffix"] = "_p{:02}".format(num)
+ text.nameext_from_url(url, work)
+ yield Message.Url, url, work
+
+ else:
+ url = url.replace("_ugoira600x600", "_ugoira1920x1080")
+ yield Message.Url, url, text.nameext_from_url(url, work)
elif work["page_count"] == 1:
url = meta_single_page["original_image_url"]
@@ -551,9 +578,6 @@ class PixivSeriesExtractor(PixivExtractor):
directory_fmt = ("{category}", "{user[id]} {user[account]}",
"{series[id]} {series[title]}")
filename_fmt = "{num_series:>03}_{id}_p{num}.{extension}"
- cookies_domain = ".pixiv.net"
- browser = "firefox"
- tls12 = False
pattern = BASE_PATTERN + r"/user/(\d+)/series/(\d+)"
example = "https://www.pixiv.net/user/12345/series/12345"
@@ -562,34 +586,18 @@ class PixivSeriesExtractor(PixivExtractor):
self.user_id, self.series_id = match.groups()
def works(self):
- url = self.root + "/ajax/series/" + self.series_id
- params = {"p": 1}
- headers = {
- "Accept": "application/json",
- "Referer": "{}/user/{}/series/{}".format(
- self.root, self.user_id, self.series_id),
- "Alt-Used": "www.pixiv.net",
- }
+ series = None
- while True:
- data = self.request(url, params=params, headers=headers).json()
- body = data["body"]
- page = body["page"]
-
- series = body["extraData"]["meta"]
- series["id"] = self.series_id
- series["total"] = page["total"]
- series["title"] = text.extr(series["title"], '"', '"')
-
- for info in page["series"]:
- work = self.api.illust_detail(info["workId"])
- work["num_series"] = info["order"]
- work["series"] = series
- yield work
-
- if len(page["series"]) < 10:
- return
- params["p"] += 1
+ for work in self.api.illust_series(self.series_id):
+ if series is None:
+ series = self.api.data
+ series["total"] = num_series = series.pop("series_work_count")
+ else:
+ num_series -= 1
+
+ work["num_series"] = num_series
+ work["series"] = series
+ yield work
class PixivNovelExtractor(PixivExtractor):
@@ -916,6 +924,11 @@ class PixivAppAPI():
params = {"illust_id": illust_id}
return self._pagination("/v2/illust/related", params)
+ def illust_series(self, series_id, offset=0):
+ params = {"illust_series_id": series_id, "offset": offset}
+ return self._pagination("/v1/illust/series", params,
+ key_data="illust_series_detail")
+
def novel_bookmark_detail(self, novel_id):
params = {"novel_id": novel_id}
return self._call(
@@ -1013,10 +1026,15 @@ class PixivAppAPI():
raise exception.StopExtraction("API request failed: %s", error)
- def _pagination(self, endpoint, params, key="illusts"):
+ def _pagination(self, endpoint, params,
+ key_items="illusts", key_data=None):
while True:
data = self._call(endpoint, params)
- yield from data[key]
+
+ if key_data:
+ self.data = data.get(key_data)
+ key_data = None
+ yield from data[key_items]
if not data["next_url"]:
return
diff --git a/gallery_dl/extractor/sankaku.py b/gallery_dl/extractor/sankaku.py
index ad3efa7..7db8172 100644
--- a/gallery_dl/extractor/sankaku.py
+++ b/gallery_dl/extractor/sankaku.py
@@ -66,7 +66,8 @@ class SankakuExtractor(BooruExtractor):
def _prepare(self, post):
post["created_at"] = post["created_at"]["s"]
post["date"] = text.parse_timestamp(post["created_at"])
- post["tags"] = [tag["name"] for tag in post["tags"] if tag["name"]]
+ post["tags"] = [tag["name"].lower().replace(" ", "_")
+ for tag in post["tags"] if tag["name"]]
post["tag_string"] = " ".join(post["tags"])
post["_http_validate"] = self._check_expired
@@ -79,7 +80,7 @@ class SankakuExtractor(BooruExtractor):
for tag in post["tags"]:
name = tag["name"]
if name:
- tags[types[tag["type"]]].append(name)
+ tags[types[tag["type"]]].append(name.lower().replace(" ", "_"))
for key, value in tags.items():
post["tags_" + key] = value
post["tag_string_" + key] = " ".join(value)
diff --git a/gallery_dl/extractor/sexcom.py b/gallery_dl/extractor/sexcom.py
index 80f2aea..7708b5c 100644
--- a/gallery_dl/extractor/sexcom.py
+++ b/gallery_dl/extractor/sexcom.py
@@ -152,6 +152,25 @@ class SexcomPinsExtractor(SexcomExtractor):
return self._pagination(url)
+class SexcomLikesExtractor(SexcomExtractor):
+ """Extractor for a user's liked pins on www.sex.com"""
+ subcategory = "likes"
+ directory_fmt = ("{category}", "{user}", "Likes")
+ pattern = r"(?:https?://)?(?:www\.)?sex\.com/user/([^/?#]+)/likes/"
+ example = "https://www.sex.com/user/USER/likes/"
+
+ def __init__(self, match):
+ SexcomExtractor.__init__(self, match)
+ self.user = match.group(1)
+
+ def metadata(self):
+ return {"user": text.unquote(self.user)}
+
+ def pins(self):
+ url = "{}/user/{}/likes/".format(self.root, self.user)
+ return self._pagination(url)
+
+
class SexcomBoardExtractor(SexcomExtractor):
"""Extractor for pins from a board on www.sex.com"""
subcategory = "board"
diff --git a/gallery_dl/extractor/szurubooru.py b/gallery_dl/extractor/szurubooru.py
index bba1ece..b6917cc 100644
--- a/gallery_dl/extractor/szurubooru.py
+++ b/gallery_dl/extractor/szurubooru.py
@@ -86,6 +86,7 @@ BASE_PATTERN = SzurubooruExtractor.update({
"bcbnsfw": {
"root": "https://booru.bcbnsfw.space",
"pattern": r"booru\.bcbnsfw\.space",
+ "query-all": "*",
},
"snootbooru": {
"root": "https://snootbooru.com",
@@ -110,7 +111,12 @@ class SzurubooruTagExtractor(SzurubooruExtractor):
return {"search_tags": self.query}
def posts(self):
- return self._pagination("/posts/", {"query": self.query})
+ if self.query.strip():
+ query = self.query
+ else:
+ query = self.config_instance("query-all")
+
+ return self._pagination("/posts/", {"query": query})
class SzurubooruPostExtractor(SzurubooruExtractor):
diff --git a/gallery_dl/extractor/toyhouse.py b/gallery_dl/extractor/toyhouse.py
index 64fa951..44d87ee 100644
--- a/gallery_dl/extractor/toyhouse.py
+++ b/gallery_dl/extractor/toyhouse.py
@@ -123,4 +123,5 @@ class ToyhouseImageExtractor(ToyhouseExtractor):
def posts(self):
url = "{}/~images/{}".format(self.root, self.user)
- return (self._parse_post(self.request(url).text, '<img src="'),)
+ return (self._parse_post(
+ self.request(url).text, '<img class="mw-100" src="'),)
diff --git a/gallery_dl/extractor/tumblr.py b/gallery_dl/extractor/tumblr.py
index ff29c04..73455d2 100644
--- a/gallery_dl/extractor/tumblr.py
+++ b/gallery_dl/extractor/tumblr.py
@@ -400,6 +400,9 @@ class TumblrAPI(oauth.OAuth1API):
"""Retrieve liked posts"""
endpoint = "/v2/blog/{}/likes".format(blog)
params = {"limit": "50", "before": self.before}
+ if self.api_key:
+ params["api_key"] = self.api_key
+
while True:
posts = self._call(endpoint, params)["liked_posts"]
if not posts:
diff --git a/gallery_dl/extractor/twitter.py b/gallery_dl/extractor/twitter.py
index ea57d76..d4ec343 100644
--- a/gallery_dl/extractor/twitter.py
+++ b/gallery_dl/extractor/twitter.py
@@ -155,6 +155,7 @@ class TwitterExtractor(Extractor):
if not self.unavailable:
continue
+ mtype = media.get("type")
descr = media.get("ext_alt_text")
width = media["original_info"].get("width", 0)
height = media["original_info"].get("height", 0)
@@ -164,6 +165,7 @@ class TwitterExtractor(Extractor):
files.append({
"url": "ytdl:{}/i/web/status/{}".format(
self.root, tweet["id_str"]),
+ "type" : mtype,
"width" : width,
"height" : height,
"extension" : None,
@@ -177,6 +179,7 @@ class TwitterExtractor(Extractor):
)
files.append({
"url" : variant["url"],
+ "type" : mtype,
"width" : width,
"height" : height,
"bitrate" : variant.get("bitrate", 0),
@@ -193,6 +196,7 @@ class TwitterExtractor(Extractor):
base = url.rpartition("=")[0] + "="
files.append(text.nameext_from_url(url, {
"url" : base + self._size_image,
+ "type" : mtype,
"width" : width,
"height" : height,
"_fallback" : self._image_fallback(base),
@@ -504,7 +508,11 @@ class TwitterExtractor(Extractor):
}
def _init_cursor(self):
- return self.config("cursor") or None
+ cursor = self.config("cursor", True)
+ if not cursor:
+ self._update_cursor = util.identity
+ elif isinstance(cursor, str):
+ return cursor
def _update_cursor(self, cursor):
self.log.debug("Cursor: %s", cursor)
@@ -560,6 +568,7 @@ class TwitterUserExtractor(TwitterExtractor):
def items(self):
base = "{}/{}/".format(self.root, self.user)
return self._dispatch_extractors((
+ (TwitterInfoExtractor , base + "info"),
(TwitterAvatarExtractor , base + "photo"),
(TwitterBackgroundExtractor, base + "header_photo"),
(TwitterTimelineExtractor , base + "timeline"),
@@ -590,9 +599,16 @@ class TwitterTimelineExtractor(TwitterExtractor):
return cursor
def tweets(self):
- self._cursor = cursor = self.config("cursor") or None
reset = False
+ cursor = self.config("cursor", True)
+ if not cursor:
+ self._update_cursor = util.identity
+ elif isinstance(cursor, str):
+ self._cursor = cursor
+ else:
+ cursor = None
+
if cursor:
state = cursor.partition("/")[0]
state, _, tweet_id = state.partition("_")
@@ -1612,6 +1628,9 @@ class TwitterAPI():
entries = instr["entries"]
elif instr_type == "TimelineAddToModule":
entries = instr["moduleItems"]
+ elif instr_type == "TimelinePinEntry":
+ if pinned_tweet:
+ pinned_tweet = instr["entry"]
elif instr_type == "TimelineReplaceEntry":
entry = instr["entry"]
if entry["entryId"].startswith("cursor-bottom-"):
@@ -1650,9 +1669,11 @@ class TwitterAPI():
tweet = None
if pinned_tweet:
- pinned_tweet = False
- if instructions[-1]["type"] == "TimelinePinEntry":
+ if isinstance(pinned_tweet, dict):
+ tweets.append(pinned_tweet)
+ elif instructions[-1]["type"] == "TimelinePinEntry":
tweets.append(instructions[-1]["entry"])
+ pinned_tweet = False
for entry in entries:
esw = entry["entryId"].startswith
diff --git a/gallery_dl/extractor/wikimedia.py b/gallery_dl/extractor/wikimedia.py
index 9370cfb..7a62e01 100644
--- a/gallery_dl/extractor/wikimedia.py
+++ b/gallery_dl/extractor/wikimedia.py
@@ -17,13 +17,11 @@ class WikimediaExtractor(BaseExtractor):
"""Base class for wikimedia extractors"""
basecategory = "wikimedia"
filename_fmt = "{filename} ({sha1[:8]}).{extension}"
- directory_fmt = ("{category}", "{page}")
archive_fmt = "{sha1}"
request_interval = (1.0, 2.0)
def __init__(self, match):
BaseExtractor.__init__(self, match)
- path = match.group(match.lastindex)
if self.category == "wikimedia":
self.category = self.root.split(".")[-2]
@@ -31,31 +29,7 @@ class WikimediaExtractor(BaseExtractor):
self.category = "{}-{}".format(
self.category, self.root.partition(".")[0].rpartition("/")[2])
- if path.startswith("wiki/"):
- path = path[5:]
-
- pre, sep, _ = path.partition(":")
- prefix = pre.lower() if sep else None
-
- self.title = path = text.unquote(path)
- if prefix:
- self.subcategory = prefix
-
- if prefix == "category":
- self.params = {
- "generator": "categorymembers",
- "gcmtitle" : path,
- "gcmtype" : "file",
- }
- elif prefix == "file":
- self.params = {
- "titles" : path,
- }
- else:
- self.params = {
- "generator": "images",
- "titles" : path,
- }
+ self.per_page = self.config("limit", 50)
def _init(self):
api_path = self.config_instance("api-path")
@@ -67,6 +41,22 @@ class WikimediaExtractor(BaseExtractor):
else:
self.api_url = self.root + "/api.php"
+ @staticmethod
+ def prepare(image):
+ """Adjust the content of a image object"""
+ image["metadata"] = {
+ m["name"]: m["value"]
+ for m in image["metadata"] or ()}
+ image["commonmetadata"] = {
+ m["name"]: m["value"]
+ for m in image["commonmetadata"] or ()}
+
+ filename = image["canonicaltitle"]
+ image["filename"], _, image["extension"] = \
+ filename.partition(":")[2].rpartition(".")
+ image["date"] = text.parse_datetime(
+ image["timestamp"], "%Y-%m-%dT%H:%M:%SZ")
+
def items(self):
for info in self._pagination(self.params):
try:
@@ -75,20 +65,7 @@ class WikimediaExtractor(BaseExtractor):
self.log.debug("Missing 'imageinfo' for %s", info)
continue
- image["metadata"] = {
- m["name"]: m["value"]
- for m in image["metadata"] or ()}
- image["commonmetadata"] = {
- m["name"]: m["value"]
- for m in image["commonmetadata"] or ()}
-
- filename = image["canonicaltitle"]
- image["filename"], _, image["extension"] = \
- filename.partition(":")[2].rpartition(".")
- image["date"] = text.parse_datetime(
- image["timestamp"], "%Y-%m-%dT%H:%M:%SZ")
- image["page"] = self.title
-
+ self.prepare(image)
yield Message.Directory, image
yield Message.Url, image["url"], image
@@ -110,6 +87,17 @@ class WikimediaExtractor(BaseExtractor):
while True:
data = self.request(url, params=params).json()
+ # ref: https://www.mediawiki.org/wiki/API:Errors_and_warnings
+ error = data.get("error")
+ if error:
+ self.log.error("%s: %s", error["code"], error["info"])
+ return
+ # MediaWiki will emit warnings for non-fatal mistakes such as
+ # invalid parameter instead of raising an error
+ warnings = data.get("warnings")
+ if warnings:
+ self.log.debug("MediaWiki returned warnings: %s", warnings)
+
try:
pages = data["query"]["pages"]
except KeyError:
@@ -181,5 +169,59 @@ BASE_PATTERN = WikimediaExtractor.update({
class WikimediaArticleExtractor(WikimediaExtractor):
"""Extractor for wikimedia articles"""
subcategory = "article"
+ directory_fmt = ("{category}", "{page}")
pattern = BASE_PATTERN + r"/(?!static/)([^?#]+)"
example = "https://en.wikipedia.org/wiki/TITLE"
+
+ def __init__(self, match):
+ WikimediaExtractor.__init__(self, match)
+
+ path = match.group(match.lastindex)
+ if path.startswith("wiki/"):
+ path = path[5:]
+
+ pre, sep, _ = path.partition(":")
+ prefix = pre.lower() if sep else None
+
+ self.title = path = text.unquote(path)
+ if prefix:
+ self.subcategory = prefix
+
+ if prefix == "category":
+ self.params = {
+ "generator": "categorymembers",
+ "gcmtitle" : path,
+ "gcmtype" : "file",
+ "gcmlimit" : self.per_page,
+ }
+ elif prefix == "file":
+ self.params = {
+ "titles" : path,
+ }
+ else:
+ self.params = {
+ "generator": "images",
+ "gimlimit" : self.per_page,
+ "titles" : path,
+ }
+
+ def prepare(self, image):
+ WikimediaExtractor.prepare(image)
+ image["page"] = self.title
+
+
+class WikimediaWikiExtractor(WikimediaExtractor):
+ """Extractor for all files on a MediaWiki instance"""
+ subcategory = "wiki"
+ pattern = BASE_PATTERN + r"/?$"
+ example = "https://en.wikipedia.org/"
+
+ def __init__(self, match):
+ WikimediaExtractor.__init__(self, match)
+
+ # ref: https://www.mediawiki.org/wiki/API:Allpages
+ self.params = {
+ "generator" : "allpages",
+ "gapnamespace": 6, # "File" namespace
+ "gaplimit" : self.per_page,
+ }
diff --git a/gallery_dl/extractor/ytdl.py b/gallery_dl/extractor/ytdl.py
index cb3c74c..168845e 100644
--- a/gallery_dl/extractor/ytdl.py
+++ b/gallery_dl/extractor/ytdl.py
@@ -116,21 +116,20 @@ class YoutubeDLExtractor(Extractor):
for entry in entries:
if not entry:
continue
- elif entry.get("_type") in ("url", "url_transparent"):
+
+ if entry.get("_type") in ("url", "url_transparent"):
try:
- info_dict = ytdl_instance.extract_info(
+ entry = ytdl_instance.extract_info(
entry["url"], False,
ie_key=entry.get("ie_key"))
except ytdl_module.utils.YoutubeDLError:
continue
-
- if not info_dict:
+ if not entry:
continue
- elif "entries" in info_dict:
- yield from self._process_entries(
- ytdl_module, ytdl_instance, info_dict["entries"])
- else:
- yield info_dict
+
+ if "entries" in entry:
+ yield from self._process_entries(
+ ytdl_module, ytdl_instance, entry["entries"])
else:
yield entry
diff --git a/gallery_dl/formatter.py b/gallery_dl/formatter.py
index ec1c926..f197e5d 100644
--- a/gallery_dl/formatter.py
+++ b/gallery_dl/formatter.py
@@ -325,6 +325,23 @@ def _parse_slice(format_spec, default):
return apply_slice
+def _parse_arithmetic(format_spec, default):
+ op, _, format_spec = format_spec.partition(_SEPARATOR)
+ fmt = _build_format_func(format_spec, default)
+
+ value = int(op[2:])
+ op = op[1]
+
+ if op == "+":
+ return lambda obj: fmt(obj + value)
+ if op == "-":
+ return lambda obj: fmt(obj - value)
+ if op == "*":
+ return lambda obj: fmt(obj * value)
+
+ return fmt
+
+
def _parse_conversion(format_spec, default):
conversions, _, format_spec = format_spec.partition(_SEPARATOR)
convs = [_CONVERSIONS[c] for c in conversions[1:]]
@@ -480,6 +497,7 @@ _CONVERSIONS = {
_FORMAT_SPECIFIERS = {
"?": _parse_optional,
"[": _parse_slice,
+ "A": _parse_arithmetic,
"C": _parse_conversion,
"D": _parse_datetime,
"J": _parse_join,
diff --git a/gallery_dl/job.py b/gallery_dl/job.py
index 0e0916d..c995767 100644
--- a/gallery_dl/job.py
+++ b/gallery_dl/job.py
@@ -322,6 +322,12 @@ class DownloadJob(Job):
for callback in hooks["prepare-after"]:
callback(pathfmt)
+ if kwdict.pop("_file_recheck", False) and pathfmt.exists():
+ if archive and self._archive_write_skip:
+ archive.add(kwdict)
+ self.handle_skip()
+ return
+
if self.sleep:
self.extractor.sleep(self.sleep(), "download")
@@ -474,10 +480,11 @@ class DownloadJob(Job):
def handle_skip(self):
pathfmt = self.pathfmt
- self.out.skip(pathfmt.path)
if "skip" in self.hooks:
for callback in self.hooks["skip"]:
callback(pathfmt)
+ self.out.skip(pathfmt.path)
+
if self._skipexc:
if not self._skipftr or self._skipftr(pathfmt.kwdict):
self._skipcnt += 1
diff --git a/gallery_dl/option.py b/gallery_dl/option.py
index 155cbd9..0189c0e 100644
--- a/gallery_dl/option.py
+++ b/gallery_dl/option.py
@@ -74,6 +74,21 @@ class MtimeAction(argparse.Action):
})
+class RenameAction(argparse.Action):
+ """Configure rename post processors"""
+ def __call__(self, parser, namespace, value, option_string=None):
+ if self.const:
+ namespace.postprocessors.append({
+ "name": "rename",
+ "to" : value,
+ })
+ else:
+ namespace.postprocessors.append({
+ "name": "rename",
+ "from": value,
+ })
+
+
class UgoiraAction(argparse.Action):
"""Configure ugoira post processors"""
def __call__(self, parser, namespace, value, option_string=None):
@@ -128,7 +143,7 @@ class UgoiraAction(argparse.Action):
pp["name"] = "ugoira"
pp["whitelist"] = ("pixiv", "danbooru")
- namespace.options.append(((), "ugoira", True))
+ namespace.options.append((("extractor",), "ugoira", True))
namespace.postprocessors.append(pp)
@@ -207,7 +222,7 @@ def build_parser():
)
update = parser.add_argument_group("Update Options")
- if util.EXECUTABLE or 1:
+ if util.EXECUTABLE:
update.add_argument(
"-U", "--update",
dest="update", action="store_const", const="latest",
@@ -526,7 +541,8 @@ def build_parser():
"domain prefixed with '/', "
"keyring name prefixed with '+', "
"profile prefixed with ':', and "
- "container prefixed with '::' ('none' for no container)"),
+ "container prefixed with '::' "
+ "('none' for no container (default), 'all' for all containers)"),
)
selection = parser.add_argument_group("Selection Options")
@@ -661,9 +677,21 @@ def build_parser():
help=argparse.SUPPRESS,
)
postprocessor.add_argument(
+ "--rename",
+ dest="postprocessors", metavar="FORMAT", action=RenameAction, const=0,
+ help=("Rename previously downloaded files from FORMAT "
+ "to the current filename format"),
+ )
+ postprocessor.add_argument(
+ "--rename-to",
+ dest="postprocessors", metavar="FORMAT", action=RenameAction, const=1,
+ help=("Rename previously downloaded files from the current filename "
+ "format to FORMAT"),
+ )
+ postprocessor.add_argument(
"--ugoira",
- dest="postprocessors", metavar="FORMAT", action=UgoiraAction,
- help=("Convert Pixiv Ugoira to FORMAT using FFmpeg. "
+ dest="postprocessors", metavar="FMT", action=UgoiraAction,
+ help=("Convert Pixiv Ugoira to FMT using FFmpeg. "
"Supported formats are 'webm', 'mp4', 'gif', "
"'vp8', 'vp9', 'vp9-lossless', 'copy'."),
)
diff --git a/gallery_dl/path.py b/gallery_dl/path.py
index 7892776..d408a41 100644
--- a/gallery_dl/path.py
+++ b/gallery_dl/path.py
@@ -184,29 +184,31 @@ class PathFormat():
def set_directory(self, kwdict):
"""Build directory path and create it if necessary"""
self.kwdict = kwdict
- sep = os.sep
segments = self.build_directory(kwdict)
if segments:
self.directory = directory = self.basedirectory + self.clean_path(
- sep.join(segments) + sep)
+ os.sep.join(segments) + os.sep)
else:
self.directory = directory = self.basedirectory
if WINDOWS and self.extended:
- # Enable longer-than-260-character paths
- directory = os.path.abspath(directory)
- if directory.startswith("\\\\"):
- directory = "\\\\?\\UNC\\" + directory[2:]
- else:
- directory = "\\\\?\\" + directory
-
- # abspath() in Python 3.7+ removes trailing path separators (#402)
- if directory[-1] != sep:
- directory += sep
-
+ directory = self._extended_path(directory)
self.realdirectory = directory
+ def _extended_path(self, path):
+ # Enable longer-than-260-character paths
+ path = os.path.abspath(path)
+ if not path.startswith("\\\\"):
+ path = "\\\\?\\" + path
+ elif not path.startswith("\\\\?\\"):
+ path = "\\\\?\\UNC\\" + path[2:]
+
+ # abspath() in Python 3.7+ removes trailing path separators (#402)
+ if path[-1] != os.sep:
+ return path + os.sep
+ return path
+
def set_filename(self, kwdict):
"""Set general filename data"""
self.kwdict = kwdict
diff --git a/gallery_dl/postprocessor/__init__.py b/gallery_dl/postprocessor/__init__.py
index 4690554..7837b06 100644
--- a/gallery_dl/postprocessor/__init__.py
+++ b/gallery_dl/postprocessor/__init__.py
@@ -12,9 +12,11 @@ modules = [
"classify",
"compare",
"exec",
+ "hash",
"metadata",
"mtime",
"python",
+ "rename",
"ugoira",
"zip",
]
diff --git a/gallery_dl/postprocessor/hash.py b/gallery_dl/postprocessor/hash.py
new file mode 100644
index 0000000..92a7477
--- /dev/null
+++ b/gallery_dl/postprocessor/hash.py
@@ -0,0 +1,71 @@
+# -*- coding: utf-8 -*-
+
+# Copyright 2024 Mike Fährmann
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Compute file hash digests"""
+
+from .common import PostProcessor
+import hashlib
+
+
+class HashPP(PostProcessor):
+
+ def __init__(self, job, options):
+ PostProcessor.__init__(self, job)
+
+ self.chunk_size = options.get("chunk-size", 32768)
+ self.filename = options.get("filename")
+
+ hashes = options.get("hashes")
+ if isinstance(hashes, dict):
+ self.hashes = list(hashes.items())
+ elif isinstance(hashes, str):
+ self.hashes = []
+ for h in hashes.split(","):
+ name, sep, key = h.partition(":")
+ self.hashes.append((key if sep else name, name))
+ elif hashes:
+ self.hashes = hashes
+ else:
+ self.hashes = (("md5", "md5"), ("sha1", "sha1"))
+
+ events = options.get("event")
+ if events is None:
+ events = ("file",)
+ elif isinstance(events, str):
+ events = events.split(",")
+ job.register_hooks({event: self.run for event in events}, options)
+
+ def run(self, pathfmt):
+ hashes = [
+ (key, hashlib.new(name))
+ for key, name in self.hashes
+ ]
+
+ size = self.chunk_size
+ with self._open(pathfmt) as fp:
+ while True:
+ data = fp.read(size)
+ if not data:
+ break
+ for _, h in hashes:
+ h.update(data)
+
+ for key, h in hashes:
+ pathfmt.kwdict[key] = h.hexdigest()
+
+ if self.filename:
+ pathfmt.build_path()
+
+ def _open(self, pathfmt):
+ try:
+ return open(pathfmt.temppath, "rb")
+ except OSError:
+ return open(pathfmt.realpath, "rb")
+
+
+__postprocessor__ = HashPP
diff --git a/gallery_dl/postprocessor/metadata.py b/gallery_dl/postprocessor/metadata.py
index e89b170..3ef9fbc 100644
--- a/gallery_dl/postprocessor/metadata.py
+++ b/gallery_dl/postprocessor/metadata.py
@@ -103,10 +103,10 @@ class MetadataPP(PostProcessor):
job.register_hooks({event: self.run for event in events}, options)
self._init_archive(job, options, "_MD_")
+ self.filter = self._make_filter(options)
self.mtime = options.get("mtime")
self.omode = options.get("open", omode)
self.encoding = options.get("encoding", "utf-8")
- self.private = options.get("private", False)
self.skip = options.get("skip", False)
def run(self, pathfmt):
@@ -114,7 +114,10 @@ class MetadataPP(PostProcessor):
if archive and archive.check(pathfmt.kwdict):
return
- directory = self._directory(pathfmt)
+ if util.WINDOWS and pathfmt.extended:
+ directory = pathfmt._extended_path(self._directory(pathfmt))
+ else:
+ directory = self._directory(pathfmt)
path = directory + self._filename(pathfmt)
if self.skip and os.path.exists(path):
@@ -231,10 +234,33 @@ class MetadataPP(PostProcessor):
fp.write("\n".join(tags) + "\n")
def _write_json(self, fp, kwdict):
- if not self.private:
- kwdict = util.filter_dict(kwdict)
+ if self.filter:
+ kwdict = self.filter(kwdict)
fp.write(self._json_encode(kwdict) + "\n")
+ def _make_filter(self, options):
+ include = options.get("include")
+ if include:
+ if isinstance(include, str):
+ include = include.split(",")
+ return lambda d: {k: d[k] for k in include if k in d}
+
+ exclude = options.get("exclude")
+ private = options.get("private")
+ if exclude:
+ if isinstance(exclude, str):
+ exclude = exclude.split(",")
+ exclude = set(exclude)
+
+ if private:
+ return lambda d: {k: v for k, v in d.items()
+ if k not in exclude}
+ return lambda d: {k: v for k, v in util.filter_dict(d).items()
+ if k not in exclude}
+
+ if not private:
+ return util.filter_dict
+
@staticmethod
def _make_encoder(options, indent=None):
return json.JSONEncoder(
diff --git a/gallery_dl/postprocessor/rename.py b/gallery_dl/postprocessor/rename.py
new file mode 100644
index 0000000..f71738d
--- /dev/null
+++ b/gallery_dl/postprocessor/rename.py
@@ -0,0 +1,91 @@
+# -*- coding: utf-8 -*-
+
+# Copyright 2024 Mike Fährmann
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Rename files"""
+
+from .common import PostProcessor
+from .. import formatter
+import os
+
+
+class RenamePP(PostProcessor):
+
+ def __init__(self, job, options):
+ PostProcessor.__init__(self, job)
+
+ self.skip = options.get("skip", True)
+ old = options.get("from")
+ new = options.get("to")
+
+ if old:
+ self._old = self._apply_format(old)
+ self._new = (self._apply_format(new) if new else
+ self._apply_pathfmt)
+ job.register_hooks({
+ "prepare": self.rename_from,
+ }, options)
+
+ elif new:
+ self._old = self._apply_pathfmt
+ self._new = self._apply_format(new)
+ job.register_hooks({
+ "skip" : self.rename_to_skip,
+ "prepare-after": self.rename_to_pafter,
+ }, options)
+
+ else:
+ raise ValueError("Option 'from' or 'to' is required")
+
+ def rename_from(self, pathfmt):
+ name_old = self._old(pathfmt)
+ path_old = pathfmt.realdirectory + name_old
+
+ if os.path.exists(path_old):
+ name_new = self._new(pathfmt)
+ path_new = pathfmt.realdirectory + name_new
+ self._rename(path_old, name_old, path_new, name_new)
+
+ def rename_to_skip(self, pathfmt):
+ name_old = self._old(pathfmt)
+ path_old = pathfmt.realdirectory + name_old
+
+ if os.path.exists(path_old):
+ pathfmt.filename = name_new = self._new(pathfmt)
+ pathfmt.path = pathfmt.directory + name_new
+ pathfmt.realpath = path_new = pathfmt.realdirectory + name_new
+ self._rename(path_old, name_old, path_new, name_new)
+
+ def rename_to_pafter(self, pathfmt):
+ pathfmt.filename = name_new = self._new(pathfmt)
+ pathfmt.path = pathfmt.directory + name_new
+ pathfmt.realpath = pathfmt.realdirectory + name_new
+ pathfmt.kwdict["_file_recheck"] = True
+
+ def _rename(self, path_old, name_old, path_new, name_new):
+ if self.skip and os.path.exists(path_new):
+ return self.log.warning(
+ "Not renaming '%s' to '%s' since another file with the "
+ "same name exists", name_old, name_new)
+
+ self.log.info("'%s' -> '%s'", name_old, name_new)
+ os.replace(path_old, path_new)
+
+ def _apply_pathfmt(self, pathfmt):
+ return pathfmt.build_filename(pathfmt.kwdict)
+
+ def _apply_format(self, format_string):
+ fmt = formatter.parse(format_string).format_map
+
+ def apply(pathfmt):
+ return pathfmt.clean_path(pathfmt.clean_segment(fmt(
+ pathfmt.kwdict)))
+
+ return apply
+
+
+__postprocessor__ = RenamePP
diff --git a/gallery_dl/postprocessor/ugoira.py b/gallery_dl/postprocessor/ugoira.py
index 9e60ce2..f053afa 100644
--- a/gallery_dl/postprocessor/ugoira.py
+++ b/gallery_dl/postprocessor/ugoira.py
@@ -36,7 +36,8 @@ class UgoiraPP(PostProcessor):
self.delete = not options.get("keep-files", False)
self.repeat = options.get("repeat-last-frame", True)
self.mtime = options.get("mtime", True)
- self.uniform = False
+ self.skip = options.get("skip", True)
+ self.uniform = self._convert_zip = self._convert_files = False
ffmpeg = options.get("ffmpeg-location")
self.ffmpeg = util.expand_path(ffmpeg) if ffmpeg else "ffmpeg"
@@ -90,33 +91,44 @@ class UgoiraPP(PostProcessor):
if self.prevent_odd:
args += ("-vf", "crop=iw-mod(iw\\,2):ih-mod(ih\\,2)")
- job.register_hooks(
- {"prepare": self.prepare, "file": self.convert}, options)
+ job.register_hooks({
+ "prepare": self.prepare,
+ "file" : self.convert_zip,
+ "after" : self.convert_files,
+ }, options)
def prepare(self, pathfmt):
- self._frames = None
-
- if pathfmt.extension != "zip":
+ if "frames" not in pathfmt.kwdict:
+ self._frames = None
return
- kwdict = pathfmt.kwdict
- if "frames" in kwdict:
- self._frames = kwdict["frames"]
- elif "pixiv_ugoira_frame_data" in kwdict:
- self._frames = kwdict["pixiv_ugoira_frame_data"]["data"]
+ self._frames = pathfmt.kwdict["frames"]
+ if pathfmt.extension == "zip":
+ self._convert_zip = True
+ if self.delete:
+ pathfmt.set_extension(self.extension)
+ pathfmt.build_path()
else:
- return
-
- if self.delete:
- pathfmt.set_extension(self.extension)
pathfmt.build_path()
+ index = pathfmt.kwdict["_ugoira_frame_index"]
+ frame = self._frames[index].copy()
+ frame["index"] = index
+ frame["path"] = pathfmt.realpath
+ frame["ext"] = pathfmt.kwdict["extension"]
+
+ if not index:
+ self._files = [frame]
+ else:
+ self._files.append(frame)
+ if len(self._files) >= len(self._frames):
+ self._convert_files = True
- def convert(self, pathfmt):
- if not self._frames:
+ def convert_zip(self, pathfmt):
+ if not self._convert_zip:
return
+ self._convert_zip = False
with tempfile.TemporaryDirectory() as tempdir:
- # extract frames
try:
with zipfile.ZipFile(pathfmt.temppath) as zfile:
zfile.extractall(tempdir)
@@ -124,53 +136,89 @@ class UgoiraPP(PostProcessor):
pathfmt.realpath = pathfmt.temppath
return
- # process frames and collect command-line arguments
- pathfmt.set_extension(self.extension)
- pathfmt.build_path()
-
- args = self._process(pathfmt, tempdir)
- if self.args_pp:
- args += self.args_pp
- if self.args:
- args += self.args
-
- # ensure target directory exists
- os.makedirs(pathfmt.realdirectory, exist_ok=True)
-
- # invoke ffmpeg
- try:
- if self.twopass:
- if "-f" not in self.args:
- args += ("-f", self.extension)
- args += ("-passlogfile", tempdir + "/ffmpeg2pass", "-pass")
- self._exec(args + ["1", "-y", os.devnull])
- self._exec(args + ["2", pathfmt.realpath])
- else:
- args.append(pathfmt.realpath)
- self._exec(args)
- if self._finalize:
- self._finalize(pathfmt, tempdir)
- except OSError as exc:
- print()
- self.log.error("Unable to invoke FFmpeg (%s: %s)",
- exc.__class__.__name__, exc)
- pathfmt.realpath = pathfmt.temppath
- except Exception as exc:
- print()
- self.log.error("%s: %s", exc.__class__.__name__, exc)
- self.log.debug("", exc_info=True)
- pathfmt.realpath = pathfmt.temppath
- else:
- if self.mtime:
- mtime = pathfmt.kwdict.get("_mtime")
- if mtime:
- util.set_mtime(pathfmt.realpath, mtime)
+ if self.convert(pathfmt, tempdir):
if self.delete:
pathfmt.delete = True
else:
+ self.log.info(pathfmt.filename)
pathfmt.set_extension("zip")
pathfmt.build_path()
+ def convert_files(self, pathfmt):
+ if not self._convert_files:
+ return
+ self._convert_files = False
+
+ with tempfile.TemporaryDirectory() as tempdir:
+ for frame in self._files:
+
+ # update frame filename extension
+ frame["file"] = name = "{}.{}".format(
+ frame["file"].partition(".")[0], frame["ext"])
+
+ # move frame into tempdir
+ try:
+ self._copy_file(frame["path"], tempdir + "/" + name)
+ except OSError as exc:
+ self.log.debug("Unable to copy frame %s (%s: %s)",
+ name, exc.__class__.__name__, exc)
+ return
+
+ pathfmt.kwdict["num"] = 0
+ self._frames = self._files
+ if self.convert(pathfmt, tempdir):
+ self.log.info(pathfmt.filename)
+ if self.delete:
+ self.log.debug("Deleting frames")
+ for frame in self._files:
+ util.remove_file(frame["path"])
+
+ def convert(self, pathfmt, tempdir):
+ pathfmt.set_extension(self.extension)
+ pathfmt.build_path()
+ if self.skip and pathfmt.exists():
+ return True
+
+ # process frames and collect command-line arguments
+ args = self._process(pathfmt, tempdir)
+ if self.args_pp:
+ args += self.args_pp
+ if self.args:
+ args += self.args
+
+ # ensure target directory exists
+ os.makedirs(pathfmt.realdirectory, exist_ok=True)
+
+ # invoke ffmpeg
+ try:
+ if self.twopass:
+ if "-f" not in self.args:
+ args += ("-f", self.extension)
+ args += ("-passlogfile", tempdir + "/ffmpeg2pass", "-pass")
+ self._exec(args + ["1", "-y", os.devnull])
+ self._exec(args + ["2", pathfmt.realpath])
+ else:
+ args.append(pathfmt.realpath)
+ self._exec(args)
+ if self._finalize:
+ self._finalize(pathfmt, tempdir)
+ except OSError as exc:
+ print()
+ self.log.error("Unable to invoke FFmpeg (%s: %s)",
+ exc.__class__.__name__, exc)
+ pathfmt.realpath = pathfmt.temppath
+ except Exception as exc:
+ print()
+ self.log.error("%s: %s", exc.__class__.__name__, exc)
+ self.log.debug("", exc_info=True)
+ pathfmt.realpath = pathfmt.temppath
+ else:
+ if self.mtime:
+ mtime = pathfmt.kwdict.get("_mtime")
+ if mtime:
+ util.set_mtime(pathfmt.realpath, mtime)
+ return True
+
def _exec(self, args):
self.log.debug(args)
out = None if self.output else subprocess.DEVNULL
@@ -182,6 +230,9 @@ class UgoiraPP(PostProcessor):
raise ValueError()
return retcode
+ def _copy_file(self, src, dst):
+ shutil.copyfile(src, dst)
+
def _process_concat(self, pathfmt, tempdir):
rate_in, rate_out = self.calculate_framerate(self._frames)
args = [self.ffmpeg, "-f", "concat"]
diff --git a/gallery_dl/util.py b/gallery_dl/util.py
index 5744ef3..ecb496d 100644
--- a/gallery_dl/util.py
+++ b/gallery_dl/util.py
@@ -101,7 +101,7 @@ def raises(cls):
return wrap
-def identity(x):
+def identity(x, _=None):
"""Returns its argument"""
return x
@@ -520,14 +520,9 @@ class CustomNone():
"""None-style type that supports more operations than regular None"""
__slots__ = ()
- def __getattribute__(self, _):
- return self
-
- def __getitem__(self, _):
- return self
-
- def __iter__(self):
- return self
+ __getattribute__ = identity
+ __getitem__ = identity
+ __iter__ = identity
def __call__(self, *args, **kwargs):
return self
@@ -536,10 +531,6 @@ class CustomNone():
def __next__():
raise StopIteration
- @staticmethod
- def __bool__():
- return False
-
def __eq__(self, other):
return self is other
@@ -550,14 +541,48 @@ class CustomNone():
__le__ = true
__gt__ = false
__ge__ = false
+ __bool__ = false
+
+ __add__ = identity
+ __sub__ = identity
+ __mul__ = identity
+ __matmul__ = identity
+ __truediv__ = identity
+ __floordiv__ = identity
+ __mod__ = identity
+
+ __radd__ = identity
+ __rsub__ = identity
+ __rmul__ = identity
+ __rmatmul__ = identity
+ __rtruediv__ = identity
+ __rfloordiv__ = identity
+ __rmod__ = identity
+
+ __lshift__ = identity
+ __rshift__ = identity
+ __and__ = identity
+ __xor__ = identity
+ __or__ = identity
+
+ __rlshift__ = identity
+ __rrshift__ = identity
+ __rand__ = identity
+ __rxor__ = identity
+ __ror__ = identity
+
+ __neg__ = identity
+ __pos__ = identity
+ __abs__ = identity
+ __invert__ = identity
@staticmethod
def __len__():
return 0
- @staticmethod
- def __hash__():
- return 0
+ __int__ = __len__
+ __hash__ = __len__
+ __index__ = __len__
@staticmethod
def __format__(_):
diff --git a/gallery_dl/version.py b/gallery_dl/version.py
index f2462ee..0f9f91b 100644
--- a/gallery_dl/version.py
+++ b/gallery_dl/version.py
@@ -6,5 +6,5 @@
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
-__version__ = "1.27.3"
+__version__ = "1.27.4"
__variant__ = None
diff --git a/gallery_dl/ytdl.py b/gallery_dl/ytdl.py
index d4fdedc..fe88c2c 100644
--- a/gallery_dl/ytdl.py
+++ b/gallery_dl/ytdl.py
@@ -18,7 +18,7 @@ def import_module(module_name):
if module_name is None:
try:
return __import__("yt_dlp")
- except ImportError:
+ except (ImportError, SyntaxError):
return __import__("youtube_dl")
return __import__(module_name.replace("-", "_"))
diff --git a/test/test_downloader.py b/test/test_downloader.py
index f88b2c0..35cccc4 100644
--- a/test/test_downloader.py
+++ b/test/test_downloader.py
@@ -45,11 +45,15 @@ class TestDownloaderModule(unittest.TestCase):
@classmethod
def setUpClass(cls):
# allow import of ytdl downloader module without youtube_dl installed
+ cls._orig_ytdl = sys.modules.get("youtube_dl")
sys.modules["youtube_dl"] = MagicMock()
@classmethod
def tearDownClass(cls):
- del sys.modules["youtube_dl"]
+ if cls._orig_ytdl:
+ sys.modules["youtube_dl"] = cls._orig_ytdl
+ else:
+ del sys.modules["youtube_dl"]
def tearDown(self):
downloader._cache.clear()
diff --git a/test/test_formatter.py b/test/test_formatter.py
index e00af85..75324fb 100644
--- a/test/test_formatter.py
+++ b/test/test_formatter.py
@@ -25,6 +25,7 @@ class TestFormatter(unittest.TestCase):
"b": "äöü",
"j": "げんそうきょう",
"d": {"a": "foo", "b": 0, "c": None},
+ "i": 2,
"l": ["a", "b", "c"],
"n": None,
"s": " \n\r\tSPACE ",
@@ -267,6 +268,11 @@ class TestFormatter(unittest.TestCase):
"{a:Sort-reverse}", # starts with 'S', contains 'r'
"['w', 'r', 'o', 'l', 'h', 'd', 'O', 'L', 'L', 'E', ' ']")
+ def test_specifier_arithmetic(self):
+ self._run_test("{i:A+1}", "3")
+ self._run_test("{i:A-1}", "1")
+ self._run_test("{i:A*3}", "6")
+
def test_specifier_conversions(self):
self._run_test("{a:Cl}" , "hello world")
self._run_test("{h:CHC}" , "Foo & Bar")
diff --git a/test/test_postprocessor.py b/test/test_postprocessor.py
index edd8575..dd53803 100644
--- a/test/test_postprocessor.py
+++ b/test/test_postprocessor.py
@@ -12,6 +12,7 @@ import sys
import unittest
from unittest.mock import Mock, mock_open, patch
+import shutil
import logging
import zipfile
import tempfile
@@ -239,6 +240,57 @@ class ExecTest(BasePostprocessorTest):
self.assertFalse(i.wait.called)
+class HashTest(BasePostprocessorTest):
+
+ def test_default(self):
+ self._create({})
+
+ with self.pathfmt.open() as fp:
+ fp.write(b"Foo Bar\n")
+
+ self._trigger()
+
+ kwdict = self.pathfmt.kwdict
+ self.assertEqual(
+ "35c9c9c7c90ad764bae9e2623f522c24", kwdict["md5"], "md5")
+ self.assertEqual(
+ "14d3d804494ef4e57d72de63e4cfee761240471a", kwdict["sha1"], "sha1")
+
+ def test_custom_hashes(self):
+ self._create({"hashes": "sha256:a,sha512:b"})
+
+ with self.pathfmt.open() as fp:
+ fp.write(b"Foo Bar\n")
+
+ self._trigger()
+
+ kwdict = self.pathfmt.kwdict
+ self.assertEqual(
+ "4775b55be17206445d7015a5fc7656f38a74b880670523c3b175455f885f2395",
+ kwdict["a"], "sha256")
+ self.assertEqual(
+ "6028f9e6957f4ca929941318c4bba6258713fd5162f9e33bd10e1c456d252700"
+ "3e1095b50736c4fd1e2deea152e3c8ecd5993462a747208e4d842659935a1c62",
+ kwdict["b"], "sha512")
+
+ def test_custom_hashes_dict(self):
+ self._create({"hashes": {"a": "sha256", "b": "sha512"}})
+
+ with self.pathfmt.open() as fp:
+ fp.write(b"Foo Bar\n")
+
+ self._trigger()
+
+ kwdict = self.pathfmt.kwdict
+ self.assertEqual(
+ "4775b55be17206445d7015a5fc7656f38a74b880670523c3b175455f885f2395",
+ kwdict["a"], "sha256")
+ self.assertEqual(
+ "6028f9e6957f4ca929941318c4bba6258713fd5162f9e33bd10e1c456d252700"
+ "3e1095b50736c4fd1e2deea152e3c8ecd5993462a747208e4d842659935a1c62",
+ kwdict["b"], "sha512")
+
+
class MetadataTest(BasePostprocessorTest):
def test_metadata_default(self):
@@ -585,6 +637,36 @@ class MetadataTest(BasePostprocessorTest):
self.assertTrue(not e.called)
self.assertTrue(m.called)
+ def test_metadata_option_include(self):
+ self._create(
+ {"include": ["_private", "filename", "foo"], "sort": True},
+ {"public": "hello ワールド", "_private": "foo バー"},
+ )
+
+ with patch("builtins.open", mock_open()) as m:
+ self._trigger()
+
+ self.assertEqual(self._output(m), """{
+ "_private": "foo バー",
+ "filename": "file"
+}
+""")
+
+ def test_metadata_option_exclude(self):
+ self._create(
+ {"exclude": ["category", "filename", "foo"], "sort": True},
+ {"public": "hello ワールド", "_private": "foo バー"},
+ )
+
+ with patch("builtins.open", mock_open()) as m:
+ self._trigger()
+
+ self.assertEqual(self._output(m), """{
+ "extension": "ext",
+ "public": "hello ワールド"
+}
+""")
+
@staticmethod
def _output(mock):
return "".join(
@@ -661,6 +743,60 @@ def calc(kwdict):
""")
+class RenameTest(BasePostprocessorTest):
+
+ def _prepare(self, filename):
+ path = self.pathfmt.realdirectory
+ shutil.rmtree(path, ignore_errors=True)
+ os.makedirs(path, exist_ok=True)
+
+ with open(path + filename, "w"):
+ pass
+
+ return path
+
+ def test_rename_from(self):
+ self._create({"from": "{id}.{extension}"}, {"id": 12345})
+ path = self._prepare("12345.ext")
+
+ self._trigger()
+
+ self.assertEqual(os.listdir(path), ["file.ext"])
+
+ def test_rename_to(self):
+ self._create({"to": "{id}.{extension}"}, {"id": 12345})
+ path = self._prepare("file.ext")
+
+ self._trigger(("skip",))
+
+ self.assertEqual(os.listdir(path), ["12345.ext"])
+
+ def test_rename_from_to(self):
+ self._create({"from": "name", "to": "{id}"}, {"id": 12345})
+ path = self._prepare("name")
+
+ self._trigger()
+
+ self.assertEqual(os.listdir(path), ["12345"])
+
+ def test_rename_noopt(self):
+ with self.assertRaises(ValueError):
+ self._create({})
+
+ def test_rename_skip(self):
+ self._create({"from": "{id}.{extension}"}, {"id": 12345})
+ path = self._prepare("12345.ext")
+ with open(path + "file.ext", "w"):
+ pass
+
+ with self.assertLogs("postprocessor.rename", level="WARNING") as cm:
+ self._trigger()
+ self.assertTrue(cm.output[0].startswith(
+ "WARNING:postprocessor.rename:Not renaming "
+ "'12345.ext' to 'file.ext'"))
+ self.assertEqual(sorted(os.listdir(path)), ["12345.ext", "file.ext"])
+
+
class ZipTest(BasePostprocessorTest):
def test_zip_default(self):
diff --git a/test/test_util.py b/test/test_util.py
index 4622c28..fd2ff8b 100644
--- a/test/test_util.py
+++ b/test/test_util.py
@@ -12,9 +12,11 @@ import sys
import unittest
import io
+import time
import random
import string
import datetime
+import platform
import tempfile
import itertools
import http.cookiejar
@@ -741,6 +743,9 @@ def hash(value):
self.assertFalse(obj)
self.assertEqual(len(obj), 0)
+ self.assertEqual(int(obj), 0)
+ self.assertEqual(hash(obj), 0)
+
self.assertEqual(str(obj), str(None))
self.assertEqual(repr(obj), repr(None))
self.assertEqual(format(obj), str(None))
@@ -751,6 +756,7 @@ def hash(value):
self.assertIs(obj(), obj)
self.assertIs(obj(1, "a"), obj)
self.assertIs(obj(foo="bar"), obj)
+ self.assertIs(iter(obj), obj)
self.assertEqual(util.json_dumps(obj), "null")
self.assertLess(obj, "foo")
@@ -761,9 +767,49 @@ def hash(value):
self.assertGreater(123, obj)
self.assertGreaterEqual(1.23, obj)
+ self.assertEqual(obj + 123, obj)
+ self.assertEqual(obj - 123, obj)
+ self.assertEqual(obj * 123, obj)
+ # self.assertEqual(obj @ 123, obj)
+ self.assertEqual(obj / 123, obj)
+ self.assertEqual(obj // 123, obj)
+ self.assertEqual(obj % 123, obj)
+
+ self.assertEqual(123 + obj, obj)
+ self.assertEqual(123 - obj, obj)
+ self.assertEqual(123 * obj, obj)
+ # self.assertEqual(123 @ obj, obj)
+ self.assertEqual(123 / obj, obj)
+ self.assertEqual(123 // obj, obj)
+ self.assertEqual(123 % obj, obj)
+
+ self.assertEqual(obj << 123, obj)
+ self.assertEqual(obj >> 123, obj)
+ self.assertEqual(obj & 123, obj)
+ self.assertEqual(obj ^ 123, obj)
+ self.assertEqual(obj | 123, obj)
+
+ self.assertEqual(123 << obj, obj)
+ self.assertEqual(123 >> obj, obj)
+ self.assertEqual(123 & obj, obj)
+ self.assertEqual(123 ^ obj, obj)
+ self.assertEqual(123 | obj, obj)
+
+ self.assertEqual(-obj, obj)
+ self.assertEqual(+obj, obj)
+ self.assertEqual(~obj, obj)
+ self.assertEqual(abs(obj), obj)
+
mapping = {}
mapping[obj] = 123
self.assertIn(obj, mapping)
+ self.assertEqual(mapping[obj], 123)
+
+ array = [1, 2, 3]
+ self.assertEqual(array[obj], 1)
+
+ if platform.python_implementation().lower() == "cpython":
+ self.assertTrue(time.localtime(obj))
i = 0
for _ in obj:
diff --git a/test/test_ytdl.py b/test/test_ytdl.py
index fd2e40a..f7eb671 100644
--- a/test/test_ytdl.py
+++ b/test/test_ytdl.py
@@ -22,7 +22,7 @@ class Test_CommandlineArguments(unittest.TestCase):
def setUpClass(cls):
try:
cls.module = __import__(cls.module_name)
- except ImportError:
+ except (ImportError, SyntaxError):
raise unittest.SkipTest("cannot import module '{}'".format(
cls.module_name))
cls.default = ytdl.parse_command_line(cls.module, [])