summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLibravatarUnit 193 <unit193@unit193.net>2023-03-13 02:07:49 -0400
committerLibravatarUnit 193 <unit193@unit193.net>2023-03-13 02:07:49 -0400
commit10987f08f8b6c510ba64f4b42d95ba67eec6e5b0 (patch)
tree1af82cad9ac859a70cafc976a980280b939cfcc7
parent919f8ba16a7b82ba1099bd25b2c61c7881a05aa2 (diff)
New upstream version 1.25.0.upstream/1.25.0
-rw-r--r--CHANGELOG.md67
-rw-r--r--PKG-INFO6
-rw-r--r--README.rst4
-rw-r--r--data/completion/_gallery-dl12
-rw-r--r--data/completion/gallery-dl4
-rw-r--r--data/completion/gallery-dl.fish12
-rw-r--r--data/man/gallery-dl.124
-rw-r--r--data/man/gallery-dl.conf.5313
-rw-r--r--docs/gallery-dl-example.conf3
-rw-r--r--docs/gallery-dl.conf4
-rw-r--r--gallery_dl.egg-info/PKG-INFO6
-rw-r--r--gallery_dl.egg-info/SOURCES.txt5
-rw-r--r--gallery_dl/__init__.py93
-rw-r--r--gallery_dl/actions.py112
-rw-r--r--gallery_dl/config.py60
-rw-r--r--gallery_dl/cookies.py14
-rw-r--r--gallery_dl/downloader/http.py17
-rw-r--r--gallery_dl/downloader/ytdl.py2
-rw-r--r--gallery_dl/exception.py8
-rw-r--r--gallery_dl/extractor/500px.py6
-rw-r--r--gallery_dl/extractor/8muses.py7
-rw-r--r--gallery_dl/extractor/__init__.py40
-rw-r--r--gallery_dl/extractor/bbc.py5
-rw-r--r--gallery_dl/extractor/bcy.py9
-rw-r--r--gallery_dl/extractor/behance.py5
-rw-r--r--gallery_dl/extractor/blogger.py7
-rw-r--r--gallery_dl/extractor/bunkr.py92
-rw-r--r--gallery_dl/extractor/catbox.py27
-rw-r--r--gallery_dl/extractor/common.py15
-rw-r--r--gallery_dl/extractor/danbooru.py178
-rw-r--r--gallery_dl/extractor/deviantart.py259
-rw-r--r--gallery_dl/extractor/directlink.py5
-rw-r--r--gallery_dl/extractor/dynastyscans.py7
-rw-r--r--gallery_dl/extractor/e621.py254
-rw-r--r--gallery_dl/extractor/erome.py3
-rw-r--r--gallery_dl/extractor/fallenangels.py5
-rw-r--r--gallery_dl/extractor/fanbox.py2
-rw-r--r--gallery_dl/extractor/fantia.py5
-rw-r--r--gallery_dl/extractor/foolslide.py5
-rw-r--r--gallery_dl/extractor/gelbooru.py51
-rw-r--r--gallery_dl/extractor/generic.py24
-rw-r--r--gallery_dl/extractor/hbrowse.py9
-rw-r--r--gallery_dl/extractor/hentai2read.py5
-rw-r--r--gallery_dl/extractor/hentaifox.py7
-rw-r--r--gallery_dl/extractor/hentaihand.py5
-rw-r--r--gallery_dl/extractor/hentaihere.py7
-rw-r--r--gallery_dl/extractor/hitomi.py5
-rw-r--r--gallery_dl/extractor/imagefap.py37
-rw-r--r--gallery_dl/extractor/imagehosts.py19
-rw-r--r--gallery_dl/extractor/imgbb.py5
-rw-r--r--gallery_dl/extractor/instagram.py20
-rw-r--r--gallery_dl/extractor/issuu.py7
-rw-r--r--gallery_dl/extractor/lightroom.py7
-rw-r--r--gallery_dl/extractor/mangadex.py8
-rw-r--r--gallery_dl/extractor/manganelo.py124
-rw-r--r--gallery_dl/extractor/mangapark.py7
-rw-r--r--gallery_dl/extractor/mangasee.py53
-rw-r--r--gallery_dl/extractor/misskey.py202
-rw-r--r--gallery_dl/extractor/nana.py5
-rw-r--r--gallery_dl/extractor/newgrounds.py13
-rw-r--r--gallery_dl/extractor/nhentai.py5
-rw-r--r--gallery_dl/extractor/nitter.py33
-rw-r--r--gallery_dl/extractor/oauth.py19
-rw-r--r--gallery_dl/extractor/patreon.py7
-rw-r--r--gallery_dl/extractor/photobucket.py4
-rw-r--r--gallery_dl/extractor/pinterest.py13
-rw-r--r--gallery_dl/extractor/plurk.py10
-rw-r--r--gallery_dl/extractor/poipiku.py2
-rw-r--r--gallery_dl/extractor/pornpics.py173
-rw-r--r--gallery_dl/extractor/pururin.py5
-rw-r--r--gallery_dl/extractor/reactor.py9
-rw-r--r--gallery_dl/extractor/reddit.py58
-rw-r--r--gallery_dl/extractor/redgifs.py102
-rw-r--r--gallery_dl/extractor/shopify.py7
-rw-r--r--gallery_dl/extractor/slideshare.py7
-rw-r--r--gallery_dl/extractor/soundgasm.py93
-rw-r--r--gallery_dl/extractor/subscribestar.py7
-rw-r--r--gallery_dl/extractor/szurubooru.py144
-rw-r--r--gallery_dl/extractor/telegraph.py20
-rw-r--r--gallery_dl/extractor/tumblr.py20
-rw-r--r--gallery_dl/extractor/twitter.py8
-rw-r--r--gallery_dl/extractor/vsco.py7
-rw-r--r--gallery_dl/extractor/weibo.py7
-rw-r--r--gallery_dl/extractor/wikifeet.py5
-rw-r--r--gallery_dl/extractor/xhamster.py6
-rw-r--r--gallery_dl/extractor/xvideos.py5
-rw-r--r--gallery_dl/formatter.py8
-rw-r--r--gallery_dl/job.py46
-rw-r--r--gallery_dl/oauth.py4
-rw-r--r--gallery_dl/option.py69
-rw-r--r--gallery_dl/output.py73
-rw-r--r--gallery_dl/path.py13
-rw-r--r--gallery_dl/postprocessor/common.py35
-rw-r--r--gallery_dl/postprocessor/exec.py19
-rw-r--r--gallery_dl/postprocessor/metadata.py51
-rw-r--r--gallery_dl/util.py80
-rw-r--r--gallery_dl/version.py2
-rw-r--r--gallery_dl/ytdl.py272
-rw-r--r--test/test_config.py9
-rw-r--r--test/test_extractor.py2
-rw-r--r--test/test_oauth.py58
-rw-r--r--test/test_postprocessor.py78
-rw-r--r--test/test_results.py6
-rw-r--r--test/test_util.py93
-rw-r--r--test/test_ytdl.py18
105 files changed, 3048 insertions, 986 deletions
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 3beecbb..5d805c2 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,72 @@
# Changelog
+## 1.25.0 - 2023-03-11
+### Changes
+- [e621] split `e621` extractors from `danbooru` module ([#3425](https://github.com/mikf/gallery-dl/issues/3425))
+- [deviantart] remove mature scraps warning ([#3691](https://github.com/mikf/gallery-dl/issues/3691))
+- [deviantart] use `/collections/all` endpoint for favorites ([#3666](https://github.com/mikf/gallery-dl/issues/3666) ,#3668)
+- [newgrounds] update default image and audio archive IDs to prevent ID overlap ([#3681](https://github.com/mikf/gallery-dl/issues/3681))
+- rename `--ignore-config` to `--config-ignore`
+### Extractors
+- [catbox] add `file` extractor ([#3570](https://github.com/mikf/gallery-dl/issues/3570))
+- [deviantart] add `search` extractor ([#538](https://github.com/mikf/gallery-dl/issues/538), [#1264](https://github.com/mikf/gallery-dl/issues/1264), [#2954](https://github.com/mikf/gallery-dl/issues/2954), [#2970](https://github.com/mikf/gallery-dl/issues/2970), [#3577](https://github.com/mikf/gallery-dl/issues/3577))
+- [deviantart] add `gallery-search` extractor ([#1695](https://github.com/mikf/gallery-dl/issues/1695))
+- [deviantart] support `fxdeviantart.com` URLs (##3740)
+- [e621] implement `notes` and `pools` metadata extraction ([#3425](https://github.com/mikf/gallery-dl/issues/3425))
+- [gelbooru] add `favorite` extractor ([#3704](https://github.com/mikf/gallery-dl/issues/3704))
+- [imagetwist] support `phun.imagetwist.com` and `imagehaha.com` domains ([#3622](https://github.com/mikf/gallery-dl/issues/3622))
+- [instagram] add `user` metadata field ([#3107](https://github.com/mikf/gallery-dl/issues/3107))
+- [manganelo] update and fix metadata extraction
+- [manganelo] support mobile-only chapters
+- [mangasee] extract `author` and `genre` metadata ([#3703](https://github.com/mikf/gallery-dl/issues/3703))
+- [misskey] add `misskey` extractors ([#3717](https://github.com/mikf/gallery-dl/issues/3717))
+- [pornpics] add `gallery` and `search` extractors ([#263](https://github.com/mikf/gallery-dl/issues/263), [#3544](https://github.com/mikf/gallery-dl/issues/3544), [#3654](https://github.com/mikf/gallery-dl/issues/3654))
+- [redgifs] support v3 URLs ([#3588](https://github.com/mikf/gallery-dl/issues/3588). [#3589](https://github.com/mikf/gallery-dl/issues/3589))
+- [redgifs] add `collection` extractors ([#3427](https://github.com/mikf/gallery-dl/issues/3427), [#3662](https://github.com/mikf/gallery-dl/issues/3662))
+- [shopify] support ohpolly.com ([#440](https://github.com/mikf/gallery-dl/issues/440), [#3596](https://github.com/mikf/gallery-dl/issues/3596))
+- [szurubooru] add `tag` and `post` extractors ([#3583](https://github.com/mikf/gallery-dl/issues/3583), [#3713](https://github.com/mikf/gallery-dl/issues/3713))
+- [twitter] add `transform` option
+### Options
+- [postprocessor:metadata] add `sort` and `separators` options
+- [postprocessor:exec] implement archive options ([#3584](https://github.com/mikf/gallery-dl/issues/3584))
+- add `--config-create` command-line option ([#2333](https://github.com/mikf/gallery-dl/issues/2333))
+- add `--config-toml` command-line option to load config files in TOML format
+- add `output.stdout`, `output.stdin`, and `output.stderr` options ([#1621](https://github.com/mikf/gallery-dl/issues/1621), [#2152](https://github.com/mikf/gallery-dl/issues/2152), [#2529](https://github.com/mikf/gallery-dl/issues/2529))
+- add `hash_md5` and `hash_sha1` functions ([#3679](https://github.com/mikf/gallery-dl/issues/3679))
+- implement `globals` option to enable defining custom functions for `eval` statements
+- implement `archive-pragma` option to use SQLite PRAGMA statements
+- implement `actions` to trigger events on logging messages ([#3338](https://github.com/mikf/gallery-dl/issues/3338), [#3630](https://github.com/mikf/gallery-dl/issues/3630))
+- implement ability to load external extractor classes
+ - `-X/--extractors` command-line options
+ - `extractor.modules-sources` config option
+### Fixes
+- [bunkr] fix extraction ([#3636](https://github.com/mikf/gallery-dl/issues/3636), [#3655](https://github.com/mikf/gallery-dl/issues/3655))
+- [danbooru] send gallery-dl User-Agent ([#3665](https://github.com/mikf/gallery-dl/issues/3665))
+- [deviantart] fix crash when handling deleted deviations in status updates ([#3656](https://github.com/mikf/gallery-dl/issues/3656))
+- [fanbox] fix crash with missing images ([#3673](https://github.com/mikf/gallery-dl/issues/3673))
+- [imagefap] update `gallery` URLs ([#3595](https://github.com/mikf/gallery-dl/issues/3595))
+- [imagefap] fix infinite pagination loop ([#3594](https://github.com/mikf/gallery-dl/issues/3594))
+- [imagefap] fix metadata extraction
+- [oauth] use default name for browsers without `name` attribute
+- [pinterest] unescape search terms ([#3621](https://github.com/mikf/gallery-dl/issues/3621))
+- [pixiv] fix `--write-tags` for `"tags": "original"` ([#3675](https://github.com/mikf/gallery-dl/issues/3675))
+- [poipiku] warn about incorrect passwords ([#3646](https://github.com/mikf/gallery-dl/issues/3646))
+- [reddit] update `videos` option ([#3712](https://github.com/mikf/gallery-dl/issues/3712))
+- [soundgasm] rewrite ([#3578](https://github.com/mikf/gallery-dl/issues/3578))
+- [telegraph] fix extraction when images are not in `<figure>` elements ([#3590](https://github.com/mikf/gallery-dl/issues/3590))
+- [tumblr] raise more detailed errors for dashboard-only blogs ([#3628](https://github.com/mikf/gallery-dl/issues/3628))
+- [twitter] fix some `original` retweets not downloading ([#3744](https://github.com/mikf/gallery-dl/issues/3744))
+- [ytdl] fix `--parse-metadata` ([#3663](https://github.com/mikf/gallery-dl/issues/3663))
+- [downloader:ytdl] prevent exception on empty results
+### Improvements
+- [downloader:http] use `time.monotonic()`
+- [downloader:http] update `_http_retry` to accept a Python function ([#3569](https://github.com/mikf/gallery-dl/issues/3569))
+- [postprocessor:metadata] speed up JSON encoding
+- replace `json.loads/dumps` with direct calls to `JSONDecoder.decode/JSONEncoder.encode`
+- improve `option.Formatter` performance
+### Removals
+- [nitter] remove `nitter.pussthecat.org`
+
## 1.24.5 - 2023-01-28
### Additions
- [booru] add `url` option
diff --git a/PKG-INFO b/PKG-INFO
index 9165899..43aacb4 100644
--- a/PKG-INFO
+++ b/PKG-INFO
@@ -1,6 +1,6 @@
Metadata-Version: 2.1
Name: gallery_dl
-Version: 1.24.5
+Version: 1.25.0
Summary: Command-line program to download image galleries and collections from several image hosting sites
Home-page: https://github.com/mikf/gallery-dl
Download-URL: https://github.com/mikf/gallery-dl/releases/latest
@@ -106,9 +106,9 @@ Standalone Executable
Prebuilt executable files with a Python interpreter and
required Python packages included are available for
-- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.24.5/gallery-dl.exe>`__
+- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.25.0/gallery-dl.exe>`__
(Requires `Microsoft Visual C++ Redistributable Package (x86) <https://aka.ms/vs/17/release/vc_redist.x86.exe>`__)
-- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.24.5/gallery-dl.bin>`__
+- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.25.0/gallery-dl.bin>`__
Nightly Builds
diff --git a/README.rst b/README.rst
index ed4afa5..c980bce 100644
--- a/README.rst
+++ b/README.rst
@@ -69,9 +69,9 @@ Standalone Executable
Prebuilt executable files with a Python interpreter and
required Python packages included are available for
-- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.24.5/gallery-dl.exe>`__
+- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.25.0/gallery-dl.exe>`__
(Requires `Microsoft Visual C++ Redistributable Package (x86) <https://aka.ms/vs/17/release/vc_redist.x86.exe>`__)
-- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.24.5/gallery-dl.bin>`__
+- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.25.0/gallery-dl.bin>`__
Nightly Builds
diff --git a/data/completion/_gallery-dl b/data/completion/_gallery-dl
index 06e8556..a82db8a 100644
--- a/data/completion/_gallery-dl
+++ b/data/completion/_gallery-dl
@@ -8,9 +8,10 @@ _arguments -C -S \
{-h,--help}'[Print this help message and exit]' \
--version'[Print program version and exit]' \
{-i,--input-file}'[Download URLs found in FILE ("-" for stdin). More than one --input-file can be specified]':'<file>':_files \
+{-f,--filename}'[Filename format string for downloaded files ("/O" for "original" filenames)]':'<format>' \
{-d,--destination}'[Target location for file downloads]':'<path>' \
{-D,--directory}'[Exact location for file downloads]':'<path>' \
-{-f,--filename}'[Filename format string for downloaded files ("/O" for "original" filenames)]':'<format>' \
+{-X,--extractors}'[Load external extractors from PATH]':'<path>' \
--proxy'[Use the specified proxy]':'<url>' \
--source-address'[Client-side IP address to bind to]':'<ip>' \
--user-agent'[User-Agent request header]':'<ua>' \
@@ -45,10 +46,13 @@ _arguments -C -S \
--no-download'[Do not download any files]' \
--no-postprocessors'[Do not run any post processors]' \
--no-check-certificate'[Disable HTTPS certificate validation]' \
+{-o,--option}'[Additional options. Example: -o browser=firefox]':'<key=value>' \
{-c,--config}'[Additional configuration files]':'<file>':_files \
---config-yaml'[==SUPPRESS==]':'<file>':_files \
-{-o,--option}'[Additional "<key>=<value>" option values]':'<opt>' \
---ignore-config'[Do not read default configuration files]' \
+--config-yaml'[Additional configuration files in YAML format]':'<file>':_files \
+--config-toml'[Additional configuration files in TOML format]':'<file>':_files \
+--config-create'[Create a basic configuration file]' \
+--config-ignore'[Do not read default configuration files]' \
+--ignore-config'[==SUPPRESS==]' \
{-u,--username}'[Username to login with]':'<user>' \
{-p,--password}'[Password belonging to the given username]':'<pass>' \
--netrc'[Enable .netrc authentication data]' \
diff --git a/data/completion/gallery-dl b/data/completion/gallery-dl
index 203c87d..1750af8 100644
--- a/data/completion/gallery-dl
+++ b/data/completion/gallery-dl
@@ -5,12 +5,12 @@ _gallery_dl()
cur="${COMP_WORDS[COMP_CWORD]}"
prev="${COMP_WORDS[COMP_CWORD-1]}"
- if [[ "${prev}" =~ ^(-i|--input-file|--cookies|--write-log|--write-unsupported|-c|--config|--config-yaml|--download-archive)$ ]]; then
+ if [[ "${prev}" =~ ^(-i|--input-file|--cookies|--write-log|--write-unsupported|-c|--config|--config-yaml|--config-toml|--download-archive)$ ]]; then
COMPREPLY=( $(compgen -f -- "${cur}") )
elif [[ "${prev}" =~ ^()$ ]]; then
COMPREPLY=( $(compgen -d -- "${cur}") )
else
- COMPREPLY=( $(compgen -W "--help --version --input-file --destination --directory --filename --proxy --source-address --user-agent --clear-cache --cookies --cookies-from-browser --quiet --verbose --get-urls --resolve-urls --dump-json --simulate --extractor-info --list-keywords --list-modules --list-extractors --write-log --write-unsupported --write-pages --limit-rate --retries --http-timeout --sleep --sleep-request --sleep-extractor --filesize-min --filesize-max --chunk-size --no-part --no-skip --no-mtime --no-download --no-postprocessors --no-check-certificate --config --config-yaml --option --ignore-config --username --password --netrc --download-archive --abort --terminate --range --chapter-range --filter --chapter-filter --zip --ugoira-conv --ugoira-conv-lossless --ugoira-conv-copy --write-metadata --write-info-json --write-infojson --write-tags --mtime-from-date --exec --exec-after --postprocessor --postprocessor-option" -- "${cur}") )
+ COMPREPLY=( $(compgen -W "--help --version --input-file --filename --destination --directory --extractors --proxy --source-address --user-agent --clear-cache --cookies --cookies-from-browser --quiet --verbose --get-urls --resolve-urls --dump-json --simulate --extractor-info --list-keywords --list-modules --list-extractors --write-log --write-unsupported --write-pages --limit-rate --retries --http-timeout --sleep --sleep-request --sleep-extractor --filesize-min --filesize-max --chunk-size --no-part --no-skip --no-mtime --no-download --no-postprocessors --no-check-certificate --option --config --config-yaml --config-toml --config-create --config-ignore --ignore-config --username --password --netrc --download-archive --abort --terminate --range --chapter-range --filter --chapter-filter --zip --ugoira-conv --ugoira-conv-lossless --ugoira-conv-copy --write-metadata --write-info-json --write-infojson --write-tags --mtime-from-date --exec --exec-after --postprocessor --postprocessor-option" -- "${cur}") )
fi
}
diff --git a/data/completion/gallery-dl.fish b/data/completion/gallery-dl.fish
index e2a7e6d..d764543 100644
--- a/data/completion/gallery-dl.fish
+++ b/data/completion/gallery-dl.fish
@@ -2,9 +2,10 @@ complete -c gallery-dl -x
complete -c gallery-dl -s 'h' -l 'help' -d 'Print this help message and exit'
complete -c gallery-dl -l 'version' -d 'Print program version and exit'
complete -c gallery-dl -r -F -s 'i' -l 'input-file' -d 'Download URLs found in FILE ("-" for stdin). More than one --input-file can be specified'
+complete -c gallery-dl -x -s 'f' -l 'filename' -d 'Filename format string for downloaded files ("/O" for "original" filenames)'
complete -c gallery-dl -x -a '(__fish_complete_directories)' -s 'd' -l 'destination' -d 'Target location for file downloads'
complete -c gallery-dl -x -a '(__fish_complete_directories)' -s 'D' -l 'directory' -d 'Exact location for file downloads'
-complete -c gallery-dl -x -s 'f' -l 'filename' -d 'Filename format string for downloaded files ("/O" for "original" filenames)'
+complete -c gallery-dl -x -a '(__fish_complete_directories)' -s 'X' -l 'extractors' -d 'Load external extractors from PATH'
complete -c gallery-dl -x -l 'proxy' -d 'Use the specified proxy'
complete -c gallery-dl -x -l 'source-address' -d 'Client-side IP address to bind to'
complete -c gallery-dl -x -l 'user-agent' -d 'User-Agent request header'
@@ -39,10 +40,13 @@ complete -c gallery-dl -l 'no-mtime' -d 'Do not set file modification times acco
complete -c gallery-dl -l 'no-download' -d 'Do not download any files'
complete -c gallery-dl -l 'no-postprocessors' -d 'Do not run any post processors'
complete -c gallery-dl -l 'no-check-certificate' -d 'Disable HTTPS certificate validation'
+complete -c gallery-dl -x -s 'o' -l 'option' -d 'Additional options. Example: -o browser=firefox'
complete -c gallery-dl -r -F -s 'c' -l 'config' -d 'Additional configuration files'
-complete -c gallery-dl -r -F -l 'config-yaml' -d '==SUPPRESS=='
-complete -c gallery-dl -x -s 'o' -l 'option' -d 'Additional "<key>=<value>" option values'
-complete -c gallery-dl -l 'ignore-config' -d 'Do not read default configuration files'
+complete -c gallery-dl -r -F -l 'config-yaml' -d 'Additional configuration files in YAML format'
+complete -c gallery-dl -r -F -l 'config-toml' -d 'Additional configuration files in TOML format'
+complete -c gallery-dl -l 'config-create' -d 'Create a basic configuration file'
+complete -c gallery-dl -l 'config-ignore' -d 'Do not read default configuration files'
+complete -c gallery-dl -l 'ignore-config' -d '==SUPPRESS=='
complete -c gallery-dl -x -s 'u' -l 'username' -d 'Username to login with'
complete -c gallery-dl -x -s 'p' -l 'password' -d 'Password belonging to the given username'
complete -c gallery-dl -l 'netrc' -d 'Enable .netrc authentication data'
diff --git a/data/man/gallery-dl.1 b/data/man/gallery-dl.1
index 024ddb3..27d3a09 100644
--- a/data/man/gallery-dl.1
+++ b/data/man/gallery-dl.1
@@ -1,4 +1,4 @@
-.TH "GALLERY-DL" "1" "2023-01-28" "1.24.5" "gallery-dl Manual"
+.TH "GALLERY-DL" "1" "2023-03-11" "1.25.0" "gallery-dl Manual"
.\" disable hyphenation
.nh
@@ -26,14 +26,17 @@ Print program version and exit
.B "\-i, \-\-input\-file" \f[I]FILE\f[]
Download URLs found in FILE ('-' for stdin). More than one --input-file can be specified
.TP
+.B "\-f, \-\-filename" \f[I]FORMAT\f[]
+Filename format string for downloaded files ('/O' for "original" filenames)
+.TP
.B "\-d, \-\-destination" \f[I]PATH\f[]
Target location for file downloads
.TP
.B "\-D, \-\-directory" \f[I]PATH\f[]
Exact location for file downloads
.TP
-.B "\-f, \-\-filename" \f[I]FORMAT\f[]
-Filename format string for downloaded files ('/O' for "original" filenames)
+.B "\-X, \-\-extractors" \f[I]PATH\f[]
+Load external extractors from PATH
.TP
.B "\-\-proxy" \f[I]URL\f[]
Use the specified proxy
@@ -137,13 +140,22 @@ Do not run any post processors
.B "\-\-no\-check\-certificate"
Disable HTTPS certificate validation
.TP
+.B "\-o, \-\-option" \f[I]KEY=VALUE\f[]
+Additional options. Example: -o browser=firefox
+.TP
.B "\-c, \-\-config" \f[I]FILE\f[]
Additional configuration files
.TP
-.B "\-o, \-\-option" \f[I]OPT\f[]
-Additional '<key>=<value>' option values
+.B "\-\-config\-yaml" \f[I]FILE\f[]
+Additional configuration files in YAML format
+.TP
+.B "\-\-config\-toml" \f[I]FILE\f[]
+Additional configuration files in TOML format
+.TP
+.B "\-\-config\-create"
+Create a basic configuration file
.TP
-.B "\-\-ignore\-config"
+.B "\-\-config\-ignore"
Do not read default configuration files
.TP
.B "\-u, \-\-username" \f[I]USER\f[]
diff --git a/data/man/gallery-dl.conf.5 b/data/man/gallery-dl.conf.5
index 6b11835..a0fd629 100644
--- a/data/man/gallery-dl.conf.5
+++ b/data/man/gallery-dl.conf.5
@@ -1,4 +1,4 @@
-.TH "GALLERY-DL.CONF" "5" "2023-01-28" "1.24.5" "gallery-dl Manual"
+.TH "GALLERY-DL.CONF" "5" "2023-03-11" "1.25.0" "gallery-dl Manual"
.\" disable hyphenation
.nh
.\" disable justification (adjust text to left margin only)
@@ -456,6 +456,8 @@ and optional for
.br
* \f[I]e621\f[] (*)
.br
+* \f[I]e926\f[] (*)
+.br
* \f[I]exhentai\f[]
.br
* \f[I]idolcomplex\f[]
@@ -897,6 +899,20 @@ An alternative \f[I]format string\f[] to build archive IDs with.
Prefix for archive IDs.
+.SS extractor.*.archive-pragma
+.IP "Type:" 6
+\f[I]list\f[] of \f[I]strings\f[]
+
+.IP "Example:" 4
+["journal_mode=WAL", "synchronous=NORMAL"]
+
+.IP "Description:" 4
+A list of SQLite \f[I]PRAGMA\f[] statements to run during archive initialization.
+
+See \f[I]<https://www.sqlite.org/pragma.html>\f[]
+for available \f[I]PRAGMA\f[] statements and further details.
+
+
.SS extractor.*.postprocessors
.IP "Type:" 6
\f[I]list\f[] of \f[I]Postprocessor Configuration\f[] objects
@@ -1288,7 +1304,23 @@ For unavailable or restricted posts,
follow the \f[I]source\f[] and download from there if possible.
-.SS extractor.danbooru.metadata
+.SS extractor.danbooru.ugoira
+.IP "Type:" 6
+\f[I]bool\f[]
+
+.IP "Default:" 9
+\f[I]false\f[]
+
+.IP "Description:" 4
+Controls the download target for Ugoira posts.
+
+.br
+* \f[I]true\f[]: Original ZIP archives
+.br
+* \f[I]false\f[]: Converted video files
+
+
+.SS extractor.[Danbooru].metadata
.IP "Type:" 6
.br
* \f[I]bool\f[]
@@ -1317,7 +1349,7 @@ for possible field names. \f[I]aibooru\f[] also supports \f[I]ai_metadata\f[].
Note: This requires 1 additional HTTP request per post.
-.SS extractor.danbooru.threshold
+.SS extractor.{Danbooru].threshold
.IP "Type:" 6
.br
* \f[I]string\f[]
@@ -1330,30 +1362,13 @@ Note: This requires 1 additional HTTP request per post.
.IP "Description:" 4
Stop paginating over API results if the length of a batch of returned
posts is less than the specified number. Defaults to the per-page limit
-of the current instance, which is 320 for \f[I]e621\f[] and 200 for
-everything else.
+of the current instance, which is 200.
Note: Changing this setting is normally not necessary. When the value is
greater than the per-page limit, gallery-dl will stop after the first
batch. The value cannot be less than 1.
-.SS extractor.danbooru.ugoira
-.IP "Type:" 6
-\f[I]bool\f[]
-
-.IP "Default:" 9
-\f[I]false\f[]
-
-.IP "Description:" 4
-Controls the download target for Ugoira posts.
-
-.br
-* \f[I]true\f[]: Original ZIP archives
-.br
-* \f[I]false\f[]: Converted video files
-
-
.SS extractor.derpibooru.api-key
.IP "Type:" 6
\f[I]string\f[]
@@ -1617,6 +1632,50 @@ or whenever your \f[I]cache file\f[] is deleted or cleared.
Minimum wait time in seconds before API requests.
+.SS extractor.[E621].metadata
+.IP "Type:" 6
+.br
+* \f[I]bool\f[]
+.br
+* \f[I]string\f[]
+.br
+* \f[I]list\f[] of \f[I]strings\f[]
+
+.IP "Default:" 9
+\f[I]false\f[]
+
+.IP "Example:" 4
+.br
+* notes,pools
+.br
+* ["notes", "pools"
+
+.IP "Description:" 4
+Extract additional metadata (notes, pool metadata) if available.
+
+Note: This requires 0-2 additional HTTP requests per post.
+
+
+.SS extractor.[E621].threshold
+.IP "Type:" 6
+.br
+* \f[I]string\f[]
+.br
+* \f[I]integer\f[]
+
+.IP "Default:" 9
+\f[I]"auto"\f[]
+
+.IP "Description:" 4
+Stop paginating over API results if the length of a batch of returned
+posts is less than the specified number. Defaults to the per-page limit
+of the current instance, which is 320.
+
+Note: Changing this setting is normally not necessary. When the value is
+greater than the per-page limit, gallery-dl will stop after the first
+batch. The value cannot be less than 1.
+
+
.SS extractor.exhentai.domain
.IP "Type:" 6
\f[I]string\f[]
@@ -2302,6 +2361,28 @@ Fetch media from replies to other posts.
Also emit metadata for text-only posts without media content.
+.SS extractor.[misskey].renotes
+.IP "Type:" 6
+\f[I]bool\f[]
+
+.IP "Default:" 9
+\f[I]false\f[]
+
+.IP "Description:" 4
+Fetch media from renoted notes.
+
+
+.SS extractor.[misskey].replies
+.IP "Type:" 6
+\f[I]bool\f[]
+
+.IP "Default:" 9
+\f[I]true\f[]
+
+.IP "Description:" 4
+Fetch media from replies to other notes.
+
+
.SS extractor.nana.favkey
.IP "Type:" 6
\f[I]string\f[]
@@ -2884,8 +2965,16 @@ HLS and DASH manifests
* \f[I]"ytdl"\f[]: Download videos and let \f[I]youtube-dl\f[] handle all of
video extraction and download
.br
+* \f[I]"dash"\f[]: Extract DASH manifest URLs and use \f[I]youtube-dl\f[]
+to download and merge them. (*)
+.br
* \f[I]false\f[]: Ignore videos
+(*)
+This saves 1 HTTP request per video
+and might potentially be able to download otherwise deleted videos,
+but it will not always get the best video quality available.
+
.SS extractor.redgifs.format
.IP "Type:" 6
@@ -3003,6 +3092,17 @@ Filters used during searches.
Download video files.
+.SS extractor.[szurubooru].username & .token
+.IP "Type:" 6
+\f[I]string\f[]
+
+.IP "Description:" 4
+Username and login token of your account to access private resources.
+
+To generate a token, visit \f[I]/user/USERNAME/list-tokens\f[]
+and click \f[I]Create Token\f[].
+
+
.SS extractor.tumblr.avatar
.IP "Type:" 6
\f[I]bool\f[]
@@ -3282,6 +3382,17 @@ Age-restricted replies cannot be expanded when using the
\f[I]syndication\f[] API.
+.SS extractor.twitter.transform
+.IP "Type:" 6
+\f[I]bool\f[]
+
+.IP "Default:" 9
+\f[I]true\f[]
+
+.IP "Description:" 4
+Transform Tweet and User metadata into a simpler, uniform format.
+
+
.SS extractor.twitter.size
.IP "Type:" 6
\f[I]list\f[] of \f[I]strings\f[]
@@ -4316,6 +4427,50 @@ For these format strings
* \f[I]{3}\f[] is percent of bytes downloaded to total bytes
+.SS output.stdout & .stdin & .stderr
+.IP "Type:" 6
+.br
+* \f[I]string\f[]
+.br
+* \f[I]object\f[]
+
+.IP "Example:" 4
+.. code:: json
+
+"utf-8"
+
+.. code:: json
+
+{
+"encoding": "utf-8",
+"errors": "replace",
+"line_buffering": true
+}
+
+
+.IP "Description:" 4
+\f[I]Reconfigure\f[]
+a \f[I]standard stream\f[].
+
+Possible options are
+
+.br
+* \f[I]encoding\f[]
+.br
+* \f[I]errors\f[]
+.br
+* \f[I]newline\f[]
+.br
+* \f[I]line_buffering\f[]
+.br
+* \f[I]write_through\f[]
+
+When this option is specified as a simple \f[I]string\f[],
+it is interpreted as \f[I]{"encoding": "<string-value>", "errors": "replace"}\f[]
+
+Note: \f[I]errors\f[] always defaults to \f[I]"replace"\f[]
+
+
.SS output.shorten
.IP "Type:" 6
\f[I]bool\f[]
@@ -4547,6 +4702,21 @@ after \f[I]N\f[] consecutive files compared as equal.
Only compare file sizes. Do not read and compare their content.
+.SS exec.archive
+.IP "Type:" 6
+\f[I]Path\f[]
+
+.IP "Description:" 4
+File to store IDs of executed commands in,
+similar to \f[I]extractor.*.archive\f[].
+
+\f[I]archive-format\f[], \f[I]archive-prefix\f[], and \f[I]archive-pragma\f[] options,
+akin to
+\f[I]extractor.*.archive-format\f[],
+\f[I]extractor.*.archive-prefix\f[], and
+\f[I]extractor.*.archive-pragma\f[], are supported as well.
+
+
.SS exec.async
.IP "Type:" 6
\f[I]bool\f[]
@@ -4775,6 +4945,21 @@ Custom format string to build the content of metadata files with.
Note: Only applies for \f[I]"mode": "custom"\f[].
+.SS metadata.ascii
+.IP "Type:" 6
+\f[I]bool\f[]
+
+.IP "Default:" 9
+\f[I]false\f[]
+
+.IP "Description:" 4
+Escape all non-ASCII characters.
+
+See the \f[I]ensure_ascii\f[] argument of \f[I]json.dump()\f[] for further details.
+
+Note: Only applies for \f[I]"mode": "json"\f[] and \f[I]"jsonl"\f[].
+
+
.SS metadata.indent
.IP "Type:" 6
.br
@@ -4793,6 +4978,37 @@ See the \f[I]indent\f[] argument of \f[I]json.dump()\f[] for further details.
Note: Only applies for \f[I]"mode": "json"\f[].
+.SS metadata.separators
+.IP "Type:" 6
+\f[I]list\f[] with two \f[I]string\f[] elements
+
+.IP "Default:" 9
+\f[I][", ", ": "]\f[]
+
+.IP "Description:" 4
+\f[I]<item separator>\f[] - \f[I]<key separator>\f[] pair
+to separate JSON keys and values with.
+
+See the \f[I]separators\f[] argument of \f[I]json.dump()\f[] for further details.
+
+Note: Only applies for \f[I]"mode": "json"\f[] and \f[I]"jsonl"\f[].
+
+
+.SS metadata.sort
+.IP "Type:" 6
+\f[I]bool\f[]
+
+.IP "Default:" 9
+\f[I]false\f[]
+
+.IP "Description:" 4
+Sort output by key.
+
+See the \f[I]sort_keys\f[] argument of \f[I]json.dump()\f[] for further details.
+
+Note: Only applies for \f[I]"mode": "json"\f[] and \f[I]"jsonl"\f[].
+
+
.SS metadata.open
.IP "Type:" 6
\f[I]string\f[]
@@ -4843,9 +5059,11 @@ i.e. fields whose name starts with an underscore.
File to store IDs of generated metadata files in,
similar to \f[I]extractor.*.archive\f[].
-\f[I]archive-format\f[] and \f[I]archive-prefix\f[] options,
-akin to \f[I]extractor.*.archive-format\f[] and \f[I]extractor.*.archive-prefix\f[],
-are supported as well.
+\f[I]archive-format\f[], \f[I]archive-prefix\f[], and \f[I]archive-pragma\f[] options,
+akin to
+\f[I]extractor.*.archive-format\f[],
+\f[I]extractor.*.archive-prefix\f[], and
+\f[I]extractor.*.archive-pragma\f[], are supported as well.
.SS metadata.mtime
@@ -5152,10 +5370,55 @@ The \f[I]modules\f[] list in
["reddit", "danbooru", "mangadex"]
.IP "Description:" 4
-The list of modules to load when searching for a suitable
+List of internal modules to load when searching for a suitable
extractor class. Useful to reduce startup time and memory usage.
+.SS extractor.module-sources
+.IP "Type:" 6
+\f[I]list\f[] of \f[I]Path\f[] instances
+
+.IP "Example:" 4
+["~/.config/gallery-dl/modules", null]
+
+.IP "Description:" 4
+List of directories to load external extractor modules from.
+
+Any file in a specified directory with a \f[I].py\f[] filename extension
+gets \f[I]imported\f[]
+and searched for potential extractors,
+i.e. classes with a \f[I]pattern\f[] attribute.
+
+Note: \f[I]null\f[] references internal extractors defined in
+\f[I]extractor/__init__.py\f[]
+or by \f[I]extractor.modules\f[].
+
+
+.SS globals
+.IP "Type:" 6
+.br
+* \f[I]Path\f[]
+.br
+* \f[I]string\f[]
+
+.IP "Example:" 4
+.br
+* "~/.local/share/gdl-globals.py"
+.br
+* "gdl-globals"
+
+.IP "Default:" 9
+The \f[I]GLOBALS\f[] dict in
+\f[I]util.py\f[]
+
+.IP "Description:" 4
+Path to or name of an
+\f[I]importable\f[]
+Python module whose namespace gets used as an alternative
+\f[I]globals parameter\f[]
+for compiled Python expressions.
+
+
.SS cache.file
.IP "Type:" 6
\f[I]Path\f[]
diff --git a/docs/gallery-dl-example.conf b/docs/gallery-dl-example.conf
index 92509b5..ef7b3b5 100644
--- a/docs/gallery-dl-example.conf
+++ b/docs/gallery-dl-example.conf
@@ -5,6 +5,7 @@
"#": "set global archive file for all extractors",
"archive": "~/gallery-dl/archive.sqlite3",
+ "archive-pragma": ["journal_mode=WAL", "synchronous=NORMAL"],
"#": "add two custom keywords into the metadata dictionary",
"#": "these can be used to further refine your output directories or filenames",
@@ -36,7 +37,7 @@
"pixiv":
{
- "#": "override global archive setting for pixiv",
+ "#": "override global archive path for pixiv",
"archive": "~/gallery-dl/archive-pixiv.sqlite3",
"#": "set custom directory and filename format strings for all pixiv downloads",
diff --git a/docs/gallery-dl.conf b/docs/gallery-dl.conf
index 3012e71..7564e5b 100644
--- a/docs/gallery-dl.conf
+++ b/docs/gallery-dl.conf
@@ -186,6 +186,10 @@
"username": null,
"password": null
},
+ "misskey": {
+ "renotes": false,
+ "replies": true
+ },
"newgrounds":
{
"username": null,
diff --git a/gallery_dl.egg-info/PKG-INFO b/gallery_dl.egg-info/PKG-INFO
index 3fe1b55..d4e660a 100644
--- a/gallery_dl.egg-info/PKG-INFO
+++ b/gallery_dl.egg-info/PKG-INFO
@@ -1,6 +1,6 @@
Metadata-Version: 2.1
Name: gallery-dl
-Version: 1.24.5
+Version: 1.25.0
Summary: Command-line program to download image galleries and collections from several image hosting sites
Home-page: https://github.com/mikf/gallery-dl
Download-URL: https://github.com/mikf/gallery-dl/releases/latest
@@ -106,9 +106,9 @@ Standalone Executable
Prebuilt executable files with a Python interpreter and
required Python packages included are available for
-- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.24.5/gallery-dl.exe>`__
+- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.25.0/gallery-dl.exe>`__
(Requires `Microsoft Visual C++ Redistributable Package (x86) <https://aka.ms/vs/17/release/vc_redist.x86.exe>`__)
-- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.24.5/gallery-dl.bin>`__
+- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.25.0/gallery-dl.bin>`__
Nightly Builds
diff --git a/gallery_dl.egg-info/SOURCES.txt b/gallery_dl.egg-info/SOURCES.txt
index eab1881..9827944 100644
--- a/gallery_dl.egg-info/SOURCES.txt
+++ b/gallery_dl.egg-info/SOURCES.txt
@@ -13,6 +13,7 @@ docs/gallery-dl-example.conf
docs/gallery-dl.conf
gallery_dl/__init__.py
gallery_dl/__main__.py
+gallery_dl/actions.py
gallery_dl/aes.py
gallery_dl/cache.py
gallery_dl/config.py
@@ -68,6 +69,7 @@ gallery_dl/extractor/desktopography.py
gallery_dl/extractor/deviantart.py
gallery_dl/extractor/directlink.py
gallery_dl/extractor/dynastyscans.py
+gallery_dl/extractor/e621.py
gallery_dl/extractor/erome.py
gallery_dl/extractor/exhentai.py
gallery_dl/extractor/fallenangels.py
@@ -133,6 +135,7 @@ gallery_dl/extractor/mangoxo.py
gallery_dl/extractor/mastodon.py
gallery_dl/extractor/mememuseum.py
gallery_dl/extractor/message.py
+gallery_dl/extractor/misskey.py
gallery_dl/extractor/moebooru.py
gallery_dl/extractor/myhentaigallery.py
gallery_dl/extractor/myportfolio.py
@@ -161,6 +164,7 @@ gallery_dl/extractor/pixnet.py
gallery_dl/extractor/plurk.py
gallery_dl/extractor/poipiku.py
gallery_dl/extractor/pornhub.py
+gallery_dl/extractor/pornpics.py
gallery_dl/extractor/pururin.py
gallery_dl/extractor/reactor.py
gallery_dl/extractor/readcomiconline.py
@@ -182,6 +186,7 @@ gallery_dl/extractor/smugmug.py
gallery_dl/extractor/soundgasm.py
gallery_dl/extractor/speakerdeck.py
gallery_dl/extractor/subscribestar.py
+gallery_dl/extractor/szurubooru.py
gallery_dl/extractor/tapas.py
gallery_dl/extractor/tcbscans.py
gallery_dl/extractor/telegraph.py
diff --git a/gallery_dl/__init__.py b/gallery_dl/__init__.py
index 245dbf8..116ca5d 100644
--- a/gallery_dl/__init__.py
+++ b/gallery_dl/__init__.py
@@ -11,7 +11,7 @@ import logging
from . import version, config, option, output, extractor, job, util, exception
__author__ = "Mike Fährmann"
-__copyright__ = "Copyright 2014-2022 Mike Fährmann"
+__copyright__ = "Copyright 2014-2023 Mike Fährmann"
__license__ = "GPLv2"
__maintainer__ = "Mike Fährmann"
__email__ = "mike_faehrmann@web.de"
@@ -33,20 +33,24 @@ def progress(urls, pformat):
def main():
try:
- if sys.stdout and sys.stdout.encoding.lower() != "utf-8":
- output.replace_std_streams()
-
parser = option.build_parser()
args = parser.parse_args()
log = output.initialize_logging(args.loglevel)
# configuration
- if args.load_config:
+ if args.config_load:
config.load()
- if args.cfgfiles:
- config.load(args.cfgfiles, strict=True)
- if args.yamlfiles:
- config.load(args.yamlfiles, strict=True, fmt="yaml")
+ if args.configs_json:
+ config.load(args.configs_json, strict=True)
+ if args.configs_yaml:
+ import yaml
+ config.load(args.configs_yaml, strict=True, load=yaml.safe_load)
+ if args.configs_toml:
+ try:
+ import tomllib as toml
+ except ImportError:
+ import toml
+ config.load(args.configs_toml, strict=True, load=toml.loads)
if args.filename:
filename = args.filename
if filename == "/O":
@@ -77,6 +81,8 @@ def main():
for opts in args.options:
config.set(*opts)
+ output.configure_standard_streams()
+
# signals
signals = config.get((), "signals-ignore")
if signals:
@@ -105,20 +111,17 @@ def main():
output.ANSI = True
- # extractor modules
- modules = config.get(("extractor",), "modules")
- if modules is not None:
- if isinstance(modules, str):
- modules = modules.split(",")
- extractor.modules = modules
- extractor._module_iter = iter(modules)
-
# format string separator
separator = config.get((), "format-separator")
if separator:
from . import formatter
formatter._SEPARATOR = separator
+ # eval globals
+ path = config.get((), "globals")
+ if path:
+ util.GLOBALS = util.import_file(path).__dict__
+
# loglevels
output.configure_logging(args.loglevel)
if args.loglevel >= logging.ERROR:
@@ -128,7 +131,7 @@ def main():
import requests
extra = ""
- if getattr(sys, "frozen", False):
+ if util.EXECUTABLE:
extra = " - Executable"
else:
git_head = util.git_head()
@@ -147,6 +150,44 @@ def main():
log.debug("Configuration Files %s", config._files)
+ # extractor modules
+ modules = config.get(("extractor",), "modules")
+ if modules is not None:
+ if isinstance(modules, str):
+ modules = modules.split(",")
+ extractor.modules = modules
+
+ # external modules
+ if args.extractor_sources:
+ sources = args.extractor_sources
+ sources.append(None)
+ else:
+ sources = config.get(("extractor",), "module-sources")
+
+ if sources:
+ import os
+ modules = []
+
+ for source in sources:
+ if source:
+ path = util.expand_path(source)
+ try:
+ files = os.listdir(path)
+ modules.append(extractor._modules_path(path, files))
+ except Exception as exc:
+ log.warning("Unable to load modules from %s (%s: %s)",
+ path, exc.__class__.__name__, exc)
+ else:
+ modules.append(extractor._modules_internal())
+
+ if len(modules) > 1:
+ import itertools
+ extractor._module_iter = itertools.chain(*modules)
+ elif not modules:
+ extractor._module_iter = ()
+ else:
+ extractor._module_iter = iter(modules[0])
+
if args.list_modules:
extractor.modules.append("")
sys.stdout.write("\n".join(extractor.modules))
@@ -177,6 +218,10 @@ def main():
"Deleted %d %s from '%s'",
cnt, "entry" if cnt == 1 else "entries", cache._path(),
)
+
+ elif args.config_init:
+ return config.initialize()
+
else:
if not args.urls and not args.inputfiles:
parser.error(
@@ -220,9 +265,13 @@ def main():
pformat = config.get(("output",), "progress", True)
if pformat and len(urls) > 1 and args.loglevel < logging.ERROR:
urls = progress(urls, pformat)
+ else:
+ urls = iter(urls)
retval = 0
- for url in urls:
+ url = next(urls, None)
+
+ while url is not None:
try:
log.debug("Starting %s for '%s'", jobtype.__name__, url)
if isinstance(url, util.ExtendedUrl):
@@ -234,9 +283,15 @@ def main():
retval |= jobtype(url).run()
except exception.TerminateExtraction:
pass
+ except exception.RestartExtraction:
+ log.debug("Restarting '%s'", url)
+ continue
except exception.NoExtractorError:
log.error("Unsupported URL '%s'", url)
retval |= 64
+
+ url = next(urls, None)
+
return retval
except KeyboardInterrupt:
diff --git a/gallery_dl/actions.py b/gallery_dl/actions.py
new file mode 100644
index 0000000..15ca31e
--- /dev/null
+++ b/gallery_dl/actions.py
@@ -0,0 +1,112 @@
+# -*- coding: utf-8 -*-
+
+# Copyright 2023 Mike Fährmann
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+""" """
+
+import re
+import sys
+import logging
+import operator
+from . import util, exception
+
+
+def parse(actionspec):
+ if isinstance(actionspec, dict):
+ actionspec = actionspec.items()
+
+ actions = {}
+ actions[logging.DEBUG] = actions_d = []
+ actions[logging.INFO] = actions_i = []
+ actions[logging.WARNING] = actions_w = []
+ actions[logging.ERROR] = actions_e = []
+
+ for event, spec in actionspec:
+ level, _, pattern = event.partition(":")
+ type, _, args = spec.partition(" ")
+ action = (re.compile(pattern).search, ACTIONS[type](args))
+
+ level = level.strip()
+ if not level or level == "*":
+ actions_d.append(action)
+ actions_i.append(action)
+ actions_w.append(action)
+ actions_e.append(action)
+ else:
+
+ actions[_level_to_int(level)].append(action)
+
+ return actions
+
+
+def _level_to_int(level):
+ try:
+ return logging._nameToLevel[level]
+ except KeyError:
+ return int(level)
+
+
+def action_print(opts):
+ def _print(_):
+ print(opts)
+ return _print
+
+
+def action_status(opts):
+ op, value = re.match(r"\s*([&|^=])=?\s*(\d+)", opts).groups()
+
+ op = {
+ "&": operator.and_,
+ "|": operator.or_,
+ "^": operator.xor,
+ "=": lambda x, y: y,
+ }[op]
+
+ value = int(value)
+
+ def _status(args):
+ args["job"].status = op(args["job"].status, value)
+ return _status
+
+
+def action_level(opts):
+ level = _level_to_int(opts.lstrip(" ~="))
+
+ def _level(args):
+ args["level"] = level
+ return _level
+
+
+def action_wait(opts):
+ def _wait(args):
+ input("Press Enter to continue")
+ return _wait
+
+
+def action_restart(opts):
+ return util.raises(exception.RestartExtraction)
+
+
+def action_exit(opts):
+ try:
+ opts = int(opts)
+ except ValueError:
+ pass
+
+ def _exit(args):
+ sys.exit(opts)
+ return _exit
+
+
+ACTIONS = {
+ "print" : action_print,
+ "status" : action_status,
+ "level" : action_level,
+ "restart": action_restart,
+ "wait" : action_wait,
+ "exit" : action_exit,
+}
diff --git a/gallery_dl/config.py b/gallery_dl/config.py
index 0f2d1f1..d014293 100644
--- a/gallery_dl/config.py
+++ b/gallery_dl/config.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2015-2021 Mike Fährmann
+# Copyright 2015-2023 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -9,7 +9,6 @@
"""Global configuration module"""
import sys
-import json
import os.path
import logging
from . import util
@@ -39,7 +38,7 @@ else:
]
-if getattr(sys, "frozen", False):
+if util.EXECUTABLE:
# look for config file in PyInstaller executable directory (#682)
_default_configs.append(os.path.join(
os.path.dirname(sys.executable),
@@ -50,23 +49,54 @@ if getattr(sys, "frozen", False):
# --------------------------------------------------------------------
# public interface
-def load(files=None, strict=False, fmt="json"):
- """Load JSON configuration files"""
- if fmt == "yaml":
+
+def initialize():
+ paths = list(map(util.expand_path, _default_configs))
+
+ for path in paths:
+ if os.access(path, os.R_OK | os.W_OK):
+ log.error("There is already a configuration file at '%s'", path)
+ return 1
+
+ for path in paths:
try:
- import yaml
- parsefunc = yaml.safe_load
- except ImportError:
- log.error("Could not import 'yaml' module")
- return
+ os.makedirs(os.path.dirname(path), exist_ok=True)
+ with open(path, "x", encoding="utf-8") as fp:
+ fp.write("""\
+{
+ "extractor": {
+
+ },
+ "downloader": {
+
+ },
+ "output": {
+
+ },
+ "postprocessor": {
+
+ }
+}
+""")
+ break
+ except OSError as exc:
+ log.debug("%s: %s", exc.__class__.__name__, exc)
else:
- parsefunc = json.load
+ log.error("Unable to create a new configuration file "
+ "at any of the default paths")
+ return 1
+ log.info("Created a basic configuration file at '%s'", path)
+ return 0
+
+
+def load(files=None, strict=False, load=util.json_loads):
+ """Load JSON configuration files"""
for pathfmt in files or _default_configs:
path = util.expand_path(pathfmt)
try:
with open(path, encoding="utf-8") as file:
- confdict = parsefunc(file)
+ conf = load(file.read())
except OSError as exc:
if strict:
log.error(exc)
@@ -77,9 +107,9 @@ def load(files=None, strict=False, fmt="json"):
sys.exit(2)
else:
if not _config:
- _config.update(confdict)
+ _config.update(conf)
else:
- util.combine_dict(_config, confdict)
+ util.combine_dict(_config, conf)
_files.append(pathfmt)
diff --git a/gallery_dl/cookies.py b/gallery_dl/cookies.py
index f18cc47..3d715a7 100644
--- a/gallery_dl/cookies.py
+++ b/gallery_dl/cookies.py
@@ -12,7 +12,6 @@
import binascii
import contextlib
import ctypes
-import json
import logging
import os
import shutil
@@ -24,7 +23,7 @@ import tempfile
from datetime import datetime, timedelta, timezone
from hashlib import pbkdf2_hmac
from http.cookiejar import Cookie
-from . import aes, text
+from . import aes, text, util
SUPPORTED_BROWSERS_CHROMIUM = {
@@ -169,8 +168,8 @@ def _firefox_cookies_database(profile=None, container=None):
os.path.dirname(path), "containers.json")
try:
- with open(containers_path) as containers:
- identities = json.load(containers)["identities"]
+ with open(containers_path) as file:
+ identities = util.json_loads(file.read())["identities"]
except OSError:
logger.error("Unable to read Firefox container database at %s",
containers_path)
@@ -716,8 +715,8 @@ def _get_windows_v10_key(browser_root):
logger.error("could not find local state file")
return None
logger.debug("Found local state file at '%s'", path)
- with open(path, encoding="utf8") as f:
- data = json.load(f)
+ with open(path, encoding="utf-8") as file:
+ data = util.json_loads(file.read())
try:
base64_key = data["os_crypt"]["encrypted_key"]
except KeyError:
@@ -794,7 +793,8 @@ class DatabaseCopy():
def __init__(self, path):
self.path = path
- self.directory = self.database = None
+ self.database = None
+ self.directory = None
def __enter__(self):
try:
diff --git a/gallery_dl/downloader/http.py b/gallery_dl/downloader/http.py
index 6043443..e977320 100644
--- a/gallery_dl/downloader/http.py
+++ b/gallery_dl/downloader/http.py
@@ -100,13 +100,6 @@ class HttpDownloader(DownloaderBase):
adjust_extension = kwdict.get(
"_http_adjust_extension", self.adjust_extension)
- codes = kwdict.get("_http_retry_codes")
- if codes:
- retry_codes = list(self.retry_codes)
- retry_codes += codes
- else:
- retry_codes = self.retry_codes
-
if self.part and not metadata:
pathfmt.part_enable(self.partdir)
@@ -167,7 +160,10 @@ class HttpDownloader(DownloaderBase):
break
else:
msg = "'{} {}' for '{}'".format(code, response.reason, url)
- if code in retry_codes or 500 <= code < 600:
+ if code in self.retry_codes or 500 <= code < 600:
+ continue
+ retry = kwdict.get("_http_retry")
+ if retry and retry(response):
continue
self.log.warning(msg)
return False
@@ -296,11 +292,10 @@ class HttpDownloader(DownloaderBase):
progress = self.progress
bytes_downloaded = 0
- time_start = time.time()
+ time_start = time.monotonic()
for data in content:
- time_current = time.time()
- time_elapsed = time_current - time_start
+ time_elapsed = time.monotonic() - time_start
bytes_downloaded += len(data)
write(data)
diff --git a/gallery_dl/downloader/ytdl.py b/gallery_dl/downloader/ytdl.py
index c44ea0a..adada75 100644
--- a/gallery_dl/downloader/ytdl.py
+++ b/gallery_dl/downloader/ytdl.py
@@ -64,6 +64,8 @@ class YoutubeDLDownloader(DownloaderBase):
try:
info_dict = ytdl_instance.extract_info(url[5:], download=False)
except Exception:
+ pass
+ if not info_dict:
return False
if "entries" in info_dict:
diff --git a/gallery_dl/exception.py b/gallery_dl/exception.py
index 5120039..ef190f2 100644
--- a/gallery_dl/exception.py
+++ b/gallery_dl/exception.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2015-2022 Mike Fährmann
+# Copyright 2015-2023 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -24,6 +24,7 @@ Exception
+-- NoExtractorError
+-- StopExtraction
+-- TerminateExtraction
+ +-- RestartExtraction
"""
@@ -115,3 +116,8 @@ class StopExtraction(GalleryDLException):
class TerminateExtraction(GalleryDLException):
"""Terminate data extraction"""
code = 0
+
+
+class RestartExtraction(GalleryDLException):
+ """Restart data extraction"""
+ code = 0
diff --git a/gallery_dl/extractor/500px.py b/gallery_dl/extractor/500px.py
index b2ae963..1213194 100644
--- a/gallery_dl/extractor/500px.py
+++ b/gallery_dl/extractor/500px.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2019-2022 Mike Fährmann
+# Copyright 2019-2023 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -9,7 +9,7 @@
"""Extractors for https://500px.com/"""
from .common import Extractor, Message
-import json
+from .. import util
BASE_PATTERN = r"(?:https?://)?(?:web\.)?500px\.com"
@@ -86,7 +86,7 @@ class _500pxExtractor(Extractor):
}
data = {
"operationName": opname,
- "variables" : json.dumps(variables),
+ "variables" : util.json_dumps(variables),
"query" : QUERIES[opname],
}
return self.request(
diff --git a/gallery_dl/extractor/8muses.py b/gallery_dl/extractor/8muses.py
index fed4991..26ac8b2 100644
--- a/gallery_dl/extractor/8muses.py
+++ b/gallery_dl/extractor/8muses.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2019-2022 Mike Fährmann
+# Copyright 2019-2023 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -9,8 +9,7 @@
"""Extractors for https://comics.8muses.com/"""
from .common import Extractor, Message
-from .. import text
-import json
+from .. import text, util
class _8musesAlbumExtractor(Extractor):
@@ -131,7 +130,7 @@ class _8musesAlbumExtractor(Extractor):
@staticmethod
def _unobfuscate(data):
- return json.loads("".join([
+ return util.json_loads("".join([
chr(33 + (ord(c) + 14) % 94) if "!" <= c <= "~" else c
for c in text.unescape(data.strip("\t\n\r !"))
]))
diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py
index 6140c2c..3968d72 100644
--- a/gallery_dl/extractor/__init__.py
+++ b/gallery_dl/extractor/__init__.py
@@ -1,11 +1,12 @@
# -*- coding: utf-8 -*-
-# Copyright 2015-2022 Mike Fährmann
+# Copyright 2015-2023 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
+import sys
import re
modules = [
@@ -34,6 +35,7 @@ modules = [
"desktopography",
"deviantart",
"dynastyscans",
+ "e621",
"erome",
"exhentai",
"fallenangels",
@@ -92,6 +94,7 @@ modules = [
"mangasee",
"mangoxo",
"mememuseum",
+ "misskey",
"myhentaigallery",
"myportfolio",
"nana",
@@ -118,6 +121,7 @@ modules = [
"plurk",
"poipiku",
"pornhub",
+ "pornpics",
"pururin",
"reactor",
"readcomiconline",
@@ -137,6 +141,7 @@ modules = [
"soundgasm",
"speakerdeck",
"subscribestar",
+ "szurubooru",
"tapas",
"tcbscans",
"telegraph",
@@ -217,20 +222,33 @@ def extractors():
# --------------------------------------------------------------------
# internals
-_cache = []
-_module_iter = iter(modules)
-
def _list_classes():
- """Yield all available extractor classes"""
+ """Yield available extractor classes"""
yield from _cache
- globals_ = globals()
- for module_name in _module_iter:
- module = __import__(module_name, globals_, None, (), 1)
+ for module in _module_iter:
yield from add_module(module)
- globals_["_list_classes"] = lambda : _cache
+ globals()["_list_classes"] = lambda : _cache
+
+
+def _modules_internal():
+ globals_ = globals()
+ for module_name in modules:
+ yield __import__(module_name, globals_, None, (), 1)
+
+
+def _modules_path(path, files):
+ sys.path.insert(0, path)
+ try:
+ return [
+ __import__(name[:-3])
+ for name in files
+ if name.endswith(".py")
+ ]
+ finally:
+ del sys.path[0]
def _get_classes(module):
@@ -240,3 +258,7 @@ def _get_classes(module):
hasattr(cls, "pattern") and cls.__module__ == module.__name__
)
]
+
+
+_cache = []
+_module_iter = _modules_internal()
diff --git a/gallery_dl/extractor/bbc.py b/gallery_dl/extractor/bbc.py
index 1b49d6a..638fedc 100644
--- a/gallery_dl/extractor/bbc.py
+++ b/gallery_dl/extractor/bbc.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2021 Mike Fährmann
+# Copyright 2021-2023 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -10,7 +10,6 @@
from .common import GalleryExtractor, Extractor, Message
from .. import text, util
-import json
BASE_PATTERN = r"(?:https?://)?(?:www\.)?bbc\.co\.uk(/programmes/"
@@ -38,7 +37,7 @@ class BbcGalleryExtractor(GalleryExtractor):
)
def metadata(self, page):
- data = json.loads(text.extr(
+ data = util.json_loads(text.extr(
page, '<script type="application/ld+json">', '</script>'))
return {
"programme": self.gallery_url.split("/")[4],
diff --git a/gallery_dl/extractor/bcy.py b/gallery_dl/extractor/bcy.py
index 44d6065..d6adb4e 100644
--- a/gallery_dl/extractor/bcy.py
+++ b/gallery_dl/extractor/bcy.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2020-2022 Mike Fährmann
+# Copyright 2020-2023 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -9,8 +9,7 @@
"""Extractors for https://bcy.net/"""
from .common import Extractor, Message
-from .. import text, exception
-import json
+from .. import text, util, exception
import re
@@ -100,9 +99,9 @@ class BcyExtractor(Extractor):
.replace('\\\\u002F', '/')
.replace('\\"', '"'))
try:
- return json.loads(data)["detail"]
+ return util.json_loads(data)["detail"]
except ValueError:
- return json.loads(data.replace('\\"', '"'))["detail"]
+ return util.json_loads(data.replace('\\"', '"'))["detail"]
class BcyUserExtractor(BcyExtractor):
diff --git a/gallery_dl/extractor/behance.py b/gallery_dl/extractor/behance.py
index 6da6175..1469aad 100644
--- a/gallery_dl/extractor/behance.py
+++ b/gallery_dl/extractor/behance.py
@@ -9,8 +9,7 @@
"""Extractors for https://www.behance.net/"""
from .common import Extractor, Message
-from .. import text
-import json
+from .. import text, util
class BehanceExtractor(Extractor):
@@ -120,7 +119,7 @@ class BehanceGalleryExtractor(BehanceExtractor):
}
page = self.request(url, cookies=cookies).text
- data = json.loads(text.extr(
+ data = util.json_loads(text.extr(
page, 'id="beconfig-store_state">', '</script>'))
return self._update(data["project"]["project"])
diff --git a/gallery_dl/extractor/blogger.py b/gallery_dl/extractor/blogger.py
index 8a1a42e..56010c2 100644
--- a/gallery_dl/extractor/blogger.py
+++ b/gallery_dl/extractor/blogger.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2019-2022 Mike Fährmann
+# Copyright 2019-2023 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -9,8 +9,7 @@
"""Extractors for Blogger blogs"""
from .common import Extractor, Message
-from .. import text
-import json
+from .. import text, util
import re
BASE_PATTERN = (
@@ -61,7 +60,7 @@ class BloggerExtractor(Extractor):
page = self.request(post["url"]).text
for url in findall_video(page):
page = self.request(url).text
- video_config = json.loads(text.extr(
+ video_config = util.json_loads(text.extr(
page, 'var VIDEO_CONFIG =', '\n'))
files.append(max(
video_config["streams"],
diff --git a/gallery_dl/extractor/bunkr.py b/gallery_dl/extractor/bunkr.py
index 1c339a9..17d066d 100644
--- a/gallery_dl/extractor/bunkr.py
+++ b/gallery_dl/extractor/bunkr.py
@@ -6,20 +6,19 @@
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
-"""Extractors for https://bunkr.ru/"""
+"""Extractors for https://bunkr.su/"""
from .lolisafe import LolisafeAlbumExtractor
from .. import text
-import json
class BunkrAlbumExtractor(LolisafeAlbumExtractor):
- """Extractor for bunkr.ru albums"""
+ """Extractor for bunkr.su albums"""
category = "bunkr"
- root = "https://bunkr.ru"
- pattern = r"(?:https?://)?(?:app\.)?bunkr\.(?:ru|is|to)/a/([^/?#]+)"
+ root = "https://bunkr.su"
+ pattern = r"(?:https?://)?(?:app\.)?bunkr\.(?:[sr]u|is|to)/a/([^/?#]+)"
test = (
- ("https://bunkr.ru/a/Lktg9Keq", {
+ ("https://bunkr.su/a/Lktg9Keq", {
"pattern": r"https://cdn\.bunkr\.ru/test-テスト-\"&>-QjgneIQv\.png",
"content": "0c8768055e4e20e7c7259608b67799171b691140",
"keyword": {
@@ -33,7 +32,7 @@ class BunkrAlbumExtractor(LolisafeAlbumExtractor):
},
}),
# mp4 (#2239)
- ("https://app.bunkr.is/a/ptRHaCn2", {
+ ("https://app.bunkr.ru/a/ptRHaCn2", {
"pattern": r"https://media-files\.bunkr\.ru/_-RnHoW69L\.mp4",
"content": "80e61d1dbc5896ae7ef9a28734c747b28b320471",
}),
@@ -41,44 +40,57 @@ class BunkrAlbumExtractor(LolisafeAlbumExtractor):
("https://bunkr.is/a/iXTTc1o2", {
"pattern": r"https://(cdn|media-files)4\.bunkr\.ru/",
"content": "da29aae371b7adc8c5ef8e6991b66b69823791e8",
+ "keyword": {
+ "album_id": "iXTTc1o2",
+ "album_name": "test2",
+ "album_size": "691.1 KB",
+ "count": 2,
+ "description": "072022",
+ "filename": "re:video-wFO9FtxG|image-sZrQUeOx",
+ "id": "re:wFO9FtxG|sZrQUeOx",
+ "name": "re:video|image",
+ "num": int,
+ },
}),
("https://bunkr.to/a/Lktg9Keq"),
)
def fetch_album(self, album_id):
- root = self.root
+ # album metadata
+ page = self.request(self.root + "/a/" + self.album_id).text
+ info = text.split_html(text.extr(
+ page, "<h1", "</div>").partition(">")[2])
+ count, _, size = info[1].split(None, 2)
+
+ # files
+ cdn = None
+ files = []
+ append = files.append
+ headers = {"Referer": self.root.replace("://", "://stream.", 1) + "/"}
- try:
- data = json.loads(text.extr(
- self.request(root + "/a/" + self.album_id).text,
- 'id="__NEXT_DATA__" type="application/json">', '<'))
- album = data["props"]["pageProps"]["album"]
- files = album["files"]
- except Exception as exc:
- self.log.debug("%s: %s", exc.__class__.__name__, exc)
- self.log.debug("Falling back to lolisafe API")
- self.root = root.replace("://", "://app.", 1)
- files, data = LolisafeAlbumExtractor.fetch_album(self, album_id)
- # fix file URLs (bunkr..ru -> bunkr.ru) (#3481)
- for file in files:
- file["file"] = file["file"].replace("bunkr..", "bunkr.", 1)
- else:
- for file in files:
- file["file"] = file["cdn"] + "/" + file["name"]
- data = {
- "album_id" : self.album_id,
- "album_name" : text.unescape(album["name"]),
- "description": text.unescape(album["description"]),
- "count" : len(files),
- }
+ pos = page.index('class="grid-images')
+ for url in text.extract_iter(page, '<a href="', '"', pos):
+ if url.startswith("/"):
+ if not cdn:
+ # fetch cdn root from download page
+ durl = "{}/d/{}".format(self.root, url[3:])
+ cdn = text.extr(self.request(
+ durl).text, 'link.href = "', '"')
+ cdn = cdn[:cdn.index("/", 8)]
+ url = cdn + url[2:]
- headers = {"Referer": root.replace("://", "://stream.", 1) + "/"}
- for file in files:
- if file["file"].endswith(
- (".mp4", ".m4v", ".mov", ".webm", ".mkv", ".ts",
- ".zip", ".rar", ".7z")):
- file["_http_headers"] = headers
- file["file"] = file["file"].replace(
- "://cdn", "://media-files", 1)
+ url = text.unescape(url)
+ if url.endswith((".mp4", ".m4v", ".mov", ".webm", ".mkv", ".ts",
+ ".zip", ".rar", ".7z")):
+ append({"file": url.replace("://cdn", "://media-files", 1),
+ "_http_headers": headers})
+ else:
+ append({"file": url})
- return files, data
+ return files, {
+ "album_id" : self.album_id,
+ "album_name" : text.unescape(info[0]),
+ "album_size" : size[1:-1],
+ "description": text.unescape(info[2]) if len(info) > 2 else "",
+ "count" : len(files),
+ }
diff --git a/gallery_dl/extractor/catbox.py b/gallery_dl/extractor/catbox.py
index 509108f..7a21d2a 100644
--- a/gallery_dl/extractor/catbox.py
+++ b/gallery_dl/extractor/catbox.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2022 Mike Fährmann
+# Copyright 2022-2023 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -8,7 +8,7 @@
"""Extractors for https://catbox.moe/"""
-from .common import GalleryExtractor
+from .common import GalleryExtractor, Extractor, Message
from .. import text
@@ -54,3 +54,26 @@ class CatboxAlbumExtractor(GalleryExtractor):
for path in text.extract_iter(
page, ">https://files.catbox.moe/", "<")
]
+
+
+class CatboxFileExtractor(Extractor):
+ """Extractor for catbox files"""
+ category = "catbox"
+ subcategory = "file"
+ archive_fmt = "{filename}"
+ pattern = r"(?:https?://)?(?:files|litter|de)\.catbox\.moe/([^/?#]+)"
+ test = (
+ ("https://files.catbox.moe/8ih3y7.png", {
+ "pattern": r"^https://files\.catbox\.moe/8ih3y7\.png$",
+ "content": "0c8768055e4e20e7c7259608b67799171b691140",
+ "count": 1,
+ }),
+ ("https://litter.catbox.moe/t8v3n9.png"),
+ ("https://de.catbox.moe/bjdmz1.jpg"),
+ )
+
+ def items(self):
+ url = text.ensure_http_scheme(self.url)
+ file = text.nameext_from_url(url, {"url": url})
+ yield Message.Directory, file
+ yield Message.Url, url, file
diff --git a/gallery_dl/extractor/common.py b/gallery_dl/extractor/common.py
index 4cefa1c..8024be9 100644
--- a/gallery_dl/extractor/common.py
+++ b/gallery_dl/extractor/common.py
@@ -791,15 +791,21 @@ HTTP_HEADERS = {
("TE", "trailers"),
),
"chrome": (
+ ("Connection", "keep-alive"),
("Upgrade-Insecure-Requests", "1"),
("User-Agent", "Mozilla/5.0 ({}) AppleWebKit/537.36 (KHTML, "
- "like Gecko) Chrome/92.0.4515.131 Safari/537.36"),
+ "like Gecko) Chrome/111.0.0.0 Safari/537.36"),
("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,"
- "image/webp,image/apng,*/*;q=0.8"),
+ "image/avif,image/webp,image/apng,*/*;q=0.8,"
+ "application/signed-exchange;v=b3;q=0.7"),
("Referer", None),
+ ("Sec-Fetch-Site", "same-origin"),
+ ("Sec-Fetch-Mode", "no-cors"),
+ ("Sec-Fetch-Dest", "empty"),
("Accept-Encoding", None),
("Accept-Language", "en-US,en;q=0.9"),
- ("Cookie", None),
+ ("cookie", None),
+ ("content-length", None),
),
}
@@ -838,8 +844,7 @@ SSL_CIPHERS = {
"AES128-GCM-SHA256:"
"AES256-GCM-SHA384:"
"AES128-SHA:"
- "AES256-SHA:"
- "DES-CBC3-SHA"
+ "AES256-SHA"
),
}
diff --git a/gallery_dl/extractor/danbooru.py b/gallery_dl/extractor/danbooru.py
index 7b0e572..f104556 100644
--- a/gallery_dl/extractor/danbooru.py
+++ b/gallery_dl/extractor/danbooru.py
@@ -9,8 +9,7 @@
"""Extractors for https://danbooru.donmai.us/ and other Danbooru instances"""
from .common import BaseExtractor, Message
-from ..version import __version__
-from .. import text
+from .. import text, util
import datetime
@@ -21,36 +20,13 @@ class DanbooruExtractor(BaseExtractor):
page_limit = 1000
page_start = None
per_page = 200
+ request_interval = 1.0
def __init__(self, match):
- self._init_category(match)
-
- instance = INSTANCES.get(self.category) or {}
- iget = instance.get
-
- self.headers = iget("headers")
- self.page_limit = iget("page-limit", 1000)
- self.page_start = iget("page-start")
- self.per_page = iget("per-page", 200)
- self.request_interval_min = iget("request-interval-min", 0.0)
- self._pools = iget("pools")
- self._popular_endpoint = iget("popular", "/explore/posts/popular.json")
-
BaseExtractor.__init__(self, match)
-
self.ugoira = self.config("ugoira", False)
self.external = self.config("external", False)
- metadata = self.config("metadata", False)
- if metadata:
- if isinstance(metadata, (list, tuple)):
- metadata = ",".join(metadata)
- elif not isinstance(metadata, str):
- metadata = "artist_commentary,children,notes,parent,uploader"
- self.metadata_includes = metadata
- else:
- self.metadata_includes = None
-
threshold = self.config("threshold")
if isinstance(threshold, int):
self.threshold = 1 if threshold < 1 else threshold
@@ -62,10 +38,6 @@ class DanbooruExtractor(BaseExtractor):
self.log.debug("Using HTTP Basic Auth for user '%s'", username)
self.session.auth = (username, api_key)
- def request(self, url, **kwargs):
- kwargs["headers"] = self.headers
- return BaseExtractor.request(self, url, **kwargs)
-
def skip(self, num):
pages = num // self.per_page
if pages >= self.page_limit:
@@ -74,32 +46,28 @@ class DanbooruExtractor(BaseExtractor):
return pages * self.per_page
def items(self):
+ self.session.headers["User-Agent"] = util.USERAGENT
+
+ includes = self.config("metadata")
+ if includes:
+ if isinstance(includes, (list, tuple)):
+ includes = ",".join(includes)
+ elif not isinstance(includes, str):
+ includes = "artist_commentary,children,notes,parent,uploader"
+
data = self.metadata()
for post in self.posts():
- file = post.get("file")
- if file:
- url = file["url"]
- if not url:
- md5 = file["md5"]
- url = file["url"] = (
- "https://static1.{}/data/{}/{}/{}.{}".format(
- self.root[8:], md5[0:2], md5[2:4], md5, file["ext"]
- ))
- post["filename"] = file["md5"]
- post["extension"] = file["ext"]
+ try:
+ url = post["file_url"]
+ except KeyError:
+ if self.external and post["source"]:
+ post.update(data)
+ yield Message.Directory, post
+ yield Message.Queue, post["source"], post
+ continue
- else:
- try:
- url = post["file_url"]
- except KeyError:
- if self.external and post["source"]:
- post.update(data)
- yield Message.Directory, post
- yield Message.Queue, post["source"], post
- continue
-
- text.nameext_from_url(url, post)
+ text.nameext_from_url(url, post)
if post["extension"] == "zip":
if self.ugoira:
@@ -109,9 +77,9 @@ class DanbooruExtractor(BaseExtractor):
url = post["large_file_url"]
post["extension"] = "webm"
- if self.metadata_includes:
+ if includes:
meta_url = "{}/posts/{}.json?only={}".format(
- self.root, post["id"], self.metadata_includes)
+ self.root, post["id"], includes)
post.update(self.request(meta_url).json())
if url[0] == "/":
@@ -127,7 +95,7 @@ class DanbooruExtractor(BaseExtractor):
def posts(self):
return ()
- def _pagination(self, endpoint, params, pagenum=False):
+ def _pagination(self, endpoint, params, pages=False):
url = self.root + endpoint
params["limit"] = self.per_page
params["page"] = self.page_start
@@ -141,7 +109,7 @@ class DanbooruExtractor(BaseExtractor):
if len(posts) < self.threshold:
return
- if pagenum:
+ if pages:
params["page"] += 1
else:
for post in reversed(posts):
@@ -163,34 +131,20 @@ class DanbooruExtractor(BaseExtractor):
for index, delay in enumerate(delays)]
-INSTANCES = {
+BASE_PATTERN = DanbooruExtractor.update({
"danbooru": {
"root": None,
"pattern": r"(?:danbooru|hijiribe|sonohara|safebooru)\.donmai\.us",
},
- "e621": {
- "root": None,
- "pattern": r"e(?:621|926)\.net",
- "headers": {"User-Agent": "gallery-dl/{} (by mikf)".format(
- __version__)},
- "pools": "sort",
- "popular": "/popular.json",
- "page-limit": 750,
- "per-page": 320,
- "request-interval-min": 1.0,
- },
"atfbooru": {
"root": "https://booru.allthefallen.moe",
"pattern": r"booru\.allthefallen\.moe",
- "page-limit": 5000,
},
"aibooru": {
"root": None,
"pattern": r"(?:safe.)?aibooru\.online",
}
-}
-
-BASE_PATTERN = DanbooruExtractor.update(INSTANCES)
+})
class DanbooruTagExtractor(DanbooruExtractor):
@@ -213,10 +167,6 @@ class DanbooruTagExtractor(DanbooruExtractor):
"pattern": r"https://i\.pximg\.net/img-original/img"
r"/2008/08/28/02/35/48/1476533_p0\.jpg",
}),
- ("https://e621.net/posts?tags=anry", {
- "url": "8021e5ea28d47c474c1ffc9bd44863c4d45700ba",
- "content": "501d1e5d922da20ee8ff9806f5ed3ce3a684fd58",
- }),
("https://booru.allthefallen.moe/posts?tags=yume_shokunin", {
"count": 12,
}),
@@ -228,7 +178,6 @@ class DanbooruTagExtractor(DanbooruExtractor):
("https://hijiribe.donmai.us/posts?tags=bonocho"),
("https://sonohara.donmai.us/posts?tags=bonocho"),
("https://safebooru.donmai.us/posts?tags=bonocho"),
- ("https://e926.net/posts?tags=anry"),
("https://safe.aibooru.online/posts?tags=center_frills"),
)
@@ -254,23 +203,17 @@ class DanbooruPoolExtractor(DanbooruExtractor):
("https://danbooru.donmai.us/pools/7659", {
"content": "b16bab12bea5f7ea9e0a836bf8045f280e113d99",
}),
- ("https://e621.net/pools/73", {
- "url": "1bd09a72715286a79eea3b7f09f51b3493eb579a",
- "content": "91abe5d5334425d9787811d7f06d34c77974cd22",
- }),
("https://booru.allthefallen.moe/pools/9", {
"url": "902549ffcdb00fe033c3f63e12bc3cb95c5fd8d5",
"count": 6,
}),
("https://aibooru.online/pools/1"),
("https://danbooru.donmai.us/pool/show/7659"),
- ("https://e621.net/pool/show/73"),
)
def __init__(self, match):
DanbooruExtractor.__init__(self, match)
self.pool_id = match.group(match.lastindex)
- self.post_ids = ()
def metadata(self):
url = "{}/pools/{}.json".format(self.root, self.pool_id)
@@ -280,29 +223,8 @@ class DanbooruPoolExtractor(DanbooruExtractor):
return {"pool": pool}
def posts(self):
- if self._pools == "sort":
- self.log.info("Fetching posts of pool %s", self.pool_id)
-
- id_to_post = {
- post["id"]: post
- for post in self._pagination(
- "/posts.json", {"tags": "pool:" + self.pool_id})
- }
-
- posts = []
- append = posts.append
- for num, pid in enumerate(self.post_ids, 1):
- if pid in id_to_post:
- post = id_to_post[pid]
- post["num"] = num
- append(post)
- else:
- self.log.warning("Post %s is unavailable", pid)
- return posts
-
- else:
- params = {"tags": "pool:" + self.pool_id}
- return self._pagination("/posts.json", params)
+ params = {"tags": "pool:" + self.pool_id}
+ return self._pagination("/posts.json", params)
class DanbooruPostExtractor(DanbooruExtractor):
@@ -318,10 +240,6 @@ class DanbooruPostExtractor(DanbooruExtractor):
"pattern": r"https?://.+\.zip$",
"options": (("ugoira", True),)
}),
- ("https://e621.net/posts/535", {
- "url": "f7f78b44c9b88f8f09caac080adc8d6d9fdaa529",
- "content": "66f46e96a893fba8e694c4e049b23c2acc9af462",
- }),
("https://booru.allthefallen.moe/posts/22", {
"content": "21dda68e1d7e0a554078e62923f537d8e895cac8",
}),
@@ -329,7 +247,6 @@ class DanbooruPostExtractor(DanbooruExtractor):
"content": "54d548743cd67799a62c77cbae97cfa0fec1b7e9",
}),
("https://danbooru.donmai.us/post/show/294929"),
- ("https://e621.net/post/show/535"),
)
def __init__(self, match):
@@ -338,8 +255,7 @@ class DanbooruPostExtractor(DanbooruExtractor):
def posts(self):
url = "{}/posts/{}.json".format(self.root, self.post_id)
- post = self.request(url).json()
- return (post["post"] if "post" in post else post,)
+ return (self.request(url).json(),)
class DanbooruPopularExtractor(DanbooruExtractor):
@@ -355,12 +271,6 @@ class DanbooruPopularExtractor(DanbooruExtractor):
"range": "1-120",
"count": 120,
}),
- ("https://e621.net/popular"),
- (("https://e621.net/explore/posts/popular"
- "?date=2019-06-01&scale=month"), {
- "pattern": r"https://static\d.e621.net/data/../../[0-9a-f]+",
- "count": ">= 70",
- }),
("https://booru.allthefallen.moe/explore/posts/popular"),
("https://aibooru.online/explore/posts/popular"),
)
@@ -385,31 +295,5 @@ class DanbooruPopularExtractor(DanbooruExtractor):
def posts(self):
if self.page_start is None:
self.page_start = 1
- return self._pagination(self._popular_endpoint, self.params, True)
-
-
-class DanbooruFavoriteExtractor(DanbooruExtractor):
- """Extractor for e621 favorites"""
- subcategory = "favorite"
- directory_fmt = ("{category}", "Favorites", "{user_id}")
- archive_fmt = "f_{user_id}_{id}"
- pattern = BASE_PATTERN + r"/favorites(?:\?([^#]*))?"
- test = (
- ("https://e621.net/favorites"),
- ("https://e621.net/favorites?page=2&user_id=53275", {
- "pattern": r"https://static\d.e621.net/data/../../[0-9a-f]+",
- "count": "> 260",
- }),
- )
-
- def __init__(self, match):
- DanbooruExtractor.__init__(self, match)
- self.query = text.parse_query(match.group(match.lastindex))
-
- def metadata(self):
- return {"user_id": self.query.get("user_id", "")}
-
- def posts(self):
- if self.page_start is None:
- self.page_start = 1
- return self._pagination("/favorites.json", self.query, True)
+ return self._pagination(
+ "/explore/posts/popular.json", self.params, True)
diff --git a/gallery_dl/extractor/deviantart.py b/gallery_dl/extractor/deviantart.py
index a3187fa..37475df 100644
--- a/gallery_dl/extractor/deviantart.py
+++ b/gallery_dl/extractor/deviantart.py
@@ -6,7 +6,7 @@
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
-"""Extract images from https://www.deviantart.com/"""
+"""Extractors for https://www.deviantart.com/"""
from .common import Extractor, Message
from .. import text, util, exception
@@ -21,29 +21,30 @@ import re
BASE_PATTERN = (
r"(?:https?://)?(?:"
- r"(?:www\.)?deviantart\.com/(?!watch/)([\w-]+)|"
- r"(?!www\.)([\w-]+)\.deviantart\.com)"
+ r"(?:www\.)?(?:fx)?deviantart\.com/(?!watch/)([\w-]+)|"
+ r"(?!www\.)([\w-]+)\.(?:fx)?deviantart\.com)"
)
class DeviantartExtractor(Extractor):
"""Base class for deviantart extractors"""
category = "deviantart"
+ root = "https://www.deviantart.com"
directory_fmt = ("{category}", "{username}")
filename_fmt = "{category}_{index}_{title}.{extension}"
cookiedomain = None
- root = "https://www.deviantart.com"
+ cookienames = ("auth", "auth_secure", "userinfo")
_last_request = 0
def __init__(self, match):
Extractor.__init__(self, match)
- self.offset = 0
self.flat = self.config("flat", True)
self.extra = self.config("extra", False)
self.original = self.config("original", True)
self.comments = self.config("comments", False)
self.user = match.group(1) or match.group(2)
self.group = False
+ self.offset = 0
self.api = None
unwatch = self.config("auto-unwatch")
@@ -69,6 +70,14 @@ class DeviantartExtractor(Extractor):
self.offset += num
return num
+ def login(self):
+ if not self._check_cookies(self.cookienames):
+ username, password = self._get_auth_info()
+ if not username:
+ return False
+ self._update_cookies(_login_impl(self, username, password))
+ return True
+
def items(self):
self.api = DeviantartOAuthAPI(self)
@@ -87,6 +96,13 @@ class DeviantartExtractor(Extractor):
yield Message.Queue, url, data
continue
+ if deviation["is_deleted"]:
+ # prevent crashing in case the deviation really is
+ # deleted
+ self.log.debug(
+ "Skipping %s (deleted)", deviation["deviationid"])
+ continue
+
if "premium_folder_data" in deviation:
data = self._fetch_premium(deviation)
if not data:
@@ -346,9 +362,7 @@ class DeviantartExtractor(Extractor):
kwargs["fatal"] = None
diff = time.time() - DeviantartExtractor._last_request
if diff < 2.0:
- delay = 2.0 - diff
- self.log.debug("Sleeping %.2f seconds", delay)
- time.sleep(delay)
+ self.sleep(2.0 - diff, "request")
while True:
response = self.request(url, **kwargs)
@@ -406,6 +420,16 @@ class DeviantartExtractor(Extractor):
self.log.info("Unwatching %s", username)
self.api.user_friends_unwatch(username)
+ def _eclipse_to_oauth(self, eclipse_api, deviations):
+ for obj in deviations:
+ deviation = obj["deviation"] if "deviation" in obj else obj
+ deviation_uuid = eclipse_api.deviation_extended_fetch(
+ deviation["deviationId"],
+ deviation["author"]["username"],
+ "journal" if deviation["isJournal"] else "art",
+ )["deviation"]["extended"]["deviationUuid"]
+ yield self.api.deviation(deviation_uuid)
+
class DeviantartUserExtractor(DeviantartExtractor):
"""Extractor for an artist's user profile"""
@@ -676,15 +700,9 @@ class DeviantartFavoriteExtractor(DeviantartExtractor):
)
def deviations(self):
- folders = self.api.collections_folders(self.user)
if self.flat:
- deviations = itertools.chain.from_iterable(
- self.api.collections(self.user, folder["folderid"])
- for folder in folders
- )
- if self.offset:
- deviations = util.advance(deviations, self.offset)
- return deviations
+ return self.api.collections_all(self.user, self.offset)
+ folders = self.api.collections_folders(self.user)
return self._folder_urls(
folders, "favourites", DeviantartCollectionExtractor)
@@ -796,6 +814,14 @@ class DeviantartStatusExtractor(DeviantartExtractor):
"url" : "re:^https://sta.sh",
},
}),
+ # "deleted" deviations in 'items'
+ ("https://www.deviantart.com/AndrejSKalin/posts/statuses", {
+ "options": (("journals", "none"), ("original", 0),
+ ("image-filter", "deviationid[:8] == '147C8B03'")),
+ "count": 2,
+ "archive": False,
+ "keyword": {"deviationid": "147C8B03-7D34-AE93-9241-FA3C6DBBC655"}
+ }),
("https://www.deviantart.com/justgalym/posts/statuses", {
"options": (("journals", "text"),),
"url": "c8744f7f733a3029116607b826321233c5ca452d",
@@ -861,8 +887,7 @@ class DeviantartPopularExtractor(DeviantartExtractor):
"{popular[range]}", "{popular[search]}")
archive_fmt = "P_{popular[range]}_{popular[search]}_{index}.{extension}"
pattern = (r"(?:https?://)?www\.deviantart\.com/(?:"
- r"search(?:/deviations)?"
- r"|(?:deviations/?)?\?order=(popular-[^/?#]+)"
+ r"(?:deviations/?)?\?order=(popular-[^/?#]+)"
r"|((?:[\w-]+/)*)(popular-[^/?#]+)"
r")/?(?:\?([^#]*))?")
test = (
@@ -876,8 +901,6 @@ class DeviantartPopularExtractor(DeviantartExtractor):
"range": "1-30",
"count": 30,
}),
- ("https://www.deviantart.com/search?q=tree"),
- ("https://www.deviantart.com/search/deviations?order=popular-1-week"),
("https://www.deviantart.com/artisan/popular-all-time/?q=tree"),
)
@@ -974,7 +997,7 @@ class DeviantartDeviationExtractor(DeviantartExtractor):
subcategory = "deviation"
archive_fmt = "g_{_username}_{index}.{extension}"
pattern = (BASE_PATTERN + r"/(art|journal)/(?:[^/?#]+-)?(\d+)"
- r"|(?:https?://)?(?:www\.)?deviantart\.com/"
+ r"|(?:https?://)?(?:www\.)?(?:fx)?deviantart\.com/"
r"(?:view/|deviation/|view(?:-full)?\.php/*\?(?:[^#]+&)?id=)"
r"(\d+)" # bare deviation ID without slug
r"|(?:https?://)?fav\.me/d([0-9a-z]+)") # base36
@@ -1068,6 +1091,9 @@ class DeviantartDeviationExtractor(DeviantartExtractor):
# old /view/ URLs from the Wayback Machine
("https://www.deviantart.com/view.php?id=14864502"),
("http://www.deviantart.com/view-full.php?id=100842"),
+
+ ("https://www.fxdeviantart.com/zzz/art/zzz-1234567890"),
+ ("https://www.fxdeviantart.com/view/1234567890"),
)
skip = Extractor.skip
@@ -1094,6 +1120,7 @@ class DeviantartScrapsExtractor(DeviantartExtractor):
subcategory = "scraps"
directory_fmt = ("{category}", "{username}", "Scraps")
archive_fmt = "s_{_username}_{index}.{extension}"
+ cookiedomain = ".deviantart.com"
pattern = BASE_PATTERN + r"/gallery/(?:\?catpath=)?scraps\b"
test = (
("https://www.deviantart.com/shimoda7/gallery/scraps", {
@@ -1102,34 +1129,109 @@ class DeviantartScrapsExtractor(DeviantartExtractor):
("https://www.deviantart.com/shimoda7/gallery/?catpath=scraps"),
("https://shimoda7.deviantart.com/gallery/?catpath=scraps"),
)
+
+ def deviations(self):
+ self.login()
+
+ eclipse_api = DeviantartEclipseAPI(self)
+ return self._eclipse_to_oauth(
+ eclipse_api, eclipse_api.gallery_scraps(self.user, self.offset))
+
+
+class DeviantartSearchExtractor(DeviantartExtractor):
+ """Extractor for deviantart search results"""
+ subcategory = "search"
+ directory_fmt = ("{category}", "Search", "{search_tags}")
+ archive_fmt = "Q_{search_tags}_{index}.{extension}"
cookiedomain = ".deviantart.com"
- cookienames = ("auth", "auth_secure", "userinfo")
- _warning = True
+ pattern = (r"(?:https?://)?www\.deviantart\.com"
+ r"/search(?:/deviations)?/?\?([^#]+)")
+ test = (
+ ("https://www.deviantart.com/search?q=tree"),
+ ("https://www.deviantart.com/search/deviations?order=popular-1-week"),
+ )
+
+ skip = Extractor.skip
+
+ def __init__(self, match):
+ DeviantartExtractor.__init__(self, match)
+ self.query = text.parse_query(self.user)
+ self.search = self.query.get("q", "")
+ self.user = ""
+
+ def deviations(self):
+ logged_in = self.login()
+
+ eclipse_api = DeviantartEclipseAPI(self)
+ search = (eclipse_api.search_deviations
+ if logged_in else self._search_html)
+ return self._eclipse_to_oauth(eclipse_api, search(self.query))
+
+ def prepare(self, deviation):
+ DeviantartExtractor.prepare(self, deviation)
+ deviation["search_tags"] = self.search
+
+ def _search_html(self, params):
+ url = self.root + "/search"
+ deviation = {
+ "deviationId": None,
+ "author": {"username": "u"},
+ "isJournal": False,
+ }
+
+ while True:
+ page = self.request(url, params=params).text
+
+ items , pos = text.rextract(page, r'\"items\":[', ']')
+ cursor, pos = text.extract(page, r'\"cursor\":\"', '\\', pos)
+
+ for deviation_id in items.split(","):
+ deviation["deviationId"] = deviation_id
+ yield deviation
+
+ if not cursor:
+ return
+ params["cursor"] = cursor
+
+
+class DeviantartGallerySearchExtractor(DeviantartExtractor):
+ """Extractor for deviantart gallery searches"""
+ subcategory = "gallery-search"
+ archive_fmt = "g_{_username}_{index}.{extension}"
+ cookiedomain = ".deviantart.com"
+ pattern = BASE_PATTERN + r"/gallery/?\?(q=[^#]+)"
+ test = (
+ ("https://www.deviantart.com/shimoda7/gallery?q=memory", {
+ "options": (("original", 0),),
+ "content": "6a7c74dc823ebbd457bdd9b3c2838a6ee728091e",
+ }),
+ ("https://www.deviantart.com/shimoda7/gallery?q=memory&sort=popular"),
+ )
+
+ def __init__(self, match):
+ DeviantartExtractor.__init__(self, match)
+ self.query = match.group(3)
def deviations(self):
self.login()
eclipse_api = DeviantartEclipseAPI(self)
- for obj in eclipse_api.gallery_scraps(self.user, self.offset):
- deviation = obj["deviation"]
- deviation_uuid = eclipse_api.deviation_extended_fetch(
- deviation["deviationId"],
- deviation["author"]["username"],
- "journal" if deviation["isJournal"] else "art",
- )["deviation"]["extended"]["deviationUuid"]
+ info = eclipse_api.user_info(self.user)
- yield self.api.deviation(deviation_uuid)
+ query = text.parse_query(self.query)
+ self.search = query["q"]
- def login(self):
- """Login and obtain session cookies"""
- if not self._check_cookies(self.cookienames):
- username, password = self._get_auth_info()
- if username:
- self._update_cookies(_login_impl(self, username, password))
- elif self._warning:
- self.log.warning(
- "No session cookies set: Unable to fetch mature scraps.")
- DeviantartScrapsExtractor._warning = False
+ return self._eclipse_to_oauth(
+ eclipse_api, eclipse_api.galleries_search(
+ info["user"]["userId"],
+ self.search,
+ self.offset,
+ query.get("sort", "most-recent"),
+ ))
+
+ def prepare(self, deviation):
+ DeviantartExtractor.prepare(self, deviation)
+ deviation["search_tags"] = self.search
class DeviantartFollowingExtractor(DeviantartExtractor):
@@ -1261,6 +1363,13 @@ class DeviantartOAuthAPI():
"mature_content": self.mature}
return self._pagination(endpoint, params)
+ def collections_all(self, username, offset=0):
+ """Yield all deviations in a user's collection"""
+ endpoint = "/collections/all"
+ params = {"username": username, "offset": offset, "limit": 24,
+ "mature_content": self.mature}
+ return self._pagination(endpoint, params)
+
@memcache(keyarg=1)
def collections_folders(self, username, offset=0):
"""Yield all collection folders of a specific user"""
@@ -1411,7 +1520,7 @@ class DeviantartOAuthAPI():
while True:
if self.delay:
- time.sleep(self.delay)
+ self.extractor.sleep(self.delay, "api")
self.authenticate(None if public else self.refresh_token_key)
kwargs["headers"] = self.headers
@@ -1480,6 +1589,15 @@ class DeviantartOAuthAPI():
self._metadata(results)
if self.folders:
self._folders(results)
+ else: # attempt to fix "deleted" deviations
+ for dev in self._shared_content(results):
+ if not dev["is_deleted"]:
+ continue
+ patch = self._call(
+ "/deviation/" + dev["deviationid"], fatal=False)
+ if patch:
+ dev.update(patch)
+
yield from results
if not data["has_more"] and (
@@ -1497,6 +1615,14 @@ class DeviantartOAuthAPI():
return
params["offset"] = int(params["offset"]) + len(results)
+ @staticmethod
+ def _shared_content(results):
+ """Return an iterable of shared deviations in 'results'"""
+ for result in results:
+ for item in result.get("items") or ():
+ if "deviation" in item:
+ yield item["deviation"]
+
def _pagination_list(self, endpoint, params, key="results"):
result = []
result.extend(self._pagination(endpoint, params, False, key=key))
@@ -1585,6 +1711,29 @@ class DeviantartEclipseAPI():
}
return self._pagination(endpoint, params)
+ def galleries_search(self, user_id, query,
+ offset=None, order="most-recent"):
+ endpoint = "/shared_api/galleries/search"
+ params = {
+ "userid": user_id,
+ "order" : order,
+ "q" : query,
+ "offset": offset,
+ "limit" : 24,
+ }
+ return self._pagination(endpoint, params)
+
+ def search_deviations(self, params):
+ endpoint = "/da-browse/api/networkbar/search/deviations"
+ return self._pagination(endpoint, params, key="deviations")
+
+ def user_info(self, user, expand=False):
+ endpoint = "/shared_api/user/info"
+ params = {"username": user}
+ if expand:
+ params["expand"] = "user.stats,user.profile,user.watch"
+ return self._call(endpoint, params)
+
def user_watching(self, user, offset=None):
endpoint = "/da-user-profile/api/module/watching"
params = {
@@ -1611,23 +1760,37 @@ class DeviantartEclipseAPI():
except Exception:
return {"error": response.text}
- def _pagination(self, endpoint, params):
+ def _pagination(self, endpoint, params, key="results"):
+ limit = params.get("limit", 24)
+ warn = True
+
while True:
data = self._call(endpoint, params)
- results = data.get("results")
+ results = data.get(key)
if results is None:
return
+ if len(results) < limit and warn and data.get("hasMore"):
+ warn = False
+ self.log.warning(
+ "Private deviations detected! "
+ "Provide login credentials or session cookies "
+ "to be able to access them.")
yield from results
if not data.get("hasMore"):
return
- next_offset = data.get("nextOffset")
- if next_offset:
- params["offset"] = next_offset
+ if "nextCursor" in data:
+ params["offset"] = None
+ params["cursor"] = data["nextCursor"]
+ elif "nextOffset" in data:
+ params["offset"] = data["nextOffset"]
+ params["cursor"] = None
+ elif params.get("offset") is None:
+ return
else:
- params["offset"] += params["limit"]
+ params["offset"] = int(params["offset"]) + len(results)
def _module_id_watching(self, user):
url = "{}/{}/about".format(self.extractor.root, user)
diff --git a/gallery_dl/extractor/directlink.py b/gallery_dl/extractor/directlink.py
index 8b90250..e85eb8d 100644
--- a/gallery_dl/extractor/directlink.py
+++ b/gallery_dl/extractor/directlink.py
@@ -44,6 +44,11 @@ class DirectlinkExtractor(Extractor):
("https://post-phinf.pstatic.net/MjAxOTA1MjlfMTQ4/MDAxNTU5MTI2NjcyNTkw"
".JUzkGb4V6dj9DXjLclrOoqR64uDxHFUO5KDriRdKpGwg.88mCtd4iT1NHlpVKSCaUpP"
"mZPiDgT8hmQdQ5K_gYyu0g.JPEG/2.JPG"),
+ # internationalized domain name
+ ("https://räksmörgås.josefsson.org/raksmorgas.jpg", {
+ "url": "a65667f670b194afbd1e3ea5e7a78938d36747da",
+ "keyword": "fd5037fe86eebd4764e176cbaf318caec0f700be",
+ }),
)
def __init__(self, match):
diff --git a/gallery_dl/extractor/dynastyscans.py b/gallery_dl/extractor/dynastyscans.py
index d78f25b..59e8c90 100644
--- a/gallery_dl/extractor/dynastyscans.py
+++ b/gallery_dl/extractor/dynastyscans.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2015-2022 Mike Fährmann
+# Copyright 2015-2023 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -9,8 +9,7 @@
"""Extractors for https://dynasty-scans.com/"""
from .common import ChapterExtractor, MangaExtractor, Extractor, Message
-from .. import text
-import json
+from .. import text, util
import re
BASE_PATTERN = r"(?:https?://)?(?:www\.)?dynasty-scans\.com"
@@ -86,7 +85,7 @@ class DynastyscansChapterExtractor(DynastyscansBase, ChapterExtractor):
data = text.extr(page, "var pages = ", ";\n")
return [
(self.root + img["image"], None)
- for img in json.loads(data)
+ for img in util.json_loads(data)
]
diff --git a/gallery_dl/extractor/e621.py b/gallery_dl/extractor/e621.py
new file mode 100644
index 0000000..8f2994e
--- /dev/null
+++ b/gallery_dl/extractor/e621.py
@@ -0,0 +1,254 @@
+# -*- coding: utf-8 -*-
+
+# Copyright 2014-2023 Mike Fährmann
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Extractors for https://e621.net/ and other e621 instances"""
+
+from .common import Message
+from . import danbooru
+from .. import text, util
+
+
+class E621Extractor(danbooru.DanbooruExtractor):
+ """Base class for e621 extractors"""
+ basecategory = "E621"
+ page_limit = 750
+ page_start = None
+ per_page = 320
+ request_interval_min = 1.0
+
+ def items(self):
+ self.session.headers["User-Agent"] = util.USERAGENT + " (by mikf)"
+
+ includes = self.config("metadata") or ()
+ if includes:
+ if isinstance(includes, str):
+ includes = includes.split(",")
+ elif not isinstance(includes, (list, tuple)):
+ includes = ("notes", "pools")
+
+ notes = ("notes" in includes)
+ pools = ("pools" in includes)
+
+ data = self.metadata()
+ for post in self.posts():
+ file = post["file"]
+
+ if not file["url"]:
+ md5 = file["md5"]
+ file["url"] = "https://static1.{}/data/{}/{}/{}.{}".format(
+ self.root[8:], md5[0:2], md5[2:4], md5, file["ext"])
+
+ if notes and post.get("has_notes"):
+ url = "{}/notes.json?search[post_id]={}".format(
+ self.root, post["id"])
+ post["notes"] = self.request(url).json()
+
+ if pools and post["pools"]:
+ url = "{}/pools.json?search[id]={}".format(
+ self.root, ",".join(map(str, post["pools"])))
+ post["pools"] = _pools = self.request(url).json()
+ for pool in _pools:
+ pool["name"] = pool["name"].replace("_", " ")
+
+ post["filename"] = file["md5"]
+ post["extension"] = file["ext"]
+
+ post.update(data)
+ yield Message.Directory, post
+ yield Message.Url, file["url"], post
+
+
+BASE_PATTERN = E621Extractor.update({
+ "e621": {
+ "root": "https://e621.net",
+ "pattern": r"e621\.net",
+ },
+ "e926": {
+ "root": "https://e926.net",
+ "pattern": r"e926\.net",
+ },
+})
+
+
+class E621TagExtractor(E621Extractor, danbooru.DanbooruTagExtractor):
+ """Extractor for e621 posts from tag searches"""
+ pattern = BASE_PATTERN + r"/posts?(?:\?.*?tags=|/index/\d+/)([^&#]+)"
+ test = (
+ ("https://e621.net/posts?tags=anry", {
+ "url": "8021e5ea28d47c474c1ffc9bd44863c4d45700ba",
+ "content": "501d1e5d922da20ee8ff9806f5ed3ce3a684fd58",
+ }),
+ ("https://e621.net/post/index/1/anry"),
+ ("https://e621.net/post?tags=anry"),
+
+ ("https://e926.net/posts?tags=anry", {
+ "url": "12198b275c62ffe2de67cca676c8e64de80c425d",
+ "content": "501d1e5d922da20ee8ff9806f5ed3ce3a684fd58",
+ }),
+ ("https://e926.net/post/index/1/anry"),
+ ("https://e926.net/post?tags=anry"),
+ )
+
+
+class E621PoolExtractor(E621Extractor, danbooru.DanbooruPoolExtractor):
+ """Extractor for e621 pools"""
+ pattern = BASE_PATTERN + r"/pool(?:s|/show)/(\d+)"
+ test = (
+ ("https://e621.net/pools/73", {
+ "url": "1bd09a72715286a79eea3b7f09f51b3493eb579a",
+ "content": "91abe5d5334425d9787811d7f06d34c77974cd22",
+ }),
+ ("https://e621.net/pool/show/73"),
+
+ ("https://e926.net/pools/73", {
+ "url": "6936f1b6a18c5c25bee7cad700088dbc2503481b",
+ "content": "91abe5d5334425d9787811d7f06d34c77974cd22",
+ }),
+ ("https://e926.net/pool/show/73"),
+ )
+
+ def posts(self):
+ self.log.info("Fetching posts of pool %s", self.pool_id)
+
+ id_to_post = {
+ post["id"]: post
+ for post in self._pagination(
+ "/posts.json", {"tags": "pool:" + self.pool_id})
+ }
+
+ posts = []
+ append = posts.append
+ for num, pid in enumerate(self.post_ids, 1):
+ if pid in id_to_post:
+ post = id_to_post[pid]
+ post["num"] = num
+ append(post)
+ else:
+ self.log.warning("Post %s is unavailable", pid)
+ return posts
+
+
+class E621PostExtractor(E621Extractor, danbooru.DanbooruPostExtractor):
+ """Extractor for single e621 posts"""
+ pattern = BASE_PATTERN + r"/post(?:s|/show)/(\d+)"
+ test = (
+ ("https://e621.net/posts/535", {
+ "url": "f7f78b44c9b88f8f09caac080adc8d6d9fdaa529",
+ "content": "66f46e96a893fba8e694c4e049b23c2acc9af462",
+ }),
+ ("https://e621.net/posts/3181052", {
+ "options": (("metadata", "notes,pools"),),
+ "pattern": r"https://static\d\.e621\.net/data/c6/8c"
+ r"/c68cca0643890b615f75fb2719589bff\.png",
+ "keyword": {
+ "notes": [
+ {
+ "body": "Little Legends 2",
+ "created_at": "2022-05-16T13:58:38.877-04:00",
+ "creator_id": 517450,
+ "creator_name": "EeveeCuddler69",
+ "height": 475,
+ "id": 321296,
+ "is_active": True,
+ "post_id": 3181052,
+ "updated_at": "2022-05-16T13:59:02.050-04:00",
+ "version": 3,
+ "width": 809,
+ "x": 83,
+ "y": 117,
+ },
+ ],
+ "pools": [
+ {
+ "category": "series",
+ "created_at": "2022-02-17T00:29:22.669-05:00",
+ "creator_id": 1077440,
+ "creator_name": "Yeetus90",
+ "description": "* \"Little Legends\":/pools/27971\r\n"
+ "* Little Legends 2\r\n"
+ "* \"Little Legends 3\":/pools/27481",
+ "id": 27492,
+ "is_active": False,
+ "name": "Little Legends 2",
+ "post_count": 39,
+ "post_ids": list,
+ "updated_at": "2022-03-27T06:30:03.382-04:00"
+ },
+ ],
+ },
+ }),
+ ("https://e621.net/post/show/535"),
+
+ ("https://e926.net/posts/535", {
+ "url": "17aec8ebd8fab098d321adcb62a2db59dab1f4bf",
+ "content": "66f46e96a893fba8e694c4e049b23c2acc9af462",
+ }),
+ ("https://e926.net/post/show/535"),
+ )
+
+ def posts(self):
+ url = "{}/posts/{}.json".format(self.root, self.post_id)
+ return (self.request(url).json()["post"],)
+
+
+class E621PopularExtractor(E621Extractor, danbooru.DanbooruPopularExtractor):
+ """Extractor for popular images from e621"""
+ pattern = BASE_PATTERN + r"/explore/posts/popular(?:\?([^#]*))?"
+ test = (
+ ("https://e621.net/explore/posts/popular"),
+ (("https://e621.net/explore/posts/popular"
+ "?date=2019-06-01&scale=month"), {
+ "pattern": r"https://static\d.e621.net/data/../../[0-9a-f]+",
+ "count": ">= 70",
+ }),
+
+ ("https://e926.net/explore/posts/popular"),
+ (("https://e926.net/explore/posts/popular"
+ "?date=2019-06-01&scale=month"), {
+ "pattern": r"https://static\d.e926.net/data/../../[0-9a-f]+",
+ "count": ">= 70",
+ }),
+ )
+
+ def posts(self):
+ if self.page_start is None:
+ self.page_start = 1
+ return self._pagination("/popular.json", self.params, True)
+
+
+class E621FavoriteExtractor(E621Extractor):
+ """Extractor for e621 favorites"""
+ subcategory = "favorite"
+ directory_fmt = ("{category}", "Favorites", "{user_id}")
+ archive_fmt = "f_{user_id}_{id}"
+ pattern = BASE_PATTERN + r"/favorites(?:\?([^#]*))?"
+ test = (
+ ("https://e621.net/favorites"),
+ ("https://e621.net/favorites?page=2&user_id=53275", {
+ "pattern": r"https://static\d.e621.net/data/../../[0-9a-f]+",
+ "count": "> 260",
+ }),
+
+ ("https://e926.net/favorites"),
+ ("https://e926.net/favorites?page=2&user_id=53275", {
+ "pattern": r"https://static\d.e926.net/data/../../[0-9a-f]+",
+ "count": "> 260",
+ }),
+ )
+
+ def __init__(self, match):
+ E621Extractor.__init__(self, match)
+ self.query = text.parse_query(match.group(match.lastindex))
+
+ def metadata(self):
+ return {"user_id": self.query.get("user_id", "")}
+
+ def posts(self):
+ if self.page_start is None:
+ self.page_start = 1
+ return self._pagination("/favorites.json", self.query, True)
diff --git a/gallery_dl/extractor/erome.py b/gallery_dl/extractor/erome.py
index ad3f16b..03307f8 100644
--- a/gallery_dl/extractor/erome.py
+++ b/gallery_dl/extractor/erome.py
@@ -12,7 +12,6 @@ from .common import Extractor, Message
from .. import text, util, exception
from ..cache import cache
import itertools
-import time
BASE_PATTERN = r"(?:https?://)?(?:www\.)?erome\.com"
@@ -75,7 +74,7 @@ class EromeExtractor(Extractor):
if response.content.find(
b"<title>Please wait a few moments</title>", 0, 600) < 0:
return response
- time.sleep(5)
+ self.sleep(5.0, "check")
def _pagination(self, url, params):
for params["page"] in itertools.count(1):
diff --git a/gallery_dl/extractor/fallenangels.py b/gallery_dl/extractor/fallenangels.py
index 57587b6..0503dcf 100644
--- a/gallery_dl/extractor/fallenangels.py
+++ b/gallery_dl/extractor/fallenangels.py
@@ -6,11 +6,10 @@
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
-"""Extract manga-chapters from https://www.fascans.com/"""
+"""Extractors for https://www.fascans.com/"""
from .common import ChapterExtractor, MangaExtractor
from .. import text, util
-import json
class FallenangelsChapterExtractor(ChapterExtractor):
@@ -56,7 +55,7 @@ class FallenangelsChapterExtractor(ChapterExtractor):
def images(page):
return [
(img["page_image"], None)
- for img in json.loads(
+ for img in util.json_loads(
text.extr(page, "var pages = ", ";")
)
]
diff --git a/gallery_dl/extractor/fanbox.py b/gallery_dl/extractor/fanbox.py
index 41431dc..57c4333 100644
--- a/gallery_dl/extractor/fanbox.py
+++ b/gallery_dl/extractor/fanbox.py
@@ -89,6 +89,7 @@ class FanboxExtractor(Extractor):
content_body["imageMap"] = {
image_id: image_map[image_id]
for image_id in images
+ if image_id in image_map
}
post["content"] = "\n".join(content)
@@ -256,7 +257,6 @@ class FanboxCreatorExtractor(FanboxExtractor):
def posts(self):
url = "https://api.fanbox.cc/post.listCreator?creatorId={}&limit=10"
-
return self._pagination(url.format(self.creator_id))
diff --git a/gallery_dl/extractor/fantia.py b/gallery_dl/extractor/fantia.py
index 476fdeb..13dfead 100644
--- a/gallery_dl/extractor/fantia.py
+++ b/gallery_dl/extractor/fantia.py
@@ -7,8 +7,7 @@
"""Extractors for https://fantia.jp/"""
from .common import Extractor, Message
-from .. import text
-import json
+from .. import text, util
class FantiaExtractor(Extractor):
@@ -117,7 +116,7 @@ class FantiaExtractor(Extractor):
yield self.root+"/"+content["download_uri"], post
if content["category"] == "blog" and "comment" in content:
- comment_json = json.loads(content["comment"])
+ comment_json = util.json_loads(content["comment"])
ops = comment_json.get("ops", ())
# collect blogpost text first
diff --git a/gallery_dl/extractor/foolslide.py b/gallery_dl/extractor/foolslide.py
index 2290cc2..4a38fb4 100644
--- a/gallery_dl/extractor/foolslide.py
+++ b/gallery_dl/extractor/foolslide.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2016-2022 Mike Fährmann
+# Copyright 2016-2023 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -10,7 +10,6 @@
from .common import BaseExtractor, Message
from .. import text, util
-import json
class FoolslideExtractor(BaseExtractor):
@@ -106,7 +105,7 @@ class FoolslideChapterExtractor(FoolslideExtractor):
})
def images(self, page):
- return json.loads(text.extr(page, "var pages = ", ";"))
+ return util.json_loads(text.extr(page, "var pages = ", ";"))
class FoolslideMangaExtractor(FoolslideExtractor):
diff --git a/gallery_dl/extractor/gelbooru.py b/gallery_dl/extractor/gelbooru.py
index 8d73949..80b0ae1 100644
--- a/gallery_dl/extractor/gelbooru.py
+++ b/gallery_dl/extractor/gelbooru.py
@@ -13,6 +13,8 @@ from . import gelbooru_v02
from .. import text, exception
import binascii
+BASE_PATTERN = r"(?:https?://)?(?:www\.)?gelbooru\.com/(?:index\.php)?\?"
+
class GelbooruBase():
"""Base class for gelbooru extractors"""
@@ -53,6 +55,23 @@ class GelbooruBase():
del params["pid"]
params["tags"] = "{} id:<{}".format(self.tags, post["id"])
+ def _pagination_html(self, params):
+ url = self.root + "/index.php"
+ params["pid"] = self.page_start * self.per_page
+
+ data = {}
+ while True:
+ num_ids = 0
+ page = self.request(url, params=params).text
+
+ for data["id"] in text.extract_iter(page, '" id="p', '"'):
+ num_ids += 1
+ yield from self._api_request(data)
+
+ if num_ids < self.per_page:
+ return
+ params["pid"] += self.per_page
+
@staticmethod
def _file_url(post):
url = post["file_url"]
@@ -88,8 +107,7 @@ class GelbooruBase():
class GelbooruTagExtractor(GelbooruBase,
gelbooru_v02.GelbooruV02TagExtractor):
"""Extractor for images from gelbooru.com based on search-tags"""
- pattern = (r"(?:https?://)?(?:www\.)?gelbooru\.com/(?:index\.php)?"
- r"\?page=post&s=list&tags=(?P<tags>[^&#]+)")
+ pattern = BASE_PATTERN + r"page=post&s=list&tags=([^&#]+)"
test = (
("https://gelbooru.com/index.php?page=post&s=list&tags=bonocho", {
"count": 5,
@@ -108,8 +126,7 @@ class GelbooruPoolExtractor(GelbooruBase,
gelbooru_v02.GelbooruV02PoolExtractor):
"""Extractor for gelbooru pools"""
per_page = 45
- pattern = (r"(?:https?://)?(?:www\.)?gelbooru\.com/(?:index\.php)?"
- r"\?page=pool&s=show&id=(?P<pool>\d+)")
+ pattern = BASE_PATTERN + r"page=pool&s=show&id=(\d+)"
test = (
("https://gelbooru.com/index.php?page=pool&s=show&id=761", {
"count": 6,
@@ -124,9 +141,9 @@ class GelbooruPoolExtractor(GelbooruBase,
"id" : self.pool_id,
"pid" : self.page_start,
}
- self._page = self.request(url, params=self._params).text
+ page = self.request(url, params=self._params).text
- name, pos = text.extract(self._page, "<h3>Now Viewing: ", "</h3>")
+ name, pos = text.extract(page, "<h3>Now Viewing: ", "</h3>")
if not name:
raise exception.NotFoundError("pool")
@@ -136,29 +153,19 @@ class GelbooruPoolExtractor(GelbooruBase,
}
def posts(self):
- url = self.root + "/index.php"
- params = self._params
+ return self._pagination_html(self._params)
- page = self._page
- del self._page
- data = {}
-
- while True:
- num_ids = 0
- for data["id"] in text.extract_iter(page, '" id="p', '"'):
- num_ids += 1
- yield from self._api_request(data)
- if num_ids < self.per_page:
- return
- params["pid"] += self.per_page
- page = self.request(url, params=params).text
+class GelbooruFavoriteExtractor(GelbooruBase,
+ gelbooru_v02.GelbooruV02FavoriteExtractor):
+ pattern = BASE_PATTERN + r"page=favorites&s=view&id=(\d+)"
+ test = ("https://gelbooru.com/index.php?page=favorites&s=view&id=12345",)
class GelbooruPostExtractor(GelbooruBase,
gelbooru_v02.GelbooruV02PostExtractor):
"""Extractor for single images from gelbooru.com"""
- pattern = (r"(?:https?://)?(?:www\.)?gelbooru\.com/(?:index\.php)?\?"
+ pattern = (BASE_PATTERN +
r"(?=(?:[^#]+&)?page=post(?:&|#|$))"
r"(?=(?:[^#]+&)?s=view(?:&|#|$))"
r"(?:[^#]+&)?id=(\d+)")
diff --git a/gallery_dl/extractor/generic.py b/gallery_dl/extractor/generic.py
index 9292da3..9999283 100644
--- a/gallery_dl/extractor/generic.py
+++ b/gallery_dl/extractor/generic.py
@@ -32,6 +32,28 @@ class GenericExtractor(Extractor):
(?:\#(?P<fragment>.*))? # optional fragment
"""
+ test = (
+ ("generic:https://www.nongnu.org/lzip/", {
+ "count": 1,
+ "content": "40be5c77773d3e91db6e1c5df720ee30afb62368",
+ "keyword": {
+ "description": "Lossless data compressor",
+ "imageurl": "https://www.nongnu.org/lzip/lzip.png",
+ "keywords": "lzip, clzip, plzip, lzlib, LZMA, bzip2, "
+ "gzip, data compression, GNU, free software",
+ "pageurl": "https://www.nongnu.org/lzip/",
+ },
+ }),
+ # internationalized domain name
+ ("generic:https://räksmörgås.josefsson.org/", {
+ "count": 2,
+ "pattern": "^https://räksmörgås.josefsson.org/",
+ }),
+ ("generic:https://en.wikipedia.org/Main_Page"),
+ ("generic:https://example.org/path/to/file?que=1?&ry=2/#fragment"),
+ ("generic:https://example.org/%27%3C%23/%23%3E%27.htm?key=%3C%26%3E"),
+ )
+
def __init__(self, match):
"""Init."""
Extractor.__init__(self, match)
@@ -56,7 +78,7 @@ class GenericExtractor(Extractor):
self.root = self.scheme + match.group('domain')
def items(self):
- """Get page, extract metadata & images, yield them in suitable messages.
+ """Get page, extract metadata & images, yield them in suitable messages
Adapted from common.GalleryExtractor.items()
diff --git a/gallery_dl/extractor/hbrowse.py b/gallery_dl/extractor/hbrowse.py
index 43479c6..5b561ea 100644
--- a/gallery_dl/extractor/hbrowse.py
+++ b/gallery_dl/extractor/hbrowse.py
@@ -1,16 +1,15 @@
# -*- coding: utf-8 -*-
-# Copyright 2015-2019 Mike Fährmann
+# Copyright 2015-2023 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
-"""Extract images from https://www.hbrowse.com/"""
+"""Extractors for https://www.hbrowse.com/"""
from .common import ChapterExtractor, MangaExtractor
-from .. import text, exception
-import json
+from .. import text, util, exception
class HbrowseBase():
@@ -68,7 +67,7 @@ class HbrowseChapterExtractor(HbrowseBase, ChapterExtractor):
def images(self, page):
base = self.root + "/data" + self.path
json_data = text.extract(page, ';list = ', ',"zzz"')[0] + "]"
- return [(base + name, None) for name in json.loads(json_data)]
+ return [(base + name, None) for name in util.json_loads(json_data)]
class HbrowseMangaExtractor(HbrowseBase, MangaExtractor):
diff --git a/gallery_dl/extractor/hentai2read.py b/gallery_dl/extractor/hentai2read.py
index dc4e31d..e771a4f 100644
--- a/gallery_dl/extractor/hentai2read.py
+++ b/gallery_dl/extractor/hentai2read.py
@@ -9,8 +9,7 @@
"""Extractors for https://hentai2read.com/"""
from .common import ChapterExtractor, MangaExtractor
-from .. import text
-import json
+from .. import text, util
import re
@@ -78,7 +77,7 @@ class Hentai2readChapterExtractor(Hentai2readBase, ChapterExtractor):
images = text.extract(page, "'images' : ", ",\n")[0]
return [
("https://hentaicdn.com/hentai" + part, None)
- for part in json.loads(images)
+ for part in util.json_loads(images)
]
diff --git a/gallery_dl/extractor/hentaifox.py b/gallery_dl/extractor/hentaifox.py
index 0327f56..ed8576f 100644
--- a/gallery_dl/extractor/hentaifox.py
+++ b/gallery_dl/extractor/hentaifox.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2019-2021 Mike Fährmann
+# Copyright 2019-2023 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -9,8 +9,7 @@
"""Extractors for https://hentaifox.com/"""
from .common import GalleryExtractor, Extractor, Message
-from .. import text
-import json
+from .. import text, util
class HentaifoxBase():
@@ -90,7 +89,7 @@ class HentaifoxGalleryExtractor(HentaifoxBase, GalleryExtractor):
server1 = "https://i.hentaifox.com"
server2 = "https://i2.hentaifox.com"
- for num, image in json.loads(data).items():
+ for num, image in util.json_loads(data).items():
ext, width, height = image.split(",")
path = urlfmt(num, extmap[ext])
append((server1 + path, {
diff --git a/gallery_dl/extractor/hentaihand.py b/gallery_dl/extractor/hentaihand.py
index bf9e464..0617330 100644
--- a/gallery_dl/extractor/hentaihand.py
+++ b/gallery_dl/extractor/hentaihand.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2020-2022 Mike Fährmann
+# Copyright 2020-2023 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -10,7 +10,6 @@
from .common import GalleryExtractor, Extractor, Message
from .. import text, util
-import json
class HentaihandGalleryExtractor(GalleryExtractor):
@@ -46,7 +45,7 @@ class HentaihandGalleryExtractor(GalleryExtractor):
GalleryExtractor.__init__(self, match, url)
def metadata(self, page):
- info = json.loads(page)
+ info = util.json_loads(page)
data = {
"gallery_id" : text.parse_int(info["id"]),
"title" : info["title"],
diff --git a/gallery_dl/extractor/hentaihere.py b/gallery_dl/extractor/hentaihere.py
index 38ec77c..2297cc0 100644
--- a/gallery_dl/extractor/hentaihere.py
+++ b/gallery_dl/extractor/hentaihere.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2016-2022 Mike Fährmann
+# Copyright 2016-2023 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -9,8 +9,7 @@
"""Extractors for https://hentaihere.com/"""
from .common import ChapterExtractor, MangaExtractor
-from .. import text
-import json
+from .. import text, util
import re
@@ -80,7 +79,7 @@ class HentaihereChapterExtractor(HentaihereBase, ChapterExtractor):
images = text.extr(page, "var rff_imageList = ", ";")
return [
("https://hentaicdn.com/hentai" + part, None)
- for part in json.loads(images)
+ for part in util.json_loads(images)
]
diff --git a/gallery_dl/extractor/hitomi.py b/gallery_dl/extractor/hitomi.py
index 44459ce..4e8d1ca 100644
--- a/gallery_dl/extractor/hitomi.py
+++ b/gallery_dl/extractor/hitomi.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2015-2022 Mike Fährmann
+# Copyright 2015-2023 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -13,7 +13,6 @@ from .nozomi import decode_nozomi
from ..cache import memcache
from .. import text, util
import string
-import json
import re
@@ -75,7 +74,7 @@ class HitomiGalleryExtractor(GalleryExtractor):
self.root, gid)
def metadata(self, page):
- self.info = info = json.loads(page.partition("=")[2])
+ self.info = info = util.json_loads(page.partition("=")[2])
iget = info.get
language = iget("language")
diff --git a/gallery_dl/extractor/imagefap.py b/gallery_dl/extractor/imagefap.py
index 1efbbf0..497f1ef 100644
--- a/gallery_dl/extractor/imagefap.py
+++ b/gallery_dl/extractor/imagefap.py
@@ -9,8 +9,7 @@
"""Extractors for https://www.imagefap.com/"""
from .common import Extractor, Message
-from .. import text, exception
-import json
+from .. import text, util, exception
BASE_PATTERN = r"(?:https?://)?(?:www\.|beta\.)?imagefap\.com"
@@ -47,7 +46,7 @@ class ImagefapGalleryExtractor(ImagefapExtractor):
pattern = BASE_PATTERN + r"/(?:gallery\.php\?gid=|gallery/|pictures/)(\d+)"
test = (
- ("https://www.imagefap.com/pictures/7102714", {
+ ("https://www.imagefap.com/gallery/7102714", {
"pattern": r"https://cdnh?\.imagefap\.com"
r"/images/full/\d+/\d+/\d+\.jpg",
"keyword": "2ba96e84c2952c4750e9fa94a3f2b1f965cec2f3",
@@ -68,6 +67,7 @@ class ImagefapGalleryExtractor(ImagefapExtractor):
},
"count": 44,
}),
+ ("https://www.imagefap.com/pictures/7102714"),
("https://www.imagefap.com/gallery.php?gid=7102714"),
("https://beta.imagefap.com/gallery.php?gid=7102714"),
)
@@ -78,7 +78,7 @@ class ImagefapGalleryExtractor(ImagefapExtractor):
self.image_id = ""
def items(self):
- url = "{}/pictures/{}/".format(self.root, self.gid)
+ url = "{}/gallery/{}".format(self.root, self.gid)
page = self.request(url).text
data = self.get_job_metadata(page)
yield Message.Directory, data
@@ -88,22 +88,21 @@ class ImagefapGalleryExtractor(ImagefapExtractor):
def get_job_metadata(self, page):
"""Collect metadata for extractor-job"""
- descr, pos = text.extract(
- page, '<meta name="description" content="Browse ', '"')
- count, pos = text.extract(page, ' 1 of ', ' pics"', pos)
- self.image_id = text.extract(page, 'id="img_ed_', '"', pos)[0]
-
- title, _, descr = descr.partition(" porn picture gallery by ")
- uploader, _, tags = descr.partition(" to see hottest ")
- self._count = text.parse_int(count)
- return {
+ extr = text.extract_from(page)
+
+ data = {
"gallery_id": text.parse_int(self.gid),
- "title": text.unescape(title),
- "uploader": uploader,
- "tags": tags[:-11].split(", "),
- "count": self._count,
+ "tags": extr('name="keywords" content="', '"').split(", "),
+ "uploader": extr("porn picture gallery by ", " to see hottest"),
+ "title": text.unescape(extr("<title>", "<")),
+ "count": text.parse_int(extr(' 1 of ', ' pics"')),
}
+ self.image_id = extr('id="img_ed_', '"')
+ self._count = data["count"]
+
+ return data
+
def get_images(self):
"""Collect image-urls and -metadata"""
url = "{}/photo/{}/".format(self.root, self.image_id)
@@ -128,7 +127,7 @@ class ImagefapGalleryExtractor(ImagefapExtractor):
data["image_id"] = text.parse_int(data["filename"])
yield image_url, data
- if cnt < 24 and num >= total:
+ if not cnt or cnt < 24 and num >= total:
return
params["idx"] += cnt
@@ -173,7 +172,7 @@ class ImagefapImageExtractor(ImagefapExtractor):
page, 'id="imageid_input" value="', '"', pos)
gallery_id, pos = text.extract(
page, 'id="galleryid_input" value="', '"', pos)
- info = json.loads(info)
+ info = util.json_loads(info)
url = info["contentUrl"]
return url, text.nameext_from_url(url, {
diff --git a/gallery_dl/extractor/imagehosts.py b/gallery_dl/extractor/imagehosts.py
index 207562a..d57ec89 100644
--- a/gallery_dl/extractor/imagehosts.py
+++ b/gallery_dl/extractor/imagehosts.py
@@ -187,12 +187,19 @@ class ImagevenueImageExtractor(ImagehostImageExtractor):
class ImagetwistImageExtractor(ImagehostImageExtractor):
"""Extractor for single images from imagetwist.com"""
category = "imagetwist"
- pattern = r"(?:https?://)?((?:www\.)?imagetwist\.com/([a-z0-9]{12}))"
- test = ("https://imagetwist.com/f1i2s4vhvbrq/test.png", {
- "url": "8d5e168c0bee30211f821c6f3b2116e419d42671",
- "keyword": "d1060a4c2e3b73b83044e20681712c0ffdd6cfef",
- "content": "0c8768055e4e20e7c7259608b67799171b691140",
- })
+ pattern = (r"(?:https?://)?((?:www\.|phun\.)?"
+ r"image(?:twist|haha)\.com/([a-z0-9]{12}))")
+ test = (
+ ("https://imagetwist.com/f1i2s4vhvbrq/test.png", {
+ "url": "8d5e168c0bee30211f821c6f3b2116e419d42671",
+ "keyword": "d1060a4c2e3b73b83044e20681712c0ffdd6cfef",
+ "content": "0c8768055e4e20e7c7259608b67799171b691140",
+ }),
+ ("https://www.imagetwist.com/f1i2s4vhvbrq/test.png"),
+ ("https://phun.imagetwist.com/f1i2s4vhvbrq/test.png"),
+ ("https://imagehaha.com/f1i2s4vhvbrq/test.png"),
+ ("https://www.imagehaha.com/f1i2s4vhvbrq/test.png"),
+ )
@property
@memcache(maxage=3*3600)
diff --git a/gallery_dl/extractor/imgbb.py b/gallery_dl/extractor/imgbb.py
index 49082d8..a221075 100644
--- a/gallery_dl/extractor/imgbb.py
+++ b/gallery_dl/extractor/imgbb.py
@@ -9,9 +9,8 @@
"""Extractors for https://imgbb.com/"""
from .common import Extractor, Message
-from .. import text, exception
+from .. import text, util, exception
from ..cache import cache
-import json
class ImgbbExtractor(Extractor):
@@ -98,7 +97,7 @@ class ImgbbExtractor(Extractor):
while True:
for img in text.extract_iter(page, "data-object='", "'"):
- yield json.loads(text.unquote(img))
+ yield util.json_loads(text.unquote(img))
if data:
if params["seek"] == data["seekEnd"]:
return
diff --git a/gallery_dl/extractor/instagram.py b/gallery_dl/extractor/instagram.py
index deb31a0..4c1be0f 100644
--- a/gallery_dl/extractor/instagram.py
+++ b/gallery_dl/extractor/instagram.py
@@ -40,6 +40,7 @@ class InstagramExtractor(Extractor):
self._logged_in = True
self._find_tags = re.compile(r"#\w+").findall
self._cursor = None
+ self._user = None
def items(self):
self.login()
@@ -60,6 +61,8 @@ class InstagramExtractor(Extractor):
post = self._parse_post_graphql(post)
else:
post = self._parse_post_rest(post)
+ if self._user:
+ post["user"] = self._user
post.update(data)
files = post.pop("_files")
@@ -363,6 +366,22 @@ class InstagramExtractor(Extractor):
self._cursor = cursor
return cursor
+ def _assign_user(self, user):
+ self._user = user
+
+ for key, old in (
+ ("count_media" , "edge_owner_to_timeline_media"),
+ ("count_video" , "edge_felix_video_timeline"),
+ ("count_saved" , "edge_saved_media"),
+ ("count_mutual" , "edge_mutual_followed_by"),
+ ("count_follow" , "edge_follow"),
+ ("count_followed" , "edge_followed_by"),
+ ("count_collection", "edge_media_collections")):
+ try:
+ user[key] = user.pop(old)["count"]
+ except Exception:
+ user[key] = 0
+
class InstagramUserExtractor(InstagramExtractor):
"""Extractor for an Instagram user profile"""
@@ -796,6 +815,7 @@ class InstagramRestAPI():
name = user["username"]
s = "" if name.endswith("s") else "s"
raise exception.StopExtraction("%s'%s posts are private", name, s)
+ self.extractor._assign_user(user)
return user["id"]
def user_clips(self, user_id):
diff --git a/gallery_dl/extractor/issuu.py b/gallery_dl/extractor/issuu.py
index 8067f63..c0a1de1 100644
--- a/gallery_dl/extractor/issuu.py
+++ b/gallery_dl/extractor/issuu.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2019-2022 Mike Fährmann
+# Copyright 2019-2023 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -9,8 +9,7 @@
"""Extractors for https://issuu.com/"""
from .common import GalleryExtractor, Extractor, Message
-from .. import text
-import json
+from .. import text, util
class IssuuBase():
@@ -54,7 +53,7 @@ class IssuuPublicationExtractor(IssuuBase, GalleryExtractor):
})
def metadata(self, page):
- data = json.loads(text.extr(
+ data = util.json_loads(text.extr(
page, '<script data-json="', '"').replace("&quot;", '"'))
doc = data["initialDocumentData"]["document"]
diff --git a/gallery_dl/extractor/lightroom.py b/gallery_dl/extractor/lightroom.py
index d202e20..783473d 100644
--- a/gallery_dl/extractor/lightroom.py
+++ b/gallery_dl/extractor/lightroom.py
@@ -7,8 +7,7 @@
"""Extractors for https://lightroom.adobe.com/"""
from .common import Extractor, Message
-from .. import text
-import json
+from .. import text, util
class LightroomGalleryExtractor(Extractor):
@@ -46,7 +45,7 @@ class LightroomGalleryExtractor(Extractor):
# Get config
url = "https://lightroom.adobe.com/shares/" + self.href
response = self.request(url)
- album = json.loads(
+ album = util.json_loads(
text.extr(response.text, "albumAttributes: ", "\n")
)
@@ -75,7 +74,7 @@ class LightroomGalleryExtractor(Extractor):
url = base_url + next_url
page = self.request(url).text
# skip 1st line as it's a JS loop
- data = json.loads(page[page.index("\n") + 1:])
+ data = util.json_loads(page[page.index("\n") + 1:])
base_url = data["base"]
for res in data["resources"]:
diff --git a/gallery_dl/extractor/mangadex.py b/gallery_dl/extractor/mangadex.py
index dae203e..409483b 100644
--- a/gallery_dl/extractor/mangadex.py
+++ b/gallery_dl/extractor/mangadex.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2018-2022 Mike Fährmann
+# Copyright 2018-2023 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -11,7 +11,6 @@
from .common import Extractor, Message
from .. import text, util, exception
from ..cache import cache, memcache
-from ..version import __version__
from collections import defaultdict
BASE_PATTERN = r"(?:https?://)?(?:www\.)?mangadex\.(?:org|cc)"
@@ -28,10 +27,10 @@ class MangadexExtractor(Extractor):
archive_fmt = "{chapter_id}_{page}"
root = "https://mangadex.org"
_cache = {}
- _headers = {"User-Agent": "gallery-dl/" + __version__}
def __init__(self, match):
Extractor.__init__(self, match)
+ self.session.headers["User-Agent"] = util.USERAGENT
self.api = MangadexAPI(self)
self.uuid = match.group(1)
@@ -127,7 +126,6 @@ class MangadexChapterExtractor(MangadexExtractor):
data["chapter"], data["chapter_minor"], data["_external_url"])
yield Message.Directory, data
- data["_http_headers"] = self._headers
server = self.api.athome_server(self.uuid)
chapter = server["chapter"]
@@ -192,7 +190,7 @@ class MangadexAPI():
def __init__(self, extr):
self.extractor = extr
- self.headers = extr._headers.copy()
+ self.headers = {}
self.username, self.password = self.extractor._get_auth_info()
if not self.username:
diff --git a/gallery_dl/extractor/manganelo.py b/gallery_dl/extractor/manganelo.py
index 14a542b..5ba18a3 100644
--- a/gallery_dl/extractor/manganelo.py
+++ b/gallery_dl/extractor/manganelo.py
@@ -10,51 +10,33 @@ from .common import ChapterExtractor, MangaExtractor
from .. import text
import re
-BASE_PATTERN = \
- r"(?:https?://)?((?:(?:chap|read)?manganato|(?:www\.)?manganelo)\.com)"
+BASE_PATTERN = r"(?:https?://)?((?:chap|read|www\.|m\.)?mangan(?:at|el)o\.com)"
-class ManganeloChapterExtractor(ChapterExtractor):
- """Extractor for manga-chapters from manganelo.com"""
+class ManganeloBase():
category = "manganelo"
root = "https://chapmanganato.com"
- pattern = BASE_PATTERN + r"(/(?:manga-\w+|chapter/\w+)/chapter[-_][^/?#]+)"
- test = (
- ("https://chapmanganato.com/manga-gn983696/chapter-23", {
- "pattern": r"https://v\d+\.mkklcdnv6tempv5\.com/img/tab_17/03/23"
- r"/39/gn983696/vol_3_chapter_23_24_yen/\d+-[no]\.jpg",
- "keyword": "2c5cd59342f149375df9bcb50aa416b4d04a43cf",
- "count": 25,
- }),
- ("https://readmanganato.com/manga-gn983696/chapter-23"),
- ("https://manganelo.com/chapter/gamers/chapter_15"),
- ("https://manganelo.com/chapter/gq921227/chapter_23"),
- )
def __init__(self, match):
domain, path = match.groups()
- ChapterExtractor.__init__(self, match, "https://" + domain + path)
+ super().__init__(match, "https://" + domain + path)
self.session.headers['Referer'] = self.root
- def metadata(self, page):
- _ , pos = text.extract(page, '<a class="a-h" ', '/a>')
- manga , pos = text.extract(page, '<a class="a-h" ', '/a>', pos)
- info , pos = text.extract(page, '<a class="a-h" ', '/a>', pos)
- author, pos = text.extract(page, '- Author(s) : ', '</p>', pos)
-
- manga, _ = text.extract(manga, '">', '<')
- info , _ = text.extract(info , '">', '<')
- match = re.match(
- r"(?:[Vv]ol\. *(\d+) )?"
- r"[Cc]hapter *([^:]*)"
- r"(?:: *(.+))?", info)
+ self._match_chapter = re.compile(
+ r"(?:[Vv]ol\.?\s*(\d+)\s?)?"
+ r"[Cc]hapter\s*([^:]+)"
+ r"(?::\s*(.+))?").match
+
+ def _parse_chapter(self, info, manga, author, date=None):
+ match = self._match_chapter(info)
volume, chapter, title = match.groups() if match else ("", "", info)
chapter, sep, minor = chapter.partition(".")
return {
- "manga" : text.unescape(manga),
+ "manga" : manga,
+ "author" : author,
+ "date" : date,
"title" : text.unescape(title) if title else "",
- "author" : text.unescape(author) if author else "",
"volume" : text.parse_int(volume),
"chapter" : text.parse_int(chapter),
"chapter_minor": sep + minor,
@@ -62,19 +44,53 @@ class ManganeloChapterExtractor(ChapterExtractor):
"language" : "English",
}
+
+class ManganeloChapterExtractor(ManganeloBase, ChapterExtractor):
+ """Extractor for manga chapters from manganelo.com"""
+ pattern = BASE_PATTERN + r"(/(?:manga-\w+|chapter/\w+)/chapter[-_][^/?#]+)"
+ test = (
+ ("https://chapmanganato.com/manga-gn983696/chapter-23", {
+ "pattern": r"https://v\d+\.mkklcdnv6tempv5\.com/img/tab_17/03/23"
+ r"/39/gn983696/vol_3_chapter_23_24_yen/\d+-[no]\.jpg",
+ "keyword": "17faaea7f0fb8c2675a327bf3aa0bcd7a6311d68",
+ "count": 25,
+ }),
+ ("https://chapmanganelo.com/manga-ti107776/chapter-4", {
+ "pattern": r"https://v\d+\.mkklcdnv6tempv5\.com/img/tab_17/01/92"
+ r"/08/ti970565/chapter_4_caster/\d+-o\.jpg",
+ "keyword": "06e01fa9b3fc9b5b954c0d4a98f0153b40922ded",
+ "count": 45,
+ }),
+ ("https://readmanganato.com/manga-gn983696/chapter-23"),
+ ("https://manganelo.com/chapter/gamers/chapter_15"),
+ ("https://manganelo.com/chapter/gq921227/chapter_23"),
+ )
+
+ def metadata(self, page):
+ extr = text.extract_from(page)
+ extr('class="a-h"', ">")
+ manga = extr('title="', '"')
+ info = extr('title="', '"')
+ author = extr("- Author(s) : ", "</p>")
+
+ return self._parse_chapter(
+ info, text.unescape(manga), text.unescape(author))
+
def images(self, page):
page = text.extr(
page, 'class="container-chapter-reader', '\n<div')
return [
(url, None)
for url in text.extract_iter(page, '<img src="', '"')
+ ] or [
+ (url, None)
+ for url in text.extract_iter(
+ page, '<img class="reader-content" src="', '"')
]
-class ManganeloMangaExtractor(MangaExtractor):
+class ManganeloMangaExtractor(ManganeloBase, MangaExtractor):
"""Extractor for manga from manganelo.com"""
- category = "manganelo"
- root = "https://chapmanganato.com"
chapterclass = ManganeloChapterExtractor
pattern = BASE_PATTERN + r"(/(?:manga[-/]|read_)\w+)/?$"
test = (
@@ -82,40 +98,28 @@ class ManganeloMangaExtractor(MangaExtractor):
"pattern": ManganeloChapterExtractor.pattern,
"count": ">= 25",
}),
+ ("https://m.manganelo.com/manga-ti107776", {
+ "pattern": ManganeloChapterExtractor.pattern,
+ "count": ">= 12",
+ }),
("https://readmanganato.com/manga-gn983696"),
("https://manganelo.com/manga/read_otome_no_teikoku"),
("https://manganelo.com/manga/ol921234/"),
)
- def __init__(self, match):
- domain, path = match.groups()
- MangaExtractor.__init__(self, match, "https://" + domain + path)
- self.session.headers['Referer'] = self.root
-
def chapters(self, page):
results = []
- data = self.parse_page(page, {"lang": "en", "language": "English"})
+ append = results.append
+
+ extr = text.extract_from(page)
+ manga = text.unescape(extr("<h1>", "<"))
+ author = text.remove_html(extr("</i>Author(s) :</td>", "</tr>"))
- needle = 'class="chapter-name text-nowrap" href="'
- pos = page.index('<ul class="row-content-chapter">')
+ extr('class="row-content-chapter', '')
while True:
- url, pos = text.extract(page, needle, '"', pos)
+ url = extr('class="chapter-name text-nowrap" href="', '"')
if not url:
return results
- data["title"], pos = text.extract(page, '>', '</a>', pos)
- data["date"] , pos = text.extract(
- page, 'class="chapter-time text-nowrap" title="', '">', pos)
- chapter, sep, minor = url.rpartition("/chapter_")[2].partition(".")
- data["chapter"] = text.parse_int(chapter)
- data["chapter_minor"] = sep + minor
- results.append((url, data.copy()))
-
- @staticmethod
- def parse_page(page, data):
- """Parse metadata on 'page' and add it to 'data'"""
- text.extract_all(page, (
- ("manga" , '<h1>', '</h1>'),
- ('author' , '</i>Author(s) :</td>', '</tr>'),
- ), values=data)
- data["author"] = text.remove_html(data["author"])
- return data
+ info = extr(">", "<")
+ date = extr('class="chapter-time text-nowrap" title="', '"')
+ append((url, self._parse_chapter(info, manga, author, date)))
diff --git a/gallery_dl/extractor/mangapark.py b/gallery_dl/extractor/mangapark.py
index dcf1972..168fbe8 100644
--- a/gallery_dl/extractor/mangapark.py
+++ b/gallery_dl/extractor/mangapark.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2015-2022 Mike Fährmann
+# Copyright 2015-2023 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -9,8 +9,7 @@
"""Extractors for https://mangapark.net/"""
from .common import ChapterExtractor, MangaExtractor
-from .. import text, exception
-import json
+from .. import text, util, exception
import re
@@ -104,7 +103,7 @@ class MangaparkChapterExtractor(MangaparkBase, ChapterExtractor):
return data
def images(self, page):
- data = json.loads(text.extr(page, "var _load_pages =", ";"))
+ data = util.json_loads(text.extr(page, "var _load_pages =", ";"))
return [
(text.urljoin(self.root, item["u"]), {
"width": text.parse_int(item["w"]),
diff --git a/gallery_dl/extractor/mangasee.py b/gallery_dl/extractor/mangasee.py
index 5fa5631..b7070f2 100644
--- a/gallery_dl/extractor/mangasee.py
+++ b/gallery_dl/extractor/mangasee.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2021-2022 Mike Fährmann
+# Copyright 2021-2023 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -10,7 +10,6 @@
from .common import ChapterExtractor, MangaExtractor
from .. import text, util
-import json
class MangaseeBase():
@@ -43,6 +42,7 @@ class MangaseeChapterExtractor(MangaseeBase, ChapterExtractor):
"pattern": r"https://[^/]+/manga/Tokyo-Innocent/0004\.5-00\d\.png",
"count": 8,
"keyword": {
+ "author": ["NARUMI Naru"],
"chapter": 4,
"chapter_minor": ".5",
"chapter_string": "100045",
@@ -50,6 +50,8 @@ class MangaseeChapterExtractor(MangaseeBase, ChapterExtractor):
"date": "dt:2020-01-20 21:52:53",
"extension": "png",
"filename": r"re:0004\.5-00\d",
+ "genre": ["Comedy", "Fantasy", "Harem", "Romance", "Shounen",
+ "Supernatural"],
"index": "1",
"lang": "en",
"language": "English",
@@ -63,6 +65,7 @@ class MangaseeChapterExtractor(MangaseeBase, ChapterExtractor):
"pattern": r"https://[^/]+/manga/One-Piece/1063-0\d\d\.png",
"count": 13,
"keyword": {
+ "author": ["ODA Eiichiro"],
"chapter": 1063,
"chapter_minor": "",
"chapter_string": "110630",
@@ -70,6 +73,8 @@ class MangaseeChapterExtractor(MangaseeBase, ChapterExtractor):
"date": "dt:2022-10-16 17:32:54",
"extension": "png",
"filename": r"re:1063-0\d\d",
+ "genre": ["Action", "Adventure", "Comedy", "Drama", "Fantasy",
+ "Shounen"],
"index": "1",
"lang": "en",
"language": "English",
@@ -94,12 +99,16 @@ class MangaseeChapterExtractor(MangaseeBase, ChapterExtractor):
def metadata(self, page):
extr = text.extract_from(page)
- self.chapter = data = json.loads(extr("vm.CurChapter =", ";\r\n"))
+ author = util.json_loads(extr('"author":', '],') + "]")
+ genre = util.json_loads(extr('"genre":', '],') + "]")
+ self.chapter = data = util.json_loads(extr("vm.CurChapter =", ";\r\n"))
self.domain = extr('vm.CurPathName = "', '"')
self.slug = extr('vm.IndexName = "', '"')
data = self._transform_chapter(data)
data["manga"] = text.unescape(extr('vm.SeriesName = "', '"'))
+ data["author"] = author
+ data["genre"] = genre
return data
def images(self, page):
@@ -128,10 +137,38 @@ class MangaseeMangaExtractor(MangaseeBase, MangaExtractor):
"/Nakamura-Koedo-To-Daizu-Keisuke-Wa-Umaku-Ikanai"), {
"pattern": MangaseeChapterExtractor.pattern,
"count": ">= 17",
+ "keyword": {
+ "author": ["TAKASE Masaya"],
+ "chapter": int,
+ "chapter_minor": r"re:^|\.5$",
+ "chapter_string": r"re:100\d\d\d",
+ "date": "type:datetime",
+ "genre": ["Comedy", "Romance", "School Life", "Shounen",
+ "Slice of Life"],
+ "index": "1",
+ "lang": "en",
+ "language": "English",
+ "manga": "Nakamura-Koedo-To-Daizu-Keisuke-Wa-Umaku-Ikanai",
+ "title": "",
+ },
}),
("https://manga4life.com/manga/Ano-Musume-Ni-Kiss-To-Shirayuri-O", {
"pattern": MangaseeChapterExtractor.pattern,
"count": ">= 50",
+ "keyword": {
+ "author": ["Canno"],
+ "chapter": int,
+ "chapter_minor": r"re:^|\.5$",
+ "chapter_string": r"re:100\d\d\d",
+ "date": "type:datetime",
+ "genre": ["Comedy", "Romance", "School Life", "Seinen",
+ "Shoujo Ai"],
+ "index": "1",
+ "lang": "en",
+ "language": "English",
+ "manga": "Ano-Musume-Ni-Kiss-To-Shirayuri-O",
+ "title": ""
+ },
}),
)
@@ -142,9 +179,11 @@ class MangaseeMangaExtractor(MangaseeBase, MangaExtractor):
MangaExtractor.__init__(self, match, self.root + match.group(2))
def chapters(self, page):
- slug, pos = text.extract(page, 'vm.IndexName = "', '"')
- chapters = json.loads(text.extract(
- page, "vm.Chapters = ", ";\r\n", pos)[0])
+ extr = text.extract_from(page)
+ author = util.json_loads(extr('"author":', '],') + "]")
+ genre = util.json_loads(extr('"genre":', '],') + "]")
+ slug = extr('vm.IndexName = "', '"')
+ chapters = util.json_loads(extr("vm.Chapters = ", ";\r\n"))
result = []
for data in map(self._transform_chapter, chapters):
@@ -155,5 +194,7 @@ class MangaseeMangaExtractor(MangaseeBase, MangaExtractor):
url += "-page-1.html"
data["manga"] = slug
+ data["author"] = author
+ data["genre"] = genre
result.append((url, data))
return result
diff --git a/gallery_dl/extractor/misskey.py b/gallery_dl/extractor/misskey.py
new file mode 100644
index 0000000..03e9104
--- /dev/null
+++ b/gallery_dl/extractor/misskey.py
@@ -0,0 +1,202 @@
+# -*- coding: utf-8 -*-
+
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Extractors for Misskey instances"""
+
+from .common import BaseExtractor, Message
+from .. import text
+
+
+class MisskeyExtractor(BaseExtractor):
+ """Base class for Misskey extractors"""
+ basecategory = "misskey"
+ directory_fmt = ("misskey", "{instance}", "{user[username]}")
+ filename_fmt = "{category}_{id}_{file[id]}.{extension}"
+ archive_fmt = "{id}_{file[id]}"
+
+ def __init__(self, match):
+ BaseExtractor.__init__(self, match)
+ self.api = MisskeyAPI(self)
+ self.instance = self.root.rpartition("://")[2]
+ self.item = match.group(match.lastindex)
+ self.renotes = self.config("renotes", False)
+ self.replies = self.config("replies", True)
+
+ def items(self):
+ for note in self.notes():
+ files = note.pop("files") or []
+ renote = note.get("renote")
+ if renote:
+ if not self.renotes:
+ self.log.debug("Skipping %s (renote)", note["id"])
+ continue
+ files.extend(renote.get("files") or ())
+
+ reply = note.get("reply")
+ if reply:
+ if not self.replies:
+ self.log.debug("Skipping %s (reply)", note["id"])
+ continue
+ files.extend(reply.get("files") or ())
+
+ note["instance"] = self.instance
+ note["instance_remote"] = note["user"]["host"]
+ note["count"] = len(files)
+ note["date"] = text.parse_datetime(
+ note["createdAt"], "%Y-%m-%dT%H:%M:%S.%f%z")
+
+ yield Message.Directory, note
+ for note["num"], file in enumerate(files, 1):
+ file["date"] = text.parse_datetime(
+ file["createdAt"], "%Y-%m-%dT%H:%M:%S.%f%z")
+ note["file"] = file
+ url = file["url"]
+ yield Message.Url, url, text.nameext_from_url(url, note)
+
+ def notes(self):
+ """Return an iterable containing all relevant Note objects"""
+ return ()
+
+
+BASE_PATTERN = MisskeyExtractor.update({
+ "misskey.io": {
+ "root": "https://misskey.io",
+ "pattern": r"misskey\.io",
+ },
+ "lesbian.energy": {
+ "root": "https://lesbian.energy",
+ "pattern": r"lesbian\.energy"
+ },
+ "sushi.ski": {
+ "root": "https://sushi.ski",
+ "pattern": r"sushi\.ski",
+ },
+})
+
+
+class MisskeyUserExtractor(MisskeyExtractor):
+ """Extractor for all images of a Misskey user"""
+ subcategory = "user"
+ pattern = BASE_PATTERN + r"/@([^/?#]+)/?$"
+ test = (
+ ("https://misskey.io/@lithla", {
+ "pattern": r"https://s\d+\.arkjp\.net/misskey/[\w-]+\.\w+",
+ "range": "1-50",
+ "count": 50,
+ }),
+ ("https://misskey.io/@blooddj@pawoo.net", {
+ "range": "1-50",
+ "count": 50,
+ }),
+ ("https://lesbian.energy/@rerorero", {
+ "pattern": r"https://lesbian.energy/files/\w+",
+ "range": "1-50",
+ "count": 50,
+ }),
+ ("https://lesbian.energy/@nano@mk.yopo.work"),
+ ("https://sushi.ski/@ui@misskey.04.si"),
+ )
+
+ def notes(self):
+ return self.api.users_notes(self.api.user_id_by_username(self.item))
+
+
+class MisskeyFollowingExtractor(MisskeyExtractor):
+ """Extractor for followed Misskey users"""
+ subcategory = "following"
+ pattern = BASE_PATTERN + r"/@([^/?#]+)/following"
+ test = (
+ ("https://misskey.io/@blooddj@pawoo.net/following", {
+ "extractor": False,
+ "count": ">= 6",
+ }),
+ ("https://sushi.ski/@hatusimo_sigure/following"),
+ )
+
+ def items(self):
+ user_id = self.api.user_id_by_username(self.item)
+ for user in self.api.users_following(user_id):
+ user = user["followee"]
+ url = self.root + "/@" + user["username"]
+ host = user["host"]
+ if host is not None:
+ url += "@" + host
+ user["_extractor"] = MisskeyUserExtractor
+ yield Message.Queue, url, user
+
+
+class MisskeyNoteExtractor(MisskeyExtractor):
+ """Extractor for images from a Note"""
+ subcategory = "note"
+ pattern = BASE_PATTERN + r"/notes/(\w+)"
+ test = (
+ ("https://misskey.io/notes/9bhqfo835v", {
+ "pattern": r"https://s\d+\.arkjp\.net/misskey/[\w-]+\.\w+",
+ "count": 4,
+ }),
+ ("https://misskey.io/notes/9brq7z1re6"),
+ ("https://sushi.ski/notes/9bm3x4ksqw", {
+ "pattern": r"https://media\.sushi\.ski/files/[\w-]+\.png",
+ "count": 1,
+ }),
+ ("https://lesbian.energy/notes/995ig09wqy", {
+ "count": 1,
+ }),
+ ("https://lesbian.energy/notes/96ynd9w5kc"),
+ )
+
+ def notes(self):
+ return (self.api.notes_show(self.item),)
+
+
+class MisskeyAPI():
+ """Interface for Misskey API
+
+ https://github.com/misskey-dev/misskey
+ https://misskey-hub.net/en/docs/api/
+ https://misskey-hub.net/docs/api/endpoints.html
+ """
+
+ def __init__(self, extractor):
+ self.root = extractor.root
+ self.extractor = extractor
+ self.headers = {"Content-Type": "application/json"}
+
+ def user_id_by_username(self, username):
+ endpoint = "/users/show"
+ data = {"username": username}
+ if "@" in username:
+ data["username"], _, data["host"] = username.partition("@")
+ return self._call(endpoint, data)["id"]
+
+ def users_following(self, user_id):
+ endpoint = "/users/following"
+ data = {"userId": user_id}
+ return self._pagination(endpoint, data)
+
+ def users_notes(self, user_id):
+ endpoint = "/users/notes"
+ data = {"userId": user_id}
+ return self._pagination(endpoint, data)
+
+ def notes_show(self, note_id):
+ endpoint = "/notes/show"
+ data = {"noteId": note_id}
+ return self._call(endpoint, data)
+
+ def _call(self, endpoint, data):
+ url = self.root + "/api" + endpoint
+ return self.extractor.request(
+ url, method="POST", headers=self.headers, json=data).json()
+
+ def _pagination(self, endpoint, data):
+ data["limit"] = 100
+ while True:
+ notes = self._call(endpoint, data)
+ if not notes:
+ return
+ yield from notes
+ data["untilId"] = notes[-1]["id"]
diff --git a/gallery_dl/extractor/nana.py b/gallery_dl/extractor/nana.py
index 1db83b0..0f79d7f 100644
--- a/gallery_dl/extractor/nana.py
+++ b/gallery_dl/extractor/nana.py
@@ -7,8 +7,7 @@
"""Extractors for https://nana.my.id/"""
from .common import GalleryExtractor, Extractor, Message
-from .. import text, exception
-import json
+from .. import text, util, exception
class NanaGalleryExtractor(GalleryExtractor):
@@ -59,7 +58,7 @@ class NanaGalleryExtractor(GalleryExtractor):
}
def images(self, page):
- data = json.loads(text.extr(page, "Reader.pages = ", ".pages"))
+ data = util.json_loads(text.extr(page, "Reader.pages = ", ".pages"))
return [
("https://nana.my.id" + image, None)
for image in data["pages"]
diff --git a/gallery_dl/extractor/newgrounds.py b/gallery_dl/extractor/newgrounds.py
index 1f96879..2b759ec 100644
--- a/gallery_dl/extractor/newgrounds.py
+++ b/gallery_dl/extractor/newgrounds.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2018-2022 Mike Fährmann
+# Copyright 2018-2023 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -9,10 +9,9 @@
"""Extractors for https://www.newgrounds.com/"""
from .common import Extractor, Message
-from .. import text, exception
+from .. import text, util, exception
from ..cache import cache
import itertools
-import json
class NewgroundsExtractor(Extractor):
@@ -20,7 +19,7 @@ class NewgroundsExtractor(Extractor):
category = "newgrounds"
directory_fmt = ("{category}", "{artist[:10]:J, }")
filename_fmt = "{category}_{_index}_{title}.{extension}"
- archive_fmt = "{_index}"
+ archive_fmt = "{_type}{_index}"
root = "https://www.newgrounds.com"
cookiedomain = ".newgrounds.com"
cookienames = ("NG_GG_username", "vmk1du5I8m")
@@ -151,11 +150,13 @@ class NewgroundsExtractor(Extractor):
@staticmethod
def _extract_image_data(extr, url):
- full = text.extract_from(json.loads(extr('"full_image_text":', '});')))
+ full = text.extract_from(util.json_loads(extr(
+ '"full_image_text":', '});')))
data = {
"title" : text.unescape(extr('"og:title" content="', '"')),
"description": text.unescape(extr(':description" content="', '"')),
"type" : extr('og:type" content="', '"'),
+ "_type" : "i",
"date" : text.parse_datetime(extr(
'itemprop="datePublished" content="', '"')),
"rating" : extr('class="rated-', '"'),
@@ -175,6 +176,7 @@ class NewgroundsExtractor(Extractor):
"title" : text.unescape(extr('"og:title" content="', '"')),
"description": text.unescape(extr(':description" content="', '"')),
"type" : extr('og:type" content="', '"'),
+ "_type" : "a",
"date" : text.parse_datetime(extr(
'itemprop="datePublished" content="', '"')),
"url" : extr('{"url":"', '"').replace("\\/", "/"),
@@ -227,6 +229,7 @@ class NewgroundsExtractor(Extractor):
"url" : src,
"date" : date,
"type" : type,
+ "_type" : "",
"description": text.unescape(descr or extr(
'itemprop="description" content="', '"')),
"rating" : extr('class="rated-', '"'),
diff --git a/gallery_dl/extractor/nhentai.py b/gallery_dl/extractor/nhentai.py
index 9df43e5..4270c84 100644
--- a/gallery_dl/extractor/nhentai.py
+++ b/gallery_dl/extractor/nhentai.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2015-2021 Mike Fährmann
+# Copyright 2015-2023 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -11,7 +11,6 @@
from .common import GalleryExtractor, Extractor, Message
from .. import text, util
import collections
-import json
class NhentaiGalleryExtractor(GalleryExtractor):
@@ -48,7 +47,7 @@ class NhentaiGalleryExtractor(GalleryExtractor):
GalleryExtractor.__init__(self, match, url)
def metadata(self, page):
- self.data = data = json.loads(page)
+ self.data = data = util.json_loads(page)
title_en = data["title"].get("english", "")
title_ja = data["title"].get("japanese", "")
diff --git a/gallery_dl/extractor/nitter.py b/gallery_dl/extractor/nitter.py
index f9c6abf..9b69694 100644
--- a/gallery_dl/extractor/nitter.py
+++ b/gallery_dl/extractor/nitter.py
@@ -59,10 +59,7 @@ class NitterExtractor(BaseExtractor):
if url[0] == "/":
url = self.root + url
- file = {
- "url": url,
- "_http_retry_codes": (404,),
- }
+ file = {"url": url, "_http_retry": _retry_on_404}
file["filename"], _, file["extension"] = \
name.rpartition(".")
append(file)
@@ -220,10 +217,6 @@ BASE_PATTERN = NitterExtractor.update({
"root": "https://nitter.lacontrevoie.fr",
"pattern": r"nitter\.lacontrevoie\.fr",
},
- "nitter.pussthecat.org": {
- "root": "https://nitter.pussthecat.org",
- "pattern": r"nitter\.pussthecat\.org",
- },
"nitter.1d4.us": {
"root": "https://nitter.1d4.us",
"pattern": r"nitter\.1d4\.us",
@@ -283,13 +276,12 @@ class NitterTweetsExtractor(NitterExtractor):
},
},
}),
- ("https://nitter.pussthecat.org/i/user/2976459548", {
- "url": "c740a2683db2c8ed2f350afc0494475c4444025b",
- "pattern": r"https://nitter.pussthecat\.org/pic/orig"
+ ("https://nitter.lacontrevoie.fr/supernaturepics", {
+ "url": "54f4b55f2099dcc248f3fb7bfacf1349e08d8e2d",
+ "pattern": r"https://nitter\.lacontrevoie\.fr/pic/orig"
r"/media%2FCGMNYZvW0AIVoom\.jpg",
"range": "1",
}),
- ("https://nitter.lacontrevoie.fr/supernaturepics"),
("https://nitter.1d4.us/supernaturepics"),
("https://nitter.kavin.rocks/id:2976459548"),
("https://nitter.unixfox.eu/supernaturepics"),
@@ -309,7 +301,6 @@ class NitterRepliesExtractor(NitterExtractor):
"range": "1-20",
}),
("https://nitter.lacontrevoie.fr/supernaturepics/with_replies"),
- ("https://nitter.pussthecat.org/supernaturepics/with_replies"),
("https://nitter.1d4.us/supernaturepics/with_replies"),
("https://nitter.kavin.rocks/id:2976459548/with_replies"),
("https://nitter.unixfox.eu/i/user/2976459548/with_replies"),
@@ -334,7 +325,6 @@ class NitterMediaExtractor(NitterExtractor):
"range": "1-20",
}),
("https://nitter.lacontrevoie.fr/supernaturepics/media"),
- ("https://nitter.pussthecat.org/supernaturepics/media"),
("https://nitter.1d4.us/supernaturepics/media"),
("https://nitter.unixfox.eu/i/user/2976459548/media"),
)
@@ -353,7 +343,6 @@ class NitterSearchExtractor(NitterExtractor):
"range": "1-20",
}),
("https://nitter.lacontrevoie.fr/supernaturepics/search"),
- ("https://nitter.pussthecat.org/supernaturepics/search"),
("https://nitter.1d4.us/supernaturepics/search"),
("https://nitter.kavin.rocks/id:2976459548/search"),
("https://nitter.unixfox.eu/i/user/2976459548/search"),
@@ -375,7 +364,7 @@ class NitterTweetExtractor(NitterExtractor):
"url": "3f2b64e175bf284aa672c3bb53ed275e470b919a",
"content": "ab05e1d8d21f8d43496df284d31e8b362cd3bcab",
"keyword": {
- "comments": 16,
+ "comments": 19,
"content": "Big Wedeene River, Canada",
"count": 1,
"date": "dt:2015-05-29 17:40:00",
@@ -399,9 +388,9 @@ class NitterTweetExtractor(NitterExtractor):
"url": "9c51b3a4a1114535eb9b168bba97ad95db0d59ff",
}),
# video
- ("https://nitter.pussthecat.org/i/status/1065692031626829824", {
- "pattern": r"ytdl:https://nitter.pussthecat.org/video"
- r"/B875137EDC8FF/https%3A%2F%2Fvideo.twimg.com%2F"
+ ("https://nitter.lacontrevoie.fr/i/status/1065692031626829824", {
+ "pattern": r"ytdl:https://nitter\.lacontrevoie\.fr/video"
+ r"/[0-9A-F]{10,}/https%3A%2F%2Fvideo.twimg.com%2F"
r"ext_tw_video%2F1065691868439007232%2Fpu%2Fpl%2F"
r"nv8hUQC1R0SjhzcZ.m3u8%3Ftag%3D5",
"keyword": {
@@ -446,7 +435,7 @@ class NitterTweetExtractor(NitterExtractor):
"count": 0,
}),
# "Misleading" content
- ("https://nitter.pussthecat.org/i/status/1486373748911575046", {
+ ("https://nitter.lacontrevoie.fr/i/status/1486373748911575046", {
"count": 4,
}),
# age-restricted (#2354)
@@ -468,3 +457,7 @@ class NitterTweetExtractor(NitterExtractor):
quoted["user"] = tweet["user"]
return (tweet, quoted)
return (tweet,)
+
+
+def _retry_on_404(response):
+ return response.status_code == 404
diff --git a/gallery_dl/extractor/oauth.py b/gallery_dl/extractor/oauth.py
index 9270f33..ec46ca3 100644
--- a/gallery_dl/extractor/oauth.py
+++ b/gallery_dl/extractor/oauth.py
@@ -9,13 +9,12 @@
"""Utility classes to setup OAuth and link accounts to gallery-dl"""
from .common import Extractor, Message
-from . import deviantart, flickr, mastodon, pixiv, reddit, smugmug, tumblr
from .. import text, oauth, util, config, exception
from ..output import stdout_write
from ..cache import cache
import urllib.parse
+import binascii
import hashlib
-import base64
REDIRECT_URI_LOCALHOST = "http://localhost:6414/"
REDIRECT_URI_HTTPS = "https://mikf.github.io/gallery-dl/oauth-redirect.html"
@@ -76,7 +75,8 @@ class OAuthBase(Extractor):
browser = webbrowser.get()
if browser and browser.open(url):
- self.log.info("Opening URL in %s:", browser.name.capitalize())
+ name = getattr(browser, "name", "Browser")
+ self.log.info("Opening URL in %s:", name.capitalize())
else:
self.log.info("Please open this URL in your browser:")
@@ -242,6 +242,7 @@ class OAuthFlickr(OAuthBase):
def items(self):
yield Message.Version, 1
+ from . import flickr
self._oauth1_authorization_flow(
flickr.FlickrAPI.API_KEY,
@@ -258,6 +259,7 @@ class OAuthSmugmug(OAuthBase):
def items(self):
yield Message.Version, 1
+ from . import smugmug
self._oauth1_authorization_flow(
smugmug.SmugmugAPI.API_KEY,
@@ -274,6 +276,7 @@ class OAuthTumblr(OAuthBase):
def items(self):
yield Message.Version, 1
+ from . import tumblr
self._oauth1_authorization_flow(
tumblr.TumblrAPI.API_KEY,
@@ -294,6 +297,7 @@ class OAuthDeviantart(OAuthBase):
def items(self):
yield Message.Version, 1
+ from . import deviantart
self._oauth2_authorization_code_grant(
self.oauth_config("client-id"),
@@ -313,6 +317,7 @@ class OAuthReddit(OAuthBase):
def items(self):
yield Message.Version, 1
+ from . import reddit
self.session.headers["User-Agent"] = reddit.RedditAPI.USER_AGENT
self._oauth2_authorization_code_grant(
@@ -337,6 +342,7 @@ class OAuthMastodon(OAuthBase):
def items(self):
yield Message.Version, 1
+ from . import mastodon
for application in mastodon.INSTANCES.values():
if self.instance == application["root"].partition("://")[2]:
@@ -389,11 +395,12 @@ class OAuthPixiv(OAuthBase):
def items(self):
yield Message.Version, 1
+ from . import pixiv
code_verifier = util.generate_token(32)
- digest = hashlib.sha256(code_verifier.encode("ascii")).digest()
- code_challenge = base64.urlsafe_b64encode(
- digest).rstrip(b"=").decode("ascii")
+ digest = hashlib.sha256(code_verifier.encode()).digest()
+ code_challenge = binascii.b2a_base64(
+ digest)[:-2].decode().replace("+", "-").replace("/", "_")
url = "https://app-api.pixiv.net/web/v1/login"
params = {
diff --git a/gallery_dl/extractor/patreon.py b/gallery_dl/extractor/patreon.py
index 1f520c3..e4bfa2a 100644
--- a/gallery_dl/extractor/patreon.py
+++ b/gallery_dl/extractor/patreon.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2019-2022 Mike Fährmann
+# Copyright 2019-2023 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -9,11 +9,10 @@
"""Extractors for https://www.patreon.com/"""
from .common import Extractor, Message
-from .. import text, exception
+from .. import text, util, exception
from ..cache import memcache
import collections
import itertools
-import json
class PatreonExtractor(Extractor):
@@ -251,7 +250,7 @@ class PatreonExtractor(Extractor):
return [genmap[ft] for ft in filetypes]
def _extract_bootstrap(self, page):
- return json.loads(text.extr(
+ return util.json_loads(text.extr(
page, "window.patreon.bootstrap,", "\n});") + "}")
diff --git a/gallery_dl/extractor/photobucket.py b/gallery_dl/extractor/photobucket.py
index 375b5e3..6234e6a 100644
--- a/gallery_dl/extractor/photobucket.py
+++ b/gallery_dl/extractor/photobucket.py
@@ -10,7 +10,7 @@
from .common import Extractor, Message
from .. import text, exception
-import base64
+import binascii
import json
@@ -168,7 +168,7 @@ class PhotobucketImageExtractor(Extractor):
image["titleOrFilename"] = image["title"] or name
image["tags"] = image.pop("clarifaiTagList", [])
- mtype, _, mid = base64.b64decode(image["id"]).partition(b":")
+ mtype, _, mid = binascii.a2b_base64(image["id"]).partition(b":")
image["pictureId"] = mid.decode() if mtype == b"mediaId" else ""
yield Message.Directory, image
diff --git a/gallery_dl/extractor/pinterest.py b/gallery_dl/extractor/pinterest.py
index 63b16ce..31ddbcc 100644
--- a/gallery_dl/extractor/pinterest.py
+++ b/gallery_dl/extractor/pinterest.py
@@ -12,7 +12,6 @@ from .common import Extractor, Message
from .. import text, util, exception
from ..cache import cache
import itertools
-import json
BASE_PATTERN = r"(?:https?://)?(?:\w+\.)?pinterest\.[\w.]+"
@@ -309,7 +308,7 @@ class PinterestSearchExtractor(PinterestExtractor):
def __init__(self, match):
PinterestExtractor.__init__(self, match)
- self.search = match.group(1)
+ self.search = text.unquote(match.group(1))
def metadata(self):
return {"search": self.search}
@@ -504,7 +503,10 @@ class PinterestAPI():
"username_or_email": username,
"password" : password,
}
- data = {"data": json.dumps({"options": options}), "source_url": ""}
+ data = {
+ "data" : util.json_dumps({"options": options}),
+ "source_url": "",
+ }
try:
response = self.extractor.request(
@@ -523,7 +525,10 @@ class PinterestAPI():
def _call(self, resource, options):
url = "{}/resource/{}Resource/get/".format(self.root, resource)
- params = {"data": json.dumps({"options": options}), "source_url": ""}
+ params = {
+ "data" : util.json_dumps({"options": options}),
+ "source_url": "",
+ }
response = self.extractor.request(
url, params=params, headers=self.headers,
diff --git a/gallery_dl/extractor/plurk.py b/gallery_dl/extractor/plurk.py
index 535fae9..4135259 100644
--- a/gallery_dl/extractor/plurk.py
+++ b/gallery_dl/extractor/plurk.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2019-2022 Mike Fährmann
+# Copyright 2019-2023 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -9,10 +9,8 @@
"""Extractors for https://www.plurk.com/"""
from .common import Extractor, Message
-from .. import text, exception
+from .. import text, util, exception
import datetime
-import time
-import json
import re
@@ -20,6 +18,7 @@ class PlurkExtractor(Extractor):
"""Base class for plurk extractors"""
category = "plurk"
root = "https://www.plurk.com"
+ request_interval = 1.0
def items(self):
urls = self._urls_ex if self.config("comments", False) else self._urls
@@ -59,14 +58,13 @@ class PlurkExtractor(Extractor):
return
elif info["has_newer"] < 200:
del data["count"]
- time.sleep(1)
data["from_response_id"] = info["responses"][-1]["id"] + 1
@staticmethod
def _load(data):
if not data:
raise exception.NotFoundError("user")
- return json.loads(re.sub(r"new Date\(([^)]+)\)", r"\1", data))
+ return util.json_loads(re.sub(r"new Date\(([^)]+)\)", r"\1", data))
class PlurkTimelineExtractor(PlurkExtractor):
diff --git a/gallery_dl/extractor/poipiku.py b/gallery_dl/extractor/poipiku.py
index c35ee74..49da9ce 100644
--- a/gallery_dl/extractor/poipiku.py
+++ b/gallery_dl/extractor/poipiku.py
@@ -79,7 +79,7 @@ class PoipikuExtractor(Extractor):
page = self.request(
url, method="POST", headers=headers, data=data).json()["html"]
- if page.startswith("You need to"):
+ if page.startswith(("You need to", "Password is incorrect")):
self.log.warning("'%s'", page)
for thumb in text.extract_iter(
diff --git a/gallery_dl/extractor/pornpics.py b/gallery_dl/extractor/pornpics.py
new file mode 100644
index 0000000..783f3da
--- /dev/null
+++ b/gallery_dl/extractor/pornpics.py
@@ -0,0 +1,173 @@
+# -*- coding: utf-8 -*-
+
+# Copyright 2023 Mike Fährmann
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Extractors for https://www.pornpics.com/"""
+
+from .common import GalleryExtractor, Extractor, Message
+from .. import text
+
+BASE_PATTERN = r"(?:https?://)?(?:www\.)?pornpics\.com(?:/\w\w)?"
+
+
+class PornpicsExtractor(Extractor):
+ """Base class for pornpics extractors"""
+ category = "pornpics"
+ root = "https://www.pornpics.com"
+ request_interval = (0.5, 1.5)
+
+ def __init__(self, match):
+ super().__init__(match)
+ self.item = match.group(1)
+ self.session.headers["Referer"] = self.root
+
+ def items(self):
+ for gallery in self.galleries():
+ gallery["_extractor"] = PornpicsGalleryExtractor
+ yield Message.Queue, gallery["g_url"], gallery
+
+ def _pagination(self, url, params=None):
+ if params is None:
+ # fetch first 20 galleries from HTML
+ # since '"offset": 0' does not return a JSON response
+ page = self.request(url).text
+ for path in text.extract_iter(
+ page, 'class="rel-link" href="', '"'):
+ yield {"g_url": self.root + path}
+ del page
+ params = {"offset": 20}
+
+ limit = params["limit"] = 20
+
+ headers = {
+ "Accept": "application/json, text/javascript, */*; q=0.01",
+ "Referer": url if params["offset"] else self.root + "/",
+ "X-Requested-With": "XMLHttpRequest",
+ }
+
+ while True:
+ galleries = self.request(
+ url, params=params, headers=headers).json()
+ yield from galleries
+
+ if len(galleries) < limit:
+ return
+ params["offset"] += limit
+
+
+class PornpicsGalleryExtractor(PornpicsExtractor, GalleryExtractor):
+ """Extractor for pornpics galleries"""
+ pattern = BASE_PATTERN + r"(/galleries/(?:[^/?#]+-)?(\d+))"
+ test = (
+ (("https://www.pornpics.com/galleries/british-beauty-danielle-flashes-"
+ "hot-breasts-ass-and-snatch-in-the-forest-62610699/"), {
+ "pattern": r"https://cdni\.pornpics\.com/1280/7/160/62610699"
+ r"/62610699_\d+_[0-9a-f]{4}\.jpg",
+ "keyword": {
+ "categories": ["MILF", "Amateur", "Sexy", "Outdoor"],
+ "channel": "FTV MILFs",
+ "count": 17,
+ "gallery_id": 62610699,
+ "models": ["Danielle"],
+ "num": int,
+ "slug": "british-beauty-danielle-flashes-"
+ "hot-breasts-ass-and-snatch-in-the-forest",
+ "tags": ["Amateur MILF", "Sexy MILF"],
+ "title": "British beauty Danielle flashes "
+ "hot breasts, ass and snatch in the forest",
+ "views": int,
+ },
+ }),
+ ("https://pornpics.com/es/galleries/62610699", {
+ "keyword": {
+ "slug": "british-beauty-danielle-flashes-"
+ "hot-breasts-ass-and-snatch-in-the-forest",
+ },
+ }),
+ )
+
+ def __init__(self, match):
+ PornpicsExtractor.__init__(self, match)
+ self.gallery_id = match.group(2)
+
+ items = GalleryExtractor.items
+
+ def metadata(self, page):
+ extr = text.extract_from(page)
+
+ return {
+ "gallery_id": text.parse_int(self.gallery_id),
+ "slug" : extr("/galleries/", "/").rpartition("-")[0],
+ "title" : text.unescape(extr("<h1>", "<")),
+ "channel" : extr('>Channel:', '</a>').rpartition(">")[2],
+ "models" : text.split_html(extr(
+ ">Models:", '<span class="suggest')),
+ "categories": text.split_html(extr(
+ ">Categories:", '<span class="suggest')),
+ "tags" : text.split_html(extr(
+ ">Tags List:", ' </div>')),
+ "views" : text.parse_int(extr(">Views:", "<").replace(",", "")),
+ }
+
+ def images(self, page):
+ return [
+ (url, None)
+ for url in text.extract_iter(page, "class='rel-link' href='", "'")
+ ]
+
+
+class PornpicsTagExtractor(PornpicsExtractor):
+ """Extractor for galleries from pornpics tag searches"""
+ subcategory = "tag"
+ pattern = BASE_PATTERN + r"/tags/([^/?#]+)"
+ test = (
+ ("https://www.pornpics.com/tags/summer-dress/", {
+ "pattern": PornpicsGalleryExtractor.pattern,
+ "range": "1-50",
+ "count": 50,
+ }),
+ ("https://pornpics.com/fr/tags/summer-dress"),
+ )
+
+ def galleries(self):
+ url = "{}/tags/{}/".format(self.root, self.item)
+ return self._pagination(url)
+
+
+class PornpicsSearchExtractor(PornpicsExtractor):
+ """Extractor for galleries from pornpics search results"""
+ subcategory = "search"
+ pattern = BASE_PATTERN + r"/(?:\?q=|pornstars/|channels/)([^/&#]+)"
+ test = (
+ ("https://www.pornpics.com/?q=nature", {
+ "pattern": PornpicsGalleryExtractor.pattern,
+ "range": "1-50",
+ "count": 50,
+ }),
+ ("https://www.pornpics.com/channels/femjoy/", {
+ "pattern": PornpicsGalleryExtractor.pattern,
+ "range": "1-50",
+ "count": 50,
+ }),
+ ("https://www.pornpics.com/pornstars/emma-brown/", {
+ "pattern": PornpicsGalleryExtractor.pattern,
+ "range": "1-50",
+ "count": 50,
+ }),
+ ("https://pornpics.com/jp/?q=nature"),
+ ("https://pornpics.com/it/channels/femjoy"),
+ ("https://pornpics.com/pt/pornstars/emma-brown"),
+ )
+
+ def galleries(self):
+ url = self.root + "/search/srch.php"
+ params = {
+ "q" : self.item.replace("-", " "),
+ "lang" : "en",
+ "offset": 0,
+ }
+ return self._pagination(url, params)
diff --git a/gallery_dl/extractor/pururin.py b/gallery_dl/extractor/pururin.py
index 7e266cc..32567f6 100644
--- a/gallery_dl/extractor/pururin.py
+++ b/gallery_dl/extractor/pururin.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2019-2021 Mike Fährmann
+# Copyright 2019-2023 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -11,7 +11,6 @@
from .common import GalleryExtractor
from .. import text, util
import binascii
-import json
class PururinGalleryExtractor(GalleryExtractor):
@@ -73,7 +72,7 @@ class PururinGalleryExtractor(GalleryExtractor):
url = "{}/read/{}/01/x".format(self.root, self.gallery_id)
page = self.request(url).text
- info = json.loads(binascii.a2b_base64(text.extr(
+ info = util.json_loads(binascii.a2b_base64(text.extr(
page, '<gallery-read encoded="', '"')).decode())
self._ext = info["image_extension"]
self._cnt = info["total_pages"]
diff --git a/gallery_dl/extractor/reactor.py b/gallery_dl/extractor/reactor.py
index 8b5b6b6..1800b68 100644
--- a/gallery_dl/extractor/reactor.py
+++ b/gallery_dl/extractor/reactor.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2019-2022 Mike Fährmann
+# Copyright 2019-2023 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -9,9 +9,8 @@
"""Generic extractors for *reactor sites"""
from .common import BaseExtractor, Message
-from .. import text
+from .. import text, util
import urllib.parse
-import json
class ReactorExtractor(BaseExtractor):
@@ -84,13 +83,13 @@ class ReactorExtractor(BaseExtractor):
script = script[:script.index("</")].strip()
try:
- data = json.loads(script)
+ data = util.json_loads(script)
except ValueError:
try:
# remove control characters and escape backslashes
mapping = dict.fromkeys(range(32))
script = script.translate(mapping).replace("\\", "\\\\")
- data = json.loads(script)
+ data = util.json_loads(script)
except ValueError as exc:
self.log.warning("Unable to parse JSON data: %s", exc)
return
diff --git a/gallery_dl/extractor/reddit.py b/gallery_dl/extractor/reddit.py
index 204562e..305de2a 100644
--- a/gallery_dl/extractor/reddit.py
+++ b/gallery_dl/extractor/reddit.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2017-2022 Mike Fährmann
+# Copyright 2017-2023 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -29,7 +29,14 @@ class RedditExtractor(Extractor):
parentdir = self.config("parent-directory")
max_depth = self.config("recursion", 0)
+
videos = self.config("videos", True)
+ if videos:
+ if videos == "ytdl":
+ self._extract_video = self._extract_video_ytdl
+ elif videos == "dash":
+ self._extract_video = self._extract_video_dash
+ videos = True
submissions = self.submissions()
visited = set()
@@ -62,19 +69,8 @@ class RedditExtractor(Extractor):
elif submission["is_video"]:
if videos:
text.nameext_from_url(url, submission)
- if videos == "ytdl":
- url = "https://www.reddit.com" + \
- submission["permalink"]
- else:
- submission["_ytdl_extra"] = {
- "title": submission["title"],
- }
- try:
- url = (submission["secure_media"]
- ["reddit_video"]["dash_url"])
- except (KeyError, TypeError):
- pass
- yield Message.Url, "ytdl:" + url, submission
+ url = "ytdl:" + self._extract_video(submission)
+ yield Message.Url, url, submission
elif not submission["is_self"]:
urls.append((url, submission))
@@ -145,6 +141,21 @@ class RedditExtractor(Extractor):
submission["id"], item["media_id"])
self.log.debug(src)
+ def _extract_video_ytdl(self, submission):
+ return "https://www.reddit.com" + submission["permalink"]
+
+ def _extract_video_dash(self, submission):
+ submission["_ytdl_extra"] = {"title": submission["title"]}
+ try:
+ return (submission["secure_media"]["reddit_video"]["dash_url"] +
+ "#__youtubedl_smuggle=%7B%22to_generic%22%3A+1%7D")
+ except Exception:
+ return submission["url"]
+
+ def _extract_video(self, submission):
+ submission["_ytdl_extra"] = {"title": submission["title"]}
+ return submission["url"]
+
class RedditSubredditExtractor(RedditExtractor):
"""Extractor for URLs from subreddits on reddit.com"""
@@ -233,6 +244,25 @@ class RedditSubmissionExtractor(RedditExtractor):
"content": "1e7dde4ee7d5f4c4b45749abfd15b2dbfa27df3f",
"count": 3,
}),
+ # video
+ ("https://www.reddit.com/r/aww/comments/90bu6w/", {
+ "pattern": r"ytdl:https://v.redd.it/gyh95hiqc0b11",
+ "count": 1,
+ }),
+ # video (ytdl)
+ ("https://www.reddit.com/r/aww/comments/90bu6w/", {
+ "options": (("videos", "ytdl"),),
+ "pattern": r"ytdl:https://www.reddit.com/r/aww/comments/90bu6w"
+ r"/heat_index_was_110_degrees_so_we_offered_him_a/",
+ "count": 1,
+ }),
+ # video (dash)
+ ("https://www.reddit.com/r/aww/comments/90bu6w/", {
+ "options": (("videos", "dash"),),
+ "pattern": r"ytdl:https://v.redd.it/gyh95hiqc0b11"
+ r"/DASHPlaylist.mpd\?a=",
+ "count": 1,
+ }),
# deleted gallery (#953)
("https://www.reddit.com/gallery/icfgzv", {
"count": 0,
diff --git a/gallery_dl/extractor/redgifs.py b/gallery_dl/extractor/redgifs.py
index ad4282c..eaaef7d 100644
--- a/gallery_dl/extractor/redgifs.py
+++ b/gallery_dl/extractor/redgifs.py
@@ -23,6 +23,7 @@ class RedgifsExtractor(Extractor):
def __init__(self, match):
Extractor.__init__(self, match)
self.key = match.group(1)
+ self.api = RedgifsAPI(self)
formats = self.config("format")
if formats is None:
@@ -69,30 +70,89 @@ class RedgifsUserExtractor(RedgifsExtractor):
"""Extractor for redgifs user profiles"""
subcategory = "user"
directory_fmt = ("{category}", "{userName}")
- pattern = r"(?:https?://)?(?:www\.)?redgifs\.com/users/([^/?#]+)"
- test = ("https://www.redgifs.com/users/Natalifiction", {
- "pattern": r"https://\w+\.redgifs\.com/[A-Za-z]+\.mp4",
- "count": ">= 100",
- })
+ pattern = r"(?:https?://)?(?:\w+\.)?redgifs\.com/users/([^/?#]+)/?$"
+ test = (
+ ("https://www.redgifs.com/users/Natalifiction", {
+ "pattern": r"https://\w+\.redgifs\.com/[\w-]+\.mp4",
+ "count": ">= 100",
+ }),
+ ("https://v3.redgifs.com/users/lamsinka89", {
+ "pattern": r"https://\w+\.redgifs\.com/[\w-]+\.(mp4|jpg)",
+ "count": ">= 100",
+ }),
+ )
def metadata(self):
return {"userName": self.key}
def gifs(self):
- return RedgifsAPI(self).user(self.key)
+ return self.api.user(self.key)
+
+
+class RedgifsCollectionExtractor(RedgifsExtractor):
+ """Extractor for an individual user collection"""
+ subcategory = "collection"
+ directory_fmt = ("{category}", "{userName}", "{folderName}")
+ archive_fmt = "{folderId}_{id}"
+ pattern = (r"(?:https?://)?(?:www\.)?redgifs\.com/users"
+ r"/([^/?#]+)/collections/([^/?#]+)")
+ test = (
+ ("https://www.redgifs.com/users/boombah123/collections/2631326bbd", {
+ "pattern": r"https://\w+\.redgifs\.com/[\w-]+\.mp4",
+ "range": "1-20",
+ "count": 20,
+ }),
+ ("https://www.redgifs.com/users/boombah123/collections/9e6f7dd41f", {
+ "pattern": r"https://\w+\.redgifs\.com/[\w-]+\.mp4",
+ "range": "1-20",
+ "count": 20,
+ }),
+ )
+
+ def __init__(self, match):
+ RedgifsExtractor.__init__(self, match)
+ self.collection_id = match.group(2)
+
+ def metadata(self):
+ data = {"userName": self.key}
+ data.update(self.api.collection_info(self.key, self.collection_id))
+ return data
+
+ def gifs(self):
+ return self.api.collection(self.key, self.collection_id)
+
+
+class RedgifsCollectionsExtractor(RedgifsExtractor):
+ """Extractor for redgifs user collections"""
+ subcategory = "collections"
+ pattern = (r"(?:https?://)?(?:www\.)?redgifs\.com/users"
+ r"/([^/?#]+)/collections/?$")
+ test = ("https://www.redgifs.com/users/boombah123/collections", {
+ "pattern": (r"https://www\.redgifs\.com/users"
+ r"/boombah123/collections/\w+"),
+ "count": ">= 3",
+ })
+
+ def items(self):
+ for collection in self.api.collections(self.key):
+ url = "{}/users/{}/collections/{}".format(
+ self.root, self.key, collection["folderId"])
+ collection["_extractor"] = RedgifsCollectionExtractor
+ yield Message.Queue, url, collection
class RedgifsSearchExtractor(RedgifsExtractor):
"""Extractor for redgifs search results"""
subcategory = "search"
directory_fmt = ("{category}", "Search", "{search}")
- pattern = r"(?:https?://)?(?:www\.)?redgifs\.com/browse/?\?([^#]+)"
+ pattern = r"(?:https?://)?(?:\w+\.)?redgifs\.com/browse/?\?([^#]+)"
test = (
("https://www.redgifs.com/browse?tags=JAV", {
"pattern": r"https://\w+\.redgifs\.com/[A-Za-z-]+\.(mp4|jpg)",
"range": "1-10",
"count": 10,
}),
+ ("https://v3.redgifs.com/browse?tags=JAV"),
("https://www.redgifs.com/browse?type=i&verified=y&order=top7"),
)
@@ -102,14 +162,14 @@ class RedgifsSearchExtractor(RedgifsExtractor):
return {"search": search}
def gifs(self):
- return RedgifsAPI(self).search(self.params)
+ return self.api.search(self.params)
class RedgifsImageExtractor(RedgifsExtractor):
"""Extractor for individual gifs from redgifs.com"""
subcategory = "image"
pattern = (r"(?:https?://)?(?:"
- r"(?:www\.)?redgifs\.com/(?:watch|ifr)|"
+ r"(?:\w+\.)?redgifs\.com/(?:watch|ifr)|"
r"(?:www\.)?gifdeliverynetwork\.com|"
r"i\.redgifs\.com/i)/([A-Za-z]+)")
test = (
@@ -121,13 +181,16 @@ class RedgifsImageExtractor(RedgifsExtractor):
("https://redgifs.com/ifr/FoolishForkedAbyssiniancat"),
("https://i.redgifs.com/i/FoolishForkedAbyssiniancat"),
("https://www.gifdeliverynetwork.com/foolishforkedabyssiniancat"),
+ ("https://v3.redgifs.com/watch/FoolishForkedAbyssiniancat"),
)
def gifs(self):
- return (RedgifsAPI(self).gif(self.key),)
+ return (self.api.gif(self.key),)
class RedgifsAPI():
+ """https://api.redgifs.com/docs/index.html"""
+
API_ROOT = "https://api.redgifs.com"
def __init__(self, extractor):
@@ -149,6 +212,19 @@ class RedgifsAPI():
params = {"order": order}
return self._pagination(endpoint, params)
+ def collection(self, user, collection_id):
+ endpoint = "/v2/users/{}/collections/{}/gifs".format(
+ user, collection_id)
+ return self._pagination(endpoint)
+
+ def collection_info(self, user, collection_id):
+ endpoint = "/v2/users/{}/collections/{}".format(user, collection_id)
+ return self._call(endpoint)
+
+ def collections(self, user):
+ endpoint = "/v2/users/{}/collections".format(user)
+ return self._pagination(endpoint, key="collections")
+
def search(self, params):
endpoint = "/v2/gifs/search"
params["search_text"] = params.pop("tags", None)
@@ -161,12 +237,14 @@ class RedgifsAPI():
return self.extractor.request(
url, params=params, headers=self.headers).json()
- def _pagination(self, endpoint, params):
+ def _pagination(self, endpoint, params=None, key="gifs"):
+ if params is None:
+ params = {}
params["page"] = 1
while True:
data = self._call(endpoint, params)
- yield from data["gifs"]
+ yield from data[key]
if params["page"] >= data["pages"]:
return
diff --git a/gallery_dl/extractor/shopify.py b/gallery_dl/extractor/shopify.py
index f2bf3cb..278ad14 100644
--- a/gallery_dl/extractor/shopify.py
+++ b/gallery_dl/extractor/shopify.py
@@ -63,6 +63,10 @@ BASE_PATTERN = ShopifyExtractor.update({
"root": "https://modcloth.com",
"pattern": r"modcloth\.com",
},
+ "ohpolly": {
+ "root": "https://www.ohpolly.com",
+ "pattern": r"(?:www\.)?ohpolly\.com",
+ },
"omgmiamiswimwear": {
"root": "https://www.omgmiamiswimwear.com",
"pattern": r"(?:www\.)?omgmiamiswimwear\.com",
@@ -102,6 +106,7 @@ class ShopifyCollectionExtractor(ShopifyExtractor):
("https://loungeunderwear.com/collections/apparel"),
("https://michaels.com.au/collections/microphones"),
("https://modcloth.com/collections/shoes"),
+ ("https://www.ohpolly.com/collections/dresses-mini-dresses"),
("https://www.omgmiamiswimwear.com/collections/fajas"),
("https://pinupgirlclothing.com/collections/evening"),
("https://www.raidlondon.com/collections/flats"),
@@ -141,6 +146,8 @@ class ShopifyProductExtractor(ShopifyExtractor):
("https://michaels.com.au/collections/audio/products"
"/boya-by-wm4-pro-k5-2-4ghz-mic-android-1-1-101281"),
("https://modcloth.com/collections/shoes/products/heidii-brn"),
+ (("https://www.ohpolly.com/products/edonia-ruched-triangle-cup"
+ "-a-line-mini-dress-brown")),
("https://www.omgmiamiswimwear.com/products/la-medusa-maxi-dress", {
"pattern": r"https://cdn\.shopify\.com/s/files/1/1819/6171/",
"count": 5,
diff --git a/gallery_dl/extractor/slideshare.py b/gallery_dl/extractor/slideshare.py
index 506db26..bea457f 100644
--- a/gallery_dl/extractor/slideshare.py
+++ b/gallery_dl/extractor/slideshare.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2016-2022 Mike Fährmann, Leonardo Taccari
+# Copyright 2016-2023 Mike Fährmann, Leonardo Taccari
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -9,8 +9,7 @@
"""Extractors for https://www.slideshare.net/"""
from .common import GalleryExtractor
-from .. import text
-import json
+from .. import text, util
class SlidesharePresentationExtractor(GalleryExtractor):
@@ -97,7 +96,7 @@ class SlidesharePresentationExtractor(GalleryExtractor):
@staticmethod
def images(page):
- data = json.loads(text.extract(
+ data = util.json_loads(text.extract(
page, "xtend(true, slideshare_object.slideshow_config, ", ");")[0])
# useing 'stripped_title' here is technically wrong, but it works all
diff --git a/gallery_dl/extractor/soundgasm.py b/gallery_dl/extractor/soundgasm.py
index 1afb92c..236f94f 100644
--- a/gallery_dl/extractor/soundgasm.py
+++ b/gallery_dl/extractor/soundgasm.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2022 Mike Fährmann
+# Copyright 2022-2023 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -11,17 +11,46 @@
from .common import Extractor, Message
from .. import text
+BASE_PATTERN = r"(?:https?://)?(?:www\.)?soundgasm\.net/u(?:ser)?"
-class SoundgasmAudioExtractor(Extractor):
- """Extractor for audio clips from soundgasm.net"""
+
+class SoundgasmExtractor(Extractor):
+ """Base class for soundgasm extractors"""
category = "soundgasm"
- subcategory = "audio"
root = "https://soundgasm.net"
+ request_interval = (0.5, 1.5)
directory_fmt = ("{category}", "{user}")
filename_fmt = "{title}.{extension}"
archive_fmt = "{user}_{slug}"
- pattern = (r"(?:https?://)?(?:www\.)?soundgasm\.net"
- r"/u(?:ser)?/([^/?#]+)/([^/?#]+)")
+
+ def items(self):
+ for sound in map(self._extract_sound, self.sounds()):
+ url = sound["url"]
+ yield Message.Directory, sound
+ yield Message.Url, url, text.nameext_from_url(url, sound)
+
+ def _extract_sound(self, url):
+ extr = text.extract_from(self.request(url).text)
+
+ _, user, slug = url.rstrip("/").rsplit("/", 2)
+ data = {
+ "user" : user,
+ "slug" : slug,
+ "title": text.unescape(extr('aria-label="title">', "<")),
+ "description": text.unescape(text.remove_html(extr(
+ 'class="jp-description">', '</div>'))),
+ }
+
+ formats = extr('"setMedia", {', '}')
+ data["url"] = text.extr(formats, ': "', '"')
+
+ return data
+
+
+class SoundgasmAudioExtractor(SoundgasmExtractor):
+ """Extractor for audio clips from soundgasm.net"""
+ subcategory = "audio"
+ pattern = BASE_PATTERN + r"/([^/?#]+)/([^/?#]+)"
test = (
(("https://soundgasm.net/u/ClassWarAndPuppies2"
"/687-Otto-von-Toontown-12822"), {
@@ -47,47 +76,39 @@ class SoundgasmAudioExtractor(Extractor):
)
def __init__(self, match):
- Extractor.__init__(self, match)
+ SoundgasmExtractor.__init__(self, match)
self.user, self.slug = match.groups()
- def items(self):
- url = "{}/u/{}/{}".format(self.root, self.user, self.slug)
- extr = text.extract_from(self.request(url).text)
+ def sounds(self):
+ return ("{}/u/{}/{}".format(self.root, self.user, self.slug),)
- data = {
- "user" : self.user,
- "slug" : self.slug,
- "title": text.unescape(extr('aria-label="title">', "<")),
- "description": text.unescape(text.remove_html(extr(
- 'class="jp-description">', '</div>'))),
- }
-
- formats = extr('"setMedia", {', '}')
- url = text.extr(formats, ': "', '"')
-
- yield Message.Directory, data
- yield Message.Url, url, text.nameext_from_url(url, data)
-
-class SoundgasmUserExtractor(Extractor):
+class SoundgasmUserExtractor(SoundgasmExtractor):
"""Extractor for all sounds from a soundgasm user"""
- category = "soundgasm"
subcategory = "user"
- root = "https://soundgasm.net"
- pattern = (r"(?:https?://)?(?:www\.)?soundgasm\.net"
- r"/u(?:ser)?/([^/?#]+)/?$")
+ pattern = BASE_PATTERN + r"/([^/?#]+)/?$"
test = ("https://soundgasm.net/u/fierce-aphrodite", {
- "pattern": SoundgasmAudioExtractor.pattern,
+ "pattern": r"https://media\.soundgasm\.net/sounds/[0-9a-f]{40}\.m4a",
"count" : ">= 15",
+ "keyword": {
+ "description": str,
+ "extension": "m4a",
+ "filename": "re:^[0-9a-f]{40}$",
+ "slug": str,
+ "title": str,
+ "url": str,
+ "user": "fierce-aphrodite"
+ },
})
def __init__(self, match):
- Extractor.__init__(self, match)
+ SoundgasmExtractor.__init__(self, match)
self.user = match.group(1)
- def items(self):
+ def sounds(self):
page = self.request(self.root + "/user/" + self.user).text
- data = {"_extractor": SoundgasmAudioExtractor}
- for sound in text.extract_iter(
- page, 'class="sound-details">', "</a>"):
- yield Message.Queue, text.extr(sound, '<a href="', '"'), data
+ return [
+ text.extr(sound, '<a href="', '"')
+ for sound in text.extract_iter(
+ page, 'class="sound-details">', "</a>")
+ ]
diff --git a/gallery_dl/extractor/subscribestar.py b/gallery_dl/extractor/subscribestar.py
index ea39c5e..4de7e9b 100644
--- a/gallery_dl/extractor/subscribestar.py
+++ b/gallery_dl/extractor/subscribestar.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2020-2022 Mike Fährmann
+# Copyright 2020-2023 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -9,9 +9,8 @@
"""Extractors for https://www.subscribestar.com/"""
from .common import Extractor, Message
-from .. import text, exception
+from .. import text, util, exception
from ..cache import cache
-import json
BASE_PATTERN = r"(?:https?://)?(?:www\.)?subscribestar\.(com|adult)"
@@ -92,7 +91,7 @@ class SubscribestarExtractor(Extractor):
gallery = text.extr(html, 'data-gallery="', '"')
if gallery:
media.extend(
- item for item in json.loads(text.unescape(gallery))
+ item for item in util.json_loads(text.unescape(gallery))
if "/previews/" not in item["url"]
)
diff --git a/gallery_dl/extractor/szurubooru.py b/gallery_dl/extractor/szurubooru.py
new file mode 100644
index 0000000..4b15b14
--- /dev/null
+++ b/gallery_dl/extractor/szurubooru.py
@@ -0,0 +1,144 @@
+# -*- coding: utf-8 -*-
+
+# Copyright 2023 Mike Fährmann
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Extractors for szurubooru instances"""
+
+from . import booru
+from .. import text
+
+import collections
+import binascii
+
+
+class SzurubooruExtractor(booru.BooruExtractor):
+ basecategory = "szurubooru"
+ filename_fmt = "{id}_{version}_{checksumMD5}.{extension}"
+ per_page = 100
+
+ def __init__(self, match):
+ booru.BooruExtractor.__init__(self, match)
+ self.headers = {
+ "Accept": "application/json",
+ "Content-Type": "application/json",
+ }
+
+ username = self.config("username")
+ if username:
+ token = self.config("token")
+ if token:
+ value = username + ":" + token
+ self.headers["Authorization"] = "Token " + \
+ binascii.b2a_base64(value.encode())[:-1].decode()
+
+ def _api_request(self, endpoint, params=None):
+ url = self.root + "/api" + endpoint
+ return self.request(url, headers=self.headers, params=params).json()
+
+ def _pagination(self, endpoint, params):
+ params["offset"] = 0
+ params["limit"] = self.per_page
+
+ while True:
+ data = self._api_request(endpoint, params)
+ results = data["results"]
+
+ yield from results
+
+ if len(results) < self.per_page:
+ return
+ params["offset"] += len(results)
+
+ def _file_url(self, post):
+ url = post["contentUrl"]
+ if not url.startswith("http"):
+ url = self.root + "/" + url
+ return url
+
+ @staticmethod
+ def _prepare(post):
+ post["date"] = text.parse_datetime(
+ post["creationTime"], "%Y-%m-%dT%H:%M:%S.%fZ")
+
+ tags = []
+ append = tags.append
+ tags_categories = collections.defaultdict(list)
+
+ for tag in post["tags"]:
+ tag_type = tag["category"].rpartition("_")[2]
+ tag_name = tag["names"][0]
+ tags_categories[tag_type].append(tag_name)
+ append(tag_name)
+
+ post["tags"] = tags
+ for category, tags in tags_categories.items():
+ post["tags_" + category] = tags
+
+
+BASE_PATTERN = SzurubooruExtractor.update({
+ "foalcon": {
+ "root": "https://booru.foalcon.com",
+ "pattern": r"booru\.foalcon\.com",
+ },
+ "bcbnsfw": {
+ "root": "https://booru.bcbnsfw.space",
+ "pattern": r"booru\.bcbnsfw\.space",
+ },
+})
+
+
+class SzurubooruTagExtractor(SzurubooruExtractor):
+ subcategory = "tag"
+ directory_fmt = ("{category}", "{search_tags}")
+ archive_fmt = "t_{search_tags}_{id}_{version}"
+ pattern = BASE_PATTERN + r"/posts/query=([^/?#]+)"
+ test = (
+ ("https://booru.foalcon.com/posts/query=simple_background", {
+ "pattern": r"https://booru\.foalcon\.com/data/posts"
+ r"/\d+_[0-9a-f]{16}\.\w+",
+ "range": "1-150",
+ "count": 150,
+ }),
+ ("https://booru.bcbnsfw.space/posts/query=simple_background"),
+ )
+
+ def __init__(self, match):
+ SzurubooruExtractor.__init__(self, match)
+ query = match.group(match.lastindex)
+ self.query = text.unquote(query.replace("+", " "))
+
+ def metadata(self):
+ return {"search_tags": self.query}
+
+ def posts(self):
+ return self._pagination("/posts/", {"query": self.query})
+
+
+class SzurubooruPostExtractor(SzurubooruExtractor):
+ subcategory = "post"
+ archive_fmt = "{id}_{version}"
+ pattern = BASE_PATTERN + r"/post/(\d+)"
+ test = (
+ ("https://booru.foalcon.com/post/30092", {
+ "pattern": r"https://booru\.foalcon\.com/data/posts"
+ r"/30092_b7d56e941888b624\.png",
+ "url": "dad4d4c67d87cd9a4ac429b3414747c27a95d5cb",
+ "content": "86d1514c0ca8197950cc4b74e7a59b2dc76ebf9c",
+ }),
+ ("https://booru.bcbnsfw.space/post/1599", {
+ "pattern": r"https://booru\.bcbnsfw\.space/data/posts"
+ r"/1599_53784518e92086bd\.png",
+ "content": "0c38fc612ba1f03950fad31c4f80a1fccdab1096",
+ }),
+ )
+
+ def __init__(self, match):
+ SzurubooruExtractor.__init__(self, match)
+ self.post_id = match.group(match.lastindex)
+
+ def posts(self):
+ return (self._api_request("/post/" + self.post_id),)
diff --git a/gallery_dl/extractor/telegraph.py b/gallery_dl/extractor/telegraph.py
index 5996268..116f3af 100644
--- a/gallery_dl/extractor/telegraph.py
+++ b/gallery_dl/extractor/telegraph.py
@@ -68,6 +68,21 @@ class TelegraphGalleryExtractor(GalleryExtractor):
"title": "Всё о друзьях моей сестрицы",
},
}),
+ ("https://telegra.ph/Disharmonica---Saber-Nero-02-21", {
+ "pattern": r"https://telegra\.ph/file/[0-9a-f]+\.(jpg|png)",
+ "keyword": {
+ "author": "cosmos",
+ "caption": "",
+ "count": 89,
+ "date": "dt:2022-02-21 05:57:39",
+ "description": "",
+ "num_formatted": r"re:^\d{2}$",
+ "post_url": "https://telegra.ph"
+ "/Disharmonica---Saber-Nero-02-21",
+ "slug": "Disharmonica---Saber-Nero-02-21",
+ "title": "Disharmonica - Saber Nero",
+ },
+ }),
)
def metadata(self, page):
@@ -89,7 +104,8 @@ class TelegraphGalleryExtractor(GalleryExtractor):
return data
def images(self, page):
- figures = tuple(text.extract_iter(page, "<figure>", "</figure>"))
+ figures = (tuple(text.extract_iter(page, "<figure>", "</figure>")) or
+ tuple(text.extract_iter(page, "<img", ">")))
num_zeroes = len(str(len(figures)))
num = 0
@@ -105,7 +121,7 @@ class TelegraphGalleryExtractor(GalleryExtractor):
result.append((url, {
"url" : url,
- "caption" : text.unescape(caption),
+ "caption" : text.unescape(caption) if caption else "",
"num" : num,
"num_formatted": str(num).zfill(num_zeroes),
}))
diff --git a/gallery_dl/extractor/tumblr.py b/gallery_dl/extractor/tumblr.py
index c75952a..155db1e 100644
--- a/gallery_dl/extractor/tumblr.py
+++ b/gallery_dl/extractor/tumblr.py
@@ -286,7 +286,11 @@ class TumblrUserExtractor(TumblrExtractor):
"count": 3,
"options": (("posts", "all"), ("external", True))
}),
- ("https://mikf123-hidden.tumblr.com/", { # dashbord-only
+ ("https://mikf123-hidden.tumblr.com/", { # dashboard-only
+ "options": (("access-token", None),),
+ "exception": exception.AuthorizationError,
+ }),
+ ("https://mikf123-hidden.tumblr.com/", { # dashboard-only
"count": 2,
"keyword": {"tags": ["test", "hidden"]},
}),
@@ -498,12 +502,24 @@ class TumblrAPI(oauth.OAuth1API):
if 200 <= status < 400:
return data["response"]
+ self.log.debug(data)
if status == 403:
raise exception.AuthorizationError()
+
elif status == 404:
+ try:
+ error = data["errors"][0]["detail"]
+ board = ("only viewable within the Tumblr dashboard" in error)
+ except Exception:
+ board = False
+
+ if board:
+ self.log.info("Run 'gallery-dl oauth:tumblr' "
+ "to access dashboard-only blogs")
+ raise exception.AuthorizationError(error)
raise exception.NotFoundError("user or post")
- elif status == 429:
+ elif status == 429:
# daily rate limit
if response.headers.get("x-ratelimit-perday-remaining") == "0":
self.log.info("Daily API rate limit exceeded")
diff --git a/gallery_dl/extractor/twitter.py b/gallery_dl/extractor/twitter.py
index 17a2202..29b4ac3 100644
--- a/gallery_dl/extractor/twitter.py
+++ b/gallery_dl/extractor/twitter.py
@@ -41,6 +41,10 @@ class TwitterExtractor(Extractor):
self.cards = self.config("cards", False)
self.cards_blacklist = self.config("cards-blacklist")
self.syndication = self.config("syndication")
+
+ if not self.config("transform", True):
+ self._transform_user = util.identity
+ self._transform_tweet = util.identity
self._user = self._user_obj = None
self._user_cache = {}
self._init_sizes()
@@ -212,7 +216,7 @@ class TwitterExtractor(Extractor):
files.append(value)
return
elif name == "unified_card":
- data = json.loads(bvals["unified_card"]["string_value"])
+ data = util.json_loads(bvals["unified_card"]["string_value"])
self._extract_media(tweet, data["media_entities"].values(), files)
return
@@ -1436,6 +1440,8 @@ class TwitterAPI():
if "retweeted_status_result" in legacy:
retweet = legacy["retweeted_status_result"]["result"]
+ if "tweet" in retweet:
+ retweet = retweet["tweet"]
if original_retweets:
try:
retweet["legacy"]["retweeted_status_id_str"] = \
diff --git a/gallery_dl/extractor/vsco.py b/gallery_dl/extractor/vsco.py
index 00389fa..053a799 100644
--- a/gallery_dl/extractor/vsco.py
+++ b/gallery_dl/extractor/vsco.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2019-2022 Mike Fährmann
+# Copyright 2019-2023 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -9,8 +9,7 @@
"""Extractors for https://vsco.co/"""
from .common import Extractor, Message
-from .. import text
-import json
+from .. import text, util
BASE_PATTERN = r"(?:https?://)?(?:www\.)?vsco\.co/([^/]+)"
@@ -69,7 +68,7 @@ class VscoExtractor(Extractor):
def _extract_preload_state(self, url):
page = self.request(url, notfound=self.subcategory).text
- return json.loads(text.extr(page, "__PRELOADED_STATE__ = ", "<"))
+ return util.json_loads(text.extr(page, "__PRELOADED_STATE__ = ", "<"))
def _pagination(self, url, params, token, key, extra=None):
headers = {
diff --git a/gallery_dl/extractor/weibo.py b/gallery_dl/extractor/weibo.py
index ab05c48..68bd136 100644
--- a/gallery_dl/extractor/weibo.py
+++ b/gallery_dl/extractor/weibo.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2019-2022 Mike Fährmann
+# Copyright 2019-2023 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -9,10 +9,9 @@
"""Extractors for https://www.weibo.com/"""
from .common import Extractor, Message
-from .. import text, exception
+from .. import text, util, exception
from ..cache import cache
import random
-import json
BASE_PATTERN = r"(?:https?://)?(?:www\.|m\.)?weibo\.c(?:om|n)"
USER_PATTERN = BASE_PATTERN + r"/(?:(u|n|p(?:rofile)?)/)?([^/?#]+)(?:/home)?"
@@ -179,7 +178,7 @@ class WeiboExtractor(Extractor):
page = Extractor.request(
self, passport_url, method="POST", headers=headers, data=data).text
- data = json.loads(text.extr(page, "(", ");"))["data"]
+ data = util.json_loads(text.extr(page, "(", ");"))["data"]
passport_url = "https://passport.weibo.com/visitor/visitor"
params = {
diff --git a/gallery_dl/extractor/wikifeet.py b/gallery_dl/extractor/wikifeet.py
index 70e9646..662e08b 100644
--- a/gallery_dl/extractor/wikifeet.py
+++ b/gallery_dl/extractor/wikifeet.py
@@ -7,8 +7,7 @@
"""Extractors for https://www.wikifeet.com/"""
from .common import GalleryExtractor
-from .. import text
-import json
+from .. import text, util
class WikifeetGalleryExtractor(GalleryExtractor):
@@ -114,5 +113,5 @@ class WikifeetGalleryExtractor(GalleryExtractor):
"height": data["ph"],
"tags" : [tagmap[tag] for tag in data["tags"]],
})
- for data in json.loads(text.extr(page, "['gdata'] = ", ";"))
+ for data in util.json_loads(text.extr(page, "['gdata'] = ", ";"))
]
diff --git a/gallery_dl/extractor/xhamster.py b/gallery_dl/extractor/xhamster.py
index 0125739..b308e74 100644
--- a/gallery_dl/extractor/xhamster.py
+++ b/gallery_dl/extractor/xhamster.py
@@ -9,9 +9,7 @@
"""Extractors for https://xhamster.com/"""
from .common import Extractor, Message
-from .. import text
-import json
-
+from .. import text, util
BASE_PATTERN = (r"(?:https?://)?((?:[\w-]+\.)?xhamster"
r"(?:\d?\.(?:com|one|desi)|\.porncache\.net))")
@@ -144,7 +142,7 @@ class XhamsterGalleryExtractor(XhamsterExtractor):
def _data(self, url):
page = self.request(url).text
- return json.loads(text.extr(
+ return util.json_loads(text.extr(
page, "window.initials=", "</script>").rstrip("\n\r;"))
diff --git a/gallery_dl/extractor/xvideos.py b/gallery_dl/extractor/xvideos.py
index 10de439..46ea074 100644
--- a/gallery_dl/extractor/xvideos.py
+++ b/gallery_dl/extractor/xvideos.py
@@ -9,8 +9,7 @@
"""Extractors for https://www.xvideos.com/"""
from .common import GalleryExtractor, Extractor, Message
-from .. import text
-import json
+from .. import text, util
class XvideosBase():
@@ -113,7 +112,7 @@ class XvideosUserExtractor(XvideosBase, Extractor):
def items(self):
url = "{}/profiles/{}".format(self.root, self.user)
page = self.request(url, notfound=self.subcategory).text
- data = json.loads(text.extr(
+ data = util.json_loads(text.extr(
page, "xv.conf=", ";</script>"))["data"]
if not isinstance(data["galleries"], dict):
diff --git a/gallery_dl/formatter.py b/gallery_dl/formatter.py
index 58bf48d..2c5bd11 100644
--- a/gallery_dl/formatter.py
+++ b/gallery_dl/formatter.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2021-2022 Mike Fährmann
+# Copyright 2021-2023 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -9,13 +9,11 @@
"""String formatters"""
import os
-import json
import time
import string
import _string
import datetime
import operator
-import functools
from . import text, util
NONE = util.NONE
@@ -228,7 +226,7 @@ class FStringFormatter():
"""Generate text by evaluating an f-string literal"""
def __init__(self, fstring, default=NONE, fmt=None):
- self.format_map = util.compile_expression("f'''" + fstring + "'''")
+ self.format_map = util.compile_expression('f"""' + fstring + '"""')
def parse_field_name(field_name):
@@ -399,7 +397,7 @@ _CONVERSIONS = {
"u": str.upper,
"c": str.capitalize,
"C": string.capwords,
- "j": functools.partial(json.dumps, default=str),
+ "j": util.json_dumps,
"t": str.strip,
"T": util.datetime_to_timestamp_string,
"d": text.parse_timestamp,
diff --git a/gallery_dl/job.py b/gallery_dl/job.py
index f7d84f0..a64c040 100644
--- a/gallery_dl/job.py
+++ b/gallery_dl/job.py
@@ -7,7 +7,6 @@
# published by the Free Software Foundation.
import sys
-import json
import errno
import logging
import functools
@@ -33,6 +32,12 @@ class Job():
self.kwdict = {}
self.status = 0
+ actions = extr.config("actions")
+ if actions:
+ from .actions import parse
+ self._logger_actions = parse(actions)
+ self._wrap_logger = self._wrap_logger_actions
+
path_proxy = output.PathfmtProxy(self)
self._logger_extra = {
"job" : self,
@@ -68,7 +73,7 @@ class Job():
if version_info:
self.kwdict[version_info] = {
"version" : version.__version__,
- "is_executable" : getattr(sys, "frozen", False),
+ "is_executable" : util.EXECUTABLE,
"current_git_head": util.git_head()
}
@@ -94,7 +99,7 @@ class Job():
if exc.message:
log.error(exc.message)
self.status |= exc.code
- except exception.TerminateExtraction:
+ except (exception.TerminateExtraction, exception.RestartExtraction):
raise
except exception.GalleryDLException as exc:
log.error("%s: %s", exc.__class__.__name__, exc)
@@ -201,7 +206,10 @@ class Job():
return self._wrap_logger(logging.getLogger(name))
def _wrap_logger(self, logger):
- return output.LoggerAdapter(logger, self._logger_extra)
+ return output.LoggerAdapter(logger, self)
+
+ def _wrap_logger_actions(self, logger):
+ return output.LoggerAdapterActions(logger, self)
def _write_unsupported(self, url):
if self.ulog:
@@ -344,12 +352,18 @@ class DownloadJob(Job):
if kwdict:
job.kwdict.update(kwdict)
- if pextr.config("parent-skip"):
- job._skipcnt = self._skipcnt
- self.status |= job.run()
- self._skipcnt = job._skipcnt
- else:
- self.status |= job.run()
+ while True:
+ try:
+ if pextr.config("parent-skip"):
+ job._skipcnt = self._skipcnt
+ self.status |= job.run()
+ self._skipcnt = job._skipcnt
+ else:
+ self.status |= job.run()
+ break
+ except exception.RestartExtraction:
+ pass
+
else:
self._write_unsupported(url)
@@ -436,10 +450,12 @@ class DownloadJob(Job):
archive = util.expand_path(archive)
archive_format = (cfg("archive-prefix", extr.category) +
cfg("archive-format", extr.archive_fmt))
+ archive_pragma = (cfg("archive-pragma"))
try:
if "{" in archive:
archive = formatter.parse(archive).format_map(kwdict)
- self.archive = util.DownloadArchive(archive, archive_format)
+ self.archive = util.DownloadArchive(
+ archive, archive_format, archive_pragma)
except Exception as exc:
extr.log.warning(
"Failed to open download archive at '%s' ('%s: %s')",
@@ -709,17 +725,19 @@ class InfoJob(Job):
def _print_multi(self, title, *values):
stdout_write("{}\n {}\n\n".format(
- title, " / ".join(json.dumps(v) for v in values)))
+ title, " / ".join(map(util.json_dumps, values))))
def _print_config(self, title, optname, value):
optval = self.extractor.config(optname, util.SENTINEL)
if optval is not util.SENTINEL:
stdout_write(
"{} (custom):\n {}\n{} (default):\n {}\n\n".format(
- title, json.dumps(optval), title, json.dumps(value)))
+ title, util.json_dumps(optval),
+ title, util.json_dumps(value)))
elif value:
stdout_write(
- "{} (default):\n {}\n\n".format(title, json.dumps(value)))
+ "{} (default):\n {}\n\n".format(
+ title, util.json_dumps(value)))
class DataJob(Job):
diff --git a/gallery_dl/oauth.py b/gallery_dl/oauth.py
index e9dfff0..ac38c4d 100644
--- a/gallery_dl/oauth.py
+++ b/gallery_dl/oauth.py
@@ -10,10 +10,10 @@
import hmac
import time
-import base64
import random
import string
import hashlib
+import binascii
import urllib.parse
import requests
@@ -100,7 +100,7 @@ class OAuth1Client(requests.auth.AuthBase):
key = concat(self.consumer_secret, self.token_secret or "").encode()
signature = hmac.new(key, message, hashlib.sha1).digest()
- return quote(base64.b64encode(signature).decode())
+ return quote(binascii.b2a_base64(signature)[:-1].decode())
class OAuth1API():
diff --git a/gallery_dl/option.py b/gallery_dl/option.py
index 213cd2d..aad307f 100644
--- a/gallery_dl/option.py
+++ b/gallery_dl/option.py
@@ -10,9 +10,8 @@
import argparse
import logging
-import json
import sys
-from . import job, version
+from . import job, util, version
class ConfigAction(argparse.Action):
@@ -62,24 +61,21 @@ class OptionAction(argparse.Action):
class Formatter(argparse.HelpFormatter):
"""Custom HelpFormatter class to customize help output"""
- def __init__(self, *args, **kwargs):
- super().__init__(max_help_position=30, *args, **kwargs)
+ def __init__(self, prog):
+ argparse.HelpFormatter.__init__(self, prog, max_help_position=30)
- def _format_action_invocation(self, action):
- opts = action.option_strings[:]
- if opts:
- if action.nargs != 0:
- args_string = self._format_args(action, "ARG")
- opts[-1] += " " + args_string
- return ', '.join(opts)
- else:
- return self._metavar_formatter(action, action.dest)(1)[0]
+ def _format_action_invocation(self, action, join=", ".join):
+ opts = action.option_strings
+ if action.metavar:
+ opts = opts.copy()
+ opts[-1] += " " + action.metavar
+ return join(opts)
def _parse_option(opt):
key, _, value = opt.partition("=")
try:
- value = json.loads(value)
+ value = util.json_loads(value)
except ValueError:
pass
return key, value
@@ -111,6 +107,12 @@ def build_parser():
"More than one --input-file can be specified"),
)
general.add_argument(
+ "-f", "--filename",
+ dest="filename", metavar="FORMAT",
+ help=("Filename format string for downloaded files "
+ "('/O' for \"original\" filenames)"),
+ )
+ general.add_argument(
"-d", "--destination",
dest="base-directory", metavar="PATH", action=ConfigAction,
help="Target location for file downloads",
@@ -121,10 +123,9 @@ def build_parser():
help="Exact location for file downloads",
)
general.add_argument(
- "-f", "--filename",
- dest="filename", metavar="FORMAT",
- help=("Filename format string for downloaded files "
- "('/O' for \"original\" filenames)"),
+ "-X", "--extractors",
+ dest="extractor_sources", metavar="PATH", action="append",
+ help="Load external extractors from PATH",
)
general.add_argument(
"--proxy",
@@ -320,25 +321,41 @@ def build_parser():
configuration = parser.add_argument_group("Configuration Options")
configuration.add_argument(
+ "-o", "--option",
+ dest="options", metavar="KEY=VALUE", action=ParseAction, default=[],
+ help=("Additional options. "
+ "Example: -o browser=firefox") ,
+ )
+ configuration.add_argument(
"-c", "--config",
- dest="cfgfiles", metavar="FILE", action="append",
+ dest="configs_json", metavar="FILE", action="append",
help="Additional configuration files",
)
configuration.add_argument(
"--config-yaml",
- dest="yamlfiles", metavar="FILE", action="append",
- help=argparse.SUPPRESS,
+ dest="configs_yaml", metavar="FILE", action="append",
+ help="Additional configuration files in YAML format",
)
configuration.add_argument(
- "-o", "--option",
- dest="options", metavar="OPT", action=ParseAction, default=[],
- help="Additional '<key>=<value>' option values",
+ "--config-toml",
+ dest="configs_toml", metavar="FILE", action="append",
+ help="Additional configuration files in TOML format",
)
configuration.add_argument(
- "--ignore-config",
- dest="load_config", action="store_false",
+ "--config-create",
+ dest="config_init", action="store_true",
+ help="Create a basic configuration file",
+ )
+ configuration.add_argument(
+ "--config-ignore",
+ dest="config_load", action="store_false",
help="Do not read default configuration files",
)
+ configuration.add_argument(
+ "--ignore-config",
+ dest="config_load", action="store_false",
+ help=argparse.SUPPRESS,
+ )
authentication = parser.add_argument_group("Authentication Options")
authentication.add_argument(
diff --git a/gallery_dl/output.py b/gallery_dl/output.py
index 3017f85..1d53851 100644
--- a/gallery_dl/output.py
+++ b/gallery_dl/output.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2015-2022 Mike Fährmann
+# Copyright 2015-2023 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -10,6 +10,7 @@ import os
import sys
import shutil
import logging
+import functools
import unicodedata
from . import config, util, formatter
@@ -23,7 +24,7 @@ LOG_LEVEL = logging.INFO
class Logger(logging.Logger):
- """Custom logger that includes extra info in log records"""
+ """Custom Logger that includes extra info in log records"""
def makeRecord(self, name, level, fn, lno, msg, args, exc_info,
func=None, extra=None, sinfo=None,
@@ -38,9 +39,9 @@ class LoggerAdapter():
"""Trimmed-down version of logging.LoggingAdapter"""
__slots__ = ("logger", "extra")
- def __init__(self, logger, extra):
+ def __init__(self, logger, job):
self.logger = logger
- self.extra = extra
+ self.extra = job._logger_extra
def debug(self, msg, *args, **kwargs):
if self.logger.isEnabledFor(logging.DEBUG):
@@ -63,6 +64,38 @@ class LoggerAdapter():
self.logger._log(logging.ERROR, msg, args, **kwargs)
+class LoggerAdapterActions():
+
+ def __init__(self, logger, job):
+ self.logger = logger
+ self.extra = job._logger_extra
+ self.actions = job._logger_actions
+
+ self.debug = functools.partial(self.log, logging.DEBUG)
+ self.info = functools.partial(self.log, logging.INFO)
+ self.warning = functools.partial(self.log, logging.WARNING)
+ self.error = functools.partial(self.log, logging.ERROR)
+
+ def log(self, level, msg, *args, **kwargs):
+ if args:
+ msg = msg % args
+
+ actions = self.actions[level]
+ if actions:
+ args = self.extra.copy()
+ args["level"] = level
+
+ for cond, action in actions:
+ if cond(msg):
+ action(args)
+
+ level = args["level"]
+
+ if self.logger.isEnabledFor(level):
+ kwargs["extra"] = self.extra
+ self.logger._log(level, msg, (), **kwargs)
+
+
class PathfmtProxy():
__slots__ = ("job",)
@@ -235,16 +268,32 @@ else:
stderr_write = stderr_write_flush
-def replace_std_streams(errors="replace"):
- """Replace standard streams and set their error handlers to 'errors'"""
- for name in ("stdout", "stdin", "stderr"):
- stream = getattr(sys, name)
- if stream:
+def configure_standard_streams():
+ for name in ("stdout", "stderr", "stdin"):
+ options = config.get(("output",), name)
+ if not options:
+ continue
+
+ stream = getattr(sys, name, None)
+ if not stream:
+ continue
+
+ if isinstance(options, str):
+ options = {"encoding": options, "errors": "replace"}
+ elif not options.get("errors"):
+ options["errors"] = "replace"
+
+ try:
+ stream.reconfigure(**options)
+ except AttributeError:
+ # no 'reconfigure' support
+ oget = options.get
setattr(sys, name, stream.__class__(
stream.buffer,
- errors=errors,
- newline=stream.newlines,
- line_buffering=stream.line_buffering,
+ encoding=oget("encoding", stream.encoding),
+ errors=oget("errors", "replace"),
+ newline=oget("newline", stream.newlines),
+ line_buffering=oget("line_buffering", stream.line_buffering),
))
diff --git a/gallery_dl/path.py b/gallery_dl/path.py
index 3b360e9..a14562a 100644
--- a/gallery_dl/path.py
+++ b/gallery_dl/path.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2021-2022 Mike Fährmann
+# Copyright 2021-2023 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -73,10 +73,15 @@ class PathFormat():
raise exception.DirectoryFormatError(exc)
self.kwdict = {}
- self.directory = self.realdirectory = \
- self.filename = self.extension = self.prefix = \
- self.path = self.realpath = self.temppath = ""
self.delete = False
+ self.prefix = ""
+ self.filename = ""
+ self.extension = ""
+ self.directory = ""
+ self.realdirectory = ""
+ self.path = ""
+ self.realpath = ""
+ self.temppath = ""
extension_map = config("extension-map")
if extension_map is None:
diff --git a/gallery_dl/postprocessor/common.py b/gallery_dl/postprocessor/common.py
index ef211e6..c28d060 100644
--- a/gallery_dl/postprocessor/common.py
+++ b/gallery_dl/postprocessor/common.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2018-2020 Mike Fährmann
+# Copyright 2018-2023 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -8,13 +8,42 @@
"""Common classes and constants used by postprocessor modules."""
+from .. import util, formatter
+
class PostProcessor():
"""Base class for postprocessors"""
def __init__(self, job):
- name = self.__class__.__name__[:-2].lower()
- self.log = job.get_logger("postprocessor." + name)
+ self.name = self.__class__.__name__[:-2].lower()
+ self.log = job.get_logger("postprocessor." + self.name)
def __repr__(self):
return self.__class__.__name__
+
+ def _init_archive(self, job, options, prefix=None):
+ archive = options.get("archive")
+ if archive:
+ extr = job.extractor
+ archive = util.expand_path(archive)
+ if not prefix:
+ prefix = "_" + self.name.upper() + "_"
+ archive_format = (
+ options.get("archive-prefix", extr.category) +
+ options.get("archive-format", prefix + extr.archive_fmt))
+ try:
+ if "{" in archive:
+ archive = formatter.parse(archive).format_map(
+ job.pathfmt.kwdict)
+ self.archive = util.DownloadArchive(
+ archive, archive_format,
+ options.get("archive-pragma"),
+ "_archive_" + self.name)
+ except Exception as exc:
+ self.log.warning(
+ "Failed to open %s archive at '%s' ('%s: %s')",
+ self.name, archive, exc.__class__.__name__, exc)
+ else:
+ self.log.debug("Using %s archive '%s'", self.name, archive)
+ else:
+ self.archive = None
diff --git a/gallery_dl/postprocessor/exec.py b/gallery_dl/postprocessor/exec.py
index cc217c3..e81c6cf 100644
--- a/gallery_dl/postprocessor/exec.py
+++ b/gallery_dl/postprocessor/exec.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2018-2021 Mike Fährmann
+# Copyright 2018-2023 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -43,11 +43,18 @@ class ExecPP(PostProcessor):
events = events.split(",")
job.register_hooks({event: execute for event in events}, options)
+ self._init_archive(job, options)
+
def exec_list(self, pathfmt, status=None):
if status:
return
+ archive = self.archive
kwdict = pathfmt.kwdict
+
+ if archive and archive.check(kwdict):
+ return
+
kwdict["_directory"] = pathfmt.realdirectory
kwdict["_filename"] = pathfmt.filename
kwdict["_path"] = pathfmt.realpath
@@ -55,10 +62,17 @@ class ExecPP(PostProcessor):
args = [arg.format_map(kwdict) for arg in self.args]
self._exec(args, False)
+ if archive:
+ archive.add(kwdict)
+
def exec_string(self, pathfmt, status=None):
if status:
return
+ archive = self.archive
+ if archive and archive.check(pathfmt.kwdict):
+ return
+
if status is None and pathfmt.realpath:
args = self.args.replace("{}", quote(pathfmt.realpath))
else:
@@ -66,6 +80,9 @@ class ExecPP(PostProcessor):
self._exec(args, True)
+ if archive:
+ archive.add(pathfmt.kwdict)
+
def _exec(self, args, shell):
self.log.debug("Running '%s'", args)
retcode = subprocess.Popen(args, shell=shell).wait()
diff --git a/gallery_dl/postprocessor/metadata.py b/gallery_dl/postprocessor/metadata.py
index 2ee1cf8..9667a41 100644
--- a/gallery_dl/postprocessor/metadata.py
+++ b/gallery_dl/postprocessor/metadata.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2019-2022 Mike Fährmann
+# Copyright 2019-2023 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -10,6 +10,7 @@
from .common import PostProcessor
from .. import util, formatter
+import json
import sys
import os
@@ -46,14 +47,12 @@ class MetadataPP(PostProcessor):
ext = "txt"
elif mode == "jsonl":
self.write = self._write_json
- self.indent = None
- self.ascii = options.get("ascii", False)
+ self._json_encode = self._make_encoder(options).encode
omode = "a"
filename = "data.jsonl"
else:
self.write = self._write_json
- self.indent = options.get("indent", 4)
- self.ascii = options.get("ascii", False)
+ self._json_encode = self._make_encoder(options, 4).encode
ext = "json"
directory = options.get("directory")
@@ -83,28 +82,7 @@ class MetadataPP(PostProcessor):
events = events.split(",")
job.register_hooks({event: self.run for event in events}, options)
- archive = options.get("archive")
- if archive:
- extr = job.extractor
- archive = util.expand_path(archive)
- archive_format = (
- options.get("archive-prefix", extr.category) +
- options.get("archive-format", "_MD_" + extr.archive_fmt))
- try:
- if "{" in archive:
- archive = formatter.parse(archive).format_map(
- job.pathfmt.kwdict)
- self.archive = util.DownloadArchive(
- archive, archive_format, "_archive_metadata")
- except Exception as exc:
- self.log.warning(
- "Failed to open download archive at '%s' ('%s: %s')",
- archive, exc.__class__.__name__, exc)
- else:
- self.log.debug("Using download archive '%s'", archive)
- else:
- self.archive = None
-
+ self._init_archive(job, options, "_MD_")
self.mtime = options.get("mtime")
self.omode = options.get("open", omode)
self.encoding = options.get("encoding", "utf-8")
@@ -206,13 +184,30 @@ class MetadataPP(PostProcessor):
for taglist in taglists:
extend(taglist)
tags.sort()
+ elif all(isinstance(e, dict) for e in tags):
+ taglists = tags
+ tags = []
+ extend = tags.extend
+ for tagdict in taglists:
+ extend([x for x in tagdict.values() if x is not None])
+ tags.sort()
fp.write("\n".join(tags) + "\n")
def _write_json(self, fp, kwdict):
if not self.private:
kwdict = util.filter_dict(kwdict)
- util.dump_json(kwdict, fp, self.ascii, self.indent)
+ fp.write(self._json_encode(kwdict) + "\n")
+
+ @staticmethod
+ def _make_encoder(options, indent=None):
+ return json.JSONEncoder(
+ ensure_ascii=options.get("ascii", False),
+ sort_keys=options.get("sort", False),
+ separators=options.get("separators"),
+ indent=options.get("indent", indent),
+ check_circular=False, default=str,
+ )
__postprocessor__ = MetadataPP
diff --git a/gallery_dl/util.py b/gallery_dl/util.py
index 6b9c457..bf67a64 100644
--- a/gallery_dl/util.py
+++ b/gallery_dl/util.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2017-2022 Mike Fährmann
+# Copyright 2017-2023 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -14,6 +14,7 @@ import sys
import json
import time
import random
+import hashlib
import sqlite3
import binascii
import datetime
@@ -23,7 +24,7 @@ import subprocess
import urllib.parse
from http.cookiejar import Cookie
from email.utils import mktime_tz, parsedate_tz
-from . import text, exception
+from . import text, version, exception
def bencode(num, alphabet="0123456789"):
@@ -112,6 +113,24 @@ def noop():
"""Does nothing"""
+def md5(s):
+ """Generate MD5 hexdigest of 's'"""
+ if not s:
+ s = b""
+ elif isinstance(s, str):
+ s = s.encode()
+ return hashlib.md5(s).hexdigest()
+
+
+def sha1(s):
+ """Generate SHA1 hexdigest of 's'"""
+ if not s:
+ s = b""
+ elif isinstance(s, str):
+ s = s.encode()
+ return hashlib.sha1(s).hexdigest()
+
+
def generate_token(size=16):
"""Generate a random token with hexadecimal digits"""
data = random.getrandbits(size * 8).to_bytes(size, "big")
@@ -204,6 +223,10 @@ def datetime_to_timestamp_string(dt):
return ""
+json_loads = json._default_decoder.decode
+json_dumps = json.JSONEncoder(default=str).encode
+
+
def dump_json(obj, fp=sys.stdout, ensure_ascii=True, indent=4):
"""Serialize 'obj' as JSON and write it to 'fp'"""
json.dump(
@@ -513,7 +536,7 @@ def parse_inputfile(file, log):
continue
try:
- value = json.loads(value.strip())
+ value = json_loads(value.strip())
except ValueError as exc:
log.warning("input file: unable to parse '%s': %s", value, exc)
continue
@@ -579,6 +602,8 @@ EPOCH = datetime.datetime(1970, 1, 1)
SECOND = datetime.timedelta(0, 1)
WINDOWS = (os.name == "nt")
SENTINEL = object()
+USERAGENT = "gallery-dl/" + version.__version__
+EXECUTABLE = getattr(sys, "frozen", False)
SPECIAL_EXTRACTORS = {"oauth", "recursive", "test"}
GLOBALS = {
"contains" : contains,
@@ -588,13 +613,35 @@ GLOBALS = {
"timedelta": datetime.timedelta,
"abort" : raises(exception.StopExtraction),
"terminate": raises(exception.TerminateExtraction),
+ "restart" : raises(exception.RestartExtraction),
+ "hash_sha1": sha1,
+ "hash_md5" : md5,
"re" : re,
}
-def compile_expression(expr, name="<expr>", globals=GLOBALS):
+def compile_expression(expr, name="<expr>", globals=None):
code_object = compile(expr, name, "eval")
- return functools.partial(eval, code_object, globals)
+ return functools.partial(eval, code_object, globals or GLOBALS)
+
+
+def import_file(path):
+ """Import a Python module from a filesystem path"""
+ path, name = os.path.split(path)
+
+ name, sep, ext = name.rpartition(".")
+ if not sep:
+ name = ext
+
+ if path:
+ path = expand_path(path)
+ sys.path.insert(0, path)
+ try:
+ return __import__(name)
+ finally:
+ del sys.path[0]
+ else:
+ return __import__(name)
def build_duration_func(duration, min=0.0):
@@ -733,7 +780,8 @@ class RangePredicate():
self.lower = min(r.start for r in ranges)
self.upper = max(r.stop for r in ranges) - 1
else:
- self.lower = self.upper = 0
+ self.lower = 0
+ self.upper = 0
def __call__(self, _url, _kwdict):
self.index = index = self.index + 1
@@ -831,7 +879,8 @@ class ExtendedUrl():
class DownloadArchive():
- def __init__(self, path, format_string, cache_key="_archive_key"):
+ def __init__(self, path, format_string, pragma=None,
+ cache_key="_archive_key"):
try:
con = sqlite3.connect(path, timeout=60, check_same_thread=False)
except sqlite3.OperationalError:
@@ -839,20 +888,23 @@ class DownloadArchive():
con = sqlite3.connect(path, timeout=60, check_same_thread=False)
con.isolation_level = None
- self.close = con.close
- self.cursor = con.cursor()
-
from . import formatter
self.keygen = formatter.parse(format_string).format_map
+ self.close = con.close
+ self.cursor = cursor = con.cursor()
self._cache_key = cache_key
+ if pragma:
+ for stmt in pragma:
+ cursor.execute("PRAGMA " + stmt)
+
try:
- self.cursor.execute("CREATE TABLE IF NOT EXISTS archive "
- "(entry TEXT PRIMARY KEY) WITHOUT ROWID")
+ cursor.execute("CREATE TABLE IF NOT EXISTS archive "
+ "(entry TEXT PRIMARY KEY) WITHOUT ROWID")
except sqlite3.OperationalError:
# fallback for missing WITHOUT ROWID support (#553)
- self.cursor.execute("CREATE TABLE IF NOT EXISTS archive "
- "(entry TEXT PRIMARY KEY)")
+ cursor.execute("CREATE TABLE IF NOT EXISTS archive "
+ "(entry TEXT PRIMARY KEY)")
def check(self, kwdict):
"""Return True if the item described by 'kwdict' exists in archive"""
diff --git a/gallery_dl/version.py b/gallery_dl/version.py
index 6b52610..494b7f5 100644
--- a/gallery_dl/version.py
+++ b/gallery_dl/version.py
@@ -6,4 +6,4 @@
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
-__version__ = "1.24.5"
+__version__ = "1.25.0"
diff --git a/gallery_dl/ytdl.py b/gallery_dl/ytdl.py
index 7b71349..b4638b7 100644
--- a/gallery_dl/ytdl.py
+++ b/gallery_dl/ytdl.py
@@ -237,139 +237,13 @@ def parse_command_line(module, argv):
getattr(opts, "sponsorblock_mark", None) or set()
opts.sponsorblock_remove = \
getattr(opts, "sponsorblock_remove", None) or set()
- sponsorblock_query = opts.sponsorblock_mark | opts.sponsorblock_remove
opts.remove_chapters = getattr(opts, "remove_chapters", None) or ()
- # PostProcessors
- postprocessors = []
- if opts.metafromtitle:
- postprocessors.append({
- "key": "MetadataFromTitle",
- "titleformat": opts.metafromtitle,
- })
- if getattr(opts, "add_postprocessors", None):
- postprocessors += list(opts.add_postprocessors)
- if sponsorblock_query:
- postprocessors.append({
- "key": "SponsorBlock",
- "categories": sponsorblock_query,
- "api": opts.sponsorblock_api,
- "when": "pre_process",
- })
- if opts.parse_metadata:
- postprocessors.append({
- "key": "MetadataParser",
- "actions": opts.parse_metadata,
- "when": "pre_process",
- })
- if opts.convertsubtitles:
- pp = {"key": "FFmpegSubtitlesConvertor",
- "format": opts.convertsubtitles}
- if ytdlp:
- pp["when"] = "before_dl"
- postprocessors.append(pp)
- if getattr(opts, "convertthumbnails", None):
- postprocessors.append({
- "key": "FFmpegThumbnailsConvertor",
- "format": opts.convertthumbnails,
- "when": "before_dl",
- })
- if getattr(opts, "exec_before_dl_cmd", None):
- postprocessors.append({
- "key": "Exec",
- "exec_cmd": opts.exec_before_dl_cmd,
- "when": "before_dl",
- })
- if opts.extractaudio:
- postprocessors.append({
- "key": "FFmpegExtractAudio",
- "preferredcodec": opts.audioformat,
- "preferredquality": opts.audioquality,
- "nopostoverwrites": opts.nopostoverwrites,
- })
- if getattr(opts, "remuxvideo", None):
- postprocessors.append({
- "key": "FFmpegVideoRemuxer",
- "preferedformat": opts.remuxvideo,
- })
- if opts.recodevideo:
- postprocessors.append({
- "key": "FFmpegVideoConvertor",
- "preferedformat": opts.recodevideo,
- })
- if opts.embedsubtitles:
- pp = {"key": "FFmpegEmbedSubtitle"}
- if ytdlp:
- pp["already_have_subtitle"] = (
- opts.writesubtitles and "no-keep-subs" not in compat_opts)
- postprocessors.append(pp)
- if not opts.writeautomaticsub and "no-keep-subs" not in compat_opts:
- opts.writesubtitles = True
- if opts.allsubtitles and not opts.writeautomaticsub:
- opts.writesubtitles = True
- remove_chapters_patterns, remove_ranges = [], []
- for regex in opts.remove_chapters:
- if regex.startswith("*"):
- dur = list(map(module.parse_duration, regex[1:].split("-")))
- if len(dur) == 2 and all(t is not None for t in dur):
- remove_ranges.append(tuple(dur))
- continue
- remove_chapters_patterns.append(re.compile(regex))
- if opts.remove_chapters or sponsorblock_query:
- postprocessors.append({
- "key": "ModifyChapters",
- "remove_chapters_patterns": remove_chapters_patterns,
- "remove_sponsor_segments": opts.sponsorblock_remove,
- "remove_ranges": remove_ranges,
- "sponsorblock_chapter_title": opts.sponsorblock_chapter_title,
- "force_keyframes": opts.force_keyframes_at_cuts,
- })
- addchapters = getattr(opts, "addchapters", None)
- embed_infojson = getattr(opts, "embed_infojson", None)
- if opts.addmetadata or addchapters or embed_infojson:
- pp = {"key": "FFmpegMetadata"}
- if ytdlp:
- if embed_infojson is None:
- embed_infojson = "if_exists"
- pp["add_metadata"] = opts.addmetadata
- pp["add_chapters"] = addchapters
- pp["add_infojson"] = embed_infojson
-
- postprocessors.append(pp)
- if getattr(opts, "sponskrub", False) is not False:
- postprocessors.append({
- "key": "SponSkrub",
- "path": opts.sponskrub_path,
- "args": opts.sponskrub_args,
- "cut": opts.sponskrub_cut,
- "force": opts.sponskrub_force,
- "ignoreerror": opts.sponskrub is None,
- "_from_cli": True,
- })
- if opts.embedthumbnail:
- already_have_thumbnail = (opts.writethumbnail or
- getattr(opts, "write_all_thumbnails", False))
- postprocessors.append({
- "key": "EmbedThumbnail",
- "already_have_thumbnail": already_have_thumbnail,
- })
- if not already_have_thumbnail:
- opts.writethumbnail = True
- if isinstance(opts.outtmpl, dict):
- opts.outtmpl["pl_thumbnail"] = ""
- if getattr(opts, "split_chapters", None):
- postprocessors.append({
- "key": "FFmpegSplitChapters",
- "force_keyframes": opts.force_keyframes_at_cuts,
- })
- if opts.xattrs:
- postprocessors.append({"key": "XAttrMetadata"})
- if opts.exec_cmd:
- postprocessors.append({
- "key": "Exec",
- "exec_cmd": opts.exec_cmd,
- "when": "after_move",
- })
+ try:
+ postprocessors = list(module.get_postprocessors(opts))
+ except AttributeError:
+ postprocessors = legacy_postprocessors(
+ opts, module, ytdlp, compat_opts)
match_filter = (
None if opts.match_filter is None
@@ -546,3 +420,139 @@ def parse_retries(retries, name=""):
if retries in ("inf", "infinite"):
return float("inf")
return int(retries)
+
+
+def legacy_postprocessors(opts, module, ytdlp, compat_opts):
+ postprocessors = []
+
+ sponsorblock_query = opts.sponsorblock_mark | opts.sponsorblock_remove
+ if opts.metafromtitle:
+ postprocessors.append({
+ "key": "MetadataFromTitle",
+ "titleformat": opts.metafromtitle,
+ })
+ if getattr(opts, "add_postprocessors", None):
+ postprocessors += list(opts.add_postprocessors)
+ if sponsorblock_query:
+ postprocessors.append({
+ "key": "SponsorBlock",
+ "categories": sponsorblock_query,
+ "api": opts.sponsorblock_api,
+ "when": "pre_process",
+ })
+ if opts.parse_metadata:
+ postprocessors.append({
+ "key": "MetadataParser",
+ "actions": opts.parse_metadata,
+ "when": "pre_process",
+ })
+ if opts.convertsubtitles:
+ pp = {"key": "FFmpegSubtitlesConvertor",
+ "format": opts.convertsubtitles}
+ if ytdlp:
+ pp["when"] = "before_dl"
+ postprocessors.append(pp)
+ if getattr(opts, "convertthumbnails", None):
+ postprocessors.append({
+ "key": "FFmpegThumbnailsConvertor",
+ "format": opts.convertthumbnails,
+ "when": "before_dl",
+ })
+ if getattr(opts, "exec_before_dl_cmd", None):
+ postprocessors.append({
+ "key": "Exec",
+ "exec_cmd": opts.exec_before_dl_cmd,
+ "when": "before_dl",
+ })
+ if opts.extractaudio:
+ postprocessors.append({
+ "key": "FFmpegExtractAudio",
+ "preferredcodec": opts.audioformat,
+ "preferredquality": opts.audioquality,
+ "nopostoverwrites": opts.nopostoverwrites,
+ })
+ if getattr(opts, "remuxvideo", None):
+ postprocessors.append({
+ "key": "FFmpegVideoRemuxer",
+ "preferedformat": opts.remuxvideo,
+ })
+ if opts.recodevideo:
+ postprocessors.append({
+ "key": "FFmpegVideoConvertor",
+ "preferedformat": opts.recodevideo,
+ })
+ if opts.embedsubtitles:
+ pp = {"key": "FFmpegEmbedSubtitle"}
+ if ytdlp:
+ pp["already_have_subtitle"] = (
+ opts.writesubtitles and "no-keep-subs" not in compat_opts)
+ postprocessors.append(pp)
+ if not opts.writeautomaticsub and "no-keep-subs" not in compat_opts:
+ opts.writesubtitles = True
+ if opts.allsubtitles and not opts.writeautomaticsub:
+ opts.writesubtitles = True
+ remove_chapters_patterns, remove_ranges = [], []
+ for regex in opts.remove_chapters:
+ if regex.startswith("*"):
+ dur = list(map(module.parse_duration, regex[1:].split("-")))
+ if len(dur) == 2 and all(t is not None for t in dur):
+ remove_ranges.append(tuple(dur))
+ continue
+ remove_chapters_patterns.append(re.compile(regex))
+ if opts.remove_chapters or sponsorblock_query:
+ postprocessors.append({
+ "key": "ModifyChapters",
+ "remove_chapters_patterns": remove_chapters_patterns,
+ "remove_sponsor_segments": opts.sponsorblock_remove,
+ "remove_ranges": remove_ranges,
+ "sponsorblock_chapter_title": opts.sponsorblock_chapter_title,
+ "force_keyframes": opts.force_keyframes_at_cuts,
+ })
+ addchapters = getattr(opts, "addchapters", None)
+ embed_infojson = getattr(opts, "embed_infojson", None)
+ if opts.addmetadata or addchapters or embed_infojson:
+ pp = {"key": "FFmpegMetadata"}
+ if ytdlp:
+ if embed_infojson is None:
+ embed_infojson = "if_exists"
+ pp["add_metadata"] = opts.addmetadata
+ pp["add_chapters"] = addchapters
+ pp["add_infojson"] = embed_infojson
+
+ postprocessors.append(pp)
+ if getattr(opts, "sponskrub", False) is not False:
+ postprocessors.append({
+ "key": "SponSkrub",
+ "path": opts.sponskrub_path,
+ "args": opts.sponskrub_args,
+ "cut": opts.sponskrub_cut,
+ "force": opts.sponskrub_force,
+ "ignoreerror": opts.sponskrub is None,
+ "_from_cli": True,
+ })
+ if opts.embedthumbnail:
+ already_have_thumbnail = (opts.writethumbnail or
+ getattr(opts, "write_all_thumbnails", False))
+ postprocessors.append({
+ "key": "EmbedThumbnail",
+ "already_have_thumbnail": already_have_thumbnail,
+ })
+ if not already_have_thumbnail:
+ opts.writethumbnail = True
+ if isinstance(opts.outtmpl, dict):
+ opts.outtmpl["pl_thumbnail"] = ""
+ if getattr(opts, "split_chapters", None):
+ postprocessors.append({
+ "key": "FFmpegSplitChapters",
+ "force_keyframes": opts.force_keyframes_at_cuts,
+ })
+ if opts.xattrs:
+ postprocessors.append({"key": "XAttrMetadata"})
+ if opts.exec_cmd:
+ postprocessors.append({
+ "key": "Exec",
+ "exec_cmd": opts.exec_cmd,
+ "when": "after_move",
+ })
+
+ return postprocessors
diff --git a/test/test_config.py b/test/test_config.py
index 7cbb12b..859faf5 100644
--- a/test/test_config.py
+++ b/test/test_config.py
@@ -1,7 +1,7 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
-# Copyright 2015-2020 Mike Fährmann
+# Copyright 2015-2023 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -11,12 +11,11 @@ import os
import sys
import unittest
-import json
import tempfile
ROOTDIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
sys.path.insert(0, ROOTDIR)
-from gallery_dl import config # noqa E402
+from gallery_dl import config, util # noqa E402
class TestConfig(unittest.TestCase):
@@ -209,8 +208,8 @@ class TestConfigFiles(unittest.TestCase):
def _load(name):
path = os.path.join(ROOTDIR, "docs", name)
try:
- with open(path) as fp:
- return json.load(fp)
+ with open(path) as file:
+ return util.json_loads(file.read())
except FileNotFoundError:
raise unittest.SkipTest(path + " not available")
diff --git a/test/test_extractor.py b/test/test_extractor.py
index 144c6f9..6516fa8 100644
--- a/test/test_extractor.py
+++ b/test/test_extractor.py
@@ -46,7 +46,7 @@ class TestExtractorModule(unittest.TestCase):
def setUp(self):
extractor._cache.clear()
- extractor._module_iter = iter(extractor.modules)
+ extractor._module_iter = extractor._modules_internal()
extractor._list_classes = _list_classes
def test_find(self):
diff --git a/test/test_oauth.py b/test/test_oauth.py
index 7455928..0082419 100644
--- a/test/test_oauth.py
+++ b/test/test_oauth.py
@@ -1,7 +1,7 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
-# Copyright 2018-2020 Mike Fährmann
+# Copyright 2018-2023 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -10,6 +10,7 @@
import os
import sys
import unittest
+from unittest.mock import patch
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from gallery_dl import oauth, text # noqa E402
@@ -66,6 +67,53 @@ class TestOAuthSession(unittest.TestCase):
self.assertTrue(len(quoted) >= 3)
self.assertEqual(quoted_hex.upper(), quoted_hex)
+ def test_generate_signature(self):
+ client = oauth.OAuth1Client(
+ CONSUMER_KEY, CONSUMER_SECRET, ACCESS_TOKEN, ACCESS_TOKEN_SECRET)
+
+ request = MockRequest()
+ params = []
+ self.assertEqual(
+ client.generate_signature(request, params),
+ "Wt2xo49dM5pkL4gsnCakNdHaVUo%3D")
+
+ request = MockRequest("https://example.org/")
+ params = [("hello", "world"), ("foo", "bar")]
+ self.assertEqual(
+ client.generate_signature(request, params),
+ "ay2269%2F8uKpZqKJR1doTtpv%2Bzn0%3D")
+
+ request = MockRequest("https://example.org/index.html"
+ "?hello=world&foo=bar", method="POST")
+ params = [("oauth_signature_method", "HMAC-SHA1")]
+ self.assertEqual(
+ client.generate_signature(request, params),
+ "yVZWb1ts4smdMmXxMlhaXrkoOng%3D")
+
+ def test_dunder_call(self):
+ client = oauth.OAuth1Client(
+ CONSUMER_KEY, CONSUMER_SECRET, ACCESS_TOKEN, ACCESS_TOKEN_SECRET)
+ request = MockRequest("https://example.org/")
+
+ with patch("time.time") as tmock, \
+ patch("gallery_dl.oauth.nonce") as nmock:
+ tmock.return_value = 123456789.123
+ nmock.return_value = "abcdefghijklmno"
+
+ client(request)
+
+ self.assertEqual(
+ request.headers["Authorization"],
+ """OAuth \
+oauth_consumer_key="key",\
+oauth_nonce="abcdefghijklmno",\
+oauth_signature_method="HMAC-SHA1",\
+oauth_timestamp="123456789",\
+oauth_version="1.0",\
+oauth_token="accesskey",\
+oauth_signature="DjtTk5j5P3BDZFnstZ%2FtEYcwD6c%3D"\
+""")
+
def test_request_token(self):
response = self._oauth_request(
"/request_token.php", {})
@@ -110,5 +158,13 @@ class TestOAuthSession(unittest.TestCase):
raise unittest.SkipTest()
+class MockRequest():
+
+ def __init__(self, url="", method="GET"):
+ self.url = url
+ self.method = method
+ self.headers = {}
+
+
if __name__ == "__main__":
unittest.main(warnings="ignore")
diff --git a/test/test_postprocessor.py b/test/test_postprocessor.py
index 7da2089..650bf59 100644
--- a/test/test_postprocessor.py
+++ b/test/test_postprocessor.py
@@ -1,7 +1,7 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
-# Copyright 2019-2022 Mike Fährmann
+# Copyright 2019-2023 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -171,39 +171,71 @@ class MetadataTest(BasePostprocessorTest):
# default arguments
self.assertEqual(pp.write , pp._write_json)
- self.assertEqual(pp.ascii , False)
- self.assertEqual(pp.indent , 4)
self.assertEqual(pp.extension, "json")
+ self.assertTrue(callable(pp._json_encode))
def test_metadata_json(self):
pp = self._create({
- "mode" : "json",
- "ascii" : True,
- "indent" : 2,
- "extension": "JSON",
+ "mode" : "json",
+ "extension" : "JSON",
}, {
- "public" : "hello",
- "_private" : "world",
+ "public" : "hello ワールド",
+ "_private" : "foo バー",
})
self.assertEqual(pp.write , pp._write_json)
- self.assertEqual(pp.ascii , True)
- self.assertEqual(pp.indent , 2)
self.assertEqual(pp.extension, "JSON")
+ self.assertTrue(callable(pp._json_encode))
with patch("builtins.open", mock_open()) as m:
self._trigger()
path = self.pathfmt.realpath + ".JSON"
m.assert_called_once_with(path, "w", encoding="utf-8")
- self.assertEqual(self._output(m), """{
- "category": "test",
- "extension": "ext",
- "filename": "file",
- "public": "hello"
+
+ if sys.hexversion >= 0x3060000:
+ # python 3.4 & 3.5 have random order without 'sort: True'
+ self.assertEqual(self._output(m), """{
+ "category": "test",
+ "filename": "file",
+ "extension": "ext",
+ "public": "hello ワールド"
}
""")
+ def test_metadata_json_options(self):
+ pp = self._create({
+ "mode" : "json",
+ "ascii" : True,
+ "sort" : True,
+ "separators": [",", " : "],
+ "private" : True,
+ "indent" : None,
+ "open" : "a",
+ "encoding" : "UTF-8",
+ "extension" : "JSON",
+ }, {
+ "public" : "hello ワールド",
+ "_private" : "foo バー",
+ })
+
+ self.assertEqual(pp.write , pp._write_json)
+ self.assertEqual(pp.extension, "JSON")
+ self.assertTrue(callable(pp._json_encode))
+
+ with patch("builtins.open", mock_open()) as m:
+ self._trigger()
+
+ path = self.pathfmt.realpath + ".JSON"
+ m.assert_called_once_with(path, "a", encoding="UTF-8")
+ self.assertEqual(self._output(m), """{\
+"_private" : "foo \\u30d0\\u30fc",\
+"category" : "test",\
+"extension" : "ext",\
+"filename" : "file",\
+"public" : "hello \\u30ef\\u30fc\\u30eb\\u30c9"}
+""")
+
def test_metadata_tags(self):
pp = self._create(
{"mode": "tags"},
@@ -255,6 +287,18 @@ class MetadataTest(BasePostprocessorTest):
self._trigger()
self.assertEqual(self._output(m), "foobar1\nfoobar2\nfoobarbaz\n")
+ def test_metadata_tags_list_of_dict(self):
+ self._create(
+ {"mode": "tags"},
+ {"tags": [
+ {"g": "foobar1", "m": "foobar2"},
+ {"g": None, "m": "foobarbaz"}
+ ]},
+ )
+ with patch("builtins.open", mock_open()) as m:
+ self._trigger()
+ self.assertEqual(self._output(m), "foobar1\nfoobar2\nfoobarbaz\n")
+
def test_metadata_custom(self):
def test(pp_info):
pp = self._create(pp_info, {"foo": "bar"})
@@ -334,7 +378,7 @@ class MetadataTest(BasePostprocessorTest):
m.assert_called_once_with(path, "w", encoding="utf-8")
def test_metadata_stdout(self):
- self._create({"filename": "-", "indent": None})
+ self._create({"filename": "-", "indent": None, "sort": True})
with patch("sys.stdout", Mock()) as m:
self._trigger()
diff --git a/test/test_results.py b/test/test_results.py
index a42de09..d28496b 100644
--- a/test/test_results.py
+++ b/test/test_results.py
@@ -322,9 +322,9 @@ def setup_test_config():
config.set(("extractor", "mangoxo") , "username", "LiQiang3")
config.set(("extractor", "mangoxo") , "password", "5zbQF10_5u25259Ma")
- for category in ("danbooru", "instagram", "twitter", "subscribestar",
- "e621", "atfbooru", "inkbunny", "tapas", "pillowfort",
- "mangadex", "aibooru"):
+ for category in ("danbooru", "atfbooru", "aibooru", "e621", "e926",
+ "instagram", "twitter", "subscribestar", "deviantart",
+ "inkbunny", "tapas", "pillowfort", "mangadex"):
config.set(("extractor", category), "username", None)
config.set(("extractor", "mastodon.social"), "access-token",
diff --git a/test/test_util.py b/test/test_util.py
index 67fdf60..0813a0b 100644
--- a/test/test_util.py
+++ b/test/test_util.py
@@ -1,7 +1,7 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
-# Copyright 2015-2022 Mike Fährmann
+# Copyright 2015-2023 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -15,6 +15,7 @@ import io
import random
import string
import datetime
+import tempfile
import itertools
import http.cookiejar
@@ -394,6 +395,46 @@ class TestOther(unittest.TestCase):
def test_noop(self):
self.assertEqual(util.noop(), None)
+ def test_md5(self):
+ self.assertEqual(util.md5(b""),
+ "d41d8cd98f00b204e9800998ecf8427e")
+ self.assertEqual(util.md5(b"hello"),
+ "5d41402abc4b2a76b9719d911017c592")
+
+ self.assertEqual(util.md5(""),
+ "d41d8cd98f00b204e9800998ecf8427e")
+ self.assertEqual(util.md5("hello"),
+ "5d41402abc4b2a76b9719d911017c592")
+ self.assertEqual(util.md5("ワルド"),
+ "051f29cd6c942cf110a0ccc5729871d2")
+
+ self.assertEqual(util.md5(0),
+ "d41d8cd98f00b204e9800998ecf8427e")
+ self.assertEqual(util.md5(()),
+ "d41d8cd98f00b204e9800998ecf8427e")
+ self.assertEqual(util.md5(None),
+ "d41d8cd98f00b204e9800998ecf8427e")
+
+ def test_sha1(self):
+ self.assertEqual(util.sha1(b""),
+ "da39a3ee5e6b4b0d3255bfef95601890afd80709")
+ self.assertEqual(util.sha1(b"hello"),
+ "aaf4c61ddcc5e8a2dabede0f3b482cd9aea9434d")
+
+ self.assertEqual(util.sha1(""),
+ "da39a3ee5e6b4b0d3255bfef95601890afd80709")
+ self.assertEqual(util.sha1("hello"),
+ "aaf4c61ddcc5e8a2dabede0f3b482cd9aea9434d")
+ self.assertEqual(util.sha1("ワルド"),
+ "0cbe319081aa0e9298448ec2bb16df8c494aa04e")
+
+ self.assertEqual(util.sha1(0),
+ "da39a3ee5e6b4b0d3255bfef95601890afd80709")
+ self.assertEqual(util.sha1(()),
+ "da39a3ee5e6b4b0d3255bfef95601890afd80709")
+ self.assertEqual(util.sha1(None),
+ "da39a3ee5e6b4b0d3255bfef95601890afd80709")
+
def test_compile_expression(self):
expr = util.compile_expression("1 + 2 * 3")
self.assertEqual(expr(), 7)
@@ -418,6 +459,56 @@ class TestOther(unittest.TestCase):
with self.assertRaises(exception.StopExtraction):
expr()
+ def test_import_file(self):
+ module = util.import_file("datetime")
+ self.assertIs(module, datetime)
+
+ with tempfile.TemporaryDirectory() as path:
+ file = path + "/module_test.py"
+ with open(file, "w") as fp:
+ fp.write("""
+import datetime
+key = "foobar"
+value = 123
+""")
+ module = util.import_file(file)
+
+ self.assertEqual(module.__name__, "module_test")
+ self.assertEqual(module.key, "foobar")
+ self.assertEqual(module.value, 123)
+ self.assertIs(module.datetime, datetime)
+
+ def test_custom_globals(self):
+ value = {"v": "foobar"}
+ result = "8843d7f92416211de9ebb963ff4ce28125932878"
+
+ expr = util.compile_expression("hash_sha1(v)")
+ self.assertEqual(expr(value), result)
+
+ expr = util.compile_expression("hs(v)", globals={"hs": util.sha1})
+ self.assertEqual(expr(value), result)
+
+ with tempfile.TemporaryDirectory() as path:
+ file = path + "/module_sha1.py"
+ with open(file, "w") as fp:
+ fp.write("""
+import hashlib
+def hash(value):
+ return hashlib.sha1(value.encode()).hexdigest()
+""")
+ module = util.import_file(file)
+
+ expr = util.compile_expression("hash(v)", globals=module.__dict__)
+ self.assertEqual(expr(value), result)
+
+ GLOBALS_ORIG = util.GLOBALS
+ try:
+ util.GLOBALS = module.__dict__
+ expr = util.compile_expression("hash(v)")
+ finally:
+ util.GLOBALS = GLOBALS_ORIG
+ self.assertEqual(expr(value), result)
+
def test_build_duration_func(self, f=util.build_duration_func):
def test_single(df, v):
diff --git a/test/test_ytdl.py b/test/test_ytdl.py
index a273604..7b82a0f 100644
--- a/test/test_ytdl.py
+++ b/test/test_ytdl.py
@@ -166,7 +166,7 @@ class Test_CommandlineArguments(unittest.TestCase):
subs["already_have_subtitle"] = False
opts = self._(["--embed-subs", "--embed-thumbnail"])
- self.assertEqual(opts["postprocessors"], [subs, thumb])
+ self.assertEqual(opts["postprocessors"][:2], [subs, thumb])
thumb["already_have_thumbnail"] = True
if self.module_name == "yt_dlp":
@@ -179,7 +179,7 @@ class Test_CommandlineArguments(unittest.TestCase):
"--write-sub",
"--write-all-thumbnails",
])
- self.assertEqual(opts["postprocessors"], [subs, thumb])
+ self.assertEqual(opts["postprocessors"][:2], [subs, thumb])
def test_metadata(self):
opts = self._("--add-metadata")
@@ -262,21 +262,11 @@ class Test_CommandlineArguments_YtDlp(Test_CommandlineArguments):
def test_metadata_from_title(self):
opts = self._(["--metadata-from-title", "%(artist)s - %(title)s"])
-
- try:
- legacy = (self.module.version.__version__ < "2023.01.01")
- except AttributeError:
- legacy = True
-
- actions = [self.module.MetadataFromFieldPP.to_action(
- "title:%(artist)s - %(title)s")]
- if not legacy:
- actions = {"pre_process": actions}
-
self.assertEqual(opts["postprocessors"][0], {
"key" : "MetadataParser",
"when" : "pre_process",
- "actions": actions,
+ "actions": [self.module.MetadataFromFieldPP.to_action(
+ "title:%(artist)s - %(title)s")],
})