From 10987f08f8b6c510ba64f4b42d95ba67eec6e5b0 Mon Sep 17 00:00:00 2001 From: Unit 193 Date: Mon, 13 Mar 2023 02:07:49 -0400 Subject: New upstream version 1.25.0. --- CHANGELOG.md | 67 ++++++++ PKG-INFO | 6 +- README.rst | 4 +- data/completion/_gallery-dl | 12 +- data/completion/gallery-dl | 4 +- data/completion/gallery-dl.fish | 12 +- data/man/gallery-dl.1 | 24 ++- data/man/gallery-dl.conf.5 | 313 +++++++++++++++++++++++++++++++--- docs/gallery-dl-example.conf | 3 +- docs/gallery-dl.conf | 4 + gallery_dl.egg-info/PKG-INFO | 6 +- gallery_dl.egg-info/SOURCES.txt | 5 + gallery_dl/__init__.py | 93 +++++++--- gallery_dl/actions.py | 112 ++++++++++++ gallery_dl/config.py | 60 +++++-- gallery_dl/cookies.py | 14 +- gallery_dl/downloader/http.py | 17 +- gallery_dl/downloader/ytdl.py | 2 + gallery_dl/exception.py | 8 +- gallery_dl/extractor/500px.py | 6 +- gallery_dl/extractor/8muses.py | 7 +- gallery_dl/extractor/__init__.py | 40 ++++- gallery_dl/extractor/bbc.py | 5 +- gallery_dl/extractor/bcy.py | 9 +- gallery_dl/extractor/behance.py | 5 +- gallery_dl/extractor/blogger.py | 7 +- gallery_dl/extractor/bunkr.py | 92 +++++----- gallery_dl/extractor/catbox.py | 27 ++- gallery_dl/extractor/common.py | 15 +- gallery_dl/extractor/danbooru.py | 178 ++++--------------- gallery_dl/extractor/deviantart.py | 259 ++++++++++++++++++++++------ gallery_dl/extractor/directlink.py | 5 + gallery_dl/extractor/dynastyscans.py | 7 +- gallery_dl/extractor/e621.py | 254 +++++++++++++++++++++++++++ gallery_dl/extractor/erome.py | 3 +- gallery_dl/extractor/fallenangels.py | 5 +- gallery_dl/extractor/fanbox.py | 2 +- gallery_dl/extractor/fantia.py | 5 +- gallery_dl/extractor/foolslide.py | 5 +- gallery_dl/extractor/gelbooru.py | 51 +++--- gallery_dl/extractor/generic.py | 24 ++- gallery_dl/extractor/hbrowse.py | 9 +- gallery_dl/extractor/hentai2read.py | 5 +- gallery_dl/extractor/hentaifox.py | 7 +- gallery_dl/extractor/hentaihand.py | 5 +- gallery_dl/extractor/hentaihere.py | 7 +- gallery_dl/extractor/hitomi.py | 5 +- gallery_dl/extractor/imagefap.py | 37 ++-- gallery_dl/extractor/imagehosts.py | 19 ++- gallery_dl/extractor/imgbb.py | 5 +- gallery_dl/extractor/instagram.py | 20 +++ gallery_dl/extractor/issuu.py | 7 +- gallery_dl/extractor/lightroom.py | 7 +- gallery_dl/extractor/mangadex.py | 8 +- gallery_dl/extractor/manganelo.py | 124 +++++++------- gallery_dl/extractor/mangapark.py | 7 +- gallery_dl/extractor/mangasee.py | 53 +++++- gallery_dl/extractor/misskey.py | 202 ++++++++++++++++++++++ gallery_dl/extractor/nana.py | 5 +- gallery_dl/extractor/newgrounds.py | 13 +- gallery_dl/extractor/nhentai.py | 5 +- gallery_dl/extractor/nitter.py | 33 ++-- gallery_dl/extractor/oauth.py | 19 ++- gallery_dl/extractor/patreon.py | 7 +- gallery_dl/extractor/photobucket.py | 4 +- gallery_dl/extractor/pinterest.py | 13 +- gallery_dl/extractor/plurk.py | 10 +- gallery_dl/extractor/poipiku.py | 2 +- gallery_dl/extractor/pornpics.py | 173 +++++++++++++++++++ gallery_dl/extractor/pururin.py | 5 +- gallery_dl/extractor/reactor.py | 9 +- gallery_dl/extractor/reddit.py | 58 +++++-- gallery_dl/extractor/redgifs.py | 102 +++++++++-- gallery_dl/extractor/shopify.py | 7 + gallery_dl/extractor/slideshare.py | 7 +- gallery_dl/extractor/soundgasm.py | 93 ++++++---- gallery_dl/extractor/subscribestar.py | 7 +- gallery_dl/extractor/szurubooru.py | 144 ++++++++++++++++ gallery_dl/extractor/telegraph.py | 20 ++- gallery_dl/extractor/tumblr.py | 20 ++- gallery_dl/extractor/twitter.py | 8 +- gallery_dl/extractor/vsco.py | 7 +- gallery_dl/extractor/weibo.py | 7 +- gallery_dl/extractor/wikifeet.py | 5 +- gallery_dl/extractor/xhamster.py | 6 +- gallery_dl/extractor/xvideos.py | 5 +- gallery_dl/formatter.py | 8 +- gallery_dl/job.py | 46 +++-- gallery_dl/oauth.py | 4 +- gallery_dl/option.py | 69 +++++--- gallery_dl/output.py | 73 ++++++-- gallery_dl/path.py | 13 +- gallery_dl/postprocessor/common.py | 35 +++- gallery_dl/postprocessor/exec.py | 19 ++- gallery_dl/postprocessor/metadata.py | 51 +++--- gallery_dl/util.py | 80 +++++++-- gallery_dl/version.py | 2 +- gallery_dl/ytdl.py | 272 +++++++++++++++-------------- test/test_config.py | 9 +- test/test_extractor.py | 2 +- test/test_oauth.py | 58 ++++++- test/test_postprocessor.py | 78 +++++++-- test/test_results.py | 6 +- test/test_util.py | 93 +++++++++- test/test_ytdl.py | 18 +- 105 files changed, 3048 insertions(+), 986 deletions(-) create mode 100644 gallery_dl/actions.py create mode 100644 gallery_dl/extractor/e621.py create mode 100644 gallery_dl/extractor/misskey.py create mode 100644 gallery_dl/extractor/pornpics.py create mode 100644 gallery_dl/extractor/szurubooru.py diff --git a/CHANGELOG.md b/CHANGELOG.md index 3beecbb..5d805c2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,72 @@ # Changelog +## 1.25.0 - 2023-03-11 +### Changes +- [e621] split `e621` extractors from `danbooru` module ([#3425](https://github.com/mikf/gallery-dl/issues/3425)) +- [deviantart] remove mature scraps warning ([#3691](https://github.com/mikf/gallery-dl/issues/3691)) +- [deviantart] use `/collections/all` endpoint for favorites ([#3666](https://github.com/mikf/gallery-dl/issues/3666) ,#3668) +- [newgrounds] update default image and audio archive IDs to prevent ID overlap ([#3681](https://github.com/mikf/gallery-dl/issues/3681)) +- rename `--ignore-config` to `--config-ignore` +### Extractors +- [catbox] add `file` extractor ([#3570](https://github.com/mikf/gallery-dl/issues/3570)) +- [deviantart] add `search` extractor ([#538](https://github.com/mikf/gallery-dl/issues/538), [#1264](https://github.com/mikf/gallery-dl/issues/1264), [#2954](https://github.com/mikf/gallery-dl/issues/2954), [#2970](https://github.com/mikf/gallery-dl/issues/2970), [#3577](https://github.com/mikf/gallery-dl/issues/3577)) +- [deviantart] add `gallery-search` extractor ([#1695](https://github.com/mikf/gallery-dl/issues/1695)) +- [deviantart] support `fxdeviantart.com` URLs (##3740) +- [e621] implement `notes` and `pools` metadata extraction ([#3425](https://github.com/mikf/gallery-dl/issues/3425)) +- [gelbooru] add `favorite` extractor ([#3704](https://github.com/mikf/gallery-dl/issues/3704)) +- [imagetwist] support `phun.imagetwist.com` and `imagehaha.com` domains ([#3622](https://github.com/mikf/gallery-dl/issues/3622)) +- [instagram] add `user` metadata field ([#3107](https://github.com/mikf/gallery-dl/issues/3107)) +- [manganelo] update and fix metadata extraction +- [manganelo] support mobile-only chapters +- [mangasee] extract `author` and `genre` metadata ([#3703](https://github.com/mikf/gallery-dl/issues/3703)) +- [misskey] add `misskey` extractors ([#3717](https://github.com/mikf/gallery-dl/issues/3717)) +- [pornpics] add `gallery` and `search` extractors ([#263](https://github.com/mikf/gallery-dl/issues/263), [#3544](https://github.com/mikf/gallery-dl/issues/3544), [#3654](https://github.com/mikf/gallery-dl/issues/3654)) +- [redgifs] support v3 URLs ([#3588](https://github.com/mikf/gallery-dl/issues/3588). [#3589](https://github.com/mikf/gallery-dl/issues/3589)) +- [redgifs] add `collection` extractors ([#3427](https://github.com/mikf/gallery-dl/issues/3427), [#3662](https://github.com/mikf/gallery-dl/issues/3662)) +- [shopify] support ohpolly.com ([#440](https://github.com/mikf/gallery-dl/issues/440), [#3596](https://github.com/mikf/gallery-dl/issues/3596)) +- [szurubooru] add `tag` and `post` extractors ([#3583](https://github.com/mikf/gallery-dl/issues/3583), [#3713](https://github.com/mikf/gallery-dl/issues/3713)) +- [twitter] add `transform` option +### Options +- [postprocessor:metadata] add `sort` and `separators` options +- [postprocessor:exec] implement archive options ([#3584](https://github.com/mikf/gallery-dl/issues/3584)) +- add `--config-create` command-line option ([#2333](https://github.com/mikf/gallery-dl/issues/2333)) +- add `--config-toml` command-line option to load config files in TOML format +- add `output.stdout`, `output.stdin`, and `output.stderr` options ([#1621](https://github.com/mikf/gallery-dl/issues/1621), [#2152](https://github.com/mikf/gallery-dl/issues/2152), [#2529](https://github.com/mikf/gallery-dl/issues/2529)) +- add `hash_md5` and `hash_sha1` functions ([#3679](https://github.com/mikf/gallery-dl/issues/3679)) +- implement `globals` option to enable defining custom functions for `eval` statements +- implement `archive-pragma` option to use SQLite PRAGMA statements +- implement `actions` to trigger events on logging messages ([#3338](https://github.com/mikf/gallery-dl/issues/3338), [#3630](https://github.com/mikf/gallery-dl/issues/3630)) +- implement ability to load external extractor classes + - `-X/--extractors` command-line options + - `extractor.modules-sources` config option +### Fixes +- [bunkr] fix extraction ([#3636](https://github.com/mikf/gallery-dl/issues/3636), [#3655](https://github.com/mikf/gallery-dl/issues/3655)) +- [danbooru] send gallery-dl User-Agent ([#3665](https://github.com/mikf/gallery-dl/issues/3665)) +- [deviantart] fix crash when handling deleted deviations in status updates ([#3656](https://github.com/mikf/gallery-dl/issues/3656)) +- [fanbox] fix crash with missing images ([#3673](https://github.com/mikf/gallery-dl/issues/3673)) +- [imagefap] update `gallery` URLs ([#3595](https://github.com/mikf/gallery-dl/issues/3595)) +- [imagefap] fix infinite pagination loop ([#3594](https://github.com/mikf/gallery-dl/issues/3594)) +- [imagefap] fix metadata extraction +- [oauth] use default name for browsers without `name` attribute +- [pinterest] unescape search terms ([#3621](https://github.com/mikf/gallery-dl/issues/3621)) +- [pixiv] fix `--write-tags` for `"tags": "original"` ([#3675](https://github.com/mikf/gallery-dl/issues/3675)) +- [poipiku] warn about incorrect passwords ([#3646](https://github.com/mikf/gallery-dl/issues/3646)) +- [reddit] update `videos` option ([#3712](https://github.com/mikf/gallery-dl/issues/3712)) +- [soundgasm] rewrite ([#3578](https://github.com/mikf/gallery-dl/issues/3578)) +- [telegraph] fix extraction when images are not in `
` elements ([#3590](https://github.com/mikf/gallery-dl/issues/3590)) +- [tumblr] raise more detailed errors for dashboard-only blogs ([#3628](https://github.com/mikf/gallery-dl/issues/3628)) +- [twitter] fix some `original` retweets not downloading ([#3744](https://github.com/mikf/gallery-dl/issues/3744)) +- [ytdl] fix `--parse-metadata` ([#3663](https://github.com/mikf/gallery-dl/issues/3663)) +- [downloader:ytdl] prevent exception on empty results +### Improvements +- [downloader:http] use `time.monotonic()` +- [downloader:http] update `_http_retry` to accept a Python function ([#3569](https://github.com/mikf/gallery-dl/issues/3569)) +- [postprocessor:metadata] speed up JSON encoding +- replace `json.loads/dumps` with direct calls to `JSONDecoder.decode/JSONEncoder.encode` +- improve `option.Formatter` performance +### Removals +- [nitter] remove `nitter.pussthecat.org` + ## 1.24.5 - 2023-01-28 ### Additions - [booru] add `url` option diff --git a/PKG-INFO b/PKG-INFO index 9165899..43aacb4 100644 --- a/PKG-INFO +++ b/PKG-INFO @@ -1,6 +1,6 @@ Metadata-Version: 2.1 Name: gallery_dl -Version: 1.24.5 +Version: 1.25.0 Summary: Command-line program to download image galleries and collections from several image hosting sites Home-page: https://github.com/mikf/gallery-dl Download-URL: https://github.com/mikf/gallery-dl/releases/latest @@ -106,9 +106,9 @@ Standalone Executable Prebuilt executable files with a Python interpreter and required Python packages included are available for -- `Windows `__ +- `Windows `__ (Requires `Microsoft Visual C++ Redistributable Package (x86) `__) -- `Linux `__ +- `Linux `__ Nightly Builds diff --git a/README.rst b/README.rst index ed4afa5..c980bce 100644 --- a/README.rst +++ b/README.rst @@ -69,9 +69,9 @@ Standalone Executable Prebuilt executable files with a Python interpreter and required Python packages included are available for -- `Windows `__ +- `Windows `__ (Requires `Microsoft Visual C++ Redistributable Package (x86) `__) -- `Linux `__ +- `Linux `__ Nightly Builds diff --git a/data/completion/_gallery-dl b/data/completion/_gallery-dl index 06e8556..a82db8a 100644 --- a/data/completion/_gallery-dl +++ b/data/completion/_gallery-dl @@ -8,9 +8,10 @@ _arguments -C -S \ {-h,--help}'[Print this help message and exit]' \ --version'[Print program version and exit]' \ {-i,--input-file}'[Download URLs found in FILE ("-" for stdin). More than one --input-file can be specified]':'':_files \ +{-f,--filename}'[Filename format string for downloaded files ("/O" for "original" filenames)]':'' \ {-d,--destination}'[Target location for file downloads]':'' \ {-D,--directory}'[Exact location for file downloads]':'' \ -{-f,--filename}'[Filename format string for downloaded files ("/O" for "original" filenames)]':'' \ +{-X,--extractors}'[Load external extractors from PATH]':'' \ --proxy'[Use the specified proxy]':'' \ --source-address'[Client-side IP address to bind to]':'' \ --user-agent'[User-Agent request header]':'' \ @@ -45,10 +46,13 @@ _arguments -C -S \ --no-download'[Do not download any files]' \ --no-postprocessors'[Do not run any post processors]' \ --no-check-certificate'[Disable HTTPS certificate validation]' \ +{-o,--option}'[Additional options. Example: -o browser=firefox]':'' \ {-c,--config}'[Additional configuration files]':'':_files \ ---config-yaml'[==SUPPRESS==]':'':_files \ -{-o,--option}'[Additional "=" option values]':'' \ ---ignore-config'[Do not read default configuration files]' \ +--config-yaml'[Additional configuration files in YAML format]':'':_files \ +--config-toml'[Additional configuration files in TOML format]':'':_files \ +--config-create'[Create a basic configuration file]' \ +--config-ignore'[Do not read default configuration files]' \ +--ignore-config'[==SUPPRESS==]' \ {-u,--username}'[Username to login with]':'' \ {-p,--password}'[Password belonging to the given username]':'' \ --netrc'[Enable .netrc authentication data]' \ diff --git a/data/completion/gallery-dl b/data/completion/gallery-dl index 203c87d..1750af8 100644 --- a/data/completion/gallery-dl +++ b/data/completion/gallery-dl @@ -5,12 +5,12 @@ _gallery_dl() cur="${COMP_WORDS[COMP_CWORD]}" prev="${COMP_WORDS[COMP_CWORD-1]}" - if [[ "${prev}" =~ ^(-i|--input-file|--cookies|--write-log|--write-unsupported|-c|--config|--config-yaml|--download-archive)$ ]]; then + if [[ "${prev}" =~ ^(-i|--input-file|--cookies|--write-log|--write-unsupported|-c|--config|--config-yaml|--config-toml|--download-archive)$ ]]; then COMPREPLY=( $(compgen -f -- "${cur}") ) elif [[ "${prev}" =~ ^()$ ]]; then COMPREPLY=( $(compgen -d -- "${cur}") ) else - COMPREPLY=( $(compgen -W "--help --version --input-file --destination --directory --filename --proxy --source-address --user-agent --clear-cache --cookies --cookies-from-browser --quiet --verbose --get-urls --resolve-urls --dump-json --simulate --extractor-info --list-keywords --list-modules --list-extractors --write-log --write-unsupported --write-pages --limit-rate --retries --http-timeout --sleep --sleep-request --sleep-extractor --filesize-min --filesize-max --chunk-size --no-part --no-skip --no-mtime --no-download --no-postprocessors --no-check-certificate --config --config-yaml --option --ignore-config --username --password --netrc --download-archive --abort --terminate --range --chapter-range --filter --chapter-filter --zip --ugoira-conv --ugoira-conv-lossless --ugoira-conv-copy --write-metadata --write-info-json --write-infojson --write-tags --mtime-from-date --exec --exec-after --postprocessor --postprocessor-option" -- "${cur}") ) + COMPREPLY=( $(compgen -W "--help --version --input-file --filename --destination --directory --extractors --proxy --source-address --user-agent --clear-cache --cookies --cookies-from-browser --quiet --verbose --get-urls --resolve-urls --dump-json --simulate --extractor-info --list-keywords --list-modules --list-extractors --write-log --write-unsupported --write-pages --limit-rate --retries --http-timeout --sleep --sleep-request --sleep-extractor --filesize-min --filesize-max --chunk-size --no-part --no-skip --no-mtime --no-download --no-postprocessors --no-check-certificate --option --config --config-yaml --config-toml --config-create --config-ignore --ignore-config --username --password --netrc --download-archive --abort --terminate --range --chapter-range --filter --chapter-filter --zip --ugoira-conv --ugoira-conv-lossless --ugoira-conv-copy --write-metadata --write-info-json --write-infojson --write-tags --mtime-from-date --exec --exec-after --postprocessor --postprocessor-option" -- "${cur}") ) fi } diff --git a/data/completion/gallery-dl.fish b/data/completion/gallery-dl.fish index e2a7e6d..d764543 100644 --- a/data/completion/gallery-dl.fish +++ b/data/completion/gallery-dl.fish @@ -2,9 +2,10 @@ complete -c gallery-dl -x complete -c gallery-dl -s 'h' -l 'help' -d 'Print this help message and exit' complete -c gallery-dl -l 'version' -d 'Print program version and exit' complete -c gallery-dl -r -F -s 'i' -l 'input-file' -d 'Download URLs found in FILE ("-" for stdin). More than one --input-file can be specified' +complete -c gallery-dl -x -s 'f' -l 'filename' -d 'Filename format string for downloaded files ("/O" for "original" filenames)' complete -c gallery-dl -x -a '(__fish_complete_directories)' -s 'd' -l 'destination' -d 'Target location for file downloads' complete -c gallery-dl -x -a '(__fish_complete_directories)' -s 'D' -l 'directory' -d 'Exact location for file downloads' -complete -c gallery-dl -x -s 'f' -l 'filename' -d 'Filename format string for downloaded files ("/O" for "original" filenames)' +complete -c gallery-dl -x -a '(__fish_complete_directories)' -s 'X' -l 'extractors' -d 'Load external extractors from PATH' complete -c gallery-dl -x -l 'proxy' -d 'Use the specified proxy' complete -c gallery-dl -x -l 'source-address' -d 'Client-side IP address to bind to' complete -c gallery-dl -x -l 'user-agent' -d 'User-Agent request header' @@ -39,10 +40,13 @@ complete -c gallery-dl -l 'no-mtime' -d 'Do not set file modification times acco complete -c gallery-dl -l 'no-download' -d 'Do not download any files' complete -c gallery-dl -l 'no-postprocessors' -d 'Do not run any post processors' complete -c gallery-dl -l 'no-check-certificate' -d 'Disable HTTPS certificate validation' +complete -c gallery-dl -x -s 'o' -l 'option' -d 'Additional options. Example: -o browser=firefox' complete -c gallery-dl -r -F -s 'c' -l 'config' -d 'Additional configuration files' -complete -c gallery-dl -r -F -l 'config-yaml' -d '==SUPPRESS==' -complete -c gallery-dl -x -s 'o' -l 'option' -d 'Additional "=" option values' -complete -c gallery-dl -l 'ignore-config' -d 'Do not read default configuration files' +complete -c gallery-dl -r -F -l 'config-yaml' -d 'Additional configuration files in YAML format' +complete -c gallery-dl -r -F -l 'config-toml' -d 'Additional configuration files in TOML format' +complete -c gallery-dl -l 'config-create' -d 'Create a basic configuration file' +complete -c gallery-dl -l 'config-ignore' -d 'Do not read default configuration files' +complete -c gallery-dl -l 'ignore-config' -d '==SUPPRESS==' complete -c gallery-dl -x -s 'u' -l 'username' -d 'Username to login with' complete -c gallery-dl -x -s 'p' -l 'password' -d 'Password belonging to the given username' complete -c gallery-dl -l 'netrc' -d 'Enable .netrc authentication data' diff --git a/data/man/gallery-dl.1 b/data/man/gallery-dl.1 index 024ddb3..27d3a09 100644 --- a/data/man/gallery-dl.1 +++ b/data/man/gallery-dl.1 @@ -1,4 +1,4 @@ -.TH "GALLERY-DL" "1" "2023-01-28" "1.24.5" "gallery-dl Manual" +.TH "GALLERY-DL" "1" "2023-03-11" "1.25.0" "gallery-dl Manual" .\" disable hyphenation .nh @@ -26,14 +26,17 @@ Print program version and exit .B "\-i, \-\-input\-file" \f[I]FILE\f[] Download URLs found in FILE ('-' for stdin). More than one --input-file can be specified .TP +.B "\-f, \-\-filename" \f[I]FORMAT\f[] +Filename format string for downloaded files ('/O' for "original" filenames) +.TP .B "\-d, \-\-destination" \f[I]PATH\f[] Target location for file downloads .TP .B "\-D, \-\-directory" \f[I]PATH\f[] Exact location for file downloads .TP -.B "\-f, \-\-filename" \f[I]FORMAT\f[] -Filename format string for downloaded files ('/O' for "original" filenames) +.B "\-X, \-\-extractors" \f[I]PATH\f[] +Load external extractors from PATH .TP .B "\-\-proxy" \f[I]URL\f[] Use the specified proxy @@ -137,13 +140,22 @@ Do not run any post processors .B "\-\-no\-check\-certificate" Disable HTTPS certificate validation .TP +.B "\-o, \-\-option" \f[I]KEY=VALUE\f[] +Additional options. Example: -o browser=firefox +.TP .B "\-c, \-\-config" \f[I]FILE\f[] Additional configuration files .TP -.B "\-o, \-\-option" \f[I]OPT\f[] -Additional '=' option values +.B "\-\-config\-yaml" \f[I]FILE\f[] +Additional configuration files in YAML format +.TP +.B "\-\-config\-toml" \f[I]FILE\f[] +Additional configuration files in TOML format +.TP +.B "\-\-config\-create" +Create a basic configuration file .TP -.B "\-\-ignore\-config" +.B "\-\-config\-ignore" Do not read default configuration files .TP .B "\-u, \-\-username" \f[I]USER\f[] diff --git a/data/man/gallery-dl.conf.5 b/data/man/gallery-dl.conf.5 index 6b11835..a0fd629 100644 --- a/data/man/gallery-dl.conf.5 +++ b/data/man/gallery-dl.conf.5 @@ -1,4 +1,4 @@ -.TH "GALLERY-DL.CONF" "5" "2023-01-28" "1.24.5" "gallery-dl Manual" +.TH "GALLERY-DL.CONF" "5" "2023-03-11" "1.25.0" "gallery-dl Manual" .\" disable hyphenation .nh .\" disable justification (adjust text to left margin only) @@ -456,6 +456,8 @@ and optional for .br * \f[I]e621\f[] (*) .br +* \f[I]e926\f[] (*) +.br * \f[I]exhentai\f[] .br * \f[I]idolcomplex\f[] @@ -897,6 +899,20 @@ An alternative \f[I]format string\f[] to build archive IDs with. Prefix for archive IDs. +.SS extractor.*.archive-pragma +.IP "Type:" 6 +\f[I]list\f[] of \f[I]strings\f[] + +.IP "Example:" 4 +["journal_mode=WAL", "synchronous=NORMAL"] + +.IP "Description:" 4 +A list of SQLite \f[I]PRAGMA\f[] statements to run during archive initialization. + +See \f[I]\f[] +for available \f[I]PRAGMA\f[] statements and further details. + + .SS extractor.*.postprocessors .IP "Type:" 6 \f[I]list\f[] of \f[I]Postprocessor Configuration\f[] objects @@ -1288,7 +1304,23 @@ For unavailable or restricted posts, follow the \f[I]source\f[] and download from there if possible. -.SS extractor.danbooru.metadata +.SS extractor.danbooru.ugoira +.IP "Type:" 6 +\f[I]bool\f[] + +.IP "Default:" 9 +\f[I]false\f[] + +.IP "Description:" 4 +Controls the download target for Ugoira posts. + +.br +* \f[I]true\f[]: Original ZIP archives +.br +* \f[I]false\f[]: Converted video files + + +.SS extractor.[Danbooru].metadata .IP "Type:" 6 .br * \f[I]bool\f[] @@ -1317,7 +1349,7 @@ for possible field names. \f[I]aibooru\f[] also supports \f[I]ai_metadata\f[]. Note: This requires 1 additional HTTP request per post. -.SS extractor.danbooru.threshold +.SS extractor.{Danbooru].threshold .IP "Type:" 6 .br * \f[I]string\f[] @@ -1330,30 +1362,13 @@ Note: This requires 1 additional HTTP request per post. .IP "Description:" 4 Stop paginating over API results if the length of a batch of returned posts is less than the specified number. Defaults to the per-page limit -of the current instance, which is 320 for \f[I]e621\f[] and 200 for -everything else. +of the current instance, which is 200. Note: Changing this setting is normally not necessary. When the value is greater than the per-page limit, gallery-dl will stop after the first batch. The value cannot be less than 1. -.SS extractor.danbooru.ugoira -.IP "Type:" 6 -\f[I]bool\f[] - -.IP "Default:" 9 -\f[I]false\f[] - -.IP "Description:" 4 -Controls the download target for Ugoira posts. - -.br -* \f[I]true\f[]: Original ZIP archives -.br -* \f[I]false\f[]: Converted video files - - .SS extractor.derpibooru.api-key .IP "Type:" 6 \f[I]string\f[] @@ -1617,6 +1632,50 @@ or whenever your \f[I]cache file\f[] is deleted or cleared. Minimum wait time in seconds before API requests. +.SS extractor.[E621].metadata +.IP "Type:" 6 +.br +* \f[I]bool\f[] +.br +* \f[I]string\f[] +.br +* \f[I]list\f[] of \f[I]strings\f[] + +.IP "Default:" 9 +\f[I]false\f[] + +.IP "Example:" 4 +.br +* notes,pools +.br +* ["notes", "pools" + +.IP "Description:" 4 +Extract additional metadata (notes, pool metadata) if available. + +Note: This requires 0-2 additional HTTP requests per post. + + +.SS extractor.[E621].threshold +.IP "Type:" 6 +.br +* \f[I]string\f[] +.br +* \f[I]integer\f[] + +.IP "Default:" 9 +\f[I]"auto"\f[] + +.IP "Description:" 4 +Stop paginating over API results if the length of a batch of returned +posts is less than the specified number. Defaults to the per-page limit +of the current instance, which is 320. + +Note: Changing this setting is normally not necessary. When the value is +greater than the per-page limit, gallery-dl will stop after the first +batch. The value cannot be less than 1. + + .SS extractor.exhentai.domain .IP "Type:" 6 \f[I]string\f[] @@ -2302,6 +2361,28 @@ Fetch media from replies to other posts. Also emit metadata for text-only posts without media content. +.SS extractor.[misskey].renotes +.IP "Type:" 6 +\f[I]bool\f[] + +.IP "Default:" 9 +\f[I]false\f[] + +.IP "Description:" 4 +Fetch media from renoted notes. + + +.SS extractor.[misskey].replies +.IP "Type:" 6 +\f[I]bool\f[] + +.IP "Default:" 9 +\f[I]true\f[] + +.IP "Description:" 4 +Fetch media from replies to other notes. + + .SS extractor.nana.favkey .IP "Type:" 6 \f[I]string\f[] @@ -2884,8 +2965,16 @@ HLS and DASH manifests * \f[I]"ytdl"\f[]: Download videos and let \f[I]youtube-dl\f[] handle all of video extraction and download .br +* \f[I]"dash"\f[]: Extract DASH manifest URLs and use \f[I]youtube-dl\f[] +to download and merge them. (*) +.br * \f[I]false\f[]: Ignore videos +(*) +This saves 1 HTTP request per video +and might potentially be able to download otherwise deleted videos, +but it will not always get the best video quality available. + .SS extractor.redgifs.format .IP "Type:" 6 @@ -3003,6 +3092,17 @@ Filters used during searches. Download video files. +.SS extractor.[szurubooru].username & .token +.IP "Type:" 6 +\f[I]string\f[] + +.IP "Description:" 4 +Username and login token of your account to access private resources. + +To generate a token, visit \f[I]/user/USERNAME/list-tokens\f[] +and click \f[I]Create Token\f[]. + + .SS extractor.tumblr.avatar .IP "Type:" 6 \f[I]bool\f[] @@ -3282,6 +3382,17 @@ Age-restricted replies cannot be expanded when using the \f[I]syndication\f[] API. +.SS extractor.twitter.transform +.IP "Type:" 6 +\f[I]bool\f[] + +.IP "Default:" 9 +\f[I]true\f[] + +.IP "Description:" 4 +Transform Tweet and User metadata into a simpler, uniform format. + + .SS extractor.twitter.size .IP "Type:" 6 \f[I]list\f[] of \f[I]strings\f[] @@ -4316,6 +4427,50 @@ For these format strings * \f[I]{3}\f[] is percent of bytes downloaded to total bytes +.SS output.stdout & .stdin & .stderr +.IP "Type:" 6 +.br +* \f[I]string\f[] +.br +* \f[I]object\f[] + +.IP "Example:" 4 +.. code:: json + +"utf-8" + +.. code:: json + +{ +"encoding": "utf-8", +"errors": "replace", +"line_buffering": true +} + + +.IP "Description:" 4 +\f[I]Reconfigure\f[] +a \f[I]standard stream\f[]. + +Possible options are + +.br +* \f[I]encoding\f[] +.br +* \f[I]errors\f[] +.br +* \f[I]newline\f[] +.br +* \f[I]line_buffering\f[] +.br +* \f[I]write_through\f[] + +When this option is specified as a simple \f[I]string\f[], +it is interpreted as \f[I]{"encoding": "", "errors": "replace"}\f[] + +Note: \f[I]errors\f[] always defaults to \f[I]"replace"\f[] + + .SS output.shorten .IP "Type:" 6 \f[I]bool\f[] @@ -4547,6 +4702,21 @@ after \f[I]N\f[] consecutive files compared as equal. Only compare file sizes. Do not read and compare their content. +.SS exec.archive +.IP "Type:" 6 +\f[I]Path\f[] + +.IP "Description:" 4 +File to store IDs of executed commands in, +similar to \f[I]extractor.*.archive\f[]. + +\f[I]archive-format\f[], \f[I]archive-prefix\f[], and \f[I]archive-pragma\f[] options, +akin to +\f[I]extractor.*.archive-format\f[], +\f[I]extractor.*.archive-prefix\f[], and +\f[I]extractor.*.archive-pragma\f[], are supported as well. + + .SS exec.async .IP "Type:" 6 \f[I]bool\f[] @@ -4775,6 +4945,21 @@ Custom format string to build the content of metadata files with. Note: Only applies for \f[I]"mode": "custom"\f[]. +.SS metadata.ascii +.IP "Type:" 6 +\f[I]bool\f[] + +.IP "Default:" 9 +\f[I]false\f[] + +.IP "Description:" 4 +Escape all non-ASCII characters. + +See the \f[I]ensure_ascii\f[] argument of \f[I]json.dump()\f[] for further details. + +Note: Only applies for \f[I]"mode": "json"\f[] and \f[I]"jsonl"\f[]. + + .SS metadata.indent .IP "Type:" 6 .br @@ -4793,6 +4978,37 @@ See the \f[I]indent\f[] argument of \f[I]json.dump()\f[] for further details. Note: Only applies for \f[I]"mode": "json"\f[]. +.SS metadata.separators +.IP "Type:" 6 +\f[I]list\f[] with two \f[I]string\f[] elements + +.IP "Default:" 9 +\f[I][", ", ": "]\f[] + +.IP "Description:" 4 +\f[I]\f[] - \f[I]\f[] pair +to separate JSON keys and values with. + +See the \f[I]separators\f[] argument of \f[I]json.dump()\f[] for further details. + +Note: Only applies for \f[I]"mode": "json"\f[] and \f[I]"jsonl"\f[]. + + +.SS metadata.sort +.IP "Type:" 6 +\f[I]bool\f[] + +.IP "Default:" 9 +\f[I]false\f[] + +.IP "Description:" 4 +Sort output by key. + +See the \f[I]sort_keys\f[] argument of \f[I]json.dump()\f[] for further details. + +Note: Only applies for \f[I]"mode": "json"\f[] and \f[I]"jsonl"\f[]. + + .SS metadata.open .IP "Type:" 6 \f[I]string\f[] @@ -4843,9 +5059,11 @@ i.e. fields whose name starts with an underscore. File to store IDs of generated metadata files in, similar to \f[I]extractor.*.archive\f[]. -\f[I]archive-format\f[] and \f[I]archive-prefix\f[] options, -akin to \f[I]extractor.*.archive-format\f[] and \f[I]extractor.*.archive-prefix\f[], -are supported as well. +\f[I]archive-format\f[], \f[I]archive-prefix\f[], and \f[I]archive-pragma\f[] options, +akin to +\f[I]extractor.*.archive-format\f[], +\f[I]extractor.*.archive-prefix\f[], and +\f[I]extractor.*.archive-pragma\f[], are supported as well. .SS metadata.mtime @@ -5152,10 +5370,55 @@ The \f[I]modules\f[] list in ["reddit", "danbooru", "mangadex"] .IP "Description:" 4 -The list of modules to load when searching for a suitable +List of internal modules to load when searching for a suitable extractor class. Useful to reduce startup time and memory usage. +.SS extractor.module-sources +.IP "Type:" 6 +\f[I]list\f[] of \f[I]Path\f[] instances + +.IP "Example:" 4 +["~/.config/gallery-dl/modules", null] + +.IP "Description:" 4 +List of directories to load external extractor modules from. + +Any file in a specified directory with a \f[I].py\f[] filename extension +gets \f[I]imported\f[] +and searched for potential extractors, +i.e. classes with a \f[I]pattern\f[] attribute. + +Note: \f[I]null\f[] references internal extractors defined in +\f[I]extractor/__init__.py\f[] +or by \f[I]extractor.modules\f[]. + + +.SS globals +.IP "Type:" 6 +.br +* \f[I]Path\f[] +.br +* \f[I]string\f[] + +.IP "Example:" 4 +.br +* "~/.local/share/gdl-globals.py" +.br +* "gdl-globals" + +.IP "Default:" 9 +The \f[I]GLOBALS\f[] dict in +\f[I]util.py\f[] + +.IP "Description:" 4 +Path to or name of an +\f[I]importable\f[] +Python module whose namespace gets used as an alternative +\f[I]globals parameter\f[] +for compiled Python expressions. + + .SS cache.file .IP "Type:" 6 \f[I]Path\f[] diff --git a/docs/gallery-dl-example.conf b/docs/gallery-dl-example.conf index 92509b5..ef7b3b5 100644 --- a/docs/gallery-dl-example.conf +++ b/docs/gallery-dl-example.conf @@ -5,6 +5,7 @@ "#": "set global archive file for all extractors", "archive": "~/gallery-dl/archive.sqlite3", + "archive-pragma": ["journal_mode=WAL", "synchronous=NORMAL"], "#": "add two custom keywords into the metadata dictionary", "#": "these can be used to further refine your output directories or filenames", @@ -36,7 +37,7 @@ "pixiv": { - "#": "override global archive setting for pixiv", + "#": "override global archive path for pixiv", "archive": "~/gallery-dl/archive-pixiv.sqlite3", "#": "set custom directory and filename format strings for all pixiv downloads", diff --git a/docs/gallery-dl.conf b/docs/gallery-dl.conf index 3012e71..7564e5b 100644 --- a/docs/gallery-dl.conf +++ b/docs/gallery-dl.conf @@ -186,6 +186,10 @@ "username": null, "password": null }, + "misskey": { + "renotes": false, + "replies": true + }, "newgrounds": { "username": null, diff --git a/gallery_dl.egg-info/PKG-INFO b/gallery_dl.egg-info/PKG-INFO index 3fe1b55..d4e660a 100644 --- a/gallery_dl.egg-info/PKG-INFO +++ b/gallery_dl.egg-info/PKG-INFO @@ -1,6 +1,6 @@ Metadata-Version: 2.1 Name: gallery-dl -Version: 1.24.5 +Version: 1.25.0 Summary: Command-line program to download image galleries and collections from several image hosting sites Home-page: https://github.com/mikf/gallery-dl Download-URL: https://github.com/mikf/gallery-dl/releases/latest @@ -106,9 +106,9 @@ Standalone Executable Prebuilt executable files with a Python interpreter and required Python packages included are available for -- `Windows `__ +- `Windows `__ (Requires `Microsoft Visual C++ Redistributable Package (x86) `__) -- `Linux `__ +- `Linux `__ Nightly Builds diff --git a/gallery_dl.egg-info/SOURCES.txt b/gallery_dl.egg-info/SOURCES.txt index eab1881..9827944 100644 --- a/gallery_dl.egg-info/SOURCES.txt +++ b/gallery_dl.egg-info/SOURCES.txt @@ -13,6 +13,7 @@ docs/gallery-dl-example.conf docs/gallery-dl.conf gallery_dl/__init__.py gallery_dl/__main__.py +gallery_dl/actions.py gallery_dl/aes.py gallery_dl/cache.py gallery_dl/config.py @@ -68,6 +69,7 @@ gallery_dl/extractor/desktopography.py gallery_dl/extractor/deviantart.py gallery_dl/extractor/directlink.py gallery_dl/extractor/dynastyscans.py +gallery_dl/extractor/e621.py gallery_dl/extractor/erome.py gallery_dl/extractor/exhentai.py gallery_dl/extractor/fallenangels.py @@ -133,6 +135,7 @@ gallery_dl/extractor/mangoxo.py gallery_dl/extractor/mastodon.py gallery_dl/extractor/mememuseum.py gallery_dl/extractor/message.py +gallery_dl/extractor/misskey.py gallery_dl/extractor/moebooru.py gallery_dl/extractor/myhentaigallery.py gallery_dl/extractor/myportfolio.py @@ -161,6 +164,7 @@ gallery_dl/extractor/pixnet.py gallery_dl/extractor/plurk.py gallery_dl/extractor/poipiku.py gallery_dl/extractor/pornhub.py +gallery_dl/extractor/pornpics.py gallery_dl/extractor/pururin.py gallery_dl/extractor/reactor.py gallery_dl/extractor/readcomiconline.py @@ -182,6 +186,7 @@ gallery_dl/extractor/smugmug.py gallery_dl/extractor/soundgasm.py gallery_dl/extractor/speakerdeck.py gallery_dl/extractor/subscribestar.py +gallery_dl/extractor/szurubooru.py gallery_dl/extractor/tapas.py gallery_dl/extractor/tcbscans.py gallery_dl/extractor/telegraph.py diff --git a/gallery_dl/__init__.py b/gallery_dl/__init__.py index 245dbf8..116ca5d 100644 --- a/gallery_dl/__init__.py +++ b/gallery_dl/__init__.py @@ -11,7 +11,7 @@ import logging from . import version, config, option, output, extractor, job, util, exception __author__ = "Mike Fährmann" -__copyright__ = "Copyright 2014-2022 Mike Fährmann" +__copyright__ = "Copyright 2014-2023 Mike Fährmann" __license__ = "GPLv2" __maintainer__ = "Mike Fährmann" __email__ = "mike_faehrmann@web.de" @@ -33,20 +33,24 @@ def progress(urls, pformat): def main(): try: - if sys.stdout and sys.stdout.encoding.lower() != "utf-8": - output.replace_std_streams() - parser = option.build_parser() args = parser.parse_args() log = output.initialize_logging(args.loglevel) # configuration - if args.load_config: + if args.config_load: config.load() - if args.cfgfiles: - config.load(args.cfgfiles, strict=True) - if args.yamlfiles: - config.load(args.yamlfiles, strict=True, fmt="yaml") + if args.configs_json: + config.load(args.configs_json, strict=True) + if args.configs_yaml: + import yaml + config.load(args.configs_yaml, strict=True, load=yaml.safe_load) + if args.configs_toml: + try: + import tomllib as toml + except ImportError: + import toml + config.load(args.configs_toml, strict=True, load=toml.loads) if args.filename: filename = args.filename if filename == "/O": @@ -77,6 +81,8 @@ def main(): for opts in args.options: config.set(*opts) + output.configure_standard_streams() + # signals signals = config.get((), "signals-ignore") if signals: @@ -105,20 +111,17 @@ def main(): output.ANSI = True - # extractor modules - modules = config.get(("extractor",), "modules") - if modules is not None: - if isinstance(modules, str): - modules = modules.split(",") - extractor.modules = modules - extractor._module_iter = iter(modules) - # format string separator separator = config.get((), "format-separator") if separator: from . import formatter formatter._SEPARATOR = separator + # eval globals + path = config.get((), "globals") + if path: + util.GLOBALS = util.import_file(path).__dict__ + # loglevels output.configure_logging(args.loglevel) if args.loglevel >= logging.ERROR: @@ -128,7 +131,7 @@ def main(): import requests extra = "" - if getattr(sys, "frozen", False): + if util.EXECUTABLE: extra = " - Executable" else: git_head = util.git_head() @@ -147,6 +150,44 @@ def main(): log.debug("Configuration Files %s", config._files) + # extractor modules + modules = config.get(("extractor",), "modules") + if modules is not None: + if isinstance(modules, str): + modules = modules.split(",") + extractor.modules = modules + + # external modules + if args.extractor_sources: + sources = args.extractor_sources + sources.append(None) + else: + sources = config.get(("extractor",), "module-sources") + + if sources: + import os + modules = [] + + for source in sources: + if source: + path = util.expand_path(source) + try: + files = os.listdir(path) + modules.append(extractor._modules_path(path, files)) + except Exception as exc: + log.warning("Unable to load modules from %s (%s: %s)", + path, exc.__class__.__name__, exc) + else: + modules.append(extractor._modules_internal()) + + if len(modules) > 1: + import itertools + extractor._module_iter = itertools.chain(*modules) + elif not modules: + extractor._module_iter = () + else: + extractor._module_iter = iter(modules[0]) + if args.list_modules: extractor.modules.append("") sys.stdout.write("\n".join(extractor.modules)) @@ -177,6 +218,10 @@ def main(): "Deleted %d %s from '%s'", cnt, "entry" if cnt == 1 else "entries", cache._path(), ) + + elif args.config_init: + return config.initialize() + else: if not args.urls and not args.inputfiles: parser.error( @@ -220,9 +265,13 @@ def main(): pformat = config.get(("output",), "progress", True) if pformat and len(urls) > 1 and args.loglevel < logging.ERROR: urls = progress(urls, pformat) + else: + urls = iter(urls) retval = 0 - for url in urls: + url = next(urls, None) + + while url is not None: try: log.debug("Starting %s for '%s'", jobtype.__name__, url) if isinstance(url, util.ExtendedUrl): @@ -234,9 +283,15 @@ def main(): retval |= jobtype(url).run() except exception.TerminateExtraction: pass + except exception.RestartExtraction: + log.debug("Restarting '%s'", url) + continue except exception.NoExtractorError: log.error("Unsupported URL '%s'", url) retval |= 64 + + url = next(urls, None) + return retval except KeyboardInterrupt: diff --git a/gallery_dl/actions.py b/gallery_dl/actions.py new file mode 100644 index 0000000..15ca31e --- /dev/null +++ b/gallery_dl/actions.py @@ -0,0 +1,112 @@ +# -*- coding: utf-8 -*- + +# Copyright 2023 Mike Fährmann +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. + +""" """ + +import re +import sys +import logging +import operator +from . import util, exception + + +def parse(actionspec): + if isinstance(actionspec, dict): + actionspec = actionspec.items() + + actions = {} + actions[logging.DEBUG] = actions_d = [] + actions[logging.INFO] = actions_i = [] + actions[logging.WARNING] = actions_w = [] + actions[logging.ERROR] = actions_e = [] + + for event, spec in actionspec: + level, _, pattern = event.partition(":") + type, _, args = spec.partition(" ") + action = (re.compile(pattern).search, ACTIONS[type](args)) + + level = level.strip() + if not level or level == "*": + actions_d.append(action) + actions_i.append(action) + actions_w.append(action) + actions_e.append(action) + else: + + actions[_level_to_int(level)].append(action) + + return actions + + +def _level_to_int(level): + try: + return logging._nameToLevel[level] + except KeyError: + return int(level) + + +def action_print(opts): + def _print(_): + print(opts) + return _print + + +def action_status(opts): + op, value = re.match(r"\s*([&|^=])=?\s*(\d+)", opts).groups() + + op = { + "&": operator.and_, + "|": operator.or_, + "^": operator.xor, + "=": lambda x, y: y, + }[op] + + value = int(value) + + def _status(args): + args["job"].status = op(args["job"].status, value) + return _status + + +def action_level(opts): + level = _level_to_int(opts.lstrip(" ~=")) + + def _level(args): + args["level"] = level + return _level + + +def action_wait(opts): + def _wait(args): + input("Press Enter to continue") + return _wait + + +def action_restart(opts): + return util.raises(exception.RestartExtraction) + + +def action_exit(opts): + try: + opts = int(opts) + except ValueError: + pass + + def _exit(args): + sys.exit(opts) + return _exit + + +ACTIONS = { + "print" : action_print, + "status" : action_status, + "level" : action_level, + "restart": action_restart, + "wait" : action_wait, + "exit" : action_exit, +} diff --git a/gallery_dl/config.py b/gallery_dl/config.py index 0f2d1f1..d014293 100644 --- a/gallery_dl/config.py +++ b/gallery_dl/config.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2015-2021 Mike Fährmann +# Copyright 2015-2023 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -9,7 +9,6 @@ """Global configuration module""" import sys -import json import os.path import logging from . import util @@ -39,7 +38,7 @@ else: ] -if getattr(sys, "frozen", False): +if util.EXECUTABLE: # look for config file in PyInstaller executable directory (#682) _default_configs.append(os.path.join( os.path.dirname(sys.executable), @@ -50,23 +49,54 @@ if getattr(sys, "frozen", False): # -------------------------------------------------------------------- # public interface -def load(files=None, strict=False, fmt="json"): - """Load JSON configuration files""" - if fmt == "yaml": + +def initialize(): + paths = list(map(util.expand_path, _default_configs)) + + for path in paths: + if os.access(path, os.R_OK | os.W_OK): + log.error("There is already a configuration file at '%s'", path) + return 1 + + for path in paths: try: - import yaml - parsefunc = yaml.safe_load - except ImportError: - log.error("Could not import 'yaml' module") - return + os.makedirs(os.path.dirname(path), exist_ok=True) + with open(path, "x", encoding="utf-8") as fp: + fp.write("""\ +{ + "extractor": { + + }, + "downloader": { + + }, + "output": { + + }, + "postprocessor": { + + } +} +""") + break + except OSError as exc: + log.debug("%s: %s", exc.__class__.__name__, exc) else: - parsefunc = json.load + log.error("Unable to create a new configuration file " + "at any of the default paths") + return 1 + log.info("Created a basic configuration file at '%s'", path) + return 0 + + +def load(files=None, strict=False, load=util.json_loads): + """Load JSON configuration files""" for pathfmt in files or _default_configs: path = util.expand_path(pathfmt) try: with open(path, encoding="utf-8") as file: - confdict = parsefunc(file) + conf = load(file.read()) except OSError as exc: if strict: log.error(exc) @@ -77,9 +107,9 @@ def load(files=None, strict=False, fmt="json"): sys.exit(2) else: if not _config: - _config.update(confdict) + _config.update(conf) else: - util.combine_dict(_config, confdict) + util.combine_dict(_config, conf) _files.append(pathfmt) diff --git a/gallery_dl/cookies.py b/gallery_dl/cookies.py index f18cc47..3d715a7 100644 --- a/gallery_dl/cookies.py +++ b/gallery_dl/cookies.py @@ -12,7 +12,6 @@ import binascii import contextlib import ctypes -import json import logging import os import shutil @@ -24,7 +23,7 @@ import tempfile from datetime import datetime, timedelta, timezone from hashlib import pbkdf2_hmac from http.cookiejar import Cookie -from . import aes, text +from . import aes, text, util SUPPORTED_BROWSERS_CHROMIUM = { @@ -169,8 +168,8 @@ def _firefox_cookies_database(profile=None, container=None): os.path.dirname(path), "containers.json") try: - with open(containers_path) as containers: - identities = json.load(containers)["identities"] + with open(containers_path) as file: + identities = util.json_loads(file.read())["identities"] except OSError: logger.error("Unable to read Firefox container database at %s", containers_path) @@ -716,8 +715,8 @@ def _get_windows_v10_key(browser_root): logger.error("could not find local state file") return None logger.debug("Found local state file at '%s'", path) - with open(path, encoding="utf8") as f: - data = json.load(f) + with open(path, encoding="utf-8") as file: + data = util.json_loads(file.read()) try: base64_key = data["os_crypt"]["encrypted_key"] except KeyError: @@ -794,7 +793,8 @@ class DatabaseCopy(): def __init__(self, path): self.path = path - self.directory = self.database = None + self.database = None + self.directory = None def __enter__(self): try: diff --git a/gallery_dl/downloader/http.py b/gallery_dl/downloader/http.py index 6043443..e977320 100644 --- a/gallery_dl/downloader/http.py +++ b/gallery_dl/downloader/http.py @@ -100,13 +100,6 @@ class HttpDownloader(DownloaderBase): adjust_extension = kwdict.get( "_http_adjust_extension", self.adjust_extension) - codes = kwdict.get("_http_retry_codes") - if codes: - retry_codes = list(self.retry_codes) - retry_codes += codes - else: - retry_codes = self.retry_codes - if self.part and not metadata: pathfmt.part_enable(self.partdir) @@ -167,7 +160,10 @@ class HttpDownloader(DownloaderBase): break else: msg = "'{} {}' for '{}'".format(code, response.reason, url) - if code in retry_codes or 500 <= code < 600: + if code in self.retry_codes or 500 <= code < 600: + continue + retry = kwdict.get("_http_retry") + if retry and retry(response): continue self.log.warning(msg) return False @@ -296,11 +292,10 @@ class HttpDownloader(DownloaderBase): progress = self.progress bytes_downloaded = 0 - time_start = time.time() + time_start = time.monotonic() for data in content: - time_current = time.time() - time_elapsed = time_current - time_start + time_elapsed = time.monotonic() - time_start bytes_downloaded += len(data) write(data) diff --git a/gallery_dl/downloader/ytdl.py b/gallery_dl/downloader/ytdl.py index c44ea0a..adada75 100644 --- a/gallery_dl/downloader/ytdl.py +++ b/gallery_dl/downloader/ytdl.py @@ -64,6 +64,8 @@ class YoutubeDLDownloader(DownloaderBase): try: info_dict = ytdl_instance.extract_info(url[5:], download=False) except Exception: + pass + if not info_dict: return False if "entries" in info_dict: diff --git a/gallery_dl/exception.py b/gallery_dl/exception.py index 5120039..ef190f2 100644 --- a/gallery_dl/exception.py +++ b/gallery_dl/exception.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2015-2022 Mike Fährmann +# Copyright 2015-2023 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -24,6 +24,7 @@ Exception +-- NoExtractorError +-- StopExtraction +-- TerminateExtraction + +-- RestartExtraction """ @@ -115,3 +116,8 @@ class StopExtraction(GalleryDLException): class TerminateExtraction(GalleryDLException): """Terminate data extraction""" code = 0 + + +class RestartExtraction(GalleryDLException): + """Restart data extraction""" + code = 0 diff --git a/gallery_dl/extractor/500px.py b/gallery_dl/extractor/500px.py index b2ae963..1213194 100644 --- a/gallery_dl/extractor/500px.py +++ b/gallery_dl/extractor/500px.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2019-2022 Mike Fährmann +# Copyright 2019-2023 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -9,7 +9,7 @@ """Extractors for https://500px.com/""" from .common import Extractor, Message -import json +from .. import util BASE_PATTERN = r"(?:https?://)?(?:web\.)?500px\.com" @@ -86,7 +86,7 @@ class _500pxExtractor(Extractor): } data = { "operationName": opname, - "variables" : json.dumps(variables), + "variables" : util.json_dumps(variables), "query" : QUERIES[opname], } return self.request( diff --git a/gallery_dl/extractor/8muses.py b/gallery_dl/extractor/8muses.py index fed4991..26ac8b2 100644 --- a/gallery_dl/extractor/8muses.py +++ b/gallery_dl/extractor/8muses.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2019-2022 Mike Fährmann +# Copyright 2019-2023 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -9,8 +9,7 @@ """Extractors for https://comics.8muses.com/""" from .common import Extractor, Message -from .. import text -import json +from .. import text, util class _8musesAlbumExtractor(Extractor): @@ -131,7 +130,7 @@ class _8musesAlbumExtractor(Extractor): @staticmethod def _unobfuscate(data): - return json.loads("".join([ + return util.json_loads("".join([ chr(33 + (ord(c) + 14) % 94) if "!" <= c <= "~" else c for c in text.unescape(data.strip("\t\n\r !")) ])) diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py index 6140c2c..3968d72 100644 --- a/gallery_dl/extractor/__init__.py +++ b/gallery_dl/extractor/__init__.py @@ -1,11 +1,12 @@ # -*- coding: utf-8 -*- -# Copyright 2015-2022 Mike Fährmann +# Copyright 2015-2023 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as # published by the Free Software Foundation. +import sys import re modules = [ @@ -34,6 +35,7 @@ modules = [ "desktopography", "deviantart", "dynastyscans", + "e621", "erome", "exhentai", "fallenangels", @@ -92,6 +94,7 @@ modules = [ "mangasee", "mangoxo", "mememuseum", + "misskey", "myhentaigallery", "myportfolio", "nana", @@ -118,6 +121,7 @@ modules = [ "plurk", "poipiku", "pornhub", + "pornpics", "pururin", "reactor", "readcomiconline", @@ -137,6 +141,7 @@ modules = [ "soundgasm", "speakerdeck", "subscribestar", + "szurubooru", "tapas", "tcbscans", "telegraph", @@ -217,20 +222,33 @@ def extractors(): # -------------------------------------------------------------------- # internals -_cache = [] -_module_iter = iter(modules) - def _list_classes(): - """Yield all available extractor classes""" + """Yield available extractor classes""" yield from _cache - globals_ = globals() - for module_name in _module_iter: - module = __import__(module_name, globals_, None, (), 1) + for module in _module_iter: yield from add_module(module) - globals_["_list_classes"] = lambda : _cache + globals()["_list_classes"] = lambda : _cache + + +def _modules_internal(): + globals_ = globals() + for module_name in modules: + yield __import__(module_name, globals_, None, (), 1) + + +def _modules_path(path, files): + sys.path.insert(0, path) + try: + return [ + __import__(name[:-3]) + for name in files + if name.endswith(".py") + ] + finally: + del sys.path[0] def _get_classes(module): @@ -240,3 +258,7 @@ def _get_classes(module): hasattr(cls, "pattern") and cls.__module__ == module.__name__ ) ] + + +_cache = [] +_module_iter = _modules_internal() diff --git a/gallery_dl/extractor/bbc.py b/gallery_dl/extractor/bbc.py index 1b49d6a..638fedc 100644 --- a/gallery_dl/extractor/bbc.py +++ b/gallery_dl/extractor/bbc.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2021 Mike Fährmann +# Copyright 2021-2023 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -10,7 +10,6 @@ from .common import GalleryExtractor, Extractor, Message from .. import text, util -import json BASE_PATTERN = r"(?:https?://)?(?:www\.)?bbc\.co\.uk(/programmes/" @@ -38,7 +37,7 @@ class BbcGalleryExtractor(GalleryExtractor): ) def metadata(self, page): - data = json.loads(text.extr( + data = util.json_loads(text.extr( page, '')) return { "programme": self.gallery_url.split("/")[4], diff --git a/gallery_dl/extractor/bcy.py b/gallery_dl/extractor/bcy.py index 44d6065..d6adb4e 100644 --- a/gallery_dl/extractor/bcy.py +++ b/gallery_dl/extractor/bcy.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2020-2022 Mike Fährmann +# Copyright 2020-2023 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -9,8 +9,7 @@ """Extractors for https://bcy.net/""" from .common import Extractor, Message -from .. import text, exception -import json +from .. import text, util, exception import re @@ -100,9 +99,9 @@ class BcyExtractor(Extractor): .replace('\\\\u002F', '/') .replace('\\"', '"')) try: - return json.loads(data)["detail"] + return util.json_loads(data)["detail"] except ValueError: - return json.loads(data.replace('\\"', '"'))["detail"] + return util.json_loads(data.replace('\\"', '"'))["detail"] class BcyUserExtractor(BcyExtractor): diff --git a/gallery_dl/extractor/behance.py b/gallery_dl/extractor/behance.py index 6da6175..1469aad 100644 --- a/gallery_dl/extractor/behance.py +++ b/gallery_dl/extractor/behance.py @@ -9,8 +9,7 @@ """Extractors for https://www.behance.net/""" from .common import Extractor, Message -from .. import text -import json +from .. import text, util class BehanceExtractor(Extractor): @@ -120,7 +119,7 @@ class BehanceGalleryExtractor(BehanceExtractor): } page = self.request(url, cookies=cookies).text - data = json.loads(text.extr( + data = util.json_loads(text.extr( page, 'id="beconfig-store_state">', '')) return self._update(data["project"]["project"]) diff --git a/gallery_dl/extractor/blogger.py b/gallery_dl/extractor/blogger.py index 8a1a42e..56010c2 100644 --- a/gallery_dl/extractor/blogger.py +++ b/gallery_dl/extractor/blogger.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2019-2022 Mike Fährmann +# Copyright 2019-2023 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -9,8 +9,7 @@ """Extractors for Blogger blogs""" from .common import Extractor, Message -from .. import text -import json +from .. import text, util import re BASE_PATTERN = ( @@ -61,7 +60,7 @@ class BloggerExtractor(Extractor): page = self.request(post["url"]).text for url in findall_video(page): page = self.request(url).text - video_config = json.loads(text.extr( + video_config = util.json_loads(text.extr( page, 'var VIDEO_CONFIG =', '\n')) files.append(max( video_config["streams"], diff --git a/gallery_dl/extractor/bunkr.py b/gallery_dl/extractor/bunkr.py index 1c339a9..17d066d 100644 --- a/gallery_dl/extractor/bunkr.py +++ b/gallery_dl/extractor/bunkr.py @@ -6,20 +6,19 @@ # it under the terms of the GNU General Public License version 2 as # published by the Free Software Foundation. -"""Extractors for https://bunkr.ru/""" +"""Extractors for https://bunkr.su/""" from .lolisafe import LolisafeAlbumExtractor from .. import text -import json class BunkrAlbumExtractor(LolisafeAlbumExtractor): - """Extractor for bunkr.ru albums""" + """Extractor for bunkr.su albums""" category = "bunkr" - root = "https://bunkr.ru" - pattern = r"(?:https?://)?(?:app\.)?bunkr\.(?:ru|is|to)/a/([^/?#]+)" + root = "https://bunkr.su" + pattern = r"(?:https?://)?(?:app\.)?bunkr\.(?:[sr]u|is|to)/a/([^/?#]+)" test = ( - ("https://bunkr.ru/a/Lktg9Keq", { + ("https://bunkr.su/a/Lktg9Keq", { "pattern": r"https://cdn\.bunkr\.ru/test-テスト-\"&>-QjgneIQv\.png", "content": "0c8768055e4e20e7c7259608b67799171b691140", "keyword": { @@ -33,7 +32,7 @@ class BunkrAlbumExtractor(LolisafeAlbumExtractor): }, }), # mp4 (#2239) - ("https://app.bunkr.is/a/ptRHaCn2", { + ("https://app.bunkr.ru/a/ptRHaCn2", { "pattern": r"https://media-files\.bunkr\.ru/_-RnHoW69L\.mp4", "content": "80e61d1dbc5896ae7ef9a28734c747b28b320471", }), @@ -41,44 +40,57 @@ class BunkrAlbumExtractor(LolisafeAlbumExtractor): ("https://bunkr.is/a/iXTTc1o2", { "pattern": r"https://(cdn|media-files)4\.bunkr\.ru/", "content": "da29aae371b7adc8c5ef8e6991b66b69823791e8", + "keyword": { + "album_id": "iXTTc1o2", + "album_name": "test2", + "album_size": "691.1 KB", + "count": 2, + "description": "072022", + "filename": "re:video-wFO9FtxG|image-sZrQUeOx", + "id": "re:wFO9FtxG|sZrQUeOx", + "name": "re:video|image", + "num": int, + }, }), ("https://bunkr.to/a/Lktg9Keq"), ) def fetch_album(self, album_id): - root = self.root + # album metadata + page = self.request(self.root + "/a/" + self.album_id).text + info = text.split_html(text.extr( + page, "").partition(">")[2]) + count, _, size = info[1].split(None, 2) + + # files + cdn = None + files = [] + append = files.append + headers = {"Referer": self.root.replace("://", "://stream.", 1) + "/"} - try: - data = json.loads(text.extr( - self.request(root + "/a/" + self.album_id).text, - 'id="__NEXT_DATA__" type="application/json">', '<')) - album = data["props"]["pageProps"]["album"] - files = album["files"] - except Exception as exc: - self.log.debug("%s: %s", exc.__class__.__name__, exc) - self.log.debug("Falling back to lolisafe API") - self.root = root.replace("://", "://app.", 1) - files, data = LolisafeAlbumExtractor.fetch_album(self, album_id) - # fix file URLs (bunkr..ru -> bunkr.ru) (#3481) - for file in files: - file["file"] = file["file"].replace("bunkr..", "bunkr.", 1) - else: - for file in files: - file["file"] = file["cdn"] + "/" + file["name"] - data = { - "album_id" : self.album_id, - "album_name" : text.unescape(album["name"]), - "description": text.unescape(album["description"]), - "count" : len(files), - } + pos = page.index('class="grid-images') + for url in text.extract_iter(page, ' 2 else "", + "count" : len(files), + } diff --git a/gallery_dl/extractor/catbox.py b/gallery_dl/extractor/catbox.py index 509108f..7a21d2a 100644 --- a/gallery_dl/extractor/catbox.py +++ b/gallery_dl/extractor/catbox.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2022 Mike Fährmann +# Copyright 2022-2023 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -8,7 +8,7 @@ """Extractors for https://catbox.moe/""" -from .common import GalleryExtractor +from .common import GalleryExtractor, Extractor, Message from .. import text @@ -54,3 +54,26 @@ class CatboxAlbumExtractor(GalleryExtractor): for path in text.extract_iter( page, ">https://files.catbox.moe/", "<") ] + + +class CatboxFileExtractor(Extractor): + """Extractor for catbox files""" + category = "catbox" + subcategory = "file" + archive_fmt = "{filename}" + pattern = r"(?:https?://)?(?:files|litter|de)\.catbox\.moe/([^/?#]+)" + test = ( + ("https://files.catbox.moe/8ih3y7.png", { + "pattern": r"^https://files\.catbox\.moe/8ih3y7\.png$", + "content": "0c8768055e4e20e7c7259608b67799171b691140", + "count": 1, + }), + ("https://litter.catbox.moe/t8v3n9.png"), + ("https://de.catbox.moe/bjdmz1.jpg"), + ) + + def items(self): + url = text.ensure_http_scheme(self.url) + file = text.nameext_from_url(url, {"url": url}) + yield Message.Directory, file + yield Message.Url, url, file diff --git a/gallery_dl/extractor/common.py b/gallery_dl/extractor/common.py index 4cefa1c..8024be9 100644 --- a/gallery_dl/extractor/common.py +++ b/gallery_dl/extractor/common.py @@ -791,15 +791,21 @@ HTTP_HEADERS = { ("TE", "trailers"), ), "chrome": ( + ("Connection", "keep-alive"), ("Upgrade-Insecure-Requests", "1"), ("User-Agent", "Mozilla/5.0 ({}) AppleWebKit/537.36 (KHTML, " - "like Gecko) Chrome/92.0.4515.131 Safari/537.36"), + "like Gecko) Chrome/111.0.0.0 Safari/537.36"), ("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9," - "image/webp,image/apng,*/*;q=0.8"), + "image/avif,image/webp,image/apng,*/*;q=0.8," + "application/signed-exchange;v=b3;q=0.7"), ("Referer", None), + ("Sec-Fetch-Site", "same-origin"), + ("Sec-Fetch-Mode", "no-cors"), + ("Sec-Fetch-Dest", "empty"), ("Accept-Encoding", None), ("Accept-Language", "en-US,en;q=0.9"), - ("Cookie", None), + ("cookie", None), + ("content-length", None), ), } @@ -838,8 +844,7 @@ SSL_CIPHERS = { "AES128-GCM-SHA256:" "AES256-GCM-SHA384:" "AES128-SHA:" - "AES256-SHA:" - "DES-CBC3-SHA" + "AES256-SHA" ), } diff --git a/gallery_dl/extractor/danbooru.py b/gallery_dl/extractor/danbooru.py index 7b0e572..f104556 100644 --- a/gallery_dl/extractor/danbooru.py +++ b/gallery_dl/extractor/danbooru.py @@ -9,8 +9,7 @@ """Extractors for https://danbooru.donmai.us/ and other Danbooru instances""" from .common import BaseExtractor, Message -from ..version import __version__ -from .. import text +from .. import text, util import datetime @@ -21,36 +20,13 @@ class DanbooruExtractor(BaseExtractor): page_limit = 1000 page_start = None per_page = 200 + request_interval = 1.0 def __init__(self, match): - self._init_category(match) - - instance = INSTANCES.get(self.category) or {} - iget = instance.get - - self.headers = iget("headers") - self.page_limit = iget("page-limit", 1000) - self.page_start = iget("page-start") - self.per_page = iget("per-page", 200) - self.request_interval_min = iget("request-interval-min", 0.0) - self._pools = iget("pools") - self._popular_endpoint = iget("popular", "/explore/posts/popular.json") - BaseExtractor.__init__(self, match) - self.ugoira = self.config("ugoira", False) self.external = self.config("external", False) - metadata = self.config("metadata", False) - if metadata: - if isinstance(metadata, (list, tuple)): - metadata = ",".join(metadata) - elif not isinstance(metadata, str): - metadata = "artist_commentary,children,notes,parent,uploader" - self.metadata_includes = metadata - else: - self.metadata_includes = None - threshold = self.config("threshold") if isinstance(threshold, int): self.threshold = 1 if threshold < 1 else threshold @@ -62,10 +38,6 @@ class DanbooruExtractor(BaseExtractor): self.log.debug("Using HTTP Basic Auth for user '%s'", username) self.session.auth = (username, api_key) - def request(self, url, **kwargs): - kwargs["headers"] = self.headers - return BaseExtractor.request(self, url, **kwargs) - def skip(self, num): pages = num // self.per_page if pages >= self.page_limit: @@ -74,32 +46,28 @@ class DanbooruExtractor(BaseExtractor): return pages * self.per_page def items(self): + self.session.headers["User-Agent"] = util.USERAGENT + + includes = self.config("metadata") + if includes: + if isinstance(includes, (list, tuple)): + includes = ",".join(includes) + elif not isinstance(includes, str): + includes = "artist_commentary,children,notes,parent,uploader" + data = self.metadata() for post in self.posts(): - file = post.get("file") - if file: - url = file["url"] - if not url: - md5 = file["md5"] - url = file["url"] = ( - "https://static1.{}/data/{}/{}/{}.{}".format( - self.root[8:], md5[0:2], md5[2:4], md5, file["ext"] - )) - post["filename"] = file["md5"] - post["extension"] = file["ext"] + try: + url = post["file_url"] + except KeyError: + if self.external and post["source"]: + post.update(data) + yield Message.Directory, post + yield Message.Queue, post["source"], post + continue - else: - try: - url = post["file_url"] - except KeyError: - if self.external and post["source"]: - post.update(data) - yield Message.Directory, post - yield Message.Queue, post["source"], post - continue - - text.nameext_from_url(url, post) + text.nameext_from_url(url, post) if post["extension"] == "zip": if self.ugoira: @@ -109,9 +77,9 @@ class DanbooruExtractor(BaseExtractor): url = post["large_file_url"] post["extension"] = "webm" - if self.metadata_includes: + if includes: meta_url = "{}/posts/{}.json?only={}".format( - self.root, post["id"], self.metadata_includes) + self.root, post["id"], includes) post.update(self.request(meta_url).json()) if url[0] == "/": @@ -127,7 +95,7 @@ class DanbooruExtractor(BaseExtractor): def posts(self): return () - def _pagination(self, endpoint, params, pagenum=False): + def _pagination(self, endpoint, params, pages=False): url = self.root + endpoint params["limit"] = self.per_page params["page"] = self.page_start @@ -141,7 +109,7 @@ class DanbooruExtractor(BaseExtractor): if len(posts) < self.threshold: return - if pagenum: + if pages: params["page"] += 1 else: for post in reversed(posts): @@ -163,34 +131,20 @@ class DanbooruExtractor(BaseExtractor): for index, delay in enumerate(delays)] -INSTANCES = { +BASE_PATTERN = DanbooruExtractor.update({ "danbooru": { "root": None, "pattern": r"(?:danbooru|hijiribe|sonohara|safebooru)\.donmai\.us", }, - "e621": { - "root": None, - "pattern": r"e(?:621|926)\.net", - "headers": {"User-Agent": "gallery-dl/{} (by mikf)".format( - __version__)}, - "pools": "sort", - "popular": "/popular.json", - "page-limit": 750, - "per-page": 320, - "request-interval-min": 1.0, - }, "atfbooru": { "root": "https://booru.allthefallen.moe", "pattern": r"booru\.allthefallen\.moe", - "page-limit": 5000, }, "aibooru": { "root": None, "pattern": r"(?:safe.)?aibooru\.online", } -} - -BASE_PATTERN = DanbooruExtractor.update(INSTANCES) +}) class DanbooruTagExtractor(DanbooruExtractor): @@ -213,10 +167,6 @@ class DanbooruTagExtractor(DanbooruExtractor): "pattern": r"https://i\.pximg\.net/img-original/img" r"/2008/08/28/02/35/48/1476533_p0\.jpg", }), - ("https://e621.net/posts?tags=anry", { - "url": "8021e5ea28d47c474c1ffc9bd44863c4d45700ba", - "content": "501d1e5d922da20ee8ff9806f5ed3ce3a684fd58", - }), ("https://booru.allthefallen.moe/posts?tags=yume_shokunin", { "count": 12, }), @@ -228,7 +178,6 @@ class DanbooruTagExtractor(DanbooruExtractor): ("https://hijiribe.donmai.us/posts?tags=bonocho"), ("https://sonohara.donmai.us/posts?tags=bonocho"), ("https://safebooru.donmai.us/posts?tags=bonocho"), - ("https://e926.net/posts?tags=anry"), ("https://safe.aibooru.online/posts?tags=center_frills"), ) @@ -254,23 +203,17 @@ class DanbooruPoolExtractor(DanbooruExtractor): ("https://danbooru.donmai.us/pools/7659", { "content": "b16bab12bea5f7ea9e0a836bf8045f280e113d99", }), - ("https://e621.net/pools/73", { - "url": "1bd09a72715286a79eea3b7f09f51b3493eb579a", - "content": "91abe5d5334425d9787811d7f06d34c77974cd22", - }), ("https://booru.allthefallen.moe/pools/9", { "url": "902549ffcdb00fe033c3f63e12bc3cb95c5fd8d5", "count": 6, }), ("https://aibooru.online/pools/1"), ("https://danbooru.donmai.us/pool/show/7659"), - ("https://e621.net/pool/show/73"), ) def __init__(self, match): DanbooruExtractor.__init__(self, match) self.pool_id = match.group(match.lastindex) - self.post_ids = () def metadata(self): url = "{}/pools/{}.json".format(self.root, self.pool_id) @@ -280,29 +223,8 @@ class DanbooruPoolExtractor(DanbooruExtractor): return {"pool": pool} def posts(self): - if self._pools == "sort": - self.log.info("Fetching posts of pool %s", self.pool_id) - - id_to_post = { - post["id"]: post - for post in self._pagination( - "/posts.json", {"tags": "pool:" + self.pool_id}) - } - - posts = [] - append = posts.append - for num, pid in enumerate(self.post_ids, 1): - if pid in id_to_post: - post = id_to_post[pid] - post["num"] = num - append(post) - else: - self.log.warning("Post %s is unavailable", pid) - return posts - - else: - params = {"tags": "pool:" + self.pool_id} - return self._pagination("/posts.json", params) + params = {"tags": "pool:" + self.pool_id} + return self._pagination("/posts.json", params) class DanbooruPostExtractor(DanbooruExtractor): @@ -318,10 +240,6 @@ class DanbooruPostExtractor(DanbooruExtractor): "pattern": r"https?://.+\.zip$", "options": (("ugoira", True),) }), - ("https://e621.net/posts/535", { - "url": "f7f78b44c9b88f8f09caac080adc8d6d9fdaa529", - "content": "66f46e96a893fba8e694c4e049b23c2acc9af462", - }), ("https://booru.allthefallen.moe/posts/22", { "content": "21dda68e1d7e0a554078e62923f537d8e895cac8", }), @@ -329,7 +247,6 @@ class DanbooruPostExtractor(DanbooruExtractor): "content": "54d548743cd67799a62c77cbae97cfa0fec1b7e9", }), ("https://danbooru.donmai.us/post/show/294929"), - ("https://e621.net/post/show/535"), ) def __init__(self, match): @@ -338,8 +255,7 @@ class DanbooruPostExtractor(DanbooruExtractor): def posts(self): url = "{}/posts/{}.json".format(self.root, self.post_id) - post = self.request(url).json() - return (post["post"] if "post" in post else post,) + return (self.request(url).json(),) class DanbooruPopularExtractor(DanbooruExtractor): @@ -355,12 +271,6 @@ class DanbooruPopularExtractor(DanbooruExtractor): "range": "1-120", "count": 120, }), - ("https://e621.net/popular"), - (("https://e621.net/explore/posts/popular" - "?date=2019-06-01&scale=month"), { - "pattern": r"https://static\d.e621.net/data/../../[0-9a-f]+", - "count": ">= 70", - }), ("https://booru.allthefallen.moe/explore/posts/popular"), ("https://aibooru.online/explore/posts/popular"), ) @@ -385,31 +295,5 @@ class DanbooruPopularExtractor(DanbooruExtractor): def posts(self): if self.page_start is None: self.page_start = 1 - return self._pagination(self._popular_endpoint, self.params, True) - - -class DanbooruFavoriteExtractor(DanbooruExtractor): - """Extractor for e621 favorites""" - subcategory = "favorite" - directory_fmt = ("{category}", "Favorites", "{user_id}") - archive_fmt = "f_{user_id}_{id}" - pattern = BASE_PATTERN + r"/favorites(?:\?([^#]*))?" - test = ( - ("https://e621.net/favorites"), - ("https://e621.net/favorites?page=2&user_id=53275", { - "pattern": r"https://static\d.e621.net/data/../../[0-9a-f]+", - "count": "> 260", - }), - ) - - def __init__(self, match): - DanbooruExtractor.__init__(self, match) - self.query = text.parse_query(match.group(match.lastindex)) - - def metadata(self): - return {"user_id": self.query.get("user_id", "")} - - def posts(self): - if self.page_start is None: - self.page_start = 1 - return self._pagination("/favorites.json", self.query, True) + return self._pagination( + "/explore/posts/popular.json", self.params, True) diff --git a/gallery_dl/extractor/deviantart.py b/gallery_dl/extractor/deviantart.py index a3187fa..37475df 100644 --- a/gallery_dl/extractor/deviantart.py +++ b/gallery_dl/extractor/deviantart.py @@ -6,7 +6,7 @@ # it under the terms of the GNU General Public License version 2 as # published by the Free Software Foundation. -"""Extract images from https://www.deviantart.com/""" +"""Extractors for https://www.deviantart.com/""" from .common import Extractor, Message from .. import text, util, exception @@ -21,29 +21,30 @@ import re BASE_PATTERN = ( r"(?:https?://)?(?:" - r"(?:www\.)?deviantart\.com/(?!watch/)([\w-]+)|" - r"(?!www\.)([\w-]+)\.deviantart\.com)" + r"(?:www\.)?(?:fx)?deviantart\.com/(?!watch/)([\w-]+)|" + r"(?!www\.)([\w-]+)\.(?:fx)?deviantart\.com)" ) class DeviantartExtractor(Extractor): """Base class for deviantart extractors""" category = "deviantart" + root = "https://www.deviantart.com" directory_fmt = ("{category}", "{username}") filename_fmt = "{category}_{index}_{title}.{extension}" cookiedomain = None - root = "https://www.deviantart.com" + cookienames = ("auth", "auth_secure", "userinfo") _last_request = 0 def __init__(self, match): Extractor.__init__(self, match) - self.offset = 0 self.flat = self.config("flat", True) self.extra = self.config("extra", False) self.original = self.config("original", True) self.comments = self.config("comments", False) self.user = match.group(1) or match.group(2) self.group = False + self.offset = 0 self.api = None unwatch = self.config("auto-unwatch") @@ -69,6 +70,14 @@ class DeviantartExtractor(Extractor): self.offset += num return num + def login(self): + if not self._check_cookies(self.cookienames): + username, password = self._get_auth_info() + if not username: + return False + self._update_cookies(_login_impl(self, username, password)) + return True + def items(self): self.api = DeviantartOAuthAPI(self) @@ -87,6 +96,13 @@ class DeviantartExtractor(Extractor): yield Message.Queue, url, data continue + if deviation["is_deleted"]: + # prevent crashing in case the deviation really is + # deleted + self.log.debug( + "Skipping %s (deleted)", deviation["deviationid"]) + continue + if "premium_folder_data" in deviation: data = self._fetch_premium(deviation) if not data: @@ -346,9 +362,7 @@ class DeviantartExtractor(Extractor): kwargs["fatal"] = None diff = time.time() - DeviantartExtractor._last_request if diff < 2.0: - delay = 2.0 - diff - self.log.debug("Sleeping %.2f seconds", delay) - time.sleep(delay) + self.sleep(2.0 - diff, "request") while True: response = self.request(url, **kwargs) @@ -406,6 +420,16 @@ class DeviantartExtractor(Extractor): self.log.info("Unwatching %s", username) self.api.user_friends_unwatch(username) + def _eclipse_to_oauth(self, eclipse_api, deviations): + for obj in deviations: + deviation = obj["deviation"] if "deviation" in obj else obj + deviation_uuid = eclipse_api.deviation_extended_fetch( + deviation["deviationId"], + deviation["author"]["username"], + "journal" if deviation["isJournal"] else "art", + )["deviation"]["extended"]["deviationUuid"] + yield self.api.deviation(deviation_uuid) + class DeviantartUserExtractor(DeviantartExtractor): """Extractor for an artist's user profile""" @@ -676,15 +700,9 @@ class DeviantartFavoriteExtractor(DeviantartExtractor): ) def deviations(self): - folders = self.api.collections_folders(self.user) if self.flat: - deviations = itertools.chain.from_iterable( - self.api.collections(self.user, folder["folderid"]) - for folder in folders - ) - if self.offset: - deviations = util.advance(deviations, self.offset) - return deviations + return self.api.collections_all(self.user, self.offset) + folders = self.api.collections_folders(self.user) return self._folder_urls( folders, "favourites", DeviantartCollectionExtractor) @@ -796,6 +814,14 @@ class DeviantartStatusExtractor(DeviantartExtractor): "url" : "re:^https://sta.sh", }, }), + # "deleted" deviations in 'items' + ("https://www.deviantart.com/AndrejSKalin/posts/statuses", { + "options": (("journals", "none"), ("original", 0), + ("image-filter", "deviationid[:8] == '147C8B03'")), + "count": 2, + "archive": False, + "keyword": {"deviationid": "147C8B03-7D34-AE93-9241-FA3C6DBBC655"} + }), ("https://www.deviantart.com/justgalym/posts/statuses", { "options": (("journals", "text"),), "url": "c8744f7f733a3029116607b826321233c5ca452d", @@ -861,8 +887,7 @@ class DeviantartPopularExtractor(DeviantartExtractor): "{popular[range]}", "{popular[search]}") archive_fmt = "P_{popular[range]}_{popular[search]}_{index}.{extension}" pattern = (r"(?:https?://)?www\.deviantart\.com/(?:" - r"search(?:/deviations)?" - r"|(?:deviations/?)?\?order=(popular-[^/?#]+)" + r"(?:deviations/?)?\?order=(popular-[^/?#]+)" r"|((?:[\w-]+/)*)(popular-[^/?#]+)" r")/?(?:\?([^#]*))?") test = ( @@ -876,8 +901,6 @@ class DeviantartPopularExtractor(DeviantartExtractor): "range": "1-30", "count": 30, }), - ("https://www.deviantart.com/search?q=tree"), - ("https://www.deviantart.com/search/deviations?order=popular-1-week"), ("https://www.deviantart.com/artisan/popular-all-time/?q=tree"), ) @@ -974,7 +997,7 @@ class DeviantartDeviationExtractor(DeviantartExtractor): subcategory = "deviation" archive_fmt = "g_{_username}_{index}.{extension}" pattern = (BASE_PATTERN + r"/(art|journal)/(?:[^/?#]+-)?(\d+)" - r"|(?:https?://)?(?:www\.)?deviantart\.com/" + r"|(?:https?://)?(?:www\.)?(?:fx)?deviantart\.com/" r"(?:view/|deviation/|view(?:-full)?\.php/*\?(?:[^#]+&)?id=)" r"(\d+)" # bare deviation ID without slug r"|(?:https?://)?fav\.me/d([0-9a-z]+)") # base36 @@ -1068,6 +1091,9 @@ class DeviantartDeviationExtractor(DeviantartExtractor): # old /view/ URLs from the Wayback Machine ("https://www.deviantart.com/view.php?id=14864502"), ("http://www.deviantart.com/view-full.php?id=100842"), + + ("https://www.fxdeviantart.com/zzz/art/zzz-1234567890"), + ("https://www.fxdeviantart.com/view/1234567890"), ) skip = Extractor.skip @@ -1094,6 +1120,7 @@ class DeviantartScrapsExtractor(DeviantartExtractor): subcategory = "scraps" directory_fmt = ("{category}", "{username}", "Scraps") archive_fmt = "s_{_username}_{index}.{extension}" + cookiedomain = ".deviantart.com" pattern = BASE_PATTERN + r"/gallery/(?:\?catpath=)?scraps\b" test = ( ("https://www.deviantart.com/shimoda7/gallery/scraps", { @@ -1102,34 +1129,109 @@ class DeviantartScrapsExtractor(DeviantartExtractor): ("https://www.deviantart.com/shimoda7/gallery/?catpath=scraps"), ("https://shimoda7.deviantart.com/gallery/?catpath=scraps"), ) + + def deviations(self): + self.login() + + eclipse_api = DeviantartEclipseAPI(self) + return self._eclipse_to_oauth( + eclipse_api, eclipse_api.gallery_scraps(self.user, self.offset)) + + +class DeviantartSearchExtractor(DeviantartExtractor): + """Extractor for deviantart search results""" + subcategory = "search" + directory_fmt = ("{category}", "Search", "{search_tags}") + archive_fmt = "Q_{search_tags}_{index}.{extension}" cookiedomain = ".deviantart.com" - cookienames = ("auth", "auth_secure", "userinfo") - _warning = True + pattern = (r"(?:https?://)?www\.deviantart\.com" + r"/search(?:/deviations)?/?\?([^#]+)") + test = ( + ("https://www.deviantart.com/search?q=tree"), + ("https://www.deviantart.com/search/deviations?order=popular-1-week"), + ) + + skip = Extractor.skip + + def __init__(self, match): + DeviantartExtractor.__init__(self, match) + self.query = text.parse_query(self.user) + self.search = self.query.get("q", "") + self.user = "" + + def deviations(self): + logged_in = self.login() + + eclipse_api = DeviantartEclipseAPI(self) + search = (eclipse_api.search_deviations + if logged_in else self._search_html) + return self._eclipse_to_oauth(eclipse_api, search(self.query)) + + def prepare(self, deviation): + DeviantartExtractor.prepare(self, deviation) + deviation["search_tags"] = self.search + + def _search_html(self, params): + url = self.root + "/search" + deviation = { + "deviationId": None, + "author": {"username": "u"}, + "isJournal": False, + } + + while True: + page = self.request(url, params=params).text + + items , pos = text.rextract(page, r'\"items\":[', ']') + cursor, pos = text.extract(page, r'\"cursor\":\"', '\\', pos) + + for deviation_id in items.split(","): + deviation["deviationId"] = deviation_id + yield deviation + + if not cursor: + return + params["cursor"] = cursor + + +class DeviantartGallerySearchExtractor(DeviantartExtractor): + """Extractor for deviantart gallery searches""" + subcategory = "gallery-search" + archive_fmt = "g_{_username}_{index}.{extension}" + cookiedomain = ".deviantart.com" + pattern = BASE_PATTERN + r"/gallery/?\?(q=[^#]+)" + test = ( + ("https://www.deviantart.com/shimoda7/gallery?q=memory", { + "options": (("original", 0),), + "content": "6a7c74dc823ebbd457bdd9b3c2838a6ee728091e", + }), + ("https://www.deviantart.com/shimoda7/gallery?q=memory&sort=popular"), + ) + + def __init__(self, match): + DeviantartExtractor.__init__(self, match) + self.query = match.group(3) def deviations(self): self.login() eclipse_api = DeviantartEclipseAPI(self) - for obj in eclipse_api.gallery_scraps(self.user, self.offset): - deviation = obj["deviation"] - deviation_uuid = eclipse_api.deviation_extended_fetch( - deviation["deviationId"], - deviation["author"]["username"], - "journal" if deviation["isJournal"] else "art", - )["deviation"]["extended"]["deviationUuid"] + info = eclipse_api.user_info(self.user) - yield self.api.deviation(deviation_uuid) + query = text.parse_query(self.query) + self.search = query["q"] - def login(self): - """Login and obtain session cookies""" - if not self._check_cookies(self.cookienames): - username, password = self._get_auth_info() - if username: - self._update_cookies(_login_impl(self, username, password)) - elif self._warning: - self.log.warning( - "No session cookies set: Unable to fetch mature scraps.") - DeviantartScrapsExtractor._warning = False + return self._eclipse_to_oauth( + eclipse_api, eclipse_api.galleries_search( + info["user"]["userId"], + self.search, + self.offset, + query.get("sort", "most-recent"), + )) + + def prepare(self, deviation): + DeviantartExtractor.prepare(self, deviation) + deviation["search_tags"] = self.search class DeviantartFollowingExtractor(DeviantartExtractor): @@ -1261,6 +1363,13 @@ class DeviantartOAuthAPI(): "mature_content": self.mature} return self._pagination(endpoint, params) + def collections_all(self, username, offset=0): + """Yield all deviations in a user's collection""" + endpoint = "/collections/all" + params = {"username": username, "offset": offset, "limit": 24, + "mature_content": self.mature} + return self._pagination(endpoint, params) + @memcache(keyarg=1) def collections_folders(self, username, offset=0): """Yield all collection folders of a specific user""" @@ -1411,7 +1520,7 @@ class DeviantartOAuthAPI(): while True: if self.delay: - time.sleep(self.delay) + self.extractor.sleep(self.delay, "api") self.authenticate(None if public else self.refresh_token_key) kwargs["headers"] = self.headers @@ -1480,6 +1589,15 @@ class DeviantartOAuthAPI(): self._metadata(results) if self.folders: self._folders(results) + else: # attempt to fix "deleted" deviations + for dev in self._shared_content(results): + if not dev["is_deleted"]: + continue + patch = self._call( + "/deviation/" + dev["deviationid"], fatal=False) + if patch: + dev.update(patch) + yield from results if not data["has_more"] and ( @@ -1497,6 +1615,14 @@ class DeviantartOAuthAPI(): return params["offset"] = int(params["offset"]) + len(results) + @staticmethod + def _shared_content(results): + """Return an iterable of shared deviations in 'results'""" + for result in results: + for item in result.get("items") or (): + if "deviation" in item: + yield item["deviation"] + def _pagination_list(self, endpoint, params, key="results"): result = [] result.extend(self._pagination(endpoint, params, False, key=key)) @@ -1585,6 +1711,29 @@ class DeviantartEclipseAPI(): } return self._pagination(endpoint, params) + def galleries_search(self, user_id, query, + offset=None, order="most-recent"): + endpoint = "/shared_api/galleries/search" + params = { + "userid": user_id, + "order" : order, + "q" : query, + "offset": offset, + "limit" : 24, + } + return self._pagination(endpoint, params) + + def search_deviations(self, params): + endpoint = "/da-browse/api/networkbar/search/deviations" + return self._pagination(endpoint, params, key="deviations") + + def user_info(self, user, expand=False): + endpoint = "/shared_api/user/info" + params = {"username": user} + if expand: + params["expand"] = "user.stats,user.profile,user.watch" + return self._call(endpoint, params) + def user_watching(self, user, offset=None): endpoint = "/da-user-profile/api/module/watching" params = { @@ -1611,23 +1760,37 @@ class DeviantartEclipseAPI(): except Exception: return {"error": response.text} - def _pagination(self, endpoint, params): + def _pagination(self, endpoint, params, key="results"): + limit = params.get("limit", 24) + warn = True + while True: data = self._call(endpoint, params) - results = data.get("results") + results = data.get(key) if results is None: return + if len(results) < limit and warn and data.get("hasMore"): + warn = False + self.log.warning( + "Private deviations detected! " + "Provide login credentials or session cookies " + "to be able to access them.") yield from results if not data.get("hasMore"): return - next_offset = data.get("nextOffset") - if next_offset: - params["offset"] = next_offset + if "nextCursor" in data: + params["offset"] = None + params["cursor"] = data["nextCursor"] + elif "nextOffset" in data: + params["offset"] = data["nextOffset"] + params["cursor"] = None + elif params.get("offset") is None: + return else: - params["offset"] += params["limit"] + params["offset"] = int(params["offset"]) + len(results) def _module_id_watching(self, user): url = "{}/{}/about".format(self.extractor.root, user) diff --git a/gallery_dl/extractor/directlink.py b/gallery_dl/extractor/directlink.py index 8b90250..e85eb8d 100644 --- a/gallery_dl/extractor/directlink.py +++ b/gallery_dl/extractor/directlink.py @@ -44,6 +44,11 @@ class DirectlinkExtractor(Extractor): ("https://post-phinf.pstatic.net/MjAxOTA1MjlfMTQ4/MDAxNTU5MTI2NjcyNTkw" ".JUzkGb4V6dj9DXjLclrOoqR64uDxHFUO5KDriRdKpGwg.88mCtd4iT1NHlpVKSCaUpP" "mZPiDgT8hmQdQ5K_gYyu0g.JPEG/2.JPG"), + # internationalized domain name + ("https://räksmörgås.josefsson.org/raksmorgas.jpg", { + "url": "a65667f670b194afbd1e3ea5e7a78938d36747da", + "keyword": "fd5037fe86eebd4764e176cbaf318caec0f700be", + }), ) def __init__(self, match): diff --git a/gallery_dl/extractor/dynastyscans.py b/gallery_dl/extractor/dynastyscans.py index d78f25b..59e8c90 100644 --- a/gallery_dl/extractor/dynastyscans.py +++ b/gallery_dl/extractor/dynastyscans.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2015-2022 Mike Fährmann +# Copyright 2015-2023 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -9,8 +9,7 @@ """Extractors for https://dynasty-scans.com/""" from .common import ChapterExtractor, MangaExtractor, Extractor, Message -from .. import text -import json +from .. import text, util import re BASE_PATTERN = r"(?:https?://)?(?:www\.)?dynasty-scans\.com" @@ -86,7 +85,7 @@ class DynastyscansChapterExtractor(DynastyscansBase, ChapterExtractor): data = text.extr(page, "var pages = ", ";\n") return [ (self.root + img["image"], None) - for img in json.loads(data) + for img in util.json_loads(data) ] diff --git a/gallery_dl/extractor/e621.py b/gallery_dl/extractor/e621.py new file mode 100644 index 0000000..8f2994e --- /dev/null +++ b/gallery_dl/extractor/e621.py @@ -0,0 +1,254 @@ +# -*- coding: utf-8 -*- + +# Copyright 2014-2023 Mike Fährmann +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. + +"""Extractors for https://e621.net/ and other e621 instances""" + +from .common import Message +from . import danbooru +from .. import text, util + + +class E621Extractor(danbooru.DanbooruExtractor): + """Base class for e621 extractors""" + basecategory = "E621" + page_limit = 750 + page_start = None + per_page = 320 + request_interval_min = 1.0 + + def items(self): + self.session.headers["User-Agent"] = util.USERAGENT + " (by mikf)" + + includes = self.config("metadata") or () + if includes: + if isinstance(includes, str): + includes = includes.split(",") + elif not isinstance(includes, (list, tuple)): + includes = ("notes", "pools") + + notes = ("notes" in includes) + pools = ("pools" in includes) + + data = self.metadata() + for post in self.posts(): + file = post["file"] + + if not file["url"]: + md5 = file["md5"] + file["url"] = "https://static1.{}/data/{}/{}/{}.{}".format( + self.root[8:], md5[0:2], md5[2:4], md5, file["ext"]) + + if notes and post.get("has_notes"): + url = "{}/notes.json?search[post_id]={}".format( + self.root, post["id"]) + post["notes"] = self.request(url).json() + + if pools and post["pools"]: + url = "{}/pools.json?search[id]={}".format( + self.root, ",".join(map(str, post["pools"]))) + post["pools"] = _pools = self.request(url).json() + for pool in _pools: + pool["name"] = pool["name"].replace("_", " ") + + post["filename"] = file["md5"] + post["extension"] = file["ext"] + + post.update(data) + yield Message.Directory, post + yield Message.Url, file["url"], post + + +BASE_PATTERN = E621Extractor.update({ + "e621": { + "root": "https://e621.net", + "pattern": r"e621\.net", + }, + "e926": { + "root": "https://e926.net", + "pattern": r"e926\.net", + }, +}) + + +class E621TagExtractor(E621Extractor, danbooru.DanbooruTagExtractor): + """Extractor for e621 posts from tag searches""" + pattern = BASE_PATTERN + r"/posts?(?:\?.*?tags=|/index/\d+/)([^&#]+)" + test = ( + ("https://e621.net/posts?tags=anry", { + "url": "8021e5ea28d47c474c1ffc9bd44863c4d45700ba", + "content": "501d1e5d922da20ee8ff9806f5ed3ce3a684fd58", + }), + ("https://e621.net/post/index/1/anry"), + ("https://e621.net/post?tags=anry"), + + ("https://e926.net/posts?tags=anry", { + "url": "12198b275c62ffe2de67cca676c8e64de80c425d", + "content": "501d1e5d922da20ee8ff9806f5ed3ce3a684fd58", + }), + ("https://e926.net/post/index/1/anry"), + ("https://e926.net/post?tags=anry"), + ) + + +class E621PoolExtractor(E621Extractor, danbooru.DanbooruPoolExtractor): + """Extractor for e621 pools""" + pattern = BASE_PATTERN + r"/pool(?:s|/show)/(\d+)" + test = ( + ("https://e621.net/pools/73", { + "url": "1bd09a72715286a79eea3b7f09f51b3493eb579a", + "content": "91abe5d5334425d9787811d7f06d34c77974cd22", + }), + ("https://e621.net/pool/show/73"), + + ("https://e926.net/pools/73", { + "url": "6936f1b6a18c5c25bee7cad700088dbc2503481b", + "content": "91abe5d5334425d9787811d7f06d34c77974cd22", + }), + ("https://e926.net/pool/show/73"), + ) + + def posts(self): + self.log.info("Fetching posts of pool %s", self.pool_id) + + id_to_post = { + post["id"]: post + for post in self._pagination( + "/posts.json", {"tags": "pool:" + self.pool_id}) + } + + posts = [] + append = posts.append + for num, pid in enumerate(self.post_ids, 1): + if pid in id_to_post: + post = id_to_post[pid] + post["num"] = num + append(post) + else: + self.log.warning("Post %s is unavailable", pid) + return posts + + +class E621PostExtractor(E621Extractor, danbooru.DanbooruPostExtractor): + """Extractor for single e621 posts""" + pattern = BASE_PATTERN + r"/post(?:s|/show)/(\d+)" + test = ( + ("https://e621.net/posts/535", { + "url": "f7f78b44c9b88f8f09caac080adc8d6d9fdaa529", + "content": "66f46e96a893fba8e694c4e049b23c2acc9af462", + }), + ("https://e621.net/posts/3181052", { + "options": (("metadata", "notes,pools"),), + "pattern": r"https://static\d\.e621\.net/data/c6/8c" + r"/c68cca0643890b615f75fb2719589bff\.png", + "keyword": { + "notes": [ + { + "body": "Little Legends 2", + "created_at": "2022-05-16T13:58:38.877-04:00", + "creator_id": 517450, + "creator_name": "EeveeCuddler69", + "height": 475, + "id": 321296, + "is_active": True, + "post_id": 3181052, + "updated_at": "2022-05-16T13:59:02.050-04:00", + "version": 3, + "width": 809, + "x": 83, + "y": 117, + }, + ], + "pools": [ + { + "category": "series", + "created_at": "2022-02-17T00:29:22.669-05:00", + "creator_id": 1077440, + "creator_name": "Yeetus90", + "description": "* \"Little Legends\":/pools/27971\r\n" + "* Little Legends 2\r\n" + "* \"Little Legends 3\":/pools/27481", + "id": 27492, + "is_active": False, + "name": "Little Legends 2", + "post_count": 39, + "post_ids": list, + "updated_at": "2022-03-27T06:30:03.382-04:00" + }, + ], + }, + }), + ("https://e621.net/post/show/535"), + + ("https://e926.net/posts/535", { + "url": "17aec8ebd8fab098d321adcb62a2db59dab1f4bf", + "content": "66f46e96a893fba8e694c4e049b23c2acc9af462", + }), + ("https://e926.net/post/show/535"), + ) + + def posts(self): + url = "{}/posts/{}.json".format(self.root, self.post_id) + return (self.request(url).json()["post"],) + + +class E621PopularExtractor(E621Extractor, danbooru.DanbooruPopularExtractor): + """Extractor for popular images from e621""" + pattern = BASE_PATTERN + r"/explore/posts/popular(?:\?([^#]*))?" + test = ( + ("https://e621.net/explore/posts/popular"), + (("https://e621.net/explore/posts/popular" + "?date=2019-06-01&scale=month"), { + "pattern": r"https://static\d.e621.net/data/../../[0-9a-f]+", + "count": ">= 70", + }), + + ("https://e926.net/explore/posts/popular"), + (("https://e926.net/explore/posts/popular" + "?date=2019-06-01&scale=month"), { + "pattern": r"https://static\d.e926.net/data/../../[0-9a-f]+", + "count": ">= 70", + }), + ) + + def posts(self): + if self.page_start is None: + self.page_start = 1 + return self._pagination("/popular.json", self.params, True) + + +class E621FavoriteExtractor(E621Extractor): + """Extractor for e621 favorites""" + subcategory = "favorite" + directory_fmt = ("{category}", "Favorites", "{user_id}") + archive_fmt = "f_{user_id}_{id}" + pattern = BASE_PATTERN + r"/favorites(?:\?([^#]*))?" + test = ( + ("https://e621.net/favorites"), + ("https://e621.net/favorites?page=2&user_id=53275", { + "pattern": r"https://static\d.e621.net/data/../../[0-9a-f]+", + "count": "> 260", + }), + + ("https://e926.net/favorites"), + ("https://e926.net/favorites?page=2&user_id=53275", { + "pattern": r"https://static\d.e926.net/data/../../[0-9a-f]+", + "count": "> 260", + }), + ) + + def __init__(self, match): + E621Extractor.__init__(self, match) + self.query = text.parse_query(match.group(match.lastindex)) + + def metadata(self): + return {"user_id": self.query.get("user_id", "")} + + def posts(self): + if self.page_start is None: + self.page_start = 1 + return self._pagination("/favorites.json", self.query, True) diff --git a/gallery_dl/extractor/erome.py b/gallery_dl/extractor/erome.py index ad3f16b..03307f8 100644 --- a/gallery_dl/extractor/erome.py +++ b/gallery_dl/extractor/erome.py @@ -12,7 +12,6 @@ from .common import Extractor, Message from .. import text, util, exception from ..cache import cache import itertools -import time BASE_PATTERN = r"(?:https?://)?(?:www\.)?erome\.com" @@ -75,7 +74,7 @@ class EromeExtractor(Extractor): if response.content.find( b"Please wait a few moments", 0, 600) < 0: return response - time.sleep(5) + self.sleep(5.0, "check") def _pagination(self, url, params): for params["page"] in itertools.count(1): diff --git a/gallery_dl/extractor/fallenangels.py b/gallery_dl/extractor/fallenangels.py index 57587b6..0503dcf 100644 --- a/gallery_dl/extractor/fallenangels.py +++ b/gallery_dl/extractor/fallenangels.py @@ -6,11 +6,10 @@ # it under the terms of the GNU General Public License version 2 as # published by the Free Software Foundation. -"""Extract manga-chapters from https://www.fascans.com/""" +"""Extractors for https://www.fascans.com/""" from .common import ChapterExtractor, MangaExtractor from .. import text, util -import json class FallenangelsChapterExtractor(ChapterExtractor): @@ -56,7 +55,7 @@ class FallenangelsChapterExtractor(ChapterExtractor): def images(page): return [ (img["page_image"], None) - for img in json.loads( + for img in util.json_loads( text.extr(page, "var pages = ", ";") ) ] diff --git a/gallery_dl/extractor/fanbox.py b/gallery_dl/extractor/fanbox.py index 41431dc..57c4333 100644 --- a/gallery_dl/extractor/fanbox.py +++ b/gallery_dl/extractor/fanbox.py @@ -89,6 +89,7 @@ class FanboxExtractor(Extractor): content_body["imageMap"] = { image_id: image_map[image_id] for image_id in images + if image_id in image_map } post["content"] = "\n".join(content) @@ -256,7 +257,6 @@ class FanboxCreatorExtractor(FanboxExtractor): def posts(self): url = "https://api.fanbox.cc/post.listCreator?creatorId={}&limit=10" - return self._pagination(url.format(self.creator_id)) diff --git a/gallery_dl/extractor/fantia.py b/gallery_dl/extractor/fantia.py index 476fdeb..13dfead 100644 --- a/gallery_dl/extractor/fantia.py +++ b/gallery_dl/extractor/fantia.py @@ -7,8 +7,7 @@ """Extractors for https://fantia.jp/""" from .common import Extractor, Message -from .. import text -import json +from .. import text, util class FantiaExtractor(Extractor): @@ -117,7 +116,7 @@ class FantiaExtractor(Extractor): yield self.root+"/"+content["download_uri"], post if content["category"] == "blog" and "comment" in content: - comment_json = json.loads(content["comment"]) + comment_json = util.json_loads(content["comment"]) ops = comment_json.get("ops", ()) # collect blogpost text first diff --git a/gallery_dl/extractor/foolslide.py b/gallery_dl/extractor/foolslide.py index 2290cc2..4a38fb4 100644 --- a/gallery_dl/extractor/foolslide.py +++ b/gallery_dl/extractor/foolslide.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2016-2022 Mike Fährmann +# Copyright 2016-2023 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -10,7 +10,6 @@ from .common import BaseExtractor, Message from .. import text, util -import json class FoolslideExtractor(BaseExtractor): @@ -106,7 +105,7 @@ class FoolslideChapterExtractor(FoolslideExtractor): }) def images(self, page): - return json.loads(text.extr(page, "var pages = ", ";")) + return util.json_loads(text.extr(page, "var pages = ", ";")) class FoolslideMangaExtractor(FoolslideExtractor): diff --git a/gallery_dl/extractor/gelbooru.py b/gallery_dl/extractor/gelbooru.py index 8d73949..80b0ae1 100644 --- a/gallery_dl/extractor/gelbooru.py +++ b/gallery_dl/extractor/gelbooru.py @@ -13,6 +13,8 @@ from . import gelbooru_v02 from .. import text, exception import binascii +BASE_PATTERN = r"(?:https?://)?(?:www\.)?gelbooru\.com/(?:index\.php)?\?" + class GelbooruBase(): """Base class for gelbooru extractors""" @@ -53,6 +55,23 @@ class GelbooruBase(): del params["pid"] params["tags"] = "{} id:<{}".format(self.tags, post["id"]) + def _pagination_html(self, params): + url = self.root + "/index.php" + params["pid"] = self.page_start * self.per_page + + data = {} + while True: + num_ids = 0 + page = self.request(url, params=params).text + + for data["id"] in text.extract_iter(page, '" id="p', '"'): + num_ids += 1 + yield from self._api_request(data) + + if num_ids < self.per_page: + return + params["pid"] += self.per_page + @staticmethod def _file_url(post): url = post["file_url"] @@ -88,8 +107,7 @@ class GelbooruBase(): class GelbooruTagExtractor(GelbooruBase, gelbooru_v02.GelbooruV02TagExtractor): """Extractor for images from gelbooru.com based on search-tags""" - pattern = (r"(?:https?://)?(?:www\.)?gelbooru\.com/(?:index\.php)?" - r"\?page=post&s=list&tags=(?P[^&#]+)") + pattern = BASE_PATTERN + r"page=post&s=list&tags=([^&#]+)" test = ( ("https://gelbooru.com/index.php?page=post&s=list&tags=bonocho", { "count": 5, @@ -108,8 +126,7 @@ class GelbooruPoolExtractor(GelbooruBase, gelbooru_v02.GelbooruV02PoolExtractor): """Extractor for gelbooru pools""" per_page = 45 - pattern = (r"(?:https?://)?(?:www\.)?gelbooru\.com/(?:index\.php)?" - r"\?page=pool&s=show&id=(?P\d+)") + pattern = BASE_PATTERN + r"page=pool&s=show&id=(\d+)" test = ( ("https://gelbooru.com/index.php?page=pool&s=show&id=761", { "count": 6, @@ -124,9 +141,9 @@ class GelbooruPoolExtractor(GelbooruBase, "id" : self.pool_id, "pid" : self.page_start, } - self._page = self.request(url, params=self._params).text + page = self.request(url, params=self._params).text - name, pos = text.extract(self._page, "

Now Viewing: ", "

") + name, pos = text.extract(page, "

Now Viewing: ", "

") if not name: raise exception.NotFoundError("pool") @@ -136,29 +153,19 @@ class GelbooruPoolExtractor(GelbooruBase, } def posts(self): - url = self.root + "/index.php" - params = self._params + return self._pagination_html(self._params) - page = self._page - del self._page - data = {} - - while True: - num_ids = 0 - for data["id"] in text.extract_iter(page, '" id="p', '"'): - num_ids += 1 - yield from self._api_request(data) - if num_ids < self.per_page: - return - params["pid"] += self.per_page - page = self.request(url, params=params).text +class GelbooruFavoriteExtractor(GelbooruBase, + gelbooru_v02.GelbooruV02FavoriteExtractor): + pattern = BASE_PATTERN + r"page=favorites&s=view&id=(\d+)" + test = ("https://gelbooru.com/index.php?page=favorites&s=view&id=12345",) class GelbooruPostExtractor(GelbooruBase, gelbooru_v02.GelbooruV02PostExtractor): """Extractor for single images from gelbooru.com""" - pattern = (r"(?:https?://)?(?:www\.)?gelbooru\.com/(?:index\.php)?\?" + pattern = (BASE_PATTERN + r"(?=(?:[^#]+&)?page=post(?:&|#|$))" r"(?=(?:[^#]+&)?s=view(?:&|#|$))" r"(?:[^#]+&)?id=(\d+)") diff --git a/gallery_dl/extractor/generic.py b/gallery_dl/extractor/generic.py index 9292da3..9999283 100644 --- a/gallery_dl/extractor/generic.py +++ b/gallery_dl/extractor/generic.py @@ -32,6 +32,28 @@ class GenericExtractor(Extractor): (?:\#(?P.*))? # optional fragment """ + test = ( + ("generic:https://www.nongnu.org/lzip/", { + "count": 1, + "content": "40be5c77773d3e91db6e1c5df720ee30afb62368", + "keyword": { + "description": "Lossless data compressor", + "imageurl": "https://www.nongnu.org/lzip/lzip.png", + "keywords": "lzip, clzip, plzip, lzlib, LZMA, bzip2, " + "gzip, data compression, GNU, free software", + "pageurl": "https://www.nongnu.org/lzip/", + }, + }), + # internationalized domain name + ("generic:https://räksmörgås.josefsson.org/", { + "count": 2, + "pattern": "^https://räksmörgås.josefsson.org/", + }), + ("generic:https://en.wikipedia.org/Main_Page"), + ("generic:https://example.org/path/to/file?que=1?&ry=2/#fragment"), + ("generic:https://example.org/%27%3C%23/%23%3E%27.htm?key=%3C%26%3E"), + ) + def __init__(self, match): """Init.""" Extractor.__init__(self, match) @@ -56,7 +78,7 @@ class GenericExtractor(Extractor): self.root = self.scheme + match.group('domain') def items(self): - """Get page, extract metadata & images, yield them in suitable messages. + """Get page, extract metadata & images, yield them in suitable messages Adapted from common.GalleryExtractor.items() diff --git a/gallery_dl/extractor/hbrowse.py b/gallery_dl/extractor/hbrowse.py index 43479c6..5b561ea 100644 --- a/gallery_dl/extractor/hbrowse.py +++ b/gallery_dl/extractor/hbrowse.py @@ -1,16 +1,15 @@ # -*- coding: utf-8 -*- -# Copyright 2015-2019 Mike Fährmann +# Copyright 2015-2023 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as # published by the Free Software Foundation. -"""Extract images from https://www.hbrowse.com/""" +"""Extractors for https://www.hbrowse.com/""" from .common import ChapterExtractor, MangaExtractor -from .. import text, exception -import json +from .. import text, util, exception class HbrowseBase(): @@ -68,7 +67,7 @@ class HbrowseChapterExtractor(HbrowseBase, ChapterExtractor): def images(self, page): base = self.root + "/data" + self.path json_data = text.extract(page, ';list = ', ',"zzz"')[0] + "]" - return [(base + name, None) for name in json.loads(json_data)] + return [(base + name, None) for name in util.json_loads(json_data)] class HbrowseMangaExtractor(HbrowseBase, MangaExtractor): diff --git a/gallery_dl/extractor/hentai2read.py b/gallery_dl/extractor/hentai2read.py index dc4e31d..e771a4f 100644 --- a/gallery_dl/extractor/hentai2read.py +++ b/gallery_dl/extractor/hentai2read.py @@ -9,8 +9,7 @@ """Extractors for https://hentai2read.com/""" from .common import ChapterExtractor, MangaExtractor -from .. import text -import json +from .. import text, util import re @@ -78,7 +77,7 @@ class Hentai2readChapterExtractor(Hentai2readBase, ChapterExtractor): images = text.extract(page, "'images' : ", ",\n")[0] return [ ("https://hentaicdn.com/hentai" + part, None) - for part in json.loads(images) + for part in util.json_loads(images) ] diff --git a/gallery_dl/extractor/hentaifox.py b/gallery_dl/extractor/hentaifox.py index 0327f56..ed8576f 100644 --- a/gallery_dl/extractor/hentaifox.py +++ b/gallery_dl/extractor/hentaifox.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2019-2021 Mike Fährmann +# Copyright 2019-2023 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -9,8 +9,7 @@ """Extractors for https://hentaifox.com/""" from .common import GalleryExtractor, Extractor, Message -from .. import text -import json +from .. import text, util class HentaifoxBase(): @@ -90,7 +89,7 @@ class HentaifoxGalleryExtractor(HentaifoxBase, GalleryExtractor): server1 = "https://i.hentaifox.com" server2 = "https://i2.hentaifox.com" - for num, image in json.loads(data).items(): + for num, image in util.json_loads(data).items(): ext, width, height = image.split(",") path = urlfmt(num, extmap[ext]) append((server1 + path, { diff --git a/gallery_dl/extractor/hentaihand.py b/gallery_dl/extractor/hentaihand.py index bf9e464..0617330 100644 --- a/gallery_dl/extractor/hentaihand.py +++ b/gallery_dl/extractor/hentaihand.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2020-2022 Mike Fährmann +# Copyright 2020-2023 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -10,7 +10,6 @@ from .common import GalleryExtractor, Extractor, Message from .. import text, util -import json class HentaihandGalleryExtractor(GalleryExtractor): @@ -46,7 +45,7 @@ class HentaihandGalleryExtractor(GalleryExtractor): GalleryExtractor.__init__(self, match, url) def metadata(self, page): - info = json.loads(page) + info = util.json_loads(page) data = { "gallery_id" : text.parse_int(info["id"]), "title" : info["title"], diff --git a/gallery_dl/extractor/hentaihere.py b/gallery_dl/extractor/hentaihere.py index 38ec77c..2297cc0 100644 --- a/gallery_dl/extractor/hentaihere.py +++ b/gallery_dl/extractor/hentaihere.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2016-2022 Mike Fährmann +# Copyright 2016-2023 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -9,8 +9,7 @@ """Extractors for https://hentaihere.com/""" from .common import ChapterExtractor, MangaExtractor -from .. import text -import json +from .. import text, util import re @@ -80,7 +79,7 @@ class HentaihereChapterExtractor(HentaihereBase, ChapterExtractor): images = text.extr(page, "var rff_imageList = ", ";") return [ ("https://hentaicdn.com/hentai" + part, None) - for part in json.loads(images) + for part in util.json_loads(images) ] diff --git a/gallery_dl/extractor/hitomi.py b/gallery_dl/extractor/hitomi.py index 44459ce..4e8d1ca 100644 --- a/gallery_dl/extractor/hitomi.py +++ b/gallery_dl/extractor/hitomi.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2015-2022 Mike Fährmann +# Copyright 2015-2023 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -13,7 +13,6 @@ from .nozomi import decode_nozomi from ..cache import memcache from .. import text, util import string -import json import re @@ -75,7 +74,7 @@ class HitomiGalleryExtractor(GalleryExtractor): self.root, gid) def metadata(self, page): - self.info = info = json.loads(page.partition("=")[2]) + self.info = info = util.json_loads(page.partition("=")[2]) iget = info.get language = iget("language") diff --git a/gallery_dl/extractor/imagefap.py b/gallery_dl/extractor/imagefap.py index 1efbbf0..497f1ef 100644 --- a/gallery_dl/extractor/imagefap.py +++ b/gallery_dl/extractor/imagefap.py @@ -9,8 +9,7 @@ """Extractors for https://www.imagefap.com/""" from .common import Extractor, Message -from .. import text, exception -import json +from .. import text, util, exception BASE_PATTERN = r"(?:https?://)?(?:www\.|beta\.)?imagefap\.com" @@ -47,7 +46,7 @@ class ImagefapGalleryExtractor(ImagefapExtractor): pattern = BASE_PATTERN + r"/(?:gallery\.php\?gid=|gallery/|pictures/)(\d+)" test = ( - ("https://www.imagefap.com/pictures/7102714", { + ("https://www.imagefap.com/gallery/7102714", { "pattern": r"https://cdnh?\.imagefap\.com" r"/images/full/\d+/\d+/\d+\.jpg", "keyword": "2ba96e84c2952c4750e9fa94a3f2b1f965cec2f3", @@ -68,6 +67,7 @@ class ImagefapGalleryExtractor(ImagefapExtractor): }, "count": 44, }), + ("https://www.imagefap.com/pictures/7102714"), ("https://www.imagefap.com/gallery.php?gid=7102714"), ("https://beta.imagefap.com/gallery.php?gid=7102714"), ) @@ -78,7 +78,7 @@ class ImagefapGalleryExtractor(ImagefapExtractor): self.image_id = "" def items(self): - url = "{}/pictures/{}/".format(self.root, self.gid) + url = "{}/gallery/{}".format(self.root, self.gid) page = self.request(url).text data = self.get_job_metadata(page) yield Message.Directory, data @@ -88,22 +88,21 @@ class ImagefapGalleryExtractor(ImagefapExtractor): def get_job_metadata(self, page): """Collect metadata for extractor-job""" - descr, pos = text.extract( - page, '", "<")), + "count": text.parse_int(extr(' 1 of ', ' pics"')), } + self.image_id = extr('id="img_ed_', '"') + self._count = data["count"] + + return data + def get_images(self): """Collect image-urls and -metadata""" url = "{}/photo/{}/".format(self.root, self.image_id) @@ -128,7 +127,7 @@ class ImagefapGalleryExtractor(ImagefapExtractor): data["image_id"] = text.parse_int(data["filename"]) yield image_url, data - if cnt < 24 and num >= total: + if not cnt or cnt < 24 and num >= total: return params["idx"] += cnt @@ -173,7 +172,7 @@ class ImagefapImageExtractor(ImagefapExtractor): page, 'id="imageid_input" value="', '"', pos) gallery_id, pos = text.extract( page, 'id="galleryid_input" value="', '"', pos) - info = json.loads(info) + info = util.json_loads(info) url = info["contentUrl"] return url, text.nameext_from_url(url, { diff --git a/gallery_dl/extractor/imagehosts.py b/gallery_dl/extractor/imagehosts.py index 207562a..d57ec89 100644 --- a/gallery_dl/extractor/imagehosts.py +++ b/gallery_dl/extractor/imagehosts.py @@ -187,12 +187,19 @@ class ImagevenueImageExtractor(ImagehostImageExtractor): class ImagetwistImageExtractor(ImagehostImageExtractor): """Extractor for single images from imagetwist.com""" category = "imagetwist" - pattern = r"(?:https?://)?((?:www\.)?imagetwist\.com/([a-z0-9]{12}))" - test = ("https://imagetwist.com/f1i2s4vhvbrq/test.png", { - "url": "8d5e168c0bee30211f821c6f3b2116e419d42671", - "keyword": "d1060a4c2e3b73b83044e20681712c0ffdd6cfef", - "content": "0c8768055e4e20e7c7259608b67799171b691140", - }) + pattern = (r"(?:https?://)?((?:www\.|phun\.)?" + r"image(?:twist|haha)\.com/([a-z0-9]{12}))") + test = ( + ("https://imagetwist.com/f1i2s4vhvbrq/test.png", { + "url": "8d5e168c0bee30211f821c6f3b2116e419d42671", + "keyword": "d1060a4c2e3b73b83044e20681712c0ffdd6cfef", + "content": "0c8768055e4e20e7c7259608b67799171b691140", + }), + ("https://www.imagetwist.com/f1i2s4vhvbrq/test.png"), + ("https://phun.imagetwist.com/f1i2s4vhvbrq/test.png"), + ("https://imagehaha.com/f1i2s4vhvbrq/test.png"), + ("https://www.imagehaha.com/f1i2s4vhvbrq/test.png"), + ) @property @memcache(maxage=3*3600) diff --git a/gallery_dl/extractor/imgbb.py b/gallery_dl/extractor/imgbb.py index 49082d8..a221075 100644 --- a/gallery_dl/extractor/imgbb.py +++ b/gallery_dl/extractor/imgbb.py @@ -9,9 +9,8 @@ """Extractors for https://imgbb.com/""" from .common import Extractor, Message -from .. import text, exception +from .. import text, util, exception from ..cache import cache -import json class ImgbbExtractor(Extractor): @@ -98,7 +97,7 @@ class ImgbbExtractor(Extractor): while True: for img in text.extract_iter(page, "data-object='", "'"): - yield json.loads(text.unquote(img)) + yield util.json_loads(text.unquote(img)) if data: if params["seek"] == data["seekEnd"]: return diff --git a/gallery_dl/extractor/instagram.py b/gallery_dl/extractor/instagram.py index deb31a0..4c1be0f 100644 --- a/gallery_dl/extractor/instagram.py +++ b/gallery_dl/extractor/instagram.py @@ -40,6 +40,7 @@ class InstagramExtractor(Extractor): self._logged_in = True self._find_tags = re.compile(r"#\w+").findall self._cursor = None + self._user = None def items(self): self.login() @@ -60,6 +61,8 @@ class InstagramExtractor(Extractor): post = self._parse_post_graphql(post) else: post = self._parse_post_rest(post) + if self._user: + post["user"] = self._user post.update(data) files = post.pop("_files") @@ -363,6 +366,22 @@ class InstagramExtractor(Extractor): self._cursor = cursor return cursor + def _assign_user(self, user): + self._user = user + + for key, old in ( + ("count_media" , "edge_owner_to_timeline_media"), + ("count_video" , "edge_felix_video_timeline"), + ("count_saved" , "edge_saved_media"), + ("count_mutual" , "edge_mutual_followed_by"), + ("count_follow" , "edge_follow"), + ("count_followed" , "edge_followed_by"), + ("count_collection", "edge_media_collections")): + try: + user[key] = user.pop(old)["count"] + except Exception: + user[key] = 0 + class InstagramUserExtractor(InstagramExtractor): """Extractor for an Instagram user profile""" @@ -796,6 +815,7 @@ class InstagramRestAPI(): name = user["username"] s = "" if name.endswith("s") else "s" raise exception.StopExtraction("%s'%s posts are private", name, s) + self.extractor._assign_user(user) return user["id"] def user_clips(self, user_id): diff --git a/gallery_dl/extractor/issuu.py b/gallery_dl/extractor/issuu.py index 8067f63..c0a1de1 100644 --- a/gallery_dl/extractor/issuu.py +++ b/gallery_dl/extractor/issuu.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2019-2022 Mike Fährmann +# Copyright 2019-2023 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -9,8 +9,7 @@ """Extractors for https://issuu.com/""" from .common import GalleryExtractor, Extractor, Message -from .. import text -import json +from .. import text, util class IssuuBase(): @@ -54,7 +53,7 @@ class IssuuPublicationExtractor(IssuuBase, GalleryExtractor): }) def metadata(self, page): - data = json.loads(text.extr( + data = util.json_loads(text.extr( page, '").rstrip("\n\r;")) diff --git a/gallery_dl/extractor/xvideos.py b/gallery_dl/extractor/xvideos.py index 10de439..46ea074 100644 --- a/gallery_dl/extractor/xvideos.py +++ b/gallery_dl/extractor/xvideos.py @@ -9,8 +9,7 @@ """Extractors for https://www.xvideos.com/""" from .common import GalleryExtractor, Extractor, Message -from .. import text -import json +from .. import text, util class XvideosBase(): @@ -113,7 +112,7 @@ class XvideosUserExtractor(XvideosBase, Extractor): def items(self): url = "{}/profiles/{}".format(self.root, self.user) page = self.request(url, notfound=self.subcategory).text - data = json.loads(text.extr( + data = util.json_loads(text.extr( page, "xv.conf=", ";"))["data"] if not isinstance(data["galleries"], dict): diff --git a/gallery_dl/formatter.py b/gallery_dl/formatter.py index 58bf48d..2c5bd11 100644 --- a/gallery_dl/formatter.py +++ b/gallery_dl/formatter.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2021-2022 Mike Fährmann +# Copyright 2021-2023 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -9,13 +9,11 @@ """String formatters""" import os -import json import time import string import _string import datetime import operator -import functools from . import text, util NONE = util.NONE @@ -228,7 +226,7 @@ class FStringFormatter(): """Generate text by evaluating an f-string literal""" def __init__(self, fstring, default=NONE, fmt=None): - self.format_map = util.compile_expression("f'''" + fstring + "'''") + self.format_map = util.compile_expression('f"""' + fstring + '"""') def parse_field_name(field_name): @@ -399,7 +397,7 @@ _CONVERSIONS = { "u": str.upper, "c": str.capitalize, "C": string.capwords, - "j": functools.partial(json.dumps, default=str), + "j": util.json_dumps, "t": str.strip, "T": util.datetime_to_timestamp_string, "d": text.parse_timestamp, diff --git a/gallery_dl/job.py b/gallery_dl/job.py index f7d84f0..a64c040 100644 --- a/gallery_dl/job.py +++ b/gallery_dl/job.py @@ -7,7 +7,6 @@ # published by the Free Software Foundation. import sys -import json import errno import logging import functools @@ -33,6 +32,12 @@ class Job(): self.kwdict = {} self.status = 0 + actions = extr.config("actions") + if actions: + from .actions import parse + self._logger_actions = parse(actions) + self._wrap_logger = self._wrap_logger_actions + path_proxy = output.PathfmtProxy(self) self._logger_extra = { "job" : self, @@ -68,7 +73,7 @@ class Job(): if version_info: self.kwdict[version_info] = { "version" : version.__version__, - "is_executable" : getattr(sys, "frozen", False), + "is_executable" : util.EXECUTABLE, "current_git_head": util.git_head() } @@ -94,7 +99,7 @@ class Job(): if exc.message: log.error(exc.message) self.status |= exc.code - except exception.TerminateExtraction: + except (exception.TerminateExtraction, exception.RestartExtraction): raise except exception.GalleryDLException as exc: log.error("%s: %s", exc.__class__.__name__, exc) @@ -201,7 +206,10 @@ class Job(): return self._wrap_logger(logging.getLogger(name)) def _wrap_logger(self, logger): - return output.LoggerAdapter(logger, self._logger_extra) + return output.LoggerAdapter(logger, self) + + def _wrap_logger_actions(self, logger): + return output.LoggerAdapterActions(logger, self) def _write_unsupported(self, url): if self.ulog: @@ -344,12 +352,18 @@ class DownloadJob(Job): if kwdict: job.kwdict.update(kwdict) - if pextr.config("parent-skip"): - job._skipcnt = self._skipcnt - self.status |= job.run() - self._skipcnt = job._skipcnt - else: - self.status |= job.run() + while True: + try: + if pextr.config("parent-skip"): + job._skipcnt = self._skipcnt + self.status |= job.run() + self._skipcnt = job._skipcnt + else: + self.status |= job.run() + break + except exception.RestartExtraction: + pass + else: self._write_unsupported(url) @@ -436,10 +450,12 @@ class DownloadJob(Job): archive = util.expand_path(archive) archive_format = (cfg("archive-prefix", extr.category) + cfg("archive-format", extr.archive_fmt)) + archive_pragma = (cfg("archive-pragma")) try: if "{" in archive: archive = formatter.parse(archive).format_map(kwdict) - self.archive = util.DownloadArchive(archive, archive_format) + self.archive = util.DownloadArchive( + archive, archive_format, archive_pragma) except Exception as exc: extr.log.warning( "Failed to open download archive at '%s' ('%s: %s')", @@ -709,17 +725,19 @@ class InfoJob(Job): def _print_multi(self, title, *values): stdout_write("{}\n {}\n\n".format( - title, " / ".join(json.dumps(v) for v in values))) + title, " / ".join(map(util.json_dumps, values)))) def _print_config(self, title, optname, value): optval = self.extractor.config(optname, util.SENTINEL) if optval is not util.SENTINEL: stdout_write( "{} (custom):\n {}\n{} (default):\n {}\n\n".format( - title, json.dumps(optval), title, json.dumps(value))) + title, util.json_dumps(optval), + title, util.json_dumps(value))) elif value: stdout_write( - "{} (default):\n {}\n\n".format(title, json.dumps(value))) + "{} (default):\n {}\n\n".format( + title, util.json_dumps(value))) class DataJob(Job): diff --git a/gallery_dl/oauth.py b/gallery_dl/oauth.py index e9dfff0..ac38c4d 100644 --- a/gallery_dl/oauth.py +++ b/gallery_dl/oauth.py @@ -10,10 +10,10 @@ import hmac import time -import base64 import random import string import hashlib +import binascii import urllib.parse import requests @@ -100,7 +100,7 @@ class OAuth1Client(requests.auth.AuthBase): key = concat(self.consumer_secret, self.token_secret or "").encode() signature = hmac.new(key, message, hashlib.sha1).digest() - return quote(base64.b64encode(signature).decode()) + return quote(binascii.b2a_base64(signature)[:-1].decode()) class OAuth1API(): diff --git a/gallery_dl/option.py b/gallery_dl/option.py index 213cd2d..aad307f 100644 --- a/gallery_dl/option.py +++ b/gallery_dl/option.py @@ -10,9 +10,8 @@ import argparse import logging -import json import sys -from . import job, version +from . import job, util, version class ConfigAction(argparse.Action): @@ -62,24 +61,21 @@ class OptionAction(argparse.Action): class Formatter(argparse.HelpFormatter): """Custom HelpFormatter class to customize help output""" - def __init__(self, *args, **kwargs): - super().__init__(max_help_position=30, *args, **kwargs) + def __init__(self, prog): + argparse.HelpFormatter.__init__(self, prog, max_help_position=30) - def _format_action_invocation(self, action): - opts = action.option_strings[:] - if opts: - if action.nargs != 0: - args_string = self._format_args(action, "ARG") - opts[-1] += " " + args_string - return ', '.join(opts) - else: - return self._metavar_formatter(action, action.dest)(1)[0] + def _format_action_invocation(self, action, join=", ".join): + opts = action.option_strings + if action.metavar: + opts = opts.copy() + opts[-1] += " " + action.metavar + return join(opts) def _parse_option(opt): key, _, value = opt.partition("=") try: - value = json.loads(value) + value = util.json_loads(value) except ValueError: pass return key, value @@ -110,6 +106,12 @@ def build_parser(): help=("Download URLs found in FILE ('-' for stdin). " "More than one --input-file can be specified"), ) + general.add_argument( + "-f", "--filename", + dest="filename", metavar="FORMAT", + help=("Filename format string for downloaded files " + "('/O' for \"original\" filenames)"), + ) general.add_argument( "-d", "--destination", dest="base-directory", metavar="PATH", action=ConfigAction, @@ -121,10 +123,9 @@ def build_parser(): help="Exact location for file downloads", ) general.add_argument( - "-f", "--filename", - dest="filename", metavar="FORMAT", - help=("Filename format string for downloaded files " - "('/O' for \"original\" filenames)"), + "-X", "--extractors", + dest="extractor_sources", metavar="PATH", action="append", + help="Load external extractors from PATH", ) general.add_argument( "--proxy", @@ -319,26 +320,42 @@ def build_parser(): ) configuration = parser.add_argument_group("Configuration Options") + configuration.add_argument( + "-o", "--option", + dest="options", metavar="KEY=VALUE", action=ParseAction, default=[], + help=("Additional options. " + "Example: -o browser=firefox") , + ) configuration.add_argument( "-c", "--config", - dest="cfgfiles", metavar="FILE", action="append", + dest="configs_json", metavar="FILE", action="append", help="Additional configuration files", ) configuration.add_argument( "--config-yaml", - dest="yamlfiles", metavar="FILE", action="append", - help=argparse.SUPPRESS, + dest="configs_yaml", metavar="FILE", action="append", + help="Additional configuration files in YAML format", ) configuration.add_argument( - "-o", "--option", - dest="options", metavar="OPT", action=ParseAction, default=[], - help="Additional '=' option values", + "--config-toml", + dest="configs_toml", metavar="FILE", action="append", + help="Additional configuration files in TOML format", ) configuration.add_argument( - "--ignore-config", - dest="load_config", action="store_false", + "--config-create", + dest="config_init", action="store_true", + help="Create a basic configuration file", + ) + configuration.add_argument( + "--config-ignore", + dest="config_load", action="store_false", help="Do not read default configuration files", ) + configuration.add_argument( + "--ignore-config", + dest="config_load", action="store_false", + help=argparse.SUPPRESS, + ) authentication = parser.add_argument_group("Authentication Options") authentication.add_argument( diff --git a/gallery_dl/output.py b/gallery_dl/output.py index 3017f85..1d53851 100644 --- a/gallery_dl/output.py +++ b/gallery_dl/output.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2015-2022 Mike Fährmann +# Copyright 2015-2023 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -10,6 +10,7 @@ import os import sys import shutil import logging +import functools import unicodedata from . import config, util, formatter @@ -23,7 +24,7 @@ LOG_LEVEL = logging.INFO class Logger(logging.Logger): - """Custom logger that includes extra info in log records""" + """Custom Logger that includes extra info in log records""" def makeRecord(self, name, level, fn, lno, msg, args, exc_info, func=None, extra=None, sinfo=None, @@ -38,9 +39,9 @@ class LoggerAdapter(): """Trimmed-down version of logging.LoggingAdapter""" __slots__ = ("logger", "extra") - def __init__(self, logger, extra): + def __init__(self, logger, job): self.logger = logger - self.extra = extra + self.extra = job._logger_extra def debug(self, msg, *args, **kwargs): if self.logger.isEnabledFor(logging.DEBUG): @@ -63,6 +64,38 @@ class LoggerAdapter(): self.logger._log(logging.ERROR, msg, args, **kwargs) +class LoggerAdapterActions(): + + def __init__(self, logger, job): + self.logger = logger + self.extra = job._logger_extra + self.actions = job._logger_actions + + self.debug = functools.partial(self.log, logging.DEBUG) + self.info = functools.partial(self.log, logging.INFO) + self.warning = functools.partial(self.log, logging.WARNING) + self.error = functools.partial(self.log, logging.ERROR) + + def log(self, level, msg, *args, **kwargs): + if args: + msg = msg % args + + actions = self.actions[level] + if actions: + args = self.extra.copy() + args["level"] = level + + for cond, action in actions: + if cond(msg): + action(args) + + level = args["level"] + + if self.logger.isEnabledFor(level): + kwargs["extra"] = self.extra + self.logger._log(level, msg, (), **kwargs) + + class PathfmtProxy(): __slots__ = ("job",) @@ -235,16 +268,32 @@ else: stderr_write = stderr_write_flush -def replace_std_streams(errors="replace"): - """Replace standard streams and set their error handlers to 'errors'""" - for name in ("stdout", "stdin", "stderr"): - stream = getattr(sys, name) - if stream: +def configure_standard_streams(): + for name in ("stdout", "stderr", "stdin"): + options = config.get(("output",), name) + if not options: + continue + + stream = getattr(sys, name, None) + if not stream: + continue + + if isinstance(options, str): + options = {"encoding": options, "errors": "replace"} + elif not options.get("errors"): + options["errors"] = "replace" + + try: + stream.reconfigure(**options) + except AttributeError: + # no 'reconfigure' support + oget = options.get setattr(sys, name, stream.__class__( stream.buffer, - errors=errors, - newline=stream.newlines, - line_buffering=stream.line_buffering, + encoding=oget("encoding", stream.encoding), + errors=oget("errors", "replace"), + newline=oget("newline", stream.newlines), + line_buffering=oget("line_buffering", stream.line_buffering), )) diff --git a/gallery_dl/path.py b/gallery_dl/path.py index 3b360e9..a14562a 100644 --- a/gallery_dl/path.py +++ b/gallery_dl/path.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2021-2022 Mike Fährmann +# Copyright 2021-2023 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -73,10 +73,15 @@ class PathFormat(): raise exception.DirectoryFormatError(exc) self.kwdict = {} - self.directory = self.realdirectory = \ - self.filename = self.extension = self.prefix = \ - self.path = self.realpath = self.temppath = "" self.delete = False + self.prefix = "" + self.filename = "" + self.extension = "" + self.directory = "" + self.realdirectory = "" + self.path = "" + self.realpath = "" + self.temppath = "" extension_map = config("extension-map") if extension_map is None: diff --git a/gallery_dl/postprocessor/common.py b/gallery_dl/postprocessor/common.py index ef211e6..c28d060 100644 --- a/gallery_dl/postprocessor/common.py +++ b/gallery_dl/postprocessor/common.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2018-2020 Mike Fährmann +# Copyright 2018-2023 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -8,13 +8,42 @@ """Common classes and constants used by postprocessor modules.""" +from .. import util, formatter + class PostProcessor(): """Base class for postprocessors""" def __init__(self, job): - name = self.__class__.__name__[:-2].lower() - self.log = job.get_logger("postprocessor." + name) + self.name = self.__class__.__name__[:-2].lower() + self.log = job.get_logger("postprocessor." + self.name) def __repr__(self): return self.__class__.__name__ + + def _init_archive(self, job, options, prefix=None): + archive = options.get("archive") + if archive: + extr = job.extractor + archive = util.expand_path(archive) + if not prefix: + prefix = "_" + self.name.upper() + "_" + archive_format = ( + options.get("archive-prefix", extr.category) + + options.get("archive-format", prefix + extr.archive_fmt)) + try: + if "{" in archive: + archive = formatter.parse(archive).format_map( + job.pathfmt.kwdict) + self.archive = util.DownloadArchive( + archive, archive_format, + options.get("archive-pragma"), + "_archive_" + self.name) + except Exception as exc: + self.log.warning( + "Failed to open %s archive at '%s' ('%s: %s')", + self.name, archive, exc.__class__.__name__, exc) + else: + self.log.debug("Using %s archive '%s'", self.name, archive) + else: + self.archive = None diff --git a/gallery_dl/postprocessor/exec.py b/gallery_dl/postprocessor/exec.py index cc217c3..e81c6cf 100644 --- a/gallery_dl/postprocessor/exec.py +++ b/gallery_dl/postprocessor/exec.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2018-2021 Mike Fährmann +# Copyright 2018-2023 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -43,11 +43,18 @@ class ExecPP(PostProcessor): events = events.split(",") job.register_hooks({event: execute for event in events}, options) + self._init_archive(job, options) + def exec_list(self, pathfmt, status=None): if status: return + archive = self.archive kwdict = pathfmt.kwdict + + if archive and archive.check(kwdict): + return + kwdict["_directory"] = pathfmt.realdirectory kwdict["_filename"] = pathfmt.filename kwdict["_path"] = pathfmt.realpath @@ -55,10 +62,17 @@ class ExecPP(PostProcessor): args = [arg.format_map(kwdict) for arg in self.args] self._exec(args, False) + if archive: + archive.add(kwdict) + def exec_string(self, pathfmt, status=None): if status: return + archive = self.archive + if archive and archive.check(pathfmt.kwdict): + return + if status is None and pathfmt.realpath: args = self.args.replace("{}", quote(pathfmt.realpath)) else: @@ -66,6 +80,9 @@ class ExecPP(PostProcessor): self._exec(args, True) + if archive: + archive.add(pathfmt.kwdict) + def _exec(self, args, shell): self.log.debug("Running '%s'", args) retcode = subprocess.Popen(args, shell=shell).wait() diff --git a/gallery_dl/postprocessor/metadata.py b/gallery_dl/postprocessor/metadata.py index 2ee1cf8..9667a41 100644 --- a/gallery_dl/postprocessor/metadata.py +++ b/gallery_dl/postprocessor/metadata.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2019-2022 Mike Fährmann +# Copyright 2019-2023 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -10,6 +10,7 @@ from .common import PostProcessor from .. import util, formatter +import json import sys import os @@ -46,14 +47,12 @@ class MetadataPP(PostProcessor): ext = "txt" elif mode == "jsonl": self.write = self._write_json - self.indent = None - self.ascii = options.get("ascii", False) + self._json_encode = self._make_encoder(options).encode omode = "a" filename = "data.jsonl" else: self.write = self._write_json - self.indent = options.get("indent", 4) - self.ascii = options.get("ascii", False) + self._json_encode = self._make_encoder(options, 4).encode ext = "json" directory = options.get("directory") @@ -83,28 +82,7 @@ class MetadataPP(PostProcessor): events = events.split(",") job.register_hooks({event: self.run for event in events}, options) - archive = options.get("archive") - if archive: - extr = job.extractor - archive = util.expand_path(archive) - archive_format = ( - options.get("archive-prefix", extr.category) + - options.get("archive-format", "_MD_" + extr.archive_fmt)) - try: - if "{" in archive: - archive = formatter.parse(archive).format_map( - job.pathfmt.kwdict) - self.archive = util.DownloadArchive( - archive, archive_format, "_archive_metadata") - except Exception as exc: - self.log.warning( - "Failed to open download archive at '%s' ('%s: %s')", - archive, exc.__class__.__name__, exc) - else: - self.log.debug("Using download archive '%s'", archive) - else: - self.archive = None - + self._init_archive(job, options, "_MD_") self.mtime = options.get("mtime") self.omode = options.get("open", omode) self.encoding = options.get("encoding", "utf-8") @@ -206,13 +184,30 @@ class MetadataPP(PostProcessor): for taglist in taglists: extend(taglist) tags.sort() + elif all(isinstance(e, dict) for e in tags): + taglists = tags + tags = [] + extend = tags.extend + for tagdict in taglists: + extend([x for x in tagdict.values() if x is not None]) + tags.sort() fp.write("\n".join(tags) + "\n") def _write_json(self, fp, kwdict): if not self.private: kwdict = util.filter_dict(kwdict) - util.dump_json(kwdict, fp, self.ascii, self.indent) + fp.write(self._json_encode(kwdict) + "\n") + + @staticmethod + def _make_encoder(options, indent=None): + return json.JSONEncoder( + ensure_ascii=options.get("ascii", False), + sort_keys=options.get("sort", False), + separators=options.get("separators"), + indent=options.get("indent", indent), + check_circular=False, default=str, + ) __postprocessor__ = MetadataPP diff --git a/gallery_dl/util.py b/gallery_dl/util.py index 6b9c457..bf67a64 100644 --- a/gallery_dl/util.py +++ b/gallery_dl/util.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2017-2022 Mike Fährmann +# Copyright 2017-2023 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -14,6 +14,7 @@ import sys import json import time import random +import hashlib import sqlite3 import binascii import datetime @@ -23,7 +24,7 @@ import subprocess import urllib.parse from http.cookiejar import Cookie from email.utils import mktime_tz, parsedate_tz -from . import text, exception +from . import text, version, exception def bencode(num, alphabet="0123456789"): @@ -112,6 +113,24 @@ def noop(): """Does nothing""" +def md5(s): + """Generate MD5 hexdigest of 's'""" + if not s: + s = b"" + elif isinstance(s, str): + s = s.encode() + return hashlib.md5(s).hexdigest() + + +def sha1(s): + """Generate SHA1 hexdigest of 's'""" + if not s: + s = b"" + elif isinstance(s, str): + s = s.encode() + return hashlib.sha1(s).hexdigest() + + def generate_token(size=16): """Generate a random token with hexadecimal digits""" data = random.getrandbits(size * 8).to_bytes(size, "big") @@ -204,6 +223,10 @@ def datetime_to_timestamp_string(dt): return "" +json_loads = json._default_decoder.decode +json_dumps = json.JSONEncoder(default=str).encode + + def dump_json(obj, fp=sys.stdout, ensure_ascii=True, indent=4): """Serialize 'obj' as JSON and write it to 'fp'""" json.dump( @@ -513,7 +536,7 @@ def parse_inputfile(file, log): continue try: - value = json.loads(value.strip()) + value = json_loads(value.strip()) except ValueError as exc: log.warning("input file: unable to parse '%s': %s", value, exc) continue @@ -579,6 +602,8 @@ EPOCH = datetime.datetime(1970, 1, 1) SECOND = datetime.timedelta(0, 1) WINDOWS = (os.name == "nt") SENTINEL = object() +USERAGENT = "gallery-dl/" + version.__version__ +EXECUTABLE = getattr(sys, "frozen", False) SPECIAL_EXTRACTORS = {"oauth", "recursive", "test"} GLOBALS = { "contains" : contains, @@ -588,13 +613,35 @@ GLOBALS = { "timedelta": datetime.timedelta, "abort" : raises(exception.StopExtraction), "terminate": raises(exception.TerminateExtraction), + "restart" : raises(exception.RestartExtraction), + "hash_sha1": sha1, + "hash_md5" : md5, "re" : re, } -def compile_expression(expr, name="", globals=GLOBALS): +def compile_expression(expr, name="", globals=None): code_object = compile(expr, name, "eval") - return functools.partial(eval, code_object, globals) + return functools.partial(eval, code_object, globals or GLOBALS) + + +def import_file(path): + """Import a Python module from a filesystem path""" + path, name = os.path.split(path) + + name, sep, ext = name.rpartition(".") + if not sep: + name = ext + + if path: + path = expand_path(path) + sys.path.insert(0, path) + try: + return __import__(name) + finally: + del sys.path[0] + else: + return __import__(name) def build_duration_func(duration, min=0.0): @@ -733,7 +780,8 @@ class RangePredicate(): self.lower = min(r.start for r in ranges) self.upper = max(r.stop for r in ranges) - 1 else: - self.lower = self.upper = 0 + self.lower = 0 + self.upper = 0 def __call__(self, _url, _kwdict): self.index = index = self.index + 1 @@ -831,7 +879,8 @@ class ExtendedUrl(): class DownloadArchive(): - def __init__(self, path, format_string, cache_key="_archive_key"): + def __init__(self, path, format_string, pragma=None, + cache_key="_archive_key"): try: con = sqlite3.connect(path, timeout=60, check_same_thread=False) except sqlite3.OperationalError: @@ -839,20 +888,23 @@ class DownloadArchive(): con = sqlite3.connect(path, timeout=60, check_same_thread=False) con.isolation_level = None - self.close = con.close - self.cursor = con.cursor() - from . import formatter self.keygen = formatter.parse(format_string).format_map + self.close = con.close + self.cursor = cursor = con.cursor() self._cache_key = cache_key + if pragma: + for stmt in pragma: + cursor.execute("PRAGMA " + stmt) + try: - self.cursor.execute("CREATE TABLE IF NOT EXISTS archive " - "(entry TEXT PRIMARY KEY) WITHOUT ROWID") + cursor.execute("CREATE TABLE IF NOT EXISTS archive " + "(entry TEXT PRIMARY KEY) WITHOUT ROWID") except sqlite3.OperationalError: # fallback for missing WITHOUT ROWID support (#553) - self.cursor.execute("CREATE TABLE IF NOT EXISTS archive " - "(entry TEXT PRIMARY KEY)") + cursor.execute("CREATE TABLE IF NOT EXISTS archive " + "(entry TEXT PRIMARY KEY)") def check(self, kwdict): """Return True if the item described by 'kwdict' exists in archive""" diff --git a/gallery_dl/version.py b/gallery_dl/version.py index 6b52610..494b7f5 100644 --- a/gallery_dl/version.py +++ b/gallery_dl/version.py @@ -6,4 +6,4 @@ # it under the terms of the GNU General Public License version 2 as # published by the Free Software Foundation. -__version__ = "1.24.5" +__version__ = "1.25.0" diff --git a/gallery_dl/ytdl.py b/gallery_dl/ytdl.py index 7b71349..b4638b7 100644 --- a/gallery_dl/ytdl.py +++ b/gallery_dl/ytdl.py @@ -237,139 +237,13 @@ def parse_command_line(module, argv): getattr(opts, "sponsorblock_mark", None) or set() opts.sponsorblock_remove = \ getattr(opts, "sponsorblock_remove", None) or set() - sponsorblock_query = opts.sponsorblock_mark | opts.sponsorblock_remove opts.remove_chapters = getattr(opts, "remove_chapters", None) or () - # PostProcessors - postprocessors = [] - if opts.metafromtitle: - postprocessors.append({ - "key": "MetadataFromTitle", - "titleformat": opts.metafromtitle, - }) - if getattr(opts, "add_postprocessors", None): - postprocessors += list(opts.add_postprocessors) - if sponsorblock_query: - postprocessors.append({ - "key": "SponsorBlock", - "categories": sponsorblock_query, - "api": opts.sponsorblock_api, - "when": "pre_process", - }) - if opts.parse_metadata: - postprocessors.append({ - "key": "MetadataParser", - "actions": opts.parse_metadata, - "when": "pre_process", - }) - if opts.convertsubtitles: - pp = {"key": "FFmpegSubtitlesConvertor", - "format": opts.convertsubtitles} - if ytdlp: - pp["when"] = "before_dl" - postprocessors.append(pp) - if getattr(opts, "convertthumbnails", None): - postprocessors.append({ - "key": "FFmpegThumbnailsConvertor", - "format": opts.convertthumbnails, - "when": "before_dl", - }) - if getattr(opts, "exec_before_dl_cmd", None): - postprocessors.append({ - "key": "Exec", - "exec_cmd": opts.exec_before_dl_cmd, - "when": "before_dl", - }) - if opts.extractaudio: - postprocessors.append({ - "key": "FFmpegExtractAudio", - "preferredcodec": opts.audioformat, - "preferredquality": opts.audioquality, - "nopostoverwrites": opts.nopostoverwrites, - }) - if getattr(opts, "remuxvideo", None): - postprocessors.append({ - "key": "FFmpegVideoRemuxer", - "preferedformat": opts.remuxvideo, - }) - if opts.recodevideo: - postprocessors.append({ - "key": "FFmpegVideoConvertor", - "preferedformat": opts.recodevideo, - }) - if opts.embedsubtitles: - pp = {"key": "FFmpegEmbedSubtitle"} - if ytdlp: - pp["already_have_subtitle"] = ( - opts.writesubtitles and "no-keep-subs" not in compat_opts) - postprocessors.append(pp) - if not opts.writeautomaticsub and "no-keep-subs" not in compat_opts: - opts.writesubtitles = True - if opts.allsubtitles and not opts.writeautomaticsub: - opts.writesubtitles = True - remove_chapters_patterns, remove_ranges = [], [] - for regex in opts.remove_chapters: - if regex.startswith("*"): - dur = list(map(module.parse_duration, regex[1:].split("-"))) - if len(dur) == 2 and all(t is not None for t in dur): - remove_ranges.append(tuple(dur)) - continue - remove_chapters_patterns.append(re.compile(regex)) - if opts.remove_chapters or sponsorblock_query: - postprocessors.append({ - "key": "ModifyChapters", - "remove_chapters_patterns": remove_chapters_patterns, - "remove_sponsor_segments": opts.sponsorblock_remove, - "remove_ranges": remove_ranges, - "sponsorblock_chapter_title": opts.sponsorblock_chapter_title, - "force_keyframes": opts.force_keyframes_at_cuts, - }) - addchapters = getattr(opts, "addchapters", None) - embed_infojson = getattr(opts, "embed_infojson", None) - if opts.addmetadata or addchapters or embed_infojson: - pp = {"key": "FFmpegMetadata"} - if ytdlp: - if embed_infojson is None: - embed_infojson = "if_exists" - pp["add_metadata"] = opts.addmetadata - pp["add_chapters"] = addchapters - pp["add_infojson"] = embed_infojson - - postprocessors.append(pp) - if getattr(opts, "sponskrub", False) is not False: - postprocessors.append({ - "key": "SponSkrub", - "path": opts.sponskrub_path, - "args": opts.sponskrub_args, - "cut": opts.sponskrub_cut, - "force": opts.sponskrub_force, - "ignoreerror": opts.sponskrub is None, - "_from_cli": True, - }) - if opts.embedthumbnail: - already_have_thumbnail = (opts.writethumbnail or - getattr(opts, "write_all_thumbnails", False)) - postprocessors.append({ - "key": "EmbedThumbnail", - "already_have_thumbnail": already_have_thumbnail, - }) - if not already_have_thumbnail: - opts.writethumbnail = True - if isinstance(opts.outtmpl, dict): - opts.outtmpl["pl_thumbnail"] = "" - if getattr(opts, "split_chapters", None): - postprocessors.append({ - "key": "FFmpegSplitChapters", - "force_keyframes": opts.force_keyframes_at_cuts, - }) - if opts.xattrs: - postprocessors.append({"key": "XAttrMetadata"}) - if opts.exec_cmd: - postprocessors.append({ - "key": "Exec", - "exec_cmd": opts.exec_cmd, - "when": "after_move", - }) + try: + postprocessors = list(module.get_postprocessors(opts)) + except AttributeError: + postprocessors = legacy_postprocessors( + opts, module, ytdlp, compat_opts) match_filter = ( None if opts.match_filter is None @@ -546,3 +420,139 @@ def parse_retries(retries, name=""): if retries in ("inf", "infinite"): return float("inf") return int(retries) + + +def legacy_postprocessors(opts, module, ytdlp, compat_opts): + postprocessors = [] + + sponsorblock_query = opts.sponsorblock_mark | opts.sponsorblock_remove + if opts.metafromtitle: + postprocessors.append({ + "key": "MetadataFromTitle", + "titleformat": opts.metafromtitle, + }) + if getattr(opts, "add_postprocessors", None): + postprocessors += list(opts.add_postprocessors) + if sponsorblock_query: + postprocessors.append({ + "key": "SponsorBlock", + "categories": sponsorblock_query, + "api": opts.sponsorblock_api, + "when": "pre_process", + }) + if opts.parse_metadata: + postprocessors.append({ + "key": "MetadataParser", + "actions": opts.parse_metadata, + "when": "pre_process", + }) + if opts.convertsubtitles: + pp = {"key": "FFmpegSubtitlesConvertor", + "format": opts.convertsubtitles} + if ytdlp: + pp["when"] = "before_dl" + postprocessors.append(pp) + if getattr(opts, "convertthumbnails", None): + postprocessors.append({ + "key": "FFmpegThumbnailsConvertor", + "format": opts.convertthumbnails, + "when": "before_dl", + }) + if getattr(opts, "exec_before_dl_cmd", None): + postprocessors.append({ + "key": "Exec", + "exec_cmd": opts.exec_before_dl_cmd, + "when": "before_dl", + }) + if opts.extractaudio: + postprocessors.append({ + "key": "FFmpegExtractAudio", + "preferredcodec": opts.audioformat, + "preferredquality": opts.audioquality, + "nopostoverwrites": opts.nopostoverwrites, + }) + if getattr(opts, "remuxvideo", None): + postprocessors.append({ + "key": "FFmpegVideoRemuxer", + "preferedformat": opts.remuxvideo, + }) + if opts.recodevideo: + postprocessors.append({ + "key": "FFmpegVideoConvertor", + "preferedformat": opts.recodevideo, + }) + if opts.embedsubtitles: + pp = {"key": "FFmpegEmbedSubtitle"} + if ytdlp: + pp["already_have_subtitle"] = ( + opts.writesubtitles and "no-keep-subs" not in compat_opts) + postprocessors.append(pp) + if not opts.writeautomaticsub and "no-keep-subs" not in compat_opts: + opts.writesubtitles = True + if opts.allsubtitles and not opts.writeautomaticsub: + opts.writesubtitles = True + remove_chapters_patterns, remove_ranges = [], [] + for regex in opts.remove_chapters: + if regex.startswith("*"): + dur = list(map(module.parse_duration, regex[1:].split("-"))) + if len(dur) == 2 and all(t is not None for t in dur): + remove_ranges.append(tuple(dur)) + continue + remove_chapters_patterns.append(re.compile(regex)) + if opts.remove_chapters or sponsorblock_query: + postprocessors.append({ + "key": "ModifyChapters", + "remove_chapters_patterns": remove_chapters_patterns, + "remove_sponsor_segments": opts.sponsorblock_remove, + "remove_ranges": remove_ranges, + "sponsorblock_chapter_title": opts.sponsorblock_chapter_title, + "force_keyframes": opts.force_keyframes_at_cuts, + }) + addchapters = getattr(opts, "addchapters", None) + embed_infojson = getattr(opts, "embed_infojson", None) + if opts.addmetadata or addchapters or embed_infojson: + pp = {"key": "FFmpegMetadata"} + if ytdlp: + if embed_infojson is None: + embed_infojson = "if_exists" + pp["add_metadata"] = opts.addmetadata + pp["add_chapters"] = addchapters + pp["add_infojson"] = embed_infojson + + postprocessors.append(pp) + if getattr(opts, "sponskrub", False) is not False: + postprocessors.append({ + "key": "SponSkrub", + "path": opts.sponskrub_path, + "args": opts.sponskrub_args, + "cut": opts.sponskrub_cut, + "force": opts.sponskrub_force, + "ignoreerror": opts.sponskrub is None, + "_from_cli": True, + }) + if opts.embedthumbnail: + already_have_thumbnail = (opts.writethumbnail or + getattr(opts, "write_all_thumbnails", False)) + postprocessors.append({ + "key": "EmbedThumbnail", + "already_have_thumbnail": already_have_thumbnail, + }) + if not already_have_thumbnail: + opts.writethumbnail = True + if isinstance(opts.outtmpl, dict): + opts.outtmpl["pl_thumbnail"] = "" + if getattr(opts, "split_chapters", None): + postprocessors.append({ + "key": "FFmpegSplitChapters", + "force_keyframes": opts.force_keyframes_at_cuts, + }) + if opts.xattrs: + postprocessors.append({"key": "XAttrMetadata"}) + if opts.exec_cmd: + postprocessors.append({ + "key": "Exec", + "exec_cmd": opts.exec_cmd, + "when": "after_move", + }) + + return postprocessors diff --git a/test/test_config.py b/test/test_config.py index 7cbb12b..859faf5 100644 --- a/test/test_config.py +++ b/test/test_config.py @@ -1,7 +1,7 @@ #!/usr/bin/env python3 # -*- coding: utf-8 -*- -# Copyright 2015-2020 Mike Fährmann +# Copyright 2015-2023 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -11,12 +11,11 @@ import os import sys import unittest -import json import tempfile ROOTDIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) sys.path.insert(0, ROOTDIR) -from gallery_dl import config # noqa E402 +from gallery_dl import config, util # noqa E402 class TestConfig(unittest.TestCase): @@ -209,8 +208,8 @@ class TestConfigFiles(unittest.TestCase): def _load(name): path = os.path.join(ROOTDIR, "docs", name) try: - with open(path) as fp: - return json.load(fp) + with open(path) as file: + return util.json_loads(file.read()) except FileNotFoundError: raise unittest.SkipTest(path + " not available") diff --git a/test/test_extractor.py b/test/test_extractor.py index 144c6f9..6516fa8 100644 --- a/test/test_extractor.py +++ b/test/test_extractor.py @@ -46,7 +46,7 @@ class TestExtractorModule(unittest.TestCase): def setUp(self): extractor._cache.clear() - extractor._module_iter = iter(extractor.modules) + extractor._module_iter = extractor._modules_internal() extractor._list_classes = _list_classes def test_find(self): diff --git a/test/test_oauth.py b/test/test_oauth.py index 7455928..0082419 100644 --- a/test/test_oauth.py +++ b/test/test_oauth.py @@ -1,7 +1,7 @@ #!/usr/bin/env python3 # -*- coding: utf-8 -*- -# Copyright 2018-2020 Mike Fährmann +# Copyright 2018-2023 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -10,6 +10,7 @@ import os import sys import unittest +from unittest.mock import patch sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) from gallery_dl import oauth, text # noqa E402 @@ -66,6 +67,53 @@ class TestOAuthSession(unittest.TestCase): self.assertTrue(len(quoted) >= 3) self.assertEqual(quoted_hex.upper(), quoted_hex) + def test_generate_signature(self): + client = oauth.OAuth1Client( + CONSUMER_KEY, CONSUMER_SECRET, ACCESS_TOKEN, ACCESS_TOKEN_SECRET) + + request = MockRequest() + params = [] + self.assertEqual( + client.generate_signature(request, params), + "Wt2xo49dM5pkL4gsnCakNdHaVUo%3D") + + request = MockRequest("https://example.org/") + params = [("hello", "world"), ("foo", "bar")] + self.assertEqual( + client.generate_signature(request, params), + "ay2269%2F8uKpZqKJR1doTtpv%2Bzn0%3D") + + request = MockRequest("https://example.org/index.html" + "?hello=world&foo=bar", method="POST") + params = [("oauth_signature_method", "HMAC-SHA1")] + self.assertEqual( + client.generate_signature(request, params), + "yVZWb1ts4smdMmXxMlhaXrkoOng%3D") + + def test_dunder_call(self): + client = oauth.OAuth1Client( + CONSUMER_KEY, CONSUMER_SECRET, ACCESS_TOKEN, ACCESS_TOKEN_SECRET) + request = MockRequest("https://example.org/") + + with patch("time.time") as tmock, \ + patch("gallery_dl.oauth.nonce") as nmock: + tmock.return_value = 123456789.123 + nmock.return_value = "abcdefghijklmno" + + client(request) + + self.assertEqual( + request.headers["Authorization"], + """OAuth \ +oauth_consumer_key="key",\ +oauth_nonce="abcdefghijklmno",\ +oauth_signature_method="HMAC-SHA1",\ +oauth_timestamp="123456789",\ +oauth_version="1.0",\ +oauth_token="accesskey",\ +oauth_signature="DjtTk5j5P3BDZFnstZ%2FtEYcwD6c%3D"\ +""") + def test_request_token(self): response = self._oauth_request( "/request_token.php", {}) @@ -110,5 +158,13 @@ class TestOAuthSession(unittest.TestCase): raise unittest.SkipTest() +class MockRequest(): + + def __init__(self, url="", method="GET"): + self.url = url + self.method = method + self.headers = {} + + if __name__ == "__main__": unittest.main(warnings="ignore") diff --git a/test/test_postprocessor.py b/test/test_postprocessor.py index 7da2089..650bf59 100644 --- a/test/test_postprocessor.py +++ b/test/test_postprocessor.py @@ -1,7 +1,7 @@ #!/usr/bin/env python3 # -*- coding: utf-8 -*- -# Copyright 2019-2022 Mike Fährmann +# Copyright 2019-2023 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -171,37 +171,69 @@ class MetadataTest(BasePostprocessorTest): # default arguments self.assertEqual(pp.write , pp._write_json) - self.assertEqual(pp.ascii , False) - self.assertEqual(pp.indent , 4) self.assertEqual(pp.extension, "json") + self.assertTrue(callable(pp._json_encode)) def test_metadata_json(self): pp = self._create({ - "mode" : "json", - "ascii" : True, - "indent" : 2, - "extension": "JSON", + "mode" : "json", + "extension" : "JSON", }, { - "public" : "hello", - "_private" : "world", + "public" : "hello ワールド", + "_private" : "foo バー", }) self.assertEqual(pp.write , pp._write_json) - self.assertEqual(pp.ascii , True) - self.assertEqual(pp.indent , 2) self.assertEqual(pp.extension, "JSON") + self.assertTrue(callable(pp._json_encode)) with patch("builtins.open", mock_open()) as m: self._trigger() path = self.pathfmt.realpath + ".JSON" m.assert_called_once_with(path, "w", encoding="utf-8") - self.assertEqual(self._output(m), """{ - "category": "test", - "extension": "ext", - "filename": "file", - "public": "hello" + + if sys.hexversion >= 0x3060000: + # python 3.4 & 3.5 have random order without 'sort: True' + self.assertEqual(self._output(m), """{ + "category": "test", + "filename": "file", + "extension": "ext", + "public": "hello ワールド" } +""") + + def test_metadata_json_options(self): + pp = self._create({ + "mode" : "json", + "ascii" : True, + "sort" : True, + "separators": [",", " : "], + "private" : True, + "indent" : None, + "open" : "a", + "encoding" : "UTF-8", + "extension" : "JSON", + }, { + "public" : "hello ワールド", + "_private" : "foo バー", + }) + + self.assertEqual(pp.write , pp._write_json) + self.assertEqual(pp.extension, "JSON") + self.assertTrue(callable(pp._json_encode)) + + with patch("builtins.open", mock_open()) as m: + self._trigger() + + path = self.pathfmt.realpath + ".JSON" + m.assert_called_once_with(path, "a", encoding="UTF-8") + self.assertEqual(self._output(m), """{\ +"_private" : "foo \\u30d0\\u30fc",\ +"category" : "test",\ +"extension" : "ext",\ +"filename" : "file",\ +"public" : "hello \\u30ef\\u30fc\\u30eb\\u30c9"} """) def test_metadata_tags(self): @@ -255,6 +287,18 @@ class MetadataTest(BasePostprocessorTest): self._trigger() self.assertEqual(self._output(m), "foobar1\nfoobar2\nfoobarbaz\n") + def test_metadata_tags_list_of_dict(self): + self._create( + {"mode": "tags"}, + {"tags": [ + {"g": "foobar1", "m": "foobar2"}, + {"g": None, "m": "foobarbaz"} + ]}, + ) + with patch("builtins.open", mock_open()) as m: + self._trigger() + self.assertEqual(self._output(m), "foobar1\nfoobar2\nfoobarbaz\n") + def test_metadata_custom(self): def test(pp_info): pp = self._create(pp_info, {"foo": "bar"}) @@ -334,7 +378,7 @@ class MetadataTest(BasePostprocessorTest): m.assert_called_once_with(path, "w", encoding="utf-8") def test_metadata_stdout(self): - self._create({"filename": "-", "indent": None}) + self._create({"filename": "-", "indent": None, "sort": True}) with patch("sys.stdout", Mock()) as m: self._trigger() diff --git a/test/test_results.py b/test/test_results.py index a42de09..d28496b 100644 --- a/test/test_results.py +++ b/test/test_results.py @@ -322,9 +322,9 @@ def setup_test_config(): config.set(("extractor", "mangoxo") , "username", "LiQiang3") config.set(("extractor", "mangoxo") , "password", "5zbQF10_5u25259Ma") - for category in ("danbooru", "instagram", "twitter", "subscribestar", - "e621", "atfbooru", "inkbunny", "tapas", "pillowfort", - "mangadex", "aibooru"): + for category in ("danbooru", "atfbooru", "aibooru", "e621", "e926", + "instagram", "twitter", "subscribestar", "deviantart", + "inkbunny", "tapas", "pillowfort", "mangadex"): config.set(("extractor", category), "username", None) config.set(("extractor", "mastodon.social"), "access-token", diff --git a/test/test_util.py b/test/test_util.py index 67fdf60..0813a0b 100644 --- a/test/test_util.py +++ b/test/test_util.py @@ -1,7 +1,7 @@ #!/usr/bin/env python3 # -*- coding: utf-8 -*- -# Copyright 2015-2022 Mike Fährmann +# Copyright 2015-2023 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -15,6 +15,7 @@ import io import random import string import datetime +import tempfile import itertools import http.cookiejar @@ -394,6 +395,46 @@ class TestOther(unittest.TestCase): def test_noop(self): self.assertEqual(util.noop(), None) + def test_md5(self): + self.assertEqual(util.md5(b""), + "d41d8cd98f00b204e9800998ecf8427e") + self.assertEqual(util.md5(b"hello"), + "5d41402abc4b2a76b9719d911017c592") + + self.assertEqual(util.md5(""), + "d41d8cd98f00b204e9800998ecf8427e") + self.assertEqual(util.md5("hello"), + "5d41402abc4b2a76b9719d911017c592") + self.assertEqual(util.md5("ワルド"), + "051f29cd6c942cf110a0ccc5729871d2") + + self.assertEqual(util.md5(0), + "d41d8cd98f00b204e9800998ecf8427e") + self.assertEqual(util.md5(()), + "d41d8cd98f00b204e9800998ecf8427e") + self.assertEqual(util.md5(None), + "d41d8cd98f00b204e9800998ecf8427e") + + def test_sha1(self): + self.assertEqual(util.sha1(b""), + "da39a3ee5e6b4b0d3255bfef95601890afd80709") + self.assertEqual(util.sha1(b"hello"), + "aaf4c61ddcc5e8a2dabede0f3b482cd9aea9434d") + + self.assertEqual(util.sha1(""), + "da39a3ee5e6b4b0d3255bfef95601890afd80709") + self.assertEqual(util.sha1("hello"), + "aaf4c61ddcc5e8a2dabede0f3b482cd9aea9434d") + self.assertEqual(util.sha1("ワルド"), + "0cbe319081aa0e9298448ec2bb16df8c494aa04e") + + self.assertEqual(util.sha1(0), + "da39a3ee5e6b4b0d3255bfef95601890afd80709") + self.assertEqual(util.sha1(()), + "da39a3ee5e6b4b0d3255bfef95601890afd80709") + self.assertEqual(util.sha1(None), + "da39a3ee5e6b4b0d3255bfef95601890afd80709") + def test_compile_expression(self): expr = util.compile_expression("1 + 2 * 3") self.assertEqual(expr(), 7) @@ -418,6 +459,56 @@ class TestOther(unittest.TestCase): with self.assertRaises(exception.StopExtraction): expr() + def test_import_file(self): + module = util.import_file("datetime") + self.assertIs(module, datetime) + + with tempfile.TemporaryDirectory() as path: + file = path + "/module_test.py" + with open(file, "w") as fp: + fp.write(""" +import datetime +key = "foobar" +value = 123 +""") + module = util.import_file(file) + + self.assertEqual(module.__name__, "module_test") + self.assertEqual(module.key, "foobar") + self.assertEqual(module.value, 123) + self.assertIs(module.datetime, datetime) + + def test_custom_globals(self): + value = {"v": "foobar"} + result = "8843d7f92416211de9ebb963ff4ce28125932878" + + expr = util.compile_expression("hash_sha1(v)") + self.assertEqual(expr(value), result) + + expr = util.compile_expression("hs(v)", globals={"hs": util.sha1}) + self.assertEqual(expr(value), result) + + with tempfile.TemporaryDirectory() as path: + file = path + "/module_sha1.py" + with open(file, "w") as fp: + fp.write(""" +import hashlib +def hash(value): + return hashlib.sha1(value.encode()).hexdigest() +""") + module = util.import_file(file) + + expr = util.compile_expression("hash(v)", globals=module.__dict__) + self.assertEqual(expr(value), result) + + GLOBALS_ORIG = util.GLOBALS + try: + util.GLOBALS = module.__dict__ + expr = util.compile_expression("hash(v)") + finally: + util.GLOBALS = GLOBALS_ORIG + self.assertEqual(expr(value), result) + def test_build_duration_func(self, f=util.build_duration_func): def test_single(df, v): diff --git a/test/test_ytdl.py b/test/test_ytdl.py index a273604..7b82a0f 100644 --- a/test/test_ytdl.py +++ b/test/test_ytdl.py @@ -166,7 +166,7 @@ class Test_CommandlineArguments(unittest.TestCase): subs["already_have_subtitle"] = False opts = self._(["--embed-subs", "--embed-thumbnail"]) - self.assertEqual(opts["postprocessors"], [subs, thumb]) + self.assertEqual(opts["postprocessors"][:2], [subs, thumb]) thumb["already_have_thumbnail"] = True if self.module_name == "yt_dlp": @@ -179,7 +179,7 @@ class Test_CommandlineArguments(unittest.TestCase): "--write-sub", "--write-all-thumbnails", ]) - self.assertEqual(opts["postprocessors"], [subs, thumb]) + self.assertEqual(opts["postprocessors"][:2], [subs, thumb]) def test_metadata(self): opts = self._("--add-metadata") @@ -262,21 +262,11 @@ class Test_CommandlineArguments_YtDlp(Test_CommandlineArguments): def test_metadata_from_title(self): opts = self._(["--metadata-from-title", "%(artist)s - %(title)s"]) - - try: - legacy = (self.module.version.__version__ < "2023.01.01") - except AttributeError: - legacy = True - - actions = [self.module.MetadataFromFieldPP.to_action( - "title:%(artist)s - %(title)s")] - if not legacy: - actions = {"pre_process": actions} - self.assertEqual(opts["postprocessors"][0], { "key" : "MetadataParser", "when" : "pre_process", - "actions": actions, + "actions": [self.module.MetadataFromFieldPP.to_action( + "title:%(artist)s - %(title)s")], }) -- cgit v1.2.3