From 032e5bed275a253e122ed9ac86dac7b8c4204172 Mon Sep 17 00:00:00 2001 From: Unit 193 Date: Sat, 3 Aug 2024 20:27:44 -0400 Subject: New upstream version 1.27.2. --- CHANGELOG.md | 105 ++++++++++----- PKG-INFO | 12 +- README.rst | 10 +- data/completion/_gallery-dl | 2 + data/completion/gallery-dl | 2 +- data/completion/gallery-dl.fish | 2 + data/man/gallery-dl.1 | 8 +- data/man/gallery-dl.conf.5 | 198 ++++++++++++++++++++++++++-- docs/gallery-dl.conf | 2 +- gallery_dl.egg-info/PKG-INFO | 12 +- gallery_dl.egg-info/SOURCES.txt | 4 +- gallery_dl/__init__.py | 14 ++ gallery_dl/actions.py | 158 +++++++++++++++++++---- gallery_dl/cookies.py | 6 +- gallery_dl/extractor/8chan.py | 3 +- gallery_dl/extractor/__init__.py | 4 +- gallery_dl/extractor/agnph.py | 113 ++++++++++++++++ gallery_dl/extractor/aryion.py | 23 +++- gallery_dl/extractor/behance.py | 12 +- gallery_dl/extractor/booru.py | 18 ++- gallery_dl/extractor/bunkr.py | 2 +- gallery_dl/extractor/cien.py | 199 ++++++++++++++++++++++++++++ gallery_dl/extractor/common.py | 20 ++- gallery_dl/extractor/deviantart.py | 142 +++++++------------- gallery_dl/extractor/directlink.py | 3 +- gallery_dl/extractor/dynastyscans.py | 2 + gallery_dl/extractor/erome.py | 6 + gallery_dl/extractor/exhentai.py | 3 + gallery_dl/extractor/fallenangels.py | 84 ------------ gallery_dl/extractor/furaffinity.py | 9 ++ gallery_dl/extractor/gelbooru_v02.py | 25 +++- gallery_dl/extractor/hentainexus.py | 11 +- gallery_dl/extractor/hotleak.py | 6 + gallery_dl/extractor/imagefap.py | 2 +- gallery_dl/extractor/inkbunny.py | 4 +- gallery_dl/extractor/instagram.py | 20 ++- gallery_dl/extractor/koharu.py | 221 ++++++++++++++++++++++++++++++++ gallery_dl/extractor/nijie.py | 3 +- gallery_dl/extractor/paheal.py | 8 +- gallery_dl/extractor/readcomiconline.py | 26 ++-- gallery_dl/extractor/redgifs.py | 2 +- gallery_dl/extractor/sankaku.py | 26 +++- gallery_dl/extractor/sankakucomplex.py | 14 +- gallery_dl/extractor/subscribestar.py | 2 + gallery_dl/extractor/toyhouse.py | 28 ++-- gallery_dl/extractor/tumblr.py | 47 +++++-- gallery_dl/extractor/twitter.py | 157 ++++++++++++++++++----- gallery_dl/extractor/vipergirls.py | 3 +- gallery_dl/extractor/vsco.py | 23 +++- gallery_dl/extractor/wallpapercave.py | 11 +- gallery_dl/extractor/warosu.py | 4 +- gallery_dl/extractor/zerochan.py | 41 ++++-- gallery_dl/job.py | 63 ++++++--- gallery_dl/option.py | 12 +- gallery_dl/output.py | 34 ----- gallery_dl/path.py | 11 +- gallery_dl/postprocessor/metadata.py | 21 ++- gallery_dl/util.py | 27 +++- gallery_dl/version.py | 2 +- gallery_dl/ytdl.py | 18 ++- test/test_extractor.py | 9 +- test/test_postprocessor.py | 12 ++ test/test_results.py | 52 ++++---- test/test_util.py | 19 +-- test/test_ytdl.py | 14 ++ 65 files changed, 1664 insertions(+), 492 deletions(-) create mode 100644 gallery_dl/extractor/agnph.py create mode 100644 gallery_dl/extractor/cien.py delete mode 100644 gallery_dl/extractor/fallenangels.py create mode 100644 gallery_dl/extractor/koharu.py diff --git a/CHANGELOG.md b/CHANGELOG.md index ebede9f..1ca8647 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,40 +1,83 @@ -## 1.27.1 - 2024-06-22 +## 1.27.2 - 2024-08-03 ### Extractors #### Additions -- [hentainexus] restore module ([#5275](https://github.com/mikf/gallery-dl/issues/5275), [#5712](https://github.com/mikf/gallery-dl/issues/5712)) -- [shimmie2] support `vidya.pics` ([#5632](https://github.com/mikf/gallery-dl/issues/5632)) -- [tcbscans] support other domains ([#5774](https://github.com/mikf/gallery-dl/issues/5774)) +- [agnph] add `tag` and `post` extractors ([#5284](https://github.com/mikf/gallery-dl/issues/5284), [#5890](https://github.com/mikf/gallery-dl/issues/5890)) +- [aryion] add `favorite` extractor ([#4511](https://github.com/mikf/gallery-dl/issues/4511), [#5870](https://github.com/mikf/gallery-dl/issues/5870)) +- [cien] add support ([#2885](https://github.com/mikf/gallery-dl/issues/2885), [#4103](https://github.com/mikf/gallery-dl/issues/4103), [#5240](https://github.com/mikf/gallery-dl/issues/5240)) +- [instagram] add `info` extractor ([#5262](https://github.com/mikf/gallery-dl/issues/5262)) +- [koharu] add `gallery`, `search`, and `favorite` extractors ([#5893](https://github.com/mikf/gallery-dl/issues/5893), [#4707](https://github.com/mikf/gallery-dl/issues/4707)) +- [twitter] add `info` extractor ([#3623](https://github.com/mikf/gallery-dl/issues/3623)) #### Fixes -- [deviantart] fix watching module ID extraction ([#5696](https://github.com/mikf/gallery-dl/issues/5696), [#5772](https://github.com/mikf/gallery-dl/issues/5772)) -- [fanbox] handle KeyError for no longer existing plans ([#5759](https://github.com/mikf/gallery-dl/issues/5759)) -- [kemonoparty:favorite] fix exception when sorting `null` objects ([#5692](https://github.com/mikf/gallery-dl/issues/5692). [#5721](https://github.com/mikf/gallery-dl/issues/5721)) -- [skeb] fix `429 Too Many Requests` errors ([#5766](https://github.com/mikf/gallery-dl/issues/5766)) -- [speakerdeck] fix extraction ([#5730](https://github.com/mikf/gallery-dl/issues/5730)) -- [twitter] fix duplicate `ArkoseLogin` check +- [8chan] update `TOS` cookie name ([#5868](https://github.com/mikf/gallery-dl/issues/5868)) +- [behance] fix image extraction ([#5873](https://github.com/mikf/gallery-dl/issues/5873), [#5926](https://github.com/mikf/gallery-dl/issues/5926)) +- [booru] prevent crash when file URL is empty ([#5859](https://github.com/mikf/gallery-dl/issues/5859)) +- [deviantart] try to work around journal/status API changes ([#5916](https://github.com/mikf/gallery-dl/issues/5916)) +- [hentainexus] fix error with spread pages ([#5827](https://github.com/mikf/gallery-dl/issues/5827)) +- [hotleak] fix faulty image URLs ([#5915](https://github.com/mikf/gallery-dl/issues/5915)) +- [inkbunny:following] fix potentially infinite loop +- [nijie] fix image URLs of single image posts ([#5842](https://github.com/mikf/gallery-dl/issues/5842)) +- [readcomiconline] fix extraction ([#5866](https://github.com/mikf/gallery-dl/issues/5866)) +- [toyhouse] fix Content Warning bypass ([#5820](https://github.com/mikf/gallery-dl/issues/5820)) +- [tumblr] revert to `offset` pagination, implement `pagination` option ([#5880](https://github.com/mikf/gallery-dl/issues/5880)) +- [twitter] fix `username-alt` option name ([#5715](https://github.com/mikf/gallery-dl/issues/5715)) +- [warosu] fix extraction +- [zerochan] handle `KeyError - 'items'` ([#5826](https://github.com/mikf/gallery-dl/issues/5826)) +- [zerochan] fix error on tag redirections ([#5891](https://github.com/mikf/gallery-dl/issues/5891)) +- [zerochan] fix `Invalid control character` errors ([#5892](https://github.com/mikf/gallery-dl/issues/5892)) #### Improvements -- [nijie] support downloading videos ([#5707](https://github.com/mikf/gallery-dl/issues/5707), [#5617](https://github.com/mikf/gallery-dl/issues/5617)) -- [philomena] support downloading `.svg` files ([#5643](https://github.com/mikf/gallery-dl/issues/5643)) -- [szurubooru] support empty tag searches ([#5711](https://github.com/mikf/gallery-dl/issues/5711)) -- [twitter] ignore `Unavailable` media ([#5736](https://github.com/mikf/gallery-dl/issues/5736)) +- [bunkr] support `bunkr.fi` domain ([#5872](https://github.com/mikf/gallery-dl/issues/5872)) +- [deviantart:following] use OAuth API endpoint ([#2511](https://github.com/mikf/gallery-dl/issues/2511)) +- [directlink] extend recognized file extensions ([#5924](https://github.com/mikf/gallery-dl/issues/5924)) +- [exhentai] improve error message when temporarily banned ([#5845](https://github.com/mikf/gallery-dl/issues/5845)) +- [gelbooru_v02] use total number of posts as pagination end marker ([#5830](https://github.com/mikf/gallery-dl/issues/5830)) +- [imagefap] add enumeration index to default filenames ([#1746](https://github.com/mikf/gallery-dl/issues/1746), [#5887](https://github.com/mikf/gallery-dl/issues/5887)) +- [paheal] implement fast `--range` support ([#5905](https://github.com/mikf/gallery-dl/issues/5905)) +- [redgifs] support URLs with numeric IDs ([#5898](https://github.com/mikf/gallery-dl/issues/5898), [#5899](https://github.com/mikf/gallery-dl/issues/5899)) +- [sankaku] match URLs with `www` subdomain ([#5907](https://github.com/mikf/gallery-dl/issues/5907)) +- [sankakucomplex] update domain to `news.sankakucomplex.com` +- [twitter] implement `cursor` support ([#5753](https://github.com/mikf/gallery-dl/issues/5753)) +- [vipergirls] improve `thread` URL pattern +- [wallpapercave] support `album` listings ([#5925](https://github.com/mikf/gallery-dl/issues/5925)) #### Metadata -- [hitomi] extract `title_jpn` metadata ([#5706](https://github.com/mikf/gallery-dl/issues/5706)) -- [instagram] extract `liked` metadata ([#5609](https://github.com/mikf/gallery-dl/issues/5609)) +- [dynastyscans] extract chapter `tags` ([#5904](https://github.com/mikf/gallery-dl/issues/5904)) +- [erome] extract `date` metadata ([#5796](https://github.com/mikf/gallery-dl/issues/5796)) +- [furaffinity] extract `folders` and `thumbnail` metadata ([#1284](https://github.com/mikf/gallery-dl/issues/1284), [#5824](https://github.com/mikf/gallery-dl/issues/5824)) +- [sankaku] implement `notes` extraction ([#5865](https://github.com/mikf/gallery-dl/issues/5865)) +- [subscribestar] fix `date` parsing in updated posts ([#5783](https://github.com/mikf/gallery-dl/issues/5783)) +- [twitter] extract `bookmark_count` and `view_count` metadata ([#5802](https://github.com/mikf/gallery-dl/issues/5802)) +- [zerochan] fix `source` metadata +- [zerochan] fix tag category extraction ([#5874](https://github.com/mikf/gallery-dl/issues/5874)) +- [zerochan] delay fetching extended metadata ([#5869](https://github.com/mikf/gallery-dl/issues/5869)) #### Options -- [newgrounds] extend `format` option ([#5709](https://github.com/mikf/gallery-dl/issues/5709)) -- [twitter] extend `ratelimit` option ([#5532](https://github.com/mikf/gallery-dl/issues/5532)) -- [twitter] add `username-alt` option ([#5715](https://github.com/mikf/gallery-dl/issues/5715)) +- [agnph] implement `tags` option ([#5284](https://github.com/mikf/gallery-dl/issues/5284)) +- [booru] allow multiple `url` keys ([#5859](https://github.com/mikf/gallery-dl/issues/5859)) +- [cien] add `files` option ([#2885](https://github.com/mikf/gallery-dl/issues/2885)) +- [koharu] add `cbz` and `format` options ([#5893](https://github.com/mikf/gallery-dl/issues/5893)) +- [vsco] add `include` option ([#5911](https://github.com/mikf/gallery-dl/issues/5911)) +- [zerochan] implement `tags` option ([#5874](https://github.com/mikf/gallery-dl/issues/5874)) #### Removals -- [photobucket] remove module -- [nitter] remove instances -- [vichan] remove `wikieat.club` -### Downloaders -- [ytdl] fix exception due to missing `ext` in unavailable videos ([#5675](https://github.com/mikf/gallery-dl/issues/5675)) -### Formatter -- implement `C` format specifier ([#5647](https://github.com/mikf/gallery-dl/issues/5647)) -- implement `X` format specifier ([#5770](https://github.com/mikf/gallery-dl/issues/5770)) +- [fallenangels] remove module +### Post Processors +- [metadata] allow using format strings for `directory` ([#5728](https://github.com/mikf/gallery-dl/issues/5728)) ### Options -- add `--no-input` command-line and `input` config option ([#5733](https://github.com/mikf/gallery-dl/issues/5733)) -- add `--config-open` command-line option ([#5713](https://github.com/mikf/gallery-dl/issues/5713)) -- add `--config-status` command-line option ([#5713](https://github.com/mikf/gallery-dl/issues/5713)) +- add `--print-traffic` command-line option +- add `-J/--resolve-json` command-line option ([#5864](https://github.com/mikf/gallery-dl/issues/5864)) +- add `filters-environment` option +- implement `archive-event` option ([#5784](https://github.com/mikf/gallery-dl/issues/5784)) +### Actions +- [actions] support multiple actions per pattern +- [actions] add `exec` action ([#5619](https://github.com/mikf/gallery-dl/issues/5619)) +- [actions] add `abort` and `terminate` actions ([#5778](https://github.com/mikf/gallery-dl/issues/5778)) +- [actions] allow setting a duration for `wait` +- [actions] emit logging messages before waiting/exiting/etc +### Tests +- [tests] enable test results for external extractors ([#5262](https://github.com/mikf/gallery-dl/issues/5262)) +- [tests] load results from `${GDL_TEST_RESULTS}` ([#5262](https://github.com/mikf/gallery-dl/issues/5262)) ### Miscellaneous -- [actions] fix exception when `msg` is not a string ([#5683](https://github.com/mikf/gallery-dl/issues/5683)) +- [cookies] add `thorium` support ([#5781](https://github.com/mikf/gallery-dl/issues/5781)) +- [job] add `resolve` argument to DataJob ([#5864](https://github.com/mikf/gallery-dl/issues/5864)) +- [path] fix moving temporary files across drives on Windows ([#5807](https://github.com/mikf/gallery-dl/issues/5807)) +- [ytdl] fix `--cookies-from-browser` option parsing ([#5885](https://github.com/mikf/gallery-dl/issues/5885)) +- make exceptions in filters/conditionals non-fatal +- update default User-Agent header to Firefox 128 ESR +- include `zstd` in Accept-Encoding header when supported diff --git a/PKG-INFO b/PKG-INFO index a06aa55..eec2e32 100644 --- a/PKG-INFO +++ b/PKG-INFO @@ -1,6 +1,6 @@ Metadata-Version: 2.1 Name: gallery_dl -Version: 1.27.1 +Version: 1.27.2 Summary: Command-line program to download image galleries and collections from several image hosting sites Home-page: https://github.com/mikf/gallery-dl Download-URL: https://github.com/mikf/gallery-dl/releases/latest @@ -65,10 +65,12 @@ Dependencies Optional -------- +- yt-dlp_ or youtube-dl_: HLS/DASH video downloads, ``ytdl`` integration - FFmpeg_: Pixiv Ugoira conversion -- yt-dlp_ or youtube-dl_: Video downloads +- mkvmerge_: Accurate Ugoira frame timecodes - PySocks_: SOCKS proxy support - brotli_ or brotlicffi_: Brotli compression support +- zstandard_: Zstandard compression support - PyYAML_: YAML configuration file support - toml_: TOML configuration file support for Python<3.11 - SecretStorage_: GNOME keyring passwords for ``--cookies-from-browser`` @@ -112,9 +114,9 @@ Standalone Executable Prebuilt executable files with a Python interpreter and required Python packages included are available for -- `Windows `__ +- `Windows `__ (Requires `Microsoft Visual C++ Redistributable Package (x86) `__) -- `Linux `__ +- `Linux `__ Nightly Builds @@ -457,11 +459,13 @@ To authenticate with a ``mastodon`` instance, run *gallery-dl* with .. _pip: https://pip.pypa.io/en/stable/ .. _Requests: https://requests.readthedocs.io/en/master/ .. _FFmpeg: https://www.ffmpeg.org/ +.. _mkvmerge: https://www.matroska.org/downloads/mkvtoolnix.html .. _yt-dlp: https://github.com/yt-dlp/yt-dlp .. _youtube-dl: https://ytdl-org.github.io/youtube-dl/ .. _PySocks: https://pypi.org/project/PySocks/ .. _brotli: https://github.com/google/brotli .. _brotlicffi: https://github.com/python-hyper/brotlicffi +.. _zstandard: https://github.com/indygreg/python-zstandard .. _PyYAML: https://pyyaml.org/ .. _toml: https://pypi.org/project/toml/ .. _SecretStorage: https://pypi.org/project/SecretStorage/ diff --git a/README.rst b/README.rst index 260d0f4..6be24f4 100644 --- a/README.rst +++ b/README.rst @@ -25,10 +25,12 @@ Dependencies Optional -------- +- yt-dlp_ or youtube-dl_: HLS/DASH video downloads, ``ytdl`` integration - FFmpeg_: Pixiv Ugoira conversion -- yt-dlp_ or youtube-dl_: Video downloads +- mkvmerge_: Accurate Ugoira frame timecodes - PySocks_: SOCKS proxy support - brotli_ or brotlicffi_: Brotli compression support +- zstandard_: Zstandard compression support - PyYAML_: YAML configuration file support - toml_: TOML configuration file support for Python<3.11 - SecretStorage_: GNOME keyring passwords for ``--cookies-from-browser`` @@ -72,9 +74,9 @@ Standalone Executable Prebuilt executable files with a Python interpreter and required Python packages included are available for -- `Windows `__ +- `Windows `__ (Requires `Microsoft Visual C++ Redistributable Package (x86) `__) -- `Linux `__ +- `Linux `__ Nightly Builds @@ -417,11 +419,13 @@ To authenticate with a ``mastodon`` instance, run *gallery-dl* with .. _pip: https://pip.pypa.io/en/stable/ .. _Requests: https://requests.readthedocs.io/en/master/ .. _FFmpeg: https://www.ffmpeg.org/ +.. _mkvmerge: https://www.matroska.org/downloads/mkvtoolnix.html .. _yt-dlp: https://github.com/yt-dlp/yt-dlp .. _youtube-dl: https://ytdl-org.github.io/youtube-dl/ .. _PySocks: https://pypi.org/project/PySocks/ .. _brotli: https://github.com/google/brotli .. _brotlicffi: https://github.com/python-hyper/brotlicffi +.. _zstandard: https://github.com/indygreg/python-zstandard .. _PyYAML: https://pyyaml.org/ .. _toml: https://pypi.org/project/toml/ .. _SecretStorage: https://pypi.org/project/SecretStorage/ diff --git a/data/completion/_gallery-dl b/data/completion/_gallery-dl index 14b7321..3308e98 100644 --- a/data/completion/_gallery-dl +++ b/data/completion/_gallery-dl @@ -26,6 +26,7 @@ _arguments -s -S \ {-g,--get-urls}'[Print URLs instead of downloading]' \ {-G,--resolve-urls}'[Print URLs instead of downloading; resolve intermediary URLs]' \ {-j,--dump-json}'[Print JSON information]' \ +{-J,--resolve-json}'[Print JSON information; resolve intermediary URLs]' \ {-s,--simulate}'[Simulate data extraction; do not download anything]' \ {-E,--extractor-info}'[Print extractor defaults and settings]' \ {-K,--list-keywords}'[Print a list of available keywords and example values for the given URLs]' \ @@ -35,6 +36,7 @@ _arguments -s -S \ --write-log'[Write logging output to FILE]':'':_files \ --write-unsupported'[Write URLs, which get emitted by other extractors but cannot be handled, to FILE]':'':_files \ --write-pages'[Write downloaded intermediary pages to files in the current directory to debug problems]' \ +--print-traffic'[Display sent and read HTTP traffic]' \ --no-colors'[Do not emit ANSI color codes in output]' \ {-R,--retries}'[Maximum number of retries for failed HTTP requests or -1 for infinite retries (default: 4)]':'' \ --http-timeout'[Timeout for HTTP connections (default: 30.0)]':'' \ diff --git a/data/completion/gallery-dl b/data/completion/gallery-dl index 625ecd6..0d933fa 100644 --- a/data/completion/gallery-dl +++ b/data/completion/gallery-dl @@ -10,7 +10,7 @@ _gallery_dl() elif [[ "${prev}" =~ ^()$ ]]; then COMPREPLY=( $(compgen -d -- "${cur}") ) else - COMPREPLY=( $(compgen -W "--help --version --filename --destination --directory --extractors --user-agent --clear-cache --update --update-to --update-check --input-file --input-file-comment --input-file-delete --no-input --quiet --warning --verbose --get-urls --resolve-urls --dump-json --simulate --extractor-info --list-keywords --error-file --list-modules --list-extractors --write-log --write-unsupported --write-pages --no-colors --retries --http-timeout --proxy --source-address --no-check-certificate --limit-rate --chunk-size --sleep --sleep-request --sleep-extractor --no-part --no-skip --no-mtime --no-download --option --config --config-yaml --config-toml --config-create --config-status --config-open --config-ignore --ignore-config --username --password --netrc --cookies --cookies-export --cookies-from-browser --abort --terminate --filesize-min --filesize-max --download-archive --range --chapter-range --filter --chapter-filter --postprocessor --no-postprocessors --postprocessor-option --write-metadata --write-info-json --write-infojson --write-tags --zip --cbz --mtime --mtime-from-date --ugoira --ugoira-conv --ugoira-conv-lossless --ugoira-conv-copy --exec --exec-after" -- "${cur}") ) + COMPREPLY=( $(compgen -W "--help --version --filename --destination --directory --extractors --user-agent --clear-cache --update --update-to --update-check --input-file --input-file-comment --input-file-delete --no-input --quiet --warning --verbose --get-urls --resolve-urls --dump-json --resolve-json --simulate --extractor-info --list-keywords --error-file --list-modules --list-extractors --write-log --write-unsupported --write-pages --print-traffic --no-colors --retries --http-timeout --proxy --source-address --no-check-certificate --limit-rate --chunk-size --sleep --sleep-request --sleep-extractor --no-part --no-skip --no-mtime --no-download --option --config --config-yaml --config-toml --config-create --config-status --config-open --config-ignore --ignore-config --username --password --netrc --cookies --cookies-export --cookies-from-browser --abort --terminate --filesize-min --filesize-max --download-archive --range --chapter-range --filter --chapter-filter --postprocessor --no-postprocessors --postprocessor-option --write-metadata --write-info-json --write-infojson --write-tags --zip --cbz --mtime --mtime-from-date --ugoira --ugoira-conv --ugoira-conv-lossless --ugoira-conv-copy --exec --exec-after" -- "${cur}") ) fi } diff --git a/data/completion/gallery-dl.fish b/data/completion/gallery-dl.fish index a67cd63..7243998 100644 --- a/data/completion/gallery-dl.fish +++ b/data/completion/gallery-dl.fish @@ -20,6 +20,7 @@ complete -c gallery-dl -s 'v' -l 'verbose' -d 'Print various debugging informati complete -c gallery-dl -s 'g' -l 'get-urls' -d 'Print URLs instead of downloading' complete -c gallery-dl -s 'G' -l 'resolve-urls' -d 'Print URLs instead of downloading; resolve intermediary URLs' complete -c gallery-dl -s 'j' -l 'dump-json' -d 'Print JSON information' +complete -c gallery-dl -s 'J' -l 'resolve-json' -d 'Print JSON information; resolve intermediary URLs' complete -c gallery-dl -s 's' -l 'simulate' -d 'Simulate data extraction; do not download anything' complete -c gallery-dl -s 'E' -l 'extractor-info' -d 'Print extractor defaults and settings' complete -c gallery-dl -s 'K' -l 'list-keywords' -d 'Print a list of available keywords and example values for the given URLs' @@ -29,6 +30,7 @@ complete -c gallery-dl -l 'list-extractors' -d 'Print a list of extractor classe complete -c gallery-dl -r -F -l 'write-log' -d 'Write logging output to FILE' complete -c gallery-dl -r -F -l 'write-unsupported' -d 'Write URLs, which get emitted by other extractors but cannot be handled, to FILE' complete -c gallery-dl -l 'write-pages' -d 'Write downloaded intermediary pages to files in the current directory to debug problems' +complete -c gallery-dl -l 'print-traffic' -d 'Display sent and read HTTP traffic' complete -c gallery-dl -l 'no-colors' -d 'Do not emit ANSI color codes in output' complete -c gallery-dl -x -s 'R' -l 'retries' -d 'Maximum number of retries for failed HTTP requests or -1 for infinite retries (default: 4)' complete -c gallery-dl -x -l 'http-timeout' -d 'Timeout for HTTP connections (default: 30.0)' diff --git a/data/man/gallery-dl.1 b/data/man/gallery-dl.1 index 37529bf..d1eddd6 100644 --- a/data/man/gallery-dl.1 +++ b/data/man/gallery-dl.1 @@ -1,4 +1,4 @@ -.TH "GALLERY-DL" "1" "2024-06-22" "1.27.1" "gallery-dl Manual" +.TH "GALLERY-DL" "1" "2024-08-03" "1.27.2" "gallery-dl Manual" .\" disable hyphenation .nh @@ -80,6 +80,9 @@ Print URLs instead of downloading; resolve intermediary URLs .B "\-j, \-\-dump\-json" Print JSON information .TP +.B "\-J, \-\-resolve\-json" +Print JSON information; resolve intermediary URLs +.TP .B "\-s, \-\-simulate" Simulate data extraction; do not download anything .TP @@ -107,6 +110,9 @@ Write URLs, which get emitted by other extractors but cannot be handled, to FILE .B "\-\-write\-pages" Write downloaded intermediary pages to files in the current directory to debug problems .TP +.B "\-\-print\-traffic" +Display sent and read HTTP traffic +.TP .B "\-\-no\-colors" Do not emit ANSI color codes in output .TP diff --git a/data/man/gallery-dl.conf.5 b/data/man/gallery-dl.conf.5 index e3ed58a..8f75284 100644 --- a/data/man/gallery-dl.conf.5 +++ b/data/man/gallery-dl.conf.5 @@ -1,4 +1,4 @@ -.TH "GALLERY-DL.CONF" "5" "2024-06-22" "1.27.1" "gallery-dl Manual" +.TH "GALLERY-DL.CONF" "5" "2024-08-03" "1.27.2" "gallery-dl Manual" .\" disable hyphenation .nh .\" disable justification (adjust text to left margin only) @@ -456,6 +456,7 @@ response before \f[I]retrying\f[] the request. .br * \f[I]"0.5-1.5"\f[] \f[I][Danbooru]\f[], \f[I][E621]\f[], \f[I][foolfuuka]:search\f[], \f[I]itaku\f[], +\f[I]koharu\f[], \f[I]newgrounds\f[], \f[I][philomena]\f[], \f[I]pixiv:novel\f[], \f[I]plurk\f[], \f[I]poipiku\f[] , \f[I]pornpics\f[], \f[I]soundgasm\f[], \f[I]urlgalleries\f[], \f[I]vk\f[], \f[I]zerochan\f[] @@ -536,6 +537,8 @@ and optional for .br * \f[I]kemonoparty\f[] .br +* \f[I]koharu\f[] +.br * \f[I]mangadex\f[] .br * \f[I]mangoxo\f[] @@ -728,7 +731,7 @@ or a \f[I]list\f[] with IP and explicit port number as elements. \f[I]string\f[] .IP "Default:" 9 -\f[I]"Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/115.0"\f[] +\f[I]"Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:128.0) Gecko/20100101 Firefox/128.0"\f[] .IP "Description:" 4 User-Agent header value to be used for HTTP requests. @@ -1017,6 +1020,29 @@ but be aware that using external inputs for building local paths may pose a security risk. +.SS extractor.*.archive-event +.IP "Type:" 6 ++ \f[I]string\f[] ++ \f[I]list\f[] of \f[I]strings\f[] + +.IP "Default:" 9 +\f[I]"file"\f[] + +.IP "Example:" 4 +.br +* "file,skip" +.br +* ["file", "skip"] + +.IP "Description:" 4 +\f[I]Event(s)\f[] +for which IDs get written to an +\f[I]archive\f[]. + +Available events are: +\f[I]file\f[], \f[I]skip\f[] + + .SS extractor.*.archive-format .IP "Type:" 6 \f[I]string\f[] @@ -1075,25 +1101,33 @@ for available \f[I]PRAGMA\f[] statements and further details. .SS extractor.*.actions .IP "Type:" 6 .br -* \f[I]object\f[] (pattern -> action) +* \f[I]object\f[] (pattern -> action(s)) .br -* \f[I]list\f[] of \f[I]lists\f[] with 2 \f[I]strings\f[] as elements +* \f[I]list\f[] of \f[I]lists\f[] with pattern -> action(s) pairs as elements .IP "Example:" 4 .. code:: json { -"error" : "status \f[I]= 1", +"info:Logging in as .+" : "level = debug", "warning:(?i)unable to .+": "exit 127", -"info:Logging in as .+" : "level = debug" +"error" : [ +"status \f[I]= 1", +"exec notify.sh 'gdl error'", +"abort" +] } .. code:: json [ -["error" , "status \f[]= 1" ], +["info:Logging in as .+" , "level = debug"], ["warning:(?i)unable to .+", "exit 127" ], -["info:Logging in as .+" , "level = debug"] +["error" , [ +"status \f[]= 1", +"exec notify.sh 'gdl error'", +"abort" +]] ] @@ -1110,6 +1144,9 @@ matches logging messages of all levels \f[I]action\f[] is parsed as action type followed by (optional) arguments. +It is possible to specify more than one \f[I]action\f[] per \f[I]pattern\f[] +by providing them as a \f[I]list\f[]: \f[I]["", "", …]\f[] + Supported Action Types: \f[I]status\f[]: @@ -1128,12 +1165,21 @@ Modify severity level of the current logging message. .br Can be one of \f[I]debug\f[], \f[I]info\f[], \f[I]warning\f[], \f[I]error\f[] or an integer value. .br -\f[I]print\f[] +\f[I]print\f[]: Write argument to stdout. +\f[I]exec\f[]: +Run a shell command. +\f[I]abort\f[]: +Stop the current extractor run. +\f[I]terminate\f[]: +Stop the current extractor run, including parent extractors. \f[I]restart\f[]: Restart the current extractor run. \f[I]wait\f[]: -Stop execution until Enter is pressed. +Sleep for a given \f[I]Duration\f[] or +.br +wait until Enter is pressed when no argument was given. +.br \f[I]exit\f[]: Exit the program with the given argument as exit status. @@ -1642,6 +1688,23 @@ Sets the maximum depth of returned reply posts. Process reposts. +.SS extractor.cien.files +.IP "Type:" 6 +\f[I]list\f[] of \f[I]strings\f[] + +.IP "Default:" 9 +\f[I]["image", "video", "download", "gallery"]\f[] + +.IP "Description:" 4 +Determines the type and order of files to be downloaded. + +Available types are +\f[I]image\f[], +\f[I]video\f[], +\f[I]download\f[], +\f[I]gallery\f[]. + + .SS extractor.cyberdrop.domain .IP "Type:" 6 \f[I]string\f[] @@ -3004,6 +3067,36 @@ If the selected format is not available, the first in the list gets chosen (usually mp3). +.SS extractor.koharu.cbz +.IP "Type:" 6 +\f[I]bool\f[] + +.IP "Default:" 9 +\f[I]true\f[] + +.IP "Description:" 4 +Download each gallery as a single \f[I].cbz\f[] file. + +Disabling this option causes a gallery +to be downloaded as individual image files. + + +.SS extractor.koharu.format +.IP "Type:" 6 +\f[I]string\f[] + +.IP "Default:" 9 +\f[I]"original"\f[] + +.IP "Description:" 4 +Name of the image format to download. + +Available formats are +.br +\f[I]"780"\f[], \f[I]"980"\f[], \f[I]"1280"\f[], \f[I]"1600"\f[], \f[I]"0"\f[]/\f[I]"original"\f[] +.br + + .SS extractor.lolisafe.domain .IP "Type:" 6 \f[I]string\f[] @@ -4310,6 +4403,27 @@ or each inline image, use an extra HTTP request to find the URL to its full-resolution version. +.SS extractor.tumblr.pagination +.IP "Type:" 6 +\f[I]string\f[] + +.IP "Default:" 9 +\f[I]"offset"\f[] + +.IP "Description:" 4 +Controls how to paginate over blog posts. + +.br +* \f[I]"api"\f[]: \f[I]next\f[] parameter provided by the API +(potentially misses posts due to a +\f[I]bug\f[] +in Tumblr's API) +.br +* \f[I]"before"\f[]: timestamp of last post +.br +* \f[I]"offset"\f[]: post offset number + + .SS extractor.tumblr.ratelimit .IP "Type:" 6 \f[I]string\f[] @@ -4919,6 +5033,35 @@ Note: Requires \f[I]login\f[] or \f[I]cookies\f[] +.SS extractor.vsco.include +.IP "Type:" 6 +.br +* \f[I]string\f[] +.br +* \f[I]list\f[] of \f[I]strings\f[] + +.IP "Default:" 9 +\f[I]"gallery"\f[] + +.IP "Example:" 4 +.br +* "avatar,collection" +.br +* ["avatar", "collection"] + +.IP "Description:" 4 +A (comma-separated) list of subcategories to include +when processing a user profile. + +Possible values are +\f[I]"avatar"\f[], +\f[I]"gallery"\f[], +\f[I]"spaces"\f[], +\f[I]"collection"\f[], + +It is possible to use \f[I]"all"\f[] instead of listing all values separately. + + .SS extractor.vsco.videos .IP "Type:" 6 \f[I]bool\f[] @@ -5282,17 +5425,25 @@ Note: This requires 1 additional HTTP request per post. .SS extractor.[booru].url .IP "Type:" 6 -\f[I]string\f[] +.br +* \f[I]string\f[] +.br +* \f[I]list\f[] of \f[I]strings\f[] .IP "Default:" 9 \f[I]"file_url"\f[] .IP "Example:" 4 -"preview_url" +.br +* "preview_url" +.br +* ["sample_url", "preview_url", "file_url"} .IP "Description:" 4 Alternate field name to retrieve download URLs from. +When multiple names are given, download the first available one. + .SS extractor.[manga-extractor].chapter-reverse .IP "Type:" 6 @@ -6249,13 +6400,19 @@ If this option is set, \f[I]metadata.extension\f[] and .SS metadata.directory .IP "Type:" 6 -\f[I]string\f[] +.br +* \f[I]string\f[] +.br +* \f[I]list\f[] of \f[I]strings\f[] .IP "Default:" 9 \f[I]"."\f[] .IP "Example:" 4 -"metadata" +.br +* "metadata" +.br +* ["..", "metadata", "\\fF {id // 500 * 500}"] .IP "Description:" 4 Directory where metadata files are stored in relative to the @@ -6965,6 +7122,19 @@ Set this option to \f[I]null\f[] or an invalid path to disable this cache. +.SS filters-environment +.IP "Type:" 6 +\f[I]bool\f[] + +.IP "Default:" 9 +\f[I]true\f[] + +.IP "Description:" 4 +Evaluate filter expressions raising an exception as \f[I]false\f[] +instead of aborting the current extractor run +by wrapping them in a try/except block. + + .SS format-separator .IP "Type:" 6 \f[I]string\f[] diff --git a/docs/gallery-dl.conf b/docs/gallery-dl.conf index 9f12652..2a7f8f2 100644 --- a/docs/gallery-dl.conf +++ b/docs/gallery-dl.conf @@ -10,7 +10,7 @@ "proxy": null, "skip": true, - "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/115.0", + "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:128.0) Gecko/20100101 Firefox/128.0", "retries": 4, "timeout": 30.0, "verify": true, diff --git a/gallery_dl.egg-info/PKG-INFO b/gallery_dl.egg-info/PKG-INFO index a06aa55..eec2e32 100644 --- a/gallery_dl.egg-info/PKG-INFO +++ b/gallery_dl.egg-info/PKG-INFO @@ -1,6 +1,6 @@ Metadata-Version: 2.1 Name: gallery_dl -Version: 1.27.1 +Version: 1.27.2 Summary: Command-line program to download image galleries and collections from several image hosting sites Home-page: https://github.com/mikf/gallery-dl Download-URL: https://github.com/mikf/gallery-dl/releases/latest @@ -65,10 +65,12 @@ Dependencies Optional -------- +- yt-dlp_ or youtube-dl_: HLS/DASH video downloads, ``ytdl`` integration - FFmpeg_: Pixiv Ugoira conversion -- yt-dlp_ or youtube-dl_: Video downloads +- mkvmerge_: Accurate Ugoira frame timecodes - PySocks_: SOCKS proxy support - brotli_ or brotlicffi_: Brotli compression support +- zstandard_: Zstandard compression support - PyYAML_: YAML configuration file support - toml_: TOML configuration file support for Python<3.11 - SecretStorage_: GNOME keyring passwords for ``--cookies-from-browser`` @@ -112,9 +114,9 @@ Standalone Executable Prebuilt executable files with a Python interpreter and required Python packages included are available for -- `Windows `__ +- `Windows `__ (Requires `Microsoft Visual C++ Redistributable Package (x86) `__) -- `Linux `__ +- `Linux `__ Nightly Builds @@ -457,11 +459,13 @@ To authenticate with a ``mastodon`` instance, run *gallery-dl* with .. _pip: https://pip.pypa.io/en/stable/ .. _Requests: https://requests.readthedocs.io/en/master/ .. _FFmpeg: https://www.ffmpeg.org/ +.. _mkvmerge: https://www.matroska.org/downloads/mkvtoolnix.html .. _yt-dlp: https://github.com/yt-dlp/yt-dlp .. _youtube-dl: https://ytdl-org.github.io/youtube-dl/ .. _PySocks: https://pypi.org/project/PySocks/ .. _brotli: https://github.com/google/brotli .. _brotlicffi: https://github.com/python-hyper/brotlicffi +.. _zstandard: https://github.com/indygreg/python-zstandard .. _PyYAML: https://pyyaml.org/ .. _toml: https://pypi.org/project/toml/ .. _SecretStorage: https://pypi.org/project/SecretStorage/ diff --git a/gallery_dl.egg-info/SOURCES.txt b/gallery_dl.egg-info/SOURCES.txt index a892544..de5738a 100644 --- a/gallery_dl.egg-info/SOURCES.txt +++ b/gallery_dl.egg-info/SOURCES.txt @@ -56,6 +56,7 @@ gallery_dl/extractor/8chan.py gallery_dl/extractor/8muses.py gallery_dl/extractor/__init__.py gallery_dl/extractor/adultempire.py +gallery_dl/extractor/agnph.py gallery_dl/extractor/architizer.py gallery_dl/extractor/artstation.py gallery_dl/extractor/aryion.py @@ -68,6 +69,7 @@ gallery_dl/extractor/booru.py gallery_dl/extractor/bunkr.py gallery_dl/extractor/catbox.py gallery_dl/extractor/chevereto.py +gallery_dl/extractor/cien.py gallery_dl/extractor/comicvine.py gallery_dl/extractor/common.py gallery_dl/extractor/cyberdrop.py @@ -79,7 +81,6 @@ gallery_dl/extractor/dynastyscans.py gallery_dl/extractor/e621.py gallery_dl/extractor/erome.py gallery_dl/extractor/exhentai.py -gallery_dl/extractor/fallenangels.py gallery_dl/extractor/fanbox.py gallery_dl/extractor/fanleaks.py gallery_dl/extractor/fantia.py @@ -125,6 +126,7 @@ gallery_dl/extractor/kabeuchi.py gallery_dl/extractor/keenspot.py gallery_dl/extractor/kemonoparty.py gallery_dl/extractor/khinsider.py +gallery_dl/extractor/koharu.py gallery_dl/extractor/komikcast.py gallery_dl/extractor/lensdump.py gallery_dl/extractor/lexica.py diff --git a/gallery_dl/__init__.py b/gallery_dl/__init__.py index 1d4215e..4b39c15 100644 --- a/gallery_dl/__init__.py +++ b/gallery_dl/__init__.py @@ -105,6 +105,11 @@ def main(): output.ANSI = True + # filter environment + filterenv = config.get((), "filters-environment", True) + if not filterenv: + util.compile_expression = util.compile_expression_raw + # format string separator separator = config.get((), "format-separator") if separator: @@ -145,6 +150,10 @@ def main(): log.debug("Configuration Files %s", config._files) + if args.print_traffic: + import requests + requests.packages.urllib3.connection.HTTPConnection.debuglevel = 1 + # extractor modules modules = config.get(("extractor",), "modules") if modules is not None: @@ -240,6 +249,9 @@ def main(): if config.get(("output",), "fallback", True): jobtype.handle_url = \ staticmethod(jobtype.handle_url_fallback) + elif args.dump_json: + jobtype = job.DataJob + jobtype.resolve = args.dump_json - 1 else: jobtype = args.jobtype or job.DownloadJob @@ -299,6 +311,8 @@ def main(): else: input_manager.success() + except exception.StopExtraction: + pass except exception.TerminateExtraction: pass except exception.RestartExtraction: diff --git a/gallery_dl/actions.py b/gallery_dl/actions.py index 883e38b..668032d 100644 --- a/gallery_dl/actions.py +++ b/gallery_dl/actions.py @@ -9,8 +9,10 @@ """ """ import re +import time import logging import operator +import functools from . import util, exception @@ -19,29 +21,100 @@ def parse(actionspec): actionspec = actionspec.items() actions = {} - actions[logging.DEBUG] = actions_d = [] - actions[logging.INFO] = actions_i = [] - actions[logging.WARNING] = actions_w = [] - actions[logging.ERROR] = actions_e = [] + actions[-logging.DEBUG] = actions_bd = [] + actions[-logging.INFO] = actions_bi = [] + actions[-logging.WARNING] = actions_bw = [] + actions[-logging.ERROR] = actions_be = [] + actions[logging.DEBUG] = actions_ad = [] + actions[logging.INFO] = actions_ai = [] + actions[logging.WARNING] = actions_aw = [] + actions[logging.ERROR] = actions_ae = [] for event, spec in actionspec: level, _, pattern = event.partition(":") - type, _, args = spec.partition(" ") - action = (re.compile(pattern).search, ACTIONS[type](args)) + search = re.compile(pattern).search if pattern else util.true + + if isinstance(spec, str): + type, _, args = spec.partition(" ") + before, after = ACTIONS[type](args) + else: + actions_before = [] + actions_after = [] + for s in spec: + type, _, args = s.partition(" ") + before, after = ACTIONS[type](args) + if before: + actions_before.append(before) + if after: + actions_after.append(after) + before = _chain_actions(actions_before) + after = _chain_actions(actions_after) level = level.strip() if not level or level == "*": - actions_d.append(action) - actions_i.append(action) - actions_w.append(action) - actions_e.append(action) + if before: + action = (search, before) + actions_bd.append(action) + actions_bi.append(action) + actions_bw.append(action) + actions_be.append(action) + if after: + action = (search, after) + actions_ad.append(action) + actions_ai.append(action) + actions_aw.append(action) + actions_ae.append(action) else: - - actions[_level_to_int(level)].append(action) + level = _level_to_int(level) + if before: + actions[-level].append((search, before)) + if after: + actions[level].append((search, after)) return actions +class LoggerAdapter(): + + def __init__(self, logger, job): + self.logger = logger + self.extra = job._logger_extra + self.actions = job._logger_actions + + self.debug = functools.partial(self.log, logging.DEBUG) + self.info = functools.partial(self.log, logging.INFO) + self.warning = functools.partial(self.log, logging.WARNING) + self.error = functools.partial(self.log, logging.ERROR) + + def log(self, level, msg, *args, **kwargs): + msg = str(msg) + if args: + msg = msg % args + + before = self.actions[-level] + after = self.actions[level] + + if before: + args = self.extra.copy() + args["level"] = level + + for cond, action in before: + if cond(msg): + action(args) + + level = args["level"] + + if self.logger.isEnabledFor(level): + kwargs["extra"] = self.extra + self.logger._log(level, msg, (), **kwargs) + + if after: + args = self.extra.copy() + for cond, action in after: + if cond(msg): + action(args) + + def _level_to_int(level): try: return logging._nameToLevel[level] @@ -49,10 +122,19 @@ def _level_to_int(level): return int(level) +def _chain_actions(actions): + def _chain(args): + for action in actions: + action(args) + return _chain + + +# -------------------------------------------------------------------- + def action_print(opts): def _print(_): print(opts) - return _print + return None, _print def action_status(opts): @@ -69,7 +151,7 @@ def action_status(opts): def _status(args): args["job"].status = op(args["job"].status, value) - return _status + return _status, None def action_level(opts): @@ -77,17 +159,38 @@ def action_level(opts): def _level(args): args["level"] = level - return _level + return _level, None + + +def action_exec(opts): + def _exec(_): + util.Popen(opts, shell=True).wait() + return None, _exec def action_wait(opts): - def _wait(args): - input("Press Enter to continue") - return _wait + if opts: + seconds = util.build_duration_func(opts) + + def _wait(args): + time.sleep(seconds()) + else: + def _wait(args): + input("Press Enter to continue") + + return None, _wait + + +def action_abort(opts): + return None, util.raises(exception.StopExtraction) + + +def action_terminate(opts): + return None, util.raises(exception.TerminateExtraction) def action_restart(opts): - return util.raises(exception.RestartExtraction) + return None, util.raises(exception.RestartExtraction) def action_exit(opts): @@ -98,14 +201,17 @@ def action_exit(opts): def _exit(args): raise SystemExit(opts) - return _exit + return None, _exit ACTIONS = { - "print" : action_print, - "status" : action_status, - "level" : action_level, - "restart": action_restart, - "wait" : action_wait, - "exit" : action_exit, + "abort" : action_abort, + "exec" : action_exec, + "exit" : action_exit, + "level" : action_level, + "print" : action_print, + "restart" : action_restart, + "status" : action_status, + "terminate": action_terminate, + "wait" : action_wait, } diff --git a/gallery_dl/cookies.py b/gallery_dl/cookies.py index 47f78a7..f017929 100644 --- a/gallery_dl/cookies.py +++ b/gallery_dl/cookies.py @@ -25,7 +25,7 @@ from . import aes, text, util SUPPORTED_BROWSERS_CHROMIUM = { - "brave", "chrome", "chromium", "edge", "opera", "vivaldi"} + "brave", "chrome", "chromium", "edge", "opera", "thorium", "vivaldi"} SUPPORTED_BROWSERS = SUPPORTED_BROWSERS_CHROMIUM | {"firefox", "safari"} logger = logging.getLogger("cookies") @@ -354,6 +354,7 @@ def _get_chromium_based_browser_settings(browser_name): "chromium": join(appdata_local, R"Chromium\User Data"), "edge" : join(appdata_local, R"Microsoft\Edge\User Data"), "opera" : join(appdata_roaming, R"Opera Software\Opera Stable"), + "thorium" : join(appdata_local, R"Thorium\User Data"), "vivaldi" : join(appdata_local, R"Vivaldi\User Data"), }[browser_name] @@ -365,6 +366,7 @@ def _get_chromium_based_browser_settings(browser_name): "chromium": join(appdata, "Chromium"), "edge" : join(appdata, "Microsoft Edge"), "opera" : join(appdata, "com.operasoftware.Opera"), + "thorium" : join(appdata, "Thorium"), "vivaldi" : join(appdata, "Vivaldi"), }[browser_name] @@ -377,6 +379,7 @@ def _get_chromium_based_browser_settings(browser_name): "chromium": join(config, "chromium"), "edge" : join(config, "microsoft-edge"), "opera" : join(config, "opera"), + "thorium" : join(config, "Thorium"), "vivaldi" : join(config, "vivaldi"), }[browser_name] @@ -390,6 +393,7 @@ def _get_chromium_based_browser_settings(browser_name): "edge" : "Microsoft Edge" if sys.platform == "darwin" else "Chromium", "opera" : "Opera" if sys.platform == "darwin" else "Chromium", + "thorium" : "Thorium", "vivaldi" : "Vivaldi" if sys.platform == "darwin" else "Chrome", }[browser_name] diff --git a/gallery_dl/extractor/8chan.py b/gallery_dl/extractor/8chan.py index a4b0997..a5e8b27 100644 --- a/gallery_dl/extractor/8chan.py +++ b/gallery_dl/extractor/8chan.py @@ -27,7 +27,8 @@ class _8chanExtractor(Extractor): Extractor.__init__(self, match) def _init(self): - self.cookies.set("TOS", "1", domain=self.root.rpartition("/")[2]) + self.cookies.set( + "TOS20240718", "1", domain=self.root.rpartition("/")[2]) @memcache() def cookies_prepare(self): diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py index 6aff1f3..e103cb1 100644 --- a/gallery_dl/extractor/__init__.py +++ b/gallery_dl/extractor/__init__.py @@ -22,6 +22,7 @@ modules = [ "8chan", "8muses", "adultempire", + "agnph", "architizer", "artstation", "aryion", @@ -33,6 +34,7 @@ modules = [ "bunkr", "catbox", "chevereto", + "cien", "comicvine", "cyberdrop", "danbooru", @@ -42,7 +44,6 @@ modules = [ "e621", "erome", "exhentai", - "fallenangels", "fanbox", "fanleaks", "fantia", @@ -84,6 +85,7 @@ modules = [ "keenspot", "kemonoparty", "khinsider", + "koharu", "komikcast", "lensdump", "lexica", diff --git a/gallery_dl/extractor/agnph.py b/gallery_dl/extractor/agnph.py new file mode 100644 index 0000000..653b73f --- /dev/null +++ b/gallery_dl/extractor/agnph.py @@ -0,0 +1,113 @@ +# -*- coding: utf-8 -*- + +# Copyright 2024 Mike Fährmann +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. + +"""Extractors for https://agn.ph/""" + +from . import booru +from .. import text + +from xml.etree import ElementTree +import collections +import re + +BASE_PATTERN = r"(?:https?://)?agn\.ph" + + +class AgnphExtractor(booru.BooruExtractor): + category = "agnph" + root = "https://agn.ph" + page_start = 1 + per_page = 45 + + TAG_TYPES = { + "a": "artist", + "b": "copyright", + "c": "character", + "d": "species", + "m": "general", + } + + def _init(self): + self.cookies.set("confirmed_age", "true", domain="agn.ph") + + def _prepare(self, post): + post["date"] = text.parse_timestamp(post["created_at"]) + post["status"] = post["status"].strip() + post["has_children"] = ("true" in post["has_children"]) + + def _xml_to_dict(self, xml): + return {element.tag: element.text for element in xml} + + def _pagination(self, url, params): + params["api"] = "xml" + if "page" in params: + params["page"] = \ + self.page_start + text.parse_int(params["page"]) - 1 + else: + params["page"] = self.page_start + + while True: + data = self.request(url, params=params).text + root = ElementTree.fromstring(data) + + yield from map(self._xml_to_dict, root) + + attrib = root.attrib + if int(attrib["offset"]) + len(root) >= int(attrib["count"]): + return + + params["page"] += 1 + + def _html(self, post): + url = "{}/gallery/post/show/{}/".format(self.root, post["id"]) + return self.request(url).text + + def _tags(self, post, page): + tag_container = text.extr( + page, '