aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLibravatarUnit 193 <unit193@unit193.net>2024-08-03 20:27:50 -0400
committerLibravatarUnit 193 <unit193@unit193.net>2024-08-03 20:27:50 -0400
commitad105563ff9e1e82b0714d9bf2de8e8247a96512 (patch)
tree9ce5d2ef148ba3caad5171b79e7a7cd051977dd2
parentdd947614238f176637b2518ee24e588ca5920dea (diff)
parent032e5bed275a253e122ed9ac86dac7b8c4204172 (diff)
Update upstream source from tag 'upstream/1.27.2'
Update to upstream version '1.27.2' with Debian dir 30ac047f32a7127d1f2b2198647d93f92db9f287
-rw-r--r--CHANGELOG.md105
-rw-r--r--PKG-INFO12
-rw-r--r--README.rst10
-rw-r--r--data/completion/_gallery-dl2
-rw-r--r--data/completion/gallery-dl2
-rw-r--r--data/completion/gallery-dl.fish2
-rw-r--r--data/man/gallery-dl.18
-rw-r--r--data/man/gallery-dl.conf.5198
-rw-r--r--docs/gallery-dl.conf2
-rw-r--r--gallery_dl.egg-info/PKG-INFO12
-rw-r--r--gallery_dl.egg-info/SOURCES.txt4
-rw-r--r--gallery_dl/__init__.py14
-rw-r--r--gallery_dl/actions.py158
-rw-r--r--gallery_dl/cookies.py6
-rw-r--r--gallery_dl/extractor/8chan.py3
-rw-r--r--gallery_dl/extractor/__init__.py4
-rw-r--r--gallery_dl/extractor/agnph.py113
-rw-r--r--gallery_dl/extractor/aryion.py23
-rw-r--r--gallery_dl/extractor/behance.py12
-rw-r--r--gallery_dl/extractor/booru.py18
-rw-r--r--gallery_dl/extractor/bunkr.py2
-rw-r--r--gallery_dl/extractor/cien.py199
-rw-r--r--gallery_dl/extractor/common.py20
-rw-r--r--gallery_dl/extractor/deviantart.py142
-rw-r--r--gallery_dl/extractor/directlink.py3
-rw-r--r--gallery_dl/extractor/dynastyscans.py2
-rw-r--r--gallery_dl/extractor/erome.py6
-rw-r--r--gallery_dl/extractor/exhentai.py3
-rw-r--r--gallery_dl/extractor/fallenangels.py84
-rw-r--r--gallery_dl/extractor/furaffinity.py9
-rw-r--r--gallery_dl/extractor/gelbooru_v02.py25
-rw-r--r--gallery_dl/extractor/hentainexus.py11
-rw-r--r--gallery_dl/extractor/hotleak.py6
-rw-r--r--gallery_dl/extractor/imagefap.py2
-rw-r--r--gallery_dl/extractor/inkbunny.py4
-rw-r--r--gallery_dl/extractor/instagram.py20
-rw-r--r--gallery_dl/extractor/koharu.py221
-rw-r--r--gallery_dl/extractor/nijie.py3
-rw-r--r--gallery_dl/extractor/paheal.py8
-rw-r--r--gallery_dl/extractor/readcomiconline.py26
-rw-r--r--gallery_dl/extractor/redgifs.py2
-rw-r--r--gallery_dl/extractor/sankaku.py26
-rw-r--r--gallery_dl/extractor/sankakucomplex.py14
-rw-r--r--gallery_dl/extractor/subscribestar.py2
-rw-r--r--gallery_dl/extractor/toyhouse.py28
-rw-r--r--gallery_dl/extractor/tumblr.py47
-rw-r--r--gallery_dl/extractor/twitter.py157
-rw-r--r--gallery_dl/extractor/vipergirls.py3
-rw-r--r--gallery_dl/extractor/vsco.py23
-rw-r--r--gallery_dl/extractor/wallpapercave.py11
-rw-r--r--gallery_dl/extractor/warosu.py4
-rw-r--r--gallery_dl/extractor/zerochan.py41
-rw-r--r--gallery_dl/job.py63
-rw-r--r--gallery_dl/option.py12
-rw-r--r--gallery_dl/output.py34
-rw-r--r--gallery_dl/path.py11
-rw-r--r--gallery_dl/postprocessor/metadata.py21
-rw-r--r--gallery_dl/util.py27
-rw-r--r--gallery_dl/version.py2
-rw-r--r--gallery_dl/ytdl.py18
-rw-r--r--test/test_extractor.py9
-rw-r--r--test/test_postprocessor.py12
-rw-r--r--test/test_results.py52
-rw-r--r--test/test_util.py19
-rw-r--r--test/test_ytdl.py14
65 files changed, 1664 insertions, 492 deletions
diff --git a/CHANGELOG.md b/CHANGELOG.md
index ebede9f..1ca8647 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,40 +1,83 @@
-## 1.27.1 - 2024-06-22
+## 1.27.2 - 2024-08-03
### Extractors
#### Additions
-- [hentainexus] restore module ([#5275](https://github.com/mikf/gallery-dl/issues/5275), [#5712](https://github.com/mikf/gallery-dl/issues/5712))
-- [shimmie2] support `vidya.pics` ([#5632](https://github.com/mikf/gallery-dl/issues/5632))
-- [tcbscans] support other domains ([#5774](https://github.com/mikf/gallery-dl/issues/5774))
+- [agnph] add `tag` and `post` extractors ([#5284](https://github.com/mikf/gallery-dl/issues/5284), [#5890](https://github.com/mikf/gallery-dl/issues/5890))
+- [aryion] add `favorite` extractor ([#4511](https://github.com/mikf/gallery-dl/issues/4511), [#5870](https://github.com/mikf/gallery-dl/issues/5870))
+- [cien] add support ([#2885](https://github.com/mikf/gallery-dl/issues/2885), [#4103](https://github.com/mikf/gallery-dl/issues/4103), [#5240](https://github.com/mikf/gallery-dl/issues/5240))
+- [instagram] add `info` extractor ([#5262](https://github.com/mikf/gallery-dl/issues/5262))
+- [koharu] add `gallery`, `search`, and `favorite` extractors ([#5893](https://github.com/mikf/gallery-dl/issues/5893), [#4707](https://github.com/mikf/gallery-dl/issues/4707))
+- [twitter] add `info` extractor ([#3623](https://github.com/mikf/gallery-dl/issues/3623))
#### Fixes
-- [deviantart] fix watching module ID extraction ([#5696](https://github.com/mikf/gallery-dl/issues/5696), [#5772](https://github.com/mikf/gallery-dl/issues/5772))
-- [fanbox] handle KeyError for no longer existing plans ([#5759](https://github.com/mikf/gallery-dl/issues/5759))
-- [kemonoparty:favorite] fix exception when sorting `null` objects ([#5692](https://github.com/mikf/gallery-dl/issues/5692). [#5721](https://github.com/mikf/gallery-dl/issues/5721))
-- [skeb] fix `429 Too Many Requests` errors ([#5766](https://github.com/mikf/gallery-dl/issues/5766))
-- [speakerdeck] fix extraction ([#5730](https://github.com/mikf/gallery-dl/issues/5730))
-- [twitter] fix duplicate `ArkoseLogin` check
+- [8chan] update `TOS` cookie name ([#5868](https://github.com/mikf/gallery-dl/issues/5868))
+- [behance] fix image extraction ([#5873](https://github.com/mikf/gallery-dl/issues/5873), [#5926](https://github.com/mikf/gallery-dl/issues/5926))
+- [booru] prevent crash when file URL is empty ([#5859](https://github.com/mikf/gallery-dl/issues/5859))
+- [deviantart] try to work around journal/status API changes ([#5916](https://github.com/mikf/gallery-dl/issues/5916))
+- [hentainexus] fix error with spread pages ([#5827](https://github.com/mikf/gallery-dl/issues/5827))
+- [hotleak] fix faulty image URLs ([#5915](https://github.com/mikf/gallery-dl/issues/5915))
+- [inkbunny:following] fix potentially infinite loop
+- [nijie] fix image URLs of single image posts ([#5842](https://github.com/mikf/gallery-dl/issues/5842))
+- [readcomiconline] fix extraction ([#5866](https://github.com/mikf/gallery-dl/issues/5866))
+- [toyhouse] fix Content Warning bypass ([#5820](https://github.com/mikf/gallery-dl/issues/5820))
+- [tumblr] revert to `offset` pagination, implement `pagination` option ([#5880](https://github.com/mikf/gallery-dl/issues/5880))
+- [twitter] fix `username-alt` option name ([#5715](https://github.com/mikf/gallery-dl/issues/5715))
+- [warosu] fix extraction
+- [zerochan] handle `KeyError - 'items'` ([#5826](https://github.com/mikf/gallery-dl/issues/5826))
+- [zerochan] fix error on tag redirections ([#5891](https://github.com/mikf/gallery-dl/issues/5891))
+- [zerochan] fix `Invalid control character` errors ([#5892](https://github.com/mikf/gallery-dl/issues/5892))
#### Improvements
-- [nijie] support downloading videos ([#5707](https://github.com/mikf/gallery-dl/issues/5707), [#5617](https://github.com/mikf/gallery-dl/issues/5617))
-- [philomena] support downloading `.svg` files ([#5643](https://github.com/mikf/gallery-dl/issues/5643))
-- [szurubooru] support empty tag searches ([#5711](https://github.com/mikf/gallery-dl/issues/5711))
-- [twitter] ignore `Unavailable` media ([#5736](https://github.com/mikf/gallery-dl/issues/5736))
+- [bunkr] support `bunkr.fi` domain ([#5872](https://github.com/mikf/gallery-dl/issues/5872))
+- [deviantart:following] use OAuth API endpoint ([#2511](https://github.com/mikf/gallery-dl/issues/2511))
+- [directlink] extend recognized file extensions ([#5924](https://github.com/mikf/gallery-dl/issues/5924))
+- [exhentai] improve error message when temporarily banned ([#5845](https://github.com/mikf/gallery-dl/issues/5845))
+- [gelbooru_v02] use total number of posts as pagination end marker ([#5830](https://github.com/mikf/gallery-dl/issues/5830))
+- [imagefap] add enumeration index to default filenames ([#1746](https://github.com/mikf/gallery-dl/issues/1746), [#5887](https://github.com/mikf/gallery-dl/issues/5887))
+- [paheal] implement fast `--range` support ([#5905](https://github.com/mikf/gallery-dl/issues/5905))
+- [redgifs] support URLs with numeric IDs ([#5898](https://github.com/mikf/gallery-dl/issues/5898), [#5899](https://github.com/mikf/gallery-dl/issues/5899))
+- [sankaku] match URLs with `www` subdomain ([#5907](https://github.com/mikf/gallery-dl/issues/5907))
+- [sankakucomplex] update domain to `news.sankakucomplex.com`
+- [twitter] implement `cursor` support ([#5753](https://github.com/mikf/gallery-dl/issues/5753))
+- [vipergirls] improve `thread` URL pattern
+- [wallpapercave] support `album` listings ([#5925](https://github.com/mikf/gallery-dl/issues/5925))
#### Metadata
-- [hitomi] extract `title_jpn` metadata ([#5706](https://github.com/mikf/gallery-dl/issues/5706))
-- [instagram] extract `liked` metadata ([#5609](https://github.com/mikf/gallery-dl/issues/5609))
+- [dynastyscans] extract chapter `tags` ([#5904](https://github.com/mikf/gallery-dl/issues/5904))
+- [erome] extract `date` metadata ([#5796](https://github.com/mikf/gallery-dl/issues/5796))
+- [furaffinity] extract `folders` and `thumbnail` metadata ([#1284](https://github.com/mikf/gallery-dl/issues/1284), [#5824](https://github.com/mikf/gallery-dl/issues/5824))
+- [sankaku] implement `notes` extraction ([#5865](https://github.com/mikf/gallery-dl/issues/5865))
+- [subscribestar] fix `date` parsing in updated posts ([#5783](https://github.com/mikf/gallery-dl/issues/5783))
+- [twitter] extract `bookmark_count` and `view_count` metadata ([#5802](https://github.com/mikf/gallery-dl/issues/5802))
+- [zerochan] fix `source` metadata
+- [zerochan] fix tag category extraction ([#5874](https://github.com/mikf/gallery-dl/issues/5874))
+- [zerochan] delay fetching extended metadata ([#5869](https://github.com/mikf/gallery-dl/issues/5869))
#### Options
-- [newgrounds] extend `format` option ([#5709](https://github.com/mikf/gallery-dl/issues/5709))
-- [twitter] extend `ratelimit` option ([#5532](https://github.com/mikf/gallery-dl/issues/5532))
-- [twitter] add `username-alt` option ([#5715](https://github.com/mikf/gallery-dl/issues/5715))
+- [agnph] implement `tags` option ([#5284](https://github.com/mikf/gallery-dl/issues/5284))
+- [booru] allow multiple `url` keys ([#5859](https://github.com/mikf/gallery-dl/issues/5859))
+- [cien] add `files` option ([#2885](https://github.com/mikf/gallery-dl/issues/2885))
+- [koharu] add `cbz` and `format` options ([#5893](https://github.com/mikf/gallery-dl/issues/5893))
+- [vsco] add `include` option ([#5911](https://github.com/mikf/gallery-dl/issues/5911))
+- [zerochan] implement `tags` option ([#5874](https://github.com/mikf/gallery-dl/issues/5874))
#### Removals
-- [photobucket] remove module
-- [nitter] remove instances
-- [vichan] remove `wikieat.club`
-### Downloaders
-- [ytdl] fix exception due to missing `ext` in unavailable videos ([#5675](https://github.com/mikf/gallery-dl/issues/5675))
-### Formatter
-- implement `C` format specifier ([#5647](https://github.com/mikf/gallery-dl/issues/5647))
-- implement `X` format specifier ([#5770](https://github.com/mikf/gallery-dl/issues/5770))
+- [fallenangels] remove module
+### Post Processors
+- [metadata] allow using format strings for `directory` ([#5728](https://github.com/mikf/gallery-dl/issues/5728))
### Options
-- add `--no-input` command-line and `input` config option ([#5733](https://github.com/mikf/gallery-dl/issues/5733))
-- add `--config-open` command-line option ([#5713](https://github.com/mikf/gallery-dl/issues/5713))
-- add `--config-status` command-line option ([#5713](https://github.com/mikf/gallery-dl/issues/5713))
+- add `--print-traffic` command-line option
+- add `-J/--resolve-json` command-line option ([#5864](https://github.com/mikf/gallery-dl/issues/5864))
+- add `filters-environment` option
+- implement `archive-event` option ([#5784](https://github.com/mikf/gallery-dl/issues/5784))
+### Actions
+- [actions] support multiple actions per pattern
+- [actions] add `exec` action ([#5619](https://github.com/mikf/gallery-dl/issues/5619))
+- [actions] add `abort` and `terminate` actions ([#5778](https://github.com/mikf/gallery-dl/issues/5778))
+- [actions] allow setting a duration for `wait`
+- [actions] emit logging messages before waiting/exiting/etc
+### Tests
+- [tests] enable test results for external extractors ([#5262](https://github.com/mikf/gallery-dl/issues/5262))
+- [tests] load results from `${GDL_TEST_RESULTS}` ([#5262](https://github.com/mikf/gallery-dl/issues/5262))
### Miscellaneous
-- [actions] fix exception when `msg` is not a string ([#5683](https://github.com/mikf/gallery-dl/issues/5683))
+- [cookies] add `thorium` support ([#5781](https://github.com/mikf/gallery-dl/issues/5781))
+- [job] add `resolve` argument to DataJob ([#5864](https://github.com/mikf/gallery-dl/issues/5864))
+- [path] fix moving temporary files across drives on Windows ([#5807](https://github.com/mikf/gallery-dl/issues/5807))
+- [ytdl] fix `--cookies-from-browser` option parsing ([#5885](https://github.com/mikf/gallery-dl/issues/5885))
+- make exceptions in filters/conditionals non-fatal
+- update default User-Agent header to Firefox 128 ESR
+- include `zstd` in Accept-Encoding header when supported
diff --git a/PKG-INFO b/PKG-INFO
index a06aa55..eec2e32 100644
--- a/PKG-INFO
+++ b/PKG-INFO
@@ -1,6 +1,6 @@
Metadata-Version: 2.1
Name: gallery_dl
-Version: 1.27.1
+Version: 1.27.2
Summary: Command-line program to download image galleries and collections from several image hosting sites
Home-page: https://github.com/mikf/gallery-dl
Download-URL: https://github.com/mikf/gallery-dl/releases/latest
@@ -65,10 +65,12 @@ Dependencies
Optional
--------
+- yt-dlp_ or youtube-dl_: HLS/DASH video downloads, ``ytdl`` integration
- FFmpeg_: Pixiv Ugoira conversion
-- yt-dlp_ or youtube-dl_: Video downloads
+- mkvmerge_: Accurate Ugoira frame timecodes
- PySocks_: SOCKS proxy support
- brotli_ or brotlicffi_: Brotli compression support
+- zstandard_: Zstandard compression support
- PyYAML_: YAML configuration file support
- toml_: TOML configuration file support for Python<3.11
- SecretStorage_: GNOME keyring passwords for ``--cookies-from-browser``
@@ -112,9 +114,9 @@ Standalone Executable
Prebuilt executable files with a Python interpreter and
required Python packages included are available for
-- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.27.1/gallery-dl.exe>`__
+- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.27.2/gallery-dl.exe>`__
(Requires `Microsoft Visual C++ Redistributable Package (x86) <https://aka.ms/vs/17/release/vc_redist.x86.exe>`__)
-- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.27.1/gallery-dl.bin>`__
+- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.27.2/gallery-dl.bin>`__
Nightly Builds
@@ -457,11 +459,13 @@ To authenticate with a ``mastodon`` instance, run *gallery-dl* with
.. _pip: https://pip.pypa.io/en/stable/
.. _Requests: https://requests.readthedocs.io/en/master/
.. _FFmpeg: https://www.ffmpeg.org/
+.. _mkvmerge: https://www.matroska.org/downloads/mkvtoolnix.html
.. _yt-dlp: https://github.com/yt-dlp/yt-dlp
.. _youtube-dl: https://ytdl-org.github.io/youtube-dl/
.. _PySocks: https://pypi.org/project/PySocks/
.. _brotli: https://github.com/google/brotli
.. _brotlicffi: https://github.com/python-hyper/brotlicffi
+.. _zstandard: https://github.com/indygreg/python-zstandard
.. _PyYAML: https://pyyaml.org/
.. _toml: https://pypi.org/project/toml/
.. _SecretStorage: https://pypi.org/project/SecretStorage/
diff --git a/README.rst b/README.rst
index 260d0f4..6be24f4 100644
--- a/README.rst
+++ b/README.rst
@@ -25,10 +25,12 @@ Dependencies
Optional
--------
+- yt-dlp_ or youtube-dl_: HLS/DASH video downloads, ``ytdl`` integration
- FFmpeg_: Pixiv Ugoira conversion
-- yt-dlp_ or youtube-dl_: Video downloads
+- mkvmerge_: Accurate Ugoira frame timecodes
- PySocks_: SOCKS proxy support
- brotli_ or brotlicffi_: Brotli compression support
+- zstandard_: Zstandard compression support
- PyYAML_: YAML configuration file support
- toml_: TOML configuration file support for Python<3.11
- SecretStorage_: GNOME keyring passwords for ``--cookies-from-browser``
@@ -72,9 +74,9 @@ Standalone Executable
Prebuilt executable files with a Python interpreter and
required Python packages included are available for
-- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.27.1/gallery-dl.exe>`__
+- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.27.2/gallery-dl.exe>`__
(Requires `Microsoft Visual C++ Redistributable Package (x86) <https://aka.ms/vs/17/release/vc_redist.x86.exe>`__)
-- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.27.1/gallery-dl.bin>`__
+- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.27.2/gallery-dl.bin>`__
Nightly Builds
@@ -417,11 +419,13 @@ To authenticate with a ``mastodon`` instance, run *gallery-dl* with
.. _pip: https://pip.pypa.io/en/stable/
.. _Requests: https://requests.readthedocs.io/en/master/
.. _FFmpeg: https://www.ffmpeg.org/
+.. _mkvmerge: https://www.matroska.org/downloads/mkvtoolnix.html
.. _yt-dlp: https://github.com/yt-dlp/yt-dlp
.. _youtube-dl: https://ytdl-org.github.io/youtube-dl/
.. _PySocks: https://pypi.org/project/PySocks/
.. _brotli: https://github.com/google/brotli
.. _brotlicffi: https://github.com/python-hyper/brotlicffi
+.. _zstandard: https://github.com/indygreg/python-zstandard
.. _PyYAML: https://pyyaml.org/
.. _toml: https://pypi.org/project/toml/
.. _SecretStorage: https://pypi.org/project/SecretStorage/
diff --git a/data/completion/_gallery-dl b/data/completion/_gallery-dl
index 14b7321..3308e98 100644
--- a/data/completion/_gallery-dl
+++ b/data/completion/_gallery-dl
@@ -26,6 +26,7 @@ _arguments -s -S \
{-g,--get-urls}'[Print URLs instead of downloading]' \
{-G,--resolve-urls}'[Print URLs instead of downloading; resolve intermediary URLs]' \
{-j,--dump-json}'[Print JSON information]' \
+{-J,--resolve-json}'[Print JSON information; resolve intermediary URLs]' \
{-s,--simulate}'[Simulate data extraction; do not download anything]' \
{-E,--extractor-info}'[Print extractor defaults and settings]' \
{-K,--list-keywords}'[Print a list of available keywords and example values for the given URLs]' \
@@ -35,6 +36,7 @@ _arguments -s -S \
--write-log'[Write logging output to FILE]':'<file>':_files \
--write-unsupported'[Write URLs, which get emitted by other extractors but cannot be handled, to FILE]':'<file>':_files \
--write-pages'[Write downloaded intermediary pages to files in the current directory to debug problems]' \
+--print-traffic'[Display sent and read HTTP traffic]' \
--no-colors'[Do not emit ANSI color codes in output]' \
{-R,--retries}'[Maximum number of retries for failed HTTP requests or -1 for infinite retries (default: 4)]':'<n>' \
--http-timeout'[Timeout for HTTP connections (default: 30.0)]':'<seconds>' \
diff --git a/data/completion/gallery-dl b/data/completion/gallery-dl
index 625ecd6..0d933fa 100644
--- a/data/completion/gallery-dl
+++ b/data/completion/gallery-dl
@@ -10,7 +10,7 @@ _gallery_dl()
elif [[ "${prev}" =~ ^()$ ]]; then
COMPREPLY=( $(compgen -d -- "${cur}") )
else
- COMPREPLY=( $(compgen -W "--help --version --filename --destination --directory --extractors --user-agent --clear-cache --update --update-to --update-check --input-file --input-file-comment --input-file-delete --no-input --quiet --warning --verbose --get-urls --resolve-urls --dump-json --simulate --extractor-info --list-keywords --error-file --list-modules --list-extractors --write-log --write-unsupported --write-pages --no-colors --retries --http-timeout --proxy --source-address --no-check-certificate --limit-rate --chunk-size --sleep --sleep-request --sleep-extractor --no-part --no-skip --no-mtime --no-download --option --config --config-yaml --config-toml --config-create --config-status --config-open --config-ignore --ignore-config --username --password --netrc --cookies --cookies-export --cookies-from-browser --abort --terminate --filesize-min --filesize-max --download-archive --range --chapter-range --filter --chapter-filter --postprocessor --no-postprocessors --postprocessor-option --write-metadata --write-info-json --write-infojson --write-tags --zip --cbz --mtime --mtime-from-date --ugoira --ugoira-conv --ugoira-conv-lossless --ugoira-conv-copy --exec --exec-after" -- "${cur}") )
+ COMPREPLY=( $(compgen -W "--help --version --filename --destination --directory --extractors --user-agent --clear-cache --update --update-to --update-check --input-file --input-file-comment --input-file-delete --no-input --quiet --warning --verbose --get-urls --resolve-urls --dump-json --resolve-json --simulate --extractor-info --list-keywords --error-file --list-modules --list-extractors --write-log --write-unsupported --write-pages --print-traffic --no-colors --retries --http-timeout --proxy --source-address --no-check-certificate --limit-rate --chunk-size --sleep --sleep-request --sleep-extractor --no-part --no-skip --no-mtime --no-download --option --config --config-yaml --config-toml --config-create --config-status --config-open --config-ignore --ignore-config --username --password --netrc --cookies --cookies-export --cookies-from-browser --abort --terminate --filesize-min --filesize-max --download-archive --range --chapter-range --filter --chapter-filter --postprocessor --no-postprocessors --postprocessor-option --write-metadata --write-info-json --write-infojson --write-tags --zip --cbz --mtime --mtime-from-date --ugoira --ugoira-conv --ugoira-conv-lossless --ugoira-conv-copy --exec --exec-after" -- "${cur}") )
fi
}
diff --git a/data/completion/gallery-dl.fish b/data/completion/gallery-dl.fish
index a67cd63..7243998 100644
--- a/data/completion/gallery-dl.fish
+++ b/data/completion/gallery-dl.fish
@@ -20,6 +20,7 @@ complete -c gallery-dl -s 'v' -l 'verbose' -d 'Print various debugging informati
complete -c gallery-dl -s 'g' -l 'get-urls' -d 'Print URLs instead of downloading'
complete -c gallery-dl -s 'G' -l 'resolve-urls' -d 'Print URLs instead of downloading; resolve intermediary URLs'
complete -c gallery-dl -s 'j' -l 'dump-json' -d 'Print JSON information'
+complete -c gallery-dl -s 'J' -l 'resolve-json' -d 'Print JSON information; resolve intermediary URLs'
complete -c gallery-dl -s 's' -l 'simulate' -d 'Simulate data extraction; do not download anything'
complete -c gallery-dl -s 'E' -l 'extractor-info' -d 'Print extractor defaults and settings'
complete -c gallery-dl -s 'K' -l 'list-keywords' -d 'Print a list of available keywords and example values for the given URLs'
@@ -29,6 +30,7 @@ complete -c gallery-dl -l 'list-extractors' -d 'Print a list of extractor classe
complete -c gallery-dl -r -F -l 'write-log' -d 'Write logging output to FILE'
complete -c gallery-dl -r -F -l 'write-unsupported' -d 'Write URLs, which get emitted by other extractors but cannot be handled, to FILE'
complete -c gallery-dl -l 'write-pages' -d 'Write downloaded intermediary pages to files in the current directory to debug problems'
+complete -c gallery-dl -l 'print-traffic' -d 'Display sent and read HTTP traffic'
complete -c gallery-dl -l 'no-colors' -d 'Do not emit ANSI color codes in output'
complete -c gallery-dl -x -s 'R' -l 'retries' -d 'Maximum number of retries for failed HTTP requests or -1 for infinite retries (default: 4)'
complete -c gallery-dl -x -l 'http-timeout' -d 'Timeout for HTTP connections (default: 30.0)'
diff --git a/data/man/gallery-dl.1 b/data/man/gallery-dl.1
index 37529bf..d1eddd6 100644
--- a/data/man/gallery-dl.1
+++ b/data/man/gallery-dl.1
@@ -1,4 +1,4 @@
-.TH "GALLERY-DL" "1" "2024-06-22" "1.27.1" "gallery-dl Manual"
+.TH "GALLERY-DL" "1" "2024-08-03" "1.27.2" "gallery-dl Manual"
.\" disable hyphenation
.nh
@@ -80,6 +80,9 @@ Print URLs instead of downloading; resolve intermediary URLs
.B "\-j, \-\-dump\-json"
Print JSON information
.TP
+.B "\-J, \-\-resolve\-json"
+Print JSON information; resolve intermediary URLs
+.TP
.B "\-s, \-\-simulate"
Simulate data extraction; do not download anything
.TP
@@ -107,6 +110,9 @@ Write URLs, which get emitted by other extractors but cannot be handled, to FILE
.B "\-\-write\-pages"
Write downloaded intermediary pages to files in the current directory to debug problems
.TP
+.B "\-\-print\-traffic"
+Display sent and read HTTP traffic
+.TP
.B "\-\-no\-colors"
Do not emit ANSI color codes in output
.TP
diff --git a/data/man/gallery-dl.conf.5 b/data/man/gallery-dl.conf.5
index e3ed58a..8f75284 100644
--- a/data/man/gallery-dl.conf.5
+++ b/data/man/gallery-dl.conf.5
@@ -1,4 +1,4 @@
-.TH "GALLERY-DL.CONF" "5" "2024-06-22" "1.27.1" "gallery-dl Manual"
+.TH "GALLERY-DL.CONF" "5" "2024-08-03" "1.27.2" "gallery-dl Manual"
.\" disable hyphenation
.nh
.\" disable justification (adjust text to left margin only)
@@ -456,6 +456,7 @@ response before \f[I]retrying\f[] the request.
.br
* \f[I]"0.5-1.5"\f[]
\f[I][Danbooru]\f[], \f[I][E621]\f[], \f[I][foolfuuka]:search\f[], \f[I]itaku\f[],
+\f[I]koharu\f[],
\f[I]newgrounds\f[], \f[I][philomena]\f[], \f[I]pixiv:novel\f[], \f[I]plurk\f[],
\f[I]poipiku\f[] , \f[I]pornpics\f[], \f[I]soundgasm\f[], \f[I]urlgalleries\f[],
\f[I]vk\f[], \f[I]zerochan\f[]
@@ -536,6 +537,8 @@ and optional for
.br
* \f[I]kemonoparty\f[]
.br
+* \f[I]koharu\f[]
+.br
* \f[I]mangadex\f[]
.br
* \f[I]mangoxo\f[]
@@ -728,7 +731,7 @@ or a \f[I]list\f[] with IP and explicit port number as elements.
\f[I]string\f[]
.IP "Default:" 9
-\f[I]"Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/115.0"\f[]
+\f[I]"Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:128.0) Gecko/20100101 Firefox/128.0"\f[]
.IP "Description:" 4
User-Agent header value to be used for HTTP requests.
@@ -1017,6 +1020,29 @@ but be aware that using external inputs for building local paths
may pose a security risk.
+.SS extractor.*.archive-event
+.IP "Type:" 6
++ \f[I]string\f[]
++ \f[I]list\f[] of \f[I]strings\f[]
+
+.IP "Default:" 9
+\f[I]"file"\f[]
+
+.IP "Example:" 4
+.br
+* "file,skip"
+.br
+* ["file", "skip"]
+
+.IP "Description:" 4
+\f[I]Event(s)\f[]
+for which IDs get written to an
+\f[I]archive\f[].
+
+Available events are:
+\f[I]file\f[], \f[I]skip\f[]
+
+
.SS extractor.*.archive-format
.IP "Type:" 6
\f[I]string\f[]
@@ -1075,25 +1101,33 @@ for available \f[I]PRAGMA\f[] statements and further details.
.SS extractor.*.actions
.IP "Type:" 6
.br
-* \f[I]object\f[] (pattern -> action)
+* \f[I]object\f[] (pattern -> action(s))
.br
-* \f[I]list\f[] of \f[I]lists\f[] with 2 \f[I]strings\f[] as elements
+* \f[I]list\f[] of \f[I]lists\f[] with pattern -> action(s) pairs as elements
.IP "Example:" 4
.. code:: json
{
-"error" : "status \f[I]= 1",
+"info:Logging in as .+" : "level = debug",
"warning:(?i)unable to .+": "exit 127",
-"info:Logging in as .+" : "level = debug"
+"error" : [
+"status \f[I]= 1",
+"exec notify.sh 'gdl error'",
+"abort"
+]
}
.. code:: json
[
-["error" , "status \f[]= 1" ],
+["info:Logging in as .+" , "level = debug"],
["warning:(?i)unable to .+", "exit 127" ],
-["info:Logging in as .+" , "level = debug"]
+["error" , [
+"status \f[]= 1",
+"exec notify.sh 'gdl error'",
+"abort"
+]]
]
@@ -1110,6 +1144,9 @@ matches logging messages of all levels
\f[I]action\f[] is parsed as action type
followed by (optional) arguments.
+It is possible to specify more than one \f[I]action\f[] per \f[I]pattern\f[]
+by providing them as a \f[I]list\f[]: \f[I]["<action1>", "<action2>", …]\f[]
+
Supported Action Types:
\f[I]status\f[]:
@@ -1128,12 +1165,21 @@ Modify severity level of the current logging message.
.br
Can be one of \f[I]debug\f[], \f[I]info\f[], \f[I]warning\f[], \f[I]error\f[] or an integer value.
.br
-\f[I]print\f[]
+\f[I]print\f[]:
Write argument to stdout.
+\f[I]exec\f[]:
+Run a shell command.
+\f[I]abort\f[]:
+Stop the current extractor run.
+\f[I]terminate\f[]:
+Stop the current extractor run, including parent extractors.
\f[I]restart\f[]:
Restart the current extractor run.
\f[I]wait\f[]:
-Stop execution until Enter is pressed.
+Sleep for a given \f[I]Duration\f[] or
+.br
+wait until Enter is pressed when no argument was given.
+.br
\f[I]exit\f[]:
Exit the program with the given argument as exit status.
@@ -1642,6 +1688,23 @@ Sets the maximum depth of returned reply posts.
Process reposts.
+.SS extractor.cien.files
+.IP "Type:" 6
+\f[I]list\f[] of \f[I]strings\f[]
+
+.IP "Default:" 9
+\f[I]["image", "video", "download", "gallery"]\f[]
+
+.IP "Description:" 4
+Determines the type and order of files to be downloaded.
+
+Available types are
+\f[I]image\f[],
+\f[I]video\f[],
+\f[I]download\f[],
+\f[I]gallery\f[].
+
+
.SS extractor.cyberdrop.domain
.IP "Type:" 6
\f[I]string\f[]
@@ -3004,6 +3067,36 @@ If the selected format is not available,
the first in the list gets chosen (usually mp3).
+.SS extractor.koharu.cbz
+.IP "Type:" 6
+\f[I]bool\f[]
+
+.IP "Default:" 9
+\f[I]true\f[]
+
+.IP "Description:" 4
+Download each gallery as a single \f[I].cbz\f[] file.
+
+Disabling this option causes a gallery
+to be downloaded as individual image files.
+
+
+.SS extractor.koharu.format
+.IP "Type:" 6
+\f[I]string\f[]
+
+.IP "Default:" 9
+\f[I]"original"\f[]
+
+.IP "Description:" 4
+Name of the image format to download.
+
+Available formats are
+.br
+\f[I]"780"\f[], \f[I]"980"\f[], \f[I]"1280"\f[], \f[I]"1600"\f[], \f[I]"0"\f[]/\f[I]"original"\f[]
+.br
+
+
.SS extractor.lolisafe.domain
.IP "Type:" 6
\f[I]string\f[]
@@ -4310,6 +4403,27 @@ or each inline image,
use an extra HTTP request to find the URL to its full-resolution version.
+.SS extractor.tumblr.pagination
+.IP "Type:" 6
+\f[I]string\f[]
+
+.IP "Default:" 9
+\f[I]"offset"\f[]
+
+.IP "Description:" 4
+Controls how to paginate over blog posts.
+
+.br
+* \f[I]"api"\f[]: \f[I]next\f[] parameter provided by the API
+(potentially misses posts due to a
+\f[I]bug\f[]
+in Tumblr's API)
+.br
+* \f[I]"before"\f[]: timestamp of last post
+.br
+* \f[I]"offset"\f[]: post offset number
+
+
.SS extractor.tumblr.ratelimit
.IP "Type:" 6
\f[I]string\f[]
@@ -4919,6 +5033,35 @@ Note: Requires \f[I]login\f[]
or \f[I]cookies\f[]
+.SS extractor.vsco.include
+.IP "Type:" 6
+.br
+* \f[I]string\f[]
+.br
+* \f[I]list\f[] of \f[I]strings\f[]
+
+.IP "Default:" 9
+\f[I]"gallery"\f[]
+
+.IP "Example:" 4
+.br
+* "avatar,collection"
+.br
+* ["avatar", "collection"]
+
+.IP "Description:" 4
+A (comma-separated) list of subcategories to include
+when processing a user profile.
+
+Possible values are
+\f[I]"avatar"\f[],
+\f[I]"gallery"\f[],
+\f[I]"spaces"\f[],
+\f[I]"collection"\f[],
+
+It is possible to use \f[I]"all"\f[] instead of listing all values separately.
+
+
.SS extractor.vsco.videos
.IP "Type:" 6
\f[I]bool\f[]
@@ -5282,17 +5425,25 @@ Note: This requires 1 additional HTTP request per post.
.SS extractor.[booru].url
.IP "Type:" 6
-\f[I]string\f[]
+.br
+* \f[I]string\f[]
+.br
+* \f[I]list\f[] of \f[I]strings\f[]
.IP "Default:" 9
\f[I]"file_url"\f[]
.IP "Example:" 4
-"preview_url"
+.br
+* "preview_url"
+.br
+* ["sample_url", "preview_url", "file_url"}
.IP "Description:" 4
Alternate field name to retrieve download URLs from.
+When multiple names are given, download the first available one.
+
.SS extractor.[manga-extractor].chapter-reverse
.IP "Type:" 6
@@ -6249,13 +6400,19 @@ If this option is set, \f[I]metadata.extension\f[] and
.SS metadata.directory
.IP "Type:" 6
-\f[I]string\f[]
+.br
+* \f[I]string\f[]
+.br
+* \f[I]list\f[] of \f[I]strings\f[]
.IP "Default:" 9
\f[I]"."\f[]
.IP "Example:" 4
-"metadata"
+.br
+* "metadata"
+.br
+* ["..", "metadata", "\\fF {id // 500 * 500}"]
.IP "Description:" 4
Directory where metadata files are stored in relative to the
@@ -6965,6 +7122,19 @@ Set this option to \f[I]null\f[] or an invalid path to disable
this cache.
+.SS filters-environment
+.IP "Type:" 6
+\f[I]bool\f[]
+
+.IP "Default:" 9
+\f[I]true\f[]
+
+.IP "Description:" 4
+Evaluate filter expressions raising an exception as \f[I]false\f[]
+instead of aborting the current extractor run
+by wrapping them in a try/except block.
+
+
.SS format-separator
.IP "Type:" 6
\f[I]string\f[]
diff --git a/docs/gallery-dl.conf b/docs/gallery-dl.conf
index 9f12652..2a7f8f2 100644
--- a/docs/gallery-dl.conf
+++ b/docs/gallery-dl.conf
@@ -10,7 +10,7 @@
"proxy": null,
"skip": true,
- "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/115.0",
+ "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:128.0) Gecko/20100101 Firefox/128.0",
"retries": 4,
"timeout": 30.0,
"verify": true,
diff --git a/gallery_dl.egg-info/PKG-INFO b/gallery_dl.egg-info/PKG-INFO
index a06aa55..eec2e32 100644
--- a/gallery_dl.egg-info/PKG-INFO
+++ b/gallery_dl.egg-info/PKG-INFO
@@ -1,6 +1,6 @@
Metadata-Version: 2.1
Name: gallery_dl
-Version: 1.27.1
+Version: 1.27.2
Summary: Command-line program to download image galleries and collections from several image hosting sites
Home-page: https://github.com/mikf/gallery-dl
Download-URL: https://github.com/mikf/gallery-dl/releases/latest
@@ -65,10 +65,12 @@ Dependencies
Optional
--------
+- yt-dlp_ or youtube-dl_: HLS/DASH video downloads, ``ytdl`` integration
- FFmpeg_: Pixiv Ugoira conversion
-- yt-dlp_ or youtube-dl_: Video downloads
+- mkvmerge_: Accurate Ugoira frame timecodes
- PySocks_: SOCKS proxy support
- brotli_ or brotlicffi_: Brotli compression support
+- zstandard_: Zstandard compression support
- PyYAML_: YAML configuration file support
- toml_: TOML configuration file support for Python<3.11
- SecretStorage_: GNOME keyring passwords for ``--cookies-from-browser``
@@ -112,9 +114,9 @@ Standalone Executable
Prebuilt executable files with a Python interpreter and
required Python packages included are available for
-- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.27.1/gallery-dl.exe>`__
+- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.27.2/gallery-dl.exe>`__
(Requires `Microsoft Visual C++ Redistributable Package (x86) <https://aka.ms/vs/17/release/vc_redist.x86.exe>`__)
-- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.27.1/gallery-dl.bin>`__
+- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.27.2/gallery-dl.bin>`__
Nightly Builds
@@ -457,11 +459,13 @@ To authenticate with a ``mastodon`` instance, run *gallery-dl* with
.. _pip: https://pip.pypa.io/en/stable/
.. _Requests: https://requests.readthedocs.io/en/master/
.. _FFmpeg: https://www.ffmpeg.org/
+.. _mkvmerge: https://www.matroska.org/downloads/mkvtoolnix.html
.. _yt-dlp: https://github.com/yt-dlp/yt-dlp
.. _youtube-dl: https://ytdl-org.github.io/youtube-dl/
.. _PySocks: https://pypi.org/project/PySocks/
.. _brotli: https://github.com/google/brotli
.. _brotlicffi: https://github.com/python-hyper/brotlicffi
+.. _zstandard: https://github.com/indygreg/python-zstandard
.. _PyYAML: https://pyyaml.org/
.. _toml: https://pypi.org/project/toml/
.. _SecretStorage: https://pypi.org/project/SecretStorage/
diff --git a/gallery_dl.egg-info/SOURCES.txt b/gallery_dl.egg-info/SOURCES.txt
index a892544..de5738a 100644
--- a/gallery_dl.egg-info/SOURCES.txt
+++ b/gallery_dl.egg-info/SOURCES.txt
@@ -56,6 +56,7 @@ gallery_dl/extractor/8chan.py
gallery_dl/extractor/8muses.py
gallery_dl/extractor/__init__.py
gallery_dl/extractor/adultempire.py
+gallery_dl/extractor/agnph.py
gallery_dl/extractor/architizer.py
gallery_dl/extractor/artstation.py
gallery_dl/extractor/aryion.py
@@ -68,6 +69,7 @@ gallery_dl/extractor/booru.py
gallery_dl/extractor/bunkr.py
gallery_dl/extractor/catbox.py
gallery_dl/extractor/chevereto.py
+gallery_dl/extractor/cien.py
gallery_dl/extractor/comicvine.py
gallery_dl/extractor/common.py
gallery_dl/extractor/cyberdrop.py
@@ -79,7 +81,6 @@ gallery_dl/extractor/dynastyscans.py
gallery_dl/extractor/e621.py
gallery_dl/extractor/erome.py
gallery_dl/extractor/exhentai.py
-gallery_dl/extractor/fallenangels.py
gallery_dl/extractor/fanbox.py
gallery_dl/extractor/fanleaks.py
gallery_dl/extractor/fantia.py
@@ -125,6 +126,7 @@ gallery_dl/extractor/kabeuchi.py
gallery_dl/extractor/keenspot.py
gallery_dl/extractor/kemonoparty.py
gallery_dl/extractor/khinsider.py
+gallery_dl/extractor/koharu.py
gallery_dl/extractor/komikcast.py
gallery_dl/extractor/lensdump.py
gallery_dl/extractor/lexica.py
diff --git a/gallery_dl/__init__.py b/gallery_dl/__init__.py
index 1d4215e..4b39c15 100644
--- a/gallery_dl/__init__.py
+++ b/gallery_dl/__init__.py
@@ -105,6 +105,11 @@ def main():
output.ANSI = True
+ # filter environment
+ filterenv = config.get((), "filters-environment", True)
+ if not filterenv:
+ util.compile_expression = util.compile_expression_raw
+
# format string separator
separator = config.get((), "format-separator")
if separator:
@@ -145,6 +150,10 @@ def main():
log.debug("Configuration Files %s", config._files)
+ if args.print_traffic:
+ import requests
+ requests.packages.urllib3.connection.HTTPConnection.debuglevel = 1
+
# extractor modules
modules = config.get(("extractor",), "modules")
if modules is not None:
@@ -240,6 +249,9 @@ def main():
if config.get(("output",), "fallback", True):
jobtype.handle_url = \
staticmethod(jobtype.handle_url_fallback)
+ elif args.dump_json:
+ jobtype = job.DataJob
+ jobtype.resolve = args.dump_json - 1
else:
jobtype = args.jobtype or job.DownloadJob
@@ -299,6 +311,8 @@ def main():
else:
input_manager.success()
+ except exception.StopExtraction:
+ pass
except exception.TerminateExtraction:
pass
except exception.RestartExtraction:
diff --git a/gallery_dl/actions.py b/gallery_dl/actions.py
index 883e38b..668032d 100644
--- a/gallery_dl/actions.py
+++ b/gallery_dl/actions.py
@@ -9,8 +9,10 @@
""" """
import re
+import time
import logging
import operator
+import functools
from . import util, exception
@@ -19,29 +21,100 @@ def parse(actionspec):
actionspec = actionspec.items()
actions = {}
- actions[logging.DEBUG] = actions_d = []
- actions[logging.INFO] = actions_i = []
- actions[logging.WARNING] = actions_w = []
- actions[logging.ERROR] = actions_e = []
+ actions[-logging.DEBUG] = actions_bd = []
+ actions[-logging.INFO] = actions_bi = []
+ actions[-logging.WARNING] = actions_bw = []
+ actions[-logging.ERROR] = actions_be = []
+ actions[logging.DEBUG] = actions_ad = []
+ actions[logging.INFO] = actions_ai = []
+ actions[logging.WARNING] = actions_aw = []
+ actions[logging.ERROR] = actions_ae = []
for event, spec in actionspec:
level, _, pattern = event.partition(":")
- type, _, args = spec.partition(" ")
- action = (re.compile(pattern).search, ACTIONS[type](args))
+ search = re.compile(pattern).search if pattern else util.true
+
+ if isinstance(spec, str):
+ type, _, args = spec.partition(" ")
+ before, after = ACTIONS[type](args)
+ else:
+ actions_before = []
+ actions_after = []
+ for s in spec:
+ type, _, args = s.partition(" ")
+ before, after = ACTIONS[type](args)
+ if before:
+ actions_before.append(before)
+ if after:
+ actions_after.append(after)
+ before = _chain_actions(actions_before)
+ after = _chain_actions(actions_after)
level = level.strip()
if not level or level == "*":
- actions_d.append(action)
- actions_i.append(action)
- actions_w.append(action)
- actions_e.append(action)
+ if before:
+ action = (search, before)
+ actions_bd.append(action)
+ actions_bi.append(action)
+ actions_bw.append(action)
+ actions_be.append(action)
+ if after:
+ action = (search, after)
+ actions_ad.append(action)
+ actions_ai.append(action)
+ actions_aw.append(action)
+ actions_ae.append(action)
else:
-
- actions[_level_to_int(level)].append(action)
+ level = _level_to_int(level)
+ if before:
+ actions[-level].append((search, before))
+ if after:
+ actions[level].append((search, after))
return actions
+class LoggerAdapter():
+
+ def __init__(self, logger, job):
+ self.logger = logger
+ self.extra = job._logger_extra
+ self.actions = job._logger_actions
+
+ self.debug = functools.partial(self.log, logging.DEBUG)
+ self.info = functools.partial(self.log, logging.INFO)
+ self.warning = functools.partial(self.log, logging.WARNING)
+ self.error = functools.partial(self.log, logging.ERROR)
+
+ def log(self, level, msg, *args, **kwargs):
+ msg = str(msg)
+ if args:
+ msg = msg % args
+
+ before = self.actions[-level]
+ after = self.actions[level]
+
+ if before:
+ args = self.extra.copy()
+ args["level"] = level
+
+ for cond, action in before:
+ if cond(msg):
+ action(args)
+
+ level = args["level"]
+
+ if self.logger.isEnabledFor(level):
+ kwargs["extra"] = self.extra
+ self.logger._log(level, msg, (), **kwargs)
+
+ if after:
+ args = self.extra.copy()
+ for cond, action in after:
+ if cond(msg):
+ action(args)
+
+
def _level_to_int(level):
try:
return logging._nameToLevel[level]
@@ -49,10 +122,19 @@ def _level_to_int(level):
return int(level)
+def _chain_actions(actions):
+ def _chain(args):
+ for action in actions:
+ action(args)
+ return _chain
+
+
+# --------------------------------------------------------------------
+
def action_print(opts):
def _print(_):
print(opts)
- return _print
+ return None, _print
def action_status(opts):
@@ -69,7 +151,7 @@ def action_status(opts):
def _status(args):
args["job"].status = op(args["job"].status, value)
- return _status
+ return _status, None
def action_level(opts):
@@ -77,17 +159,38 @@ def action_level(opts):
def _level(args):
args["level"] = level
- return _level
+ return _level, None
+
+
+def action_exec(opts):
+ def _exec(_):
+ util.Popen(opts, shell=True).wait()
+ return None, _exec
def action_wait(opts):
- def _wait(args):
- input("Press Enter to continue")
- return _wait
+ if opts:
+ seconds = util.build_duration_func(opts)
+
+ def _wait(args):
+ time.sleep(seconds())
+ else:
+ def _wait(args):
+ input("Press Enter to continue")
+
+ return None, _wait
+
+
+def action_abort(opts):
+ return None, util.raises(exception.StopExtraction)
+
+
+def action_terminate(opts):
+ return None, util.raises(exception.TerminateExtraction)
def action_restart(opts):
- return util.raises(exception.RestartExtraction)
+ return None, util.raises(exception.RestartExtraction)
def action_exit(opts):
@@ -98,14 +201,17 @@ def action_exit(opts):
def _exit(args):
raise SystemExit(opts)
- return _exit
+ return None, _exit
ACTIONS = {
- "print" : action_print,
- "status" : action_status,
- "level" : action_level,
- "restart": action_restart,
- "wait" : action_wait,
- "exit" : action_exit,
+ "abort" : action_abort,
+ "exec" : action_exec,
+ "exit" : action_exit,
+ "level" : action_level,
+ "print" : action_print,
+ "restart" : action_restart,
+ "status" : action_status,
+ "terminate": action_terminate,
+ "wait" : action_wait,
}
diff --git a/gallery_dl/cookies.py b/gallery_dl/cookies.py
index 47f78a7..f017929 100644
--- a/gallery_dl/cookies.py
+++ b/gallery_dl/cookies.py
@@ -25,7 +25,7 @@ from . import aes, text, util
SUPPORTED_BROWSERS_CHROMIUM = {
- "brave", "chrome", "chromium", "edge", "opera", "vivaldi"}
+ "brave", "chrome", "chromium", "edge", "opera", "thorium", "vivaldi"}
SUPPORTED_BROWSERS = SUPPORTED_BROWSERS_CHROMIUM | {"firefox", "safari"}
logger = logging.getLogger("cookies")
@@ -354,6 +354,7 @@ def _get_chromium_based_browser_settings(browser_name):
"chromium": join(appdata_local, R"Chromium\User Data"),
"edge" : join(appdata_local, R"Microsoft\Edge\User Data"),
"opera" : join(appdata_roaming, R"Opera Software\Opera Stable"),
+ "thorium" : join(appdata_local, R"Thorium\User Data"),
"vivaldi" : join(appdata_local, R"Vivaldi\User Data"),
}[browser_name]
@@ -365,6 +366,7 @@ def _get_chromium_based_browser_settings(browser_name):
"chromium": join(appdata, "Chromium"),
"edge" : join(appdata, "Microsoft Edge"),
"opera" : join(appdata, "com.operasoftware.Opera"),
+ "thorium" : join(appdata, "Thorium"),
"vivaldi" : join(appdata, "Vivaldi"),
}[browser_name]
@@ -377,6 +379,7 @@ def _get_chromium_based_browser_settings(browser_name):
"chromium": join(config, "chromium"),
"edge" : join(config, "microsoft-edge"),
"opera" : join(config, "opera"),
+ "thorium" : join(config, "Thorium"),
"vivaldi" : join(config, "vivaldi"),
}[browser_name]
@@ -390,6 +393,7 @@ def _get_chromium_based_browser_settings(browser_name):
"edge" : "Microsoft Edge" if sys.platform == "darwin" else
"Chromium",
"opera" : "Opera" if sys.platform == "darwin" else "Chromium",
+ "thorium" : "Thorium",
"vivaldi" : "Vivaldi" if sys.platform == "darwin" else "Chrome",
}[browser_name]
diff --git a/gallery_dl/extractor/8chan.py b/gallery_dl/extractor/8chan.py
index a4b0997..a5e8b27 100644
--- a/gallery_dl/extractor/8chan.py
+++ b/gallery_dl/extractor/8chan.py
@@ -27,7 +27,8 @@ class _8chanExtractor(Extractor):
Extractor.__init__(self, match)
def _init(self):
- self.cookies.set("TOS", "1", domain=self.root.rpartition("/")[2])
+ self.cookies.set(
+ "TOS20240718", "1", domain=self.root.rpartition("/")[2])
@memcache()
def cookies_prepare(self):
diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py
index 6aff1f3..e103cb1 100644
--- a/gallery_dl/extractor/__init__.py
+++ b/gallery_dl/extractor/__init__.py
@@ -22,6 +22,7 @@ modules = [
"8chan",
"8muses",
"adultempire",
+ "agnph",
"architizer",
"artstation",
"aryion",
@@ -33,6 +34,7 @@ modules = [
"bunkr",
"catbox",
"chevereto",
+ "cien",
"comicvine",
"cyberdrop",
"danbooru",
@@ -42,7 +44,6 @@ modules = [
"e621",
"erome",
"exhentai",
- "fallenangels",
"fanbox",
"fanleaks",
"fantia",
@@ -84,6 +85,7 @@ modules = [
"keenspot",
"kemonoparty",
"khinsider",
+ "koharu",
"komikcast",
"lensdump",
"lexica",
diff --git a/gallery_dl/extractor/agnph.py b/gallery_dl/extractor/agnph.py
new file mode 100644
index 0000000..653b73f
--- /dev/null
+++ b/gallery_dl/extractor/agnph.py
@@ -0,0 +1,113 @@
+# -*- coding: utf-8 -*-
+
+# Copyright 2024 Mike Fährmann
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Extractors for https://agn.ph/"""
+
+from . import booru
+from .. import text
+
+from xml.etree import ElementTree
+import collections
+import re
+
+BASE_PATTERN = r"(?:https?://)?agn\.ph"
+
+
+class AgnphExtractor(booru.BooruExtractor):
+ category = "agnph"
+ root = "https://agn.ph"
+ page_start = 1
+ per_page = 45
+
+ TAG_TYPES = {
+ "a": "artist",
+ "b": "copyright",
+ "c": "character",
+ "d": "species",
+ "m": "general",
+ }
+
+ def _init(self):
+ self.cookies.set("confirmed_age", "true", domain="agn.ph")
+
+ def _prepare(self, post):
+ post["date"] = text.parse_timestamp(post["created_at"])
+ post["status"] = post["status"].strip()
+ post["has_children"] = ("true" in post["has_children"])
+
+ def _xml_to_dict(self, xml):
+ return {element.tag: element.text for element in xml}
+
+ def _pagination(self, url, params):
+ params["api"] = "xml"
+ if "page" in params:
+ params["page"] = \
+ self.page_start + text.parse_int(params["page"]) - 1
+ else:
+ params["page"] = self.page_start
+
+ while True:
+ data = self.request(url, params=params).text
+ root = ElementTree.fromstring(data)
+
+ yield from map(self._xml_to_dict, root)
+
+ attrib = root.attrib
+ if int(attrib["offset"]) + len(root) >= int(attrib["count"]):
+ return
+
+ params["page"] += 1
+
+ def _html(self, post):
+ url = "{}/gallery/post/show/{}/".format(self.root, post["id"])
+ return self.request(url).text
+
+ def _tags(self, post, page):
+ tag_container = text.extr(
+ page, '<ul class="taglist">', '<h3>Statistics</h3>')
+ if not tag_container:
+ return
+
+ tags = collections.defaultdict(list)
+ pattern = re.compile(r'class="(.)typetag">([^<]+)')
+ for tag_type, tag_name in pattern.findall(tag_container):
+ tags[tag_type].append(text.unquote(tag_name).replace(" ", "_"))
+ for key, value in tags.items():
+ post["tags_" + self.TAG_TYPES[key]] = " ".join(value)
+
+
+class AgnphTagExtractor(AgnphExtractor):
+ subcategory = "tag"
+ directory_fmt = ("{category}", "{search_tags}")
+ archive_fmt = "t_{search_tags}_{id}"
+ pattern = BASE_PATTERN + r"/gallery/post/(?:\?([^#]+))?$"
+ example = "https://agn.ph/gallery/post/?search=TAG"
+
+ def __init__(self, match):
+ AgnphExtractor.__init__(self, match)
+ self.params = text.parse_query(self.groups[0])
+
+ def metadata(self):
+ return {"search_tags": self.params.get("search") or ""}
+
+ def posts(self):
+ url = self.root + "/gallery/post/"
+ return self._pagination(url, self.params.copy())
+
+
+class AgnphPostExtractor(AgnphExtractor):
+ subcategory = "post"
+ archive_fmt = "{id}"
+ pattern = BASE_PATTERN + r"/gallery/post/show/(\d+)"
+ example = "https://agn.ph/gallery/post/show/12345/"
+
+ def posts(self):
+ url = "{}/gallery/post/show/{}/?api=xml".format(
+ self.root, self.groups[0])
+ post = ElementTree.fromstring(self.request(url).text)
+ return (self._xml_to_dict(post),)
diff --git a/gallery_dl/extractor/aryion.py b/gallery_dl/extractor/aryion.py
index ec86263..17b780e 100644
--- a/gallery_dl/extractor/aryion.py
+++ b/gallery_dl/extractor/aryion.py
@@ -79,18 +79,20 @@ class AryionExtractor(Extractor):
def metadata(self):
"""Return general metadata"""
- def _pagination_params(self, url, params=None):
+ def _pagination_params(self, url, params=None, needle=None):
if params is None:
params = {"p": 1}
else:
params["p"] = text.parse_int(params.get("p"), 1)
+ if needle is None:
+ needle = "class='gallery-item' id='"
+
while True:
page = self.request(url, params=params).text
cnt = 0
- for post_id in text.extract_iter(
- page, "class='gallery-item' id='", "'"):
+ for post_id in text.extract_iter(page, needle, "'"):
cnt += 1
yield post_id
@@ -200,6 +202,21 @@ class AryionGalleryExtractor(AryionExtractor):
return util.advance(self._pagination_next(url), self.offset)
+class AryionFavoriteExtractor(AryionExtractor):
+ """Extractor for a user's favorites gallery"""
+ subcategory = "favorite"
+ directory_fmt = ("{category}", "{user!l}", "favorites")
+ archive_fmt = "f_{user}_{id}"
+ categorytransfer = True
+ pattern = BASE_PATTERN + r"/favorites/([^/?#]+)"
+ example = "https://aryion.com/g4/favorites/USER"
+
+ def posts(self):
+ url = "{}/g4/favorites/{}".format(self.root, self.user)
+ return self._pagination_params(
+ url, None, "class='gallery-item favorite' id='")
+
+
class AryionTagExtractor(AryionExtractor):
"""Extractor for tag searches on eka's portal"""
subcategory = "tag"
diff --git a/gallery_dl/extractor/behance.py b/gallery_dl/extractor/behance.py
index ad0caf9..f24059f 100644
--- a/gallery_dl/extractor/behance.py
+++ b/gallery_dl/extractor/behance.py
@@ -152,8 +152,16 @@ class BehanceGalleryExtractor(BehanceExtractor):
continue
if mtype == "image":
- url = module["imageSizes"]["size_original"]["url"]
- append((url, module))
+ sizes = {
+ size["url"].rsplit("/", 2)[1]: size
+ for size in module["imageSizes"]["allAvailable"]
+ }
+ size = (sizes.get("source") or
+ sizes.get("max_3840") or
+ sizes.get("fs") or
+ sizes.get("hd") or
+ sizes.get("disp"))
+ append((size["url"], module))
elif mtype == "video":
try:
diff --git a/gallery_dl/extractor/booru.py b/gallery_dl/extractor/booru.py
index cbd0e07..7e26f38 100644
--- a/gallery_dl/extractor/booru.py
+++ b/gallery_dl/extractor/booru.py
@@ -29,16 +29,21 @@ class BooruExtractor(BaseExtractor):
url_key = self.config("url")
if url_key:
- self._file_url = operator.itemgetter(url_key)
+ if isinstance(url_key, (list, tuple)):
+ self._file_url = self._file_url_list
+ self._file_url_keys = url_key
+ else:
+ self._file_url = operator.itemgetter(url_key)
for post in self.posts():
try:
url = self._file_url(post)
if url[0] == "/":
url = self.root + url
- except (KeyError, TypeError):
- self.log.debug("Unable to fetch download URL for post %s "
- "(md5: %s)", post.get("id"), post.get("md5"))
+ except Exception as exc:
+ self.log.debug("%s: %s", exc.__class__.__name__, exc)
+ self.log.warning("Unable to fetch download URL for post %s "
+ "(md5: %s)", post.get("id"), post.get("md5"))
continue
if fetch_html:
@@ -73,6 +78,11 @@ class BooruExtractor(BaseExtractor):
_file_url = operator.itemgetter("file_url")
+ def _file_url_list(self, post):
+ urls = (post[key] for key in self._file_url_keys if post.get(key))
+ post["_fallback"] = it = iter(urls)
+ return next(it)
+
def _prepare(self, post):
"""Prepare a 'post's metadata"""
diff --git a/gallery_dl/extractor/bunkr.py b/gallery_dl/extractor/bunkr.py
index a093347..77f0de6 100644
--- a/gallery_dl/extractor/bunkr.py
+++ b/gallery_dl/extractor/bunkr.py
@@ -13,7 +13,7 @@ from .. import text
BASE_PATTERN = (
r"(?:https?://)?(?:app\.)?(bunkr+"
- r"\.(?:s[kiu]|ru|la|is|to|ac|black|cat|media|red|site|ws))"
+ r"\.(?:s[kiu]|fi|ru|la|is|to|ac|black|cat|media|red|site|ws))"
)
LEGACY_DOMAINS = {
diff --git a/gallery_dl/extractor/cien.py b/gallery_dl/extractor/cien.py
new file mode 100644
index 0000000..bae86d0
--- /dev/null
+++ b/gallery_dl/extractor/cien.py
@@ -0,0 +1,199 @@
+# -*- coding: utf-8 -*-
+
+# Copyright 2024 Mike Fährmann
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Extractors for https://ci-en.net/"""
+
+from .common import Extractor, Message
+from .. import text, util
+
+BASE_PATTERN = r"(?:https?://)?ci-en\.(?:net|dlsite\.com)"
+
+
+class CienExtractor(Extractor):
+ category = "cien"
+ root = "https://ci-en.net"
+ request_interval = (1.0, 2.0)
+
+ def __init__(self, match):
+ self.root = text.root_from_url(match.group(0))
+ Extractor.__init__(self, match)
+
+ def _init(self):
+ self.cookies.set("accepted_rating", "r18g", domain="ci-en.dlsite.com")
+
+ def _pagination_articles(self, url, params):
+ data = {"_extractor": CienArticleExtractor}
+ params["page"] = text.parse_int(params.get("page"), 1)
+
+ while True:
+ page = self.request(url, params=params).text
+
+ for card in text.extract_iter(
+ page, ' class="c-cardCase-item', '</div>'):
+ article_url = text.extr(card, ' href="', '"')
+ yield Message.Queue, article_url, data
+
+ if ' rel="next"' not in page:
+ return
+ params["page"] += 1
+
+
+class CienArticleExtractor(CienExtractor):
+ subcategory = "article"
+ filename_fmt = "{num:>02} {filename}.{extension}"
+ directory_fmt = ("{category}", "{author[name]}", "{post_id} {name}")
+ archive_fmt = "{post_id}_{num}"
+ pattern = BASE_PATTERN + r"/creator/(\d+)/article/(\d+)"
+ example = "https://ci-en.net/creator/123/article/12345"
+
+ def items(self):
+ url = "{}/creator/{}/article/{}".format(
+ self.root, self.groups[0], self.groups[1])
+ page = self.request(url, notfound="article").text
+
+ post = util.json_loads(text.extr(
+ page, '<script type="application/ld+json">', '</script>'))[0]
+
+ files = self._extract_files(post.get("articleBody") or page)
+
+ post["post_url"] = url
+ post["post_id"] = text.parse_int(self.groups[1])
+ post["count"] = len(files)
+ post["date"] = text.parse_datetime(post["datePublished"])
+
+ try:
+ del post["publisher"]
+ del post["sameAs"]
+ except Exception:
+ pass
+
+ yield Message.Directory, post
+ for post["num"], file in enumerate(files, 1):
+ post.update(file)
+ if "extension" not in file:
+ text.nameext_from_url(file["url"], post)
+ yield Message.Url, file["url"], post
+
+ def _extract_files(self, page):
+ files = []
+
+ filetypes = self.config("files")
+ if filetypes is None:
+ self._extract_files_image(page, files)
+ self._extract_files_video(page, files)
+ self._extract_files_download(page, files)
+ self._extract_files_gallery(page, files)
+ else:
+ generators = {
+ "image" : self._extract_files_image,
+ "video" : self._extract_files_video,
+ "download": self._extract_files_download,
+ "gallery" : self._extract_files_gallery,
+ "gallerie": self._extract_files_gallery,
+ }
+ if isinstance(filetypes, str):
+ filetypes = filetypes.split(",")
+ for ft in filetypes:
+ generators[ft.rstrip("s")](page, files)
+
+ return files
+
+ def _extract_files_image(self, page, files):
+ for image in text.extract_iter(
+ page, 'class="file-player-image"', "</figure>"):
+ size = text.extr(image, ' data-size="', '"')
+ w, _, h = size.partition("x")
+
+ files.append({
+ "url" : text.extr(image, ' data-raw="', '"'),
+ "width" : text.parse_int(w),
+ "height": text.parse_int(h),
+ "type" : "image",
+ })
+
+ def _extract_files_video(self, page, files):
+ for video in text.extract_iter(
+ page, "<vue-file-player", "</vue-file-player>"):
+ path = text.extr(video, ' base-path="', '"')
+ name = text.extr(video, ' file-name="', '"')
+ auth = text.extr(video, ' auth-key="', '"')
+
+ file = text.nameext_from_url(name)
+ file["url"] = "{}video-web.mp4?{}".format(path, auth)
+ file["type"] = "video"
+ files.append(file)
+
+ def _extract_files_download(self, page, files):
+ for download in text.extract_iter(
+ page, 'class="downloadBlock', "</div>"):
+ name = text.extr(download, "<p>", "<")
+
+ file = text.nameext_from_url(name.rpartition(" ")[0])
+ file["url"] = text.extr(download, ' href="', '"')
+ file["type"] = "download"
+ files.append(file)
+
+ def _extract_files_gallery(self, page, files):
+ for gallery in text.extract_iter(
+ page, "<vue-image-gallery", "</vue-image-gallery>"):
+
+ url = self.root + "/api/creator/gallery/images"
+ params = {
+ "hash" : text.extr(gallery, ' hash="', '"'),
+ "gallery_id": text.extr(gallery, ' gallery-id="', '"'),
+ "time" : text.extr(gallery, ' time="', '"'),
+ }
+ data = self.request(url, params=params).json()
+ url = self.root + "/api/creator/gallery/imagePath"
+
+ for params["page"], params["file_id"] in enumerate(
+ data["imgList"]):
+ path = self.request(url, params=params).json()["path"]
+
+ file = params.copy()
+ file["url"] = path
+ files.append(file)
+
+
+class CienCreatorExtractor(CienExtractor):
+ subcategory = "creator"
+ pattern = BASE_PATTERN + r"/creator/(\d+)(?:/article(?:\?([^#]+))?)?/?$"
+ example = "https://ci-en.net/creator/123"
+
+ def items(self):
+ url = "{}/creator/{}/article".format(self.root, self.groups[0])
+ params = text.parse_query(self.groups[1])
+ params["mode"] = "list"
+ return self._pagination_articles(url, params)
+
+
+class CienRecentExtractor(CienExtractor):
+ subcategory = "recent"
+ pattern = BASE_PATTERN + r"/mypage/recent(?:\?([^#]+))?"
+ example = "https://ci-en.net/mypage/recent"
+
+ def items(self):
+ url = self.root + "/mypage/recent"
+ params = text.parse_query(self.groups[0])
+ return self._pagination_articles(url, params)
+
+
+class CienFollowingExtractor(CienExtractor):
+ subcategory = "following"
+ pattern = BASE_PATTERN + r"/mypage/subscription(/following)?"
+ example = "https://ci-en.net/mypage/subscription"
+
+ def items(self):
+ url = self.root + "/mypage/subscription" + (self.groups[0] or "")
+ page = self.request(url).text
+ data = {"_extractor": CienCreatorExtractor}
+
+ for subscription in text.extract_iter(
+ page, 'class="c-grid-subscriptionInfo', '</figure>'):
+ url = text.extr(subscription, ' href="', '"')
+ yield Message.Queue, url, data
diff --git a/gallery_dl/extractor/common.py b/gallery_dl/extractor/common.py
index d7a41bc..df70571 100644
--- a/gallery_dl/extractor/common.py
+++ b/gallery_dl/extractor/common.py
@@ -378,7 +378,7 @@ class Extractor():
useragent = self.config("user-agent")
if useragent is None:
useragent = ("Mozilla/5.0 (Windows NT 10.0; Win64; x64; "
- "rv:109.0) Gecko/20100101 Firefox/115.0")
+ "rv:128.0) Gecko/20100101 Firefox/128.0")
elif useragent == "browser":
useragent = _browser_useragent()
headers["User-Agent"] = useragent
@@ -390,6 +390,8 @@ class Extractor():
headers["Accept-Encoding"] = "gzip, deflate, br"
else:
headers["Accept-Encoding"] = "gzip, deflate"
+ if ZSTD:
+ headers["Accept-Encoding"] += ", zstd"
referer = self.config("referer", self.referer)
if referer:
@@ -789,10 +791,11 @@ class BaseExtractor(Extractor):
instances = ()
def __init__(self, match):
- Extractor.__init__(self, match)
if not self.category:
+ self.groups = match.groups()
+ self.match = match
self._init_category()
- self._cfgpath = ("extractor", self.category, self.subcategory)
+ Extractor.__init__(self, match)
def _init_category(self):
for index, group in enumerate(self.groups):
@@ -911,13 +914,12 @@ _browser_cookies = {}
HTTP_HEADERS = {
"firefox": (
("User-Agent", "Mozilla/5.0 ({}; "
- "rv:109.0) Gecko/20100101 Firefox/115.0"),
+ "rv:128.0) Gecko/20100101 Firefox/128.0"),
("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,"
- "image/avif,image/webp,*/*;q=0.8"),
+ "image/avif,image/webp,image/png,image/svg+xml,*/*;q=0.8"),
("Accept-Language", "en-US,en;q=0.5"),
("Accept-Encoding", None),
("Referer", None),
- ("DNT", "1"),
("Connection", "keep-alive"),
("Upgrade-Insecure-Requests", "1"),
("Cookie", None),
@@ -991,6 +993,12 @@ try:
except AttributeError:
BROTLI = False
+# detect zstandard support
+try:
+ ZSTD = urllib3.response.HAS_ZSTD
+except AttributeError:
+ ZSTD = False
+
# set (urllib3) warnings filter
action = config.get((), "warnings", "default")
if action:
diff --git a/gallery_dl/extractor/deviantart.py b/gallery_dl/extractor/deviantart.py
index 2199cc8..a70710c 100644
--- a/gallery_dl/extractor/deviantart.py
+++ b/gallery_dl/extractor/deviantart.py
@@ -846,55 +846,6 @@ class DeviantartStatusExtractor(DeviantartExtractor):
)
-class DeviantartPopularExtractor(DeviantartExtractor):
- """Extractor for popular deviations"""
- subcategory = "popular"
- directory_fmt = ("{category}", "Popular",
- "{popular[range]}", "{popular[search]}")
- archive_fmt = "P_{popular[range]}_{popular[search]}_{index}.{extension}"
- pattern = (r"(?:https?://)?www\.deviantart\.com/(?:"
- r"(?:deviations/?)?\?order=(popular-[^/?#]+)"
- r"|((?:[\w-]+/)*)(popular-[^/?#]+)"
- r")/?(?:\?([^#]*))?")
- example = "https://www.deviantart.com/popular-24-hours/"
-
- def __init__(self, match):
- DeviantartExtractor.__init__(self, match)
- self.user = ""
-
- trange1, path, trange2, query = match.groups()
- query = text.parse_query(query)
- self.search_term = query.get("q")
-
- trange = trange1 or trange2 or query.get("order", "")
- if trange.startswith("popular-"):
- trange = trange[8:]
- self.time_range = {
- "newest" : "now",
- "most-recent" : "now",
- "this-week" : "1week",
- "this-month" : "1month",
- "this-century": "alltime",
- "all-time" : "alltime",
- }.get(trange, "alltime")
-
- self.popular = {
- "search": self.search_term or "",
- "range" : trange or "all-time",
- "path" : path.strip("/") if path else "",
- }
-
- def deviations(self):
- if self.time_range == "now":
- return self.api.browse_newest(self.search_term, self.offset)
- return self.api.browse_popular(
- self.search_term, self.time_range, self.offset)
-
- def prepare(self, deviation):
- DeviantartExtractor.prepare(self, deviation)
- deviation["popular"] = self.popular
-
-
class DeviantartTagExtractor(DeviantartExtractor):
"""Extractor for deviations from tag searches"""
subcategory = "tag"
@@ -1077,14 +1028,14 @@ class DeviantartGallerySearchExtractor(DeviantartExtractor):
class DeviantartFollowingExtractor(DeviantartExtractor):
"""Extractor for user's watched users"""
subcategory = "following"
- pattern = BASE_PATTERN + "/about#watching$"
+ pattern = BASE_PATTERN + "/(?:about#)?watching"
example = "https://www.deviantart.com/USER/about#watching"
def items(self):
- eclipse_api = DeviantartEclipseAPI(self)
+ api = DeviantartOAuthAPI(self)
- for user in eclipse_api.user_watching(self.user, self.offset):
- url = "{}/{}".format(self.root, user["username"])
+ for user in api.user_friends(self.user):
+ url = "{}/{}".format(self.root, user["user"]["username"])
user["_extractor"] = DeviantartUserExtractor
yield Message.Queue, url, user
@@ -1095,7 +1046,7 @@ class DeviantartFollowingExtractor(DeviantartExtractor):
class DeviantartOAuthAPI():
"""Interface for the DeviantArt OAuth API
- Ref: https://www.deviantart.com/developers/http/v1/20160316
+ https://www.deviantart.com/developers/http/v1/20160316
"""
CLIENT_ID = "5388"
CLIENT_SECRET = "76b08c69cfb27f26d6161f9ab6d061a1"
@@ -1188,29 +1139,6 @@ class DeviantartOAuthAPI():
"mature_content": self.mature}
return self._pagination(endpoint, params, public=False, unpack=True)
- def browse_newest(self, query=None, offset=0):
- """Browse newest deviations"""
- endpoint = "/browse/newest"
- params = {
- "q" : query,
- "limit" : 120,
- "offset" : offset,
- "mature_content": self.mature,
- }
- return self._pagination(endpoint, params)
-
- def browse_popular(self, query=None, timerange=None, offset=0):
- """Yield popular deviations"""
- endpoint = "/browse/popular"
- params = {
- "q" : query,
- "limit" : 120,
- "timerange" : timerange,
- "offset" : offset,
- "mature_content": self.mature,
- }
- return self._pagination(endpoint, params)
-
def browse_tags(self, tag, offset=0):
""" Browse a tag """
endpoint = "/browse/tags"
@@ -1223,11 +1151,12 @@ class DeviantartOAuthAPI():
return self._pagination(endpoint, params)
def browse_user_journals(self, username, offset=0):
- """Yield all journal entries of a specific user"""
- endpoint = "/browse/user/journals"
- params = {"username": username, "offset": offset, "limit": 50,
- "mature_content": self.mature, "featured": "false"}
- return self._pagination(endpoint, params)
+ journals = filter(
+ lambda post: "/journal/" in post["url"],
+ self.user_profile_posts(username))
+ if offset:
+ journals = util.advance(journals, offset)
+ return journals
def collections(self, username, folder_id, offset=0):
"""Yield all Deviation-objects contained in a collection folder"""
@@ -1339,16 +1268,10 @@ class DeviantartOAuthAPI():
"mature_content": self.mature}
return self._pagination_list(endpoint, params)
- @memcache(keyarg=1)
- def user_profile(self, username):
- """Get user profile information"""
- endpoint = "/user/profile/" + username
- return self._call(endpoint, fatal=False)
-
- def user_statuses(self, username, offset=0):
- """Yield status updates of a specific user"""
- endpoint = "/user/statuses/"
- params = {"username": username, "offset": offset, "limit": 50}
+ def user_friends(self, username, offset=0):
+ """Get the users list of friends"""
+ endpoint = "/user/friends/" + username
+ params = {"limit": 50, "offset": offset, "mature_content": self.mature}
return self._pagination(endpoint, params)
def user_friends_watch(self, username):
@@ -1376,6 +1299,27 @@ class DeviantartOAuthAPI():
endpoint, method="POST", public=False, fatal=False,
).get("success")
+ @memcache(keyarg=1)
+ def user_profile(self, username):
+ """Get user profile information"""
+ endpoint = "/user/profile/" + username
+ return self._call(endpoint, fatal=False)
+
+ def user_profile_posts(self, username):
+ endpoint = "/user/profile/posts"
+ params = {"username": username, "limit": 50,
+ "mature_content": self.mature}
+ return self._pagination(endpoint, params)
+
+ def user_statuses(self, username, offset=0):
+ """Yield status updates of a specific user"""
+ statuses = filter(
+ lambda post: "/status-update/" in post["url"],
+ self.user_profile_posts(username))
+ if offset:
+ statuses = util.advance(statuses, offset)
+ return statuses
+
def authenticate(self, refresh_token_key):
"""Authenticate the application by requesting an access token"""
self.headers["Authorization"] = \
@@ -1464,7 +1408,7 @@ class DeviantartOAuthAPI():
self.log.error(msg)
return data
- def _switch_tokens(self, results, params):
+ def _should_switch_tokens(self, results, params):
if len(results) < params["limit"]:
return True
@@ -1496,7 +1440,7 @@ class DeviantartOAuthAPI():
results = [item["journal"] for item in results
if "journal" in item]
if extend:
- if public and self._switch_tokens(results, params):
+ if public and self._should_switch_tokens(results, params):
if self.refresh_token_key:
self.log.debug("Switching to private access token")
public = False
@@ -1540,6 +1484,11 @@ class DeviantartOAuthAPI():
return
params["offset"] = int(params["offset"]) + len(results)
+ def _pagination_list(self, endpoint, params, key="results"):
+ result = []
+ result.extend(self._pagination(endpoint, params, False, key=key))
+ return result
+
@staticmethod
def _shared_content(results):
"""Return an iterable of shared deviations in 'results'"""
@@ -1548,11 +1497,6 @@ class DeviantartOAuthAPI():
if "deviation" in item:
yield item["deviation"]
- def _pagination_list(self, endpoint, params, key="results"):
- result = []
- result.extend(self._pagination(endpoint, params, False, key=key))
- return result
-
def _metadata(self, deviations):
"""Add extended metadata to each deviation object"""
if len(deviations) <= self.limit:
diff --git a/gallery_dl/extractor/directlink.py b/gallery_dl/extractor/directlink.py
index 26f2184..2f0230a 100644
--- a/gallery_dl/extractor/directlink.py
+++ b/gallery_dl/extractor/directlink.py
@@ -18,7 +18,8 @@ class DirectlinkExtractor(Extractor):
filename_fmt = "{domain}/{path}/{filename}.{extension}"
archive_fmt = filename_fmt
pattern = (r"(?i)https?://(?P<domain>[^/?#]+)/(?P<path>[^?#]+\."
- r"(?:jpe?g|jpe|png|gif|web[mp]|mp4|mkv|og[gmv]|opus))"
+ r"(?:jpe?g|jpe|png|gif|bmp|svg|web[mp]|avif|heic|psd"
+ r"|mp4|m4v|mov|mkv|og[gmv]|wav|mp3|opus|zip|rar|7z|pdf|swf))"
r"(?:\?(?P<query>[^#]*))?(?:#(?P<fragment>.*))?$")
example = "https://en.wikipedia.org/static/images/project-logos/enwiki.png"
diff --git a/gallery_dl/extractor/dynastyscans.py b/gallery_dl/extractor/dynastyscans.py
index 733d0d8..583869f 100644
--- a/gallery_dl/extractor/dynastyscans.py
+++ b/gallery_dl/extractor/dynastyscans.py
@@ -66,6 +66,8 @@ class DynastyscansChapterExtractor(DynastyscansBase, ChapterExtractor):
text.extr(group, ' alt="', '"')),
"date" : text.parse_datetime(extr(
'"icon-calendar"></i> ', '<'), "%b %d, %Y"),
+ "tags" : text.split_html(extr(
+ "class='tags'>", "<div id='chapter-actions'")),
"lang" : "en",
"language": "English",
}
diff --git a/gallery_dl/extractor/erome.py b/gallery_dl/extractor/erome.py
index 8c9da2f..e6d136f 100644
--- a/gallery_dl/extractor/erome.py
+++ b/gallery_dl/extractor/erome.py
@@ -46,18 +46,24 @@ class EromeExtractor(Extractor):
page, 'href="https://www.erome.com/', '"', pos)
urls = []
+ date = None
groups = page.split('<div class="media-group"')
for group in util.advance(groups, 1):
url = (text.extr(group, '<source src="', '"') or
text.extr(group, 'data-src="', '"'))
if url:
urls.append(url)
+ if not date:
+ ts = text.extr(group, '?v=', '"')
+ if len(ts) > 1:
+ date = text.parse_timestamp(ts)
data = {
"album_id" : album_id,
"title" : text.unescape(title),
"user" : text.unquote(user),
"count" : len(urls),
+ "date" : date,
"_http_headers": {"Referer": url},
}
diff --git a/gallery_dl/extractor/exhentai.py b/gallery_dl/extractor/exhentai.py
index 1805403..1b4f995 100644
--- a/gallery_dl/extractor/exhentai.py
+++ b/gallery_dl/extractor/exhentai.py
@@ -394,6 +394,9 @@ class ExhentaiGalleryExtractor(ExhentaiExtractor):
self.original = False
return self.data["_url_1280"]
+ if " temporarily banned " in page:
+ raise exception.AuthorizationError("Temporarily Banned")
+
self._report_limits()
return True
diff --git a/gallery_dl/extractor/fallenangels.py b/gallery_dl/extractor/fallenangels.py
deleted file mode 100644
index 650a707..0000000
--- a/gallery_dl/extractor/fallenangels.py
+++ /dev/null
@@ -1,84 +0,0 @@
-# -*- coding: utf-8 -*-
-
-# Copyright 2017-2023 Mike Fährmann
-#
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License version 2 as
-# published by the Free Software Foundation.
-
-"""Extractors for https://www.fascans.com/"""
-
-from .common import ChapterExtractor, MangaExtractor
-from .. import text, util
-
-
-class FallenangelsChapterExtractor(ChapterExtractor):
- """Extractor for manga chapters from fascans.com"""
- category = "fallenangels"
- pattern = (r"(?:https?://)?(manga|truyen)\.fascans\.com"
- r"/manga/([^/?#]+)/([^/?#]+)")
- example = "https://manga.fascans.com/manga/NAME/CHAPTER/"
-
- def __init__(self, match):
- self.version, self.manga, self.chapter = match.groups()
- url = "https://{}.fascans.com/manga/{}/{}/1".format(
- self.version, self.manga, self.chapter)
- ChapterExtractor.__init__(self, match, url)
-
- def metadata(self, page):
- extr = text.extract_from(page)
- lang = "vi" if self.version == "truyen" else "en"
- chapter, sep, minor = self.chapter.partition(".")
- return {
- "manga" : extr('name="description" content="', ' Chapter '),
- "title" : extr(': ', ' - Page 1'),
- "chapter" : chapter,
- "chapter_minor": sep + minor,
- "lang" : lang,
- "language": util.code_to_language(lang),
- }
-
- @staticmethod
- def images(page):
- return [
- (img["page_image"], None)
- for img in util.json_loads(
- text.extr(page, "var pages = ", ";")
- )
- ]
-
-
-class FallenangelsMangaExtractor(MangaExtractor):
- """Extractor for manga from fascans.com"""
- chapterclass = FallenangelsChapterExtractor
- category = "fallenangels"
- pattern = r"(?:https?://)?((manga|truyen)\.fascans\.com/manga/[^/]+)/?$"
- example = "https://manga.fascans.com/manga/NAME"
-
- def __init__(self, match):
- url = "https://" + match.group(1)
- self.lang = "vi" if match.group(2) == "truyen" else "en"
- MangaExtractor.__init__(self, match, url)
-
- def chapters(self, page):
- extr = text.extract_from(page)
- results = []
- language = util.code_to_language(self.lang)
- while extr('<li style="', '"'):
- vol = extr('class="volume-', '"')
- url = extr('href="', '"')
- cha = extr('>', '<')
- title = extr('<em>', '</em>')
-
- manga, _, chapter = cha.rpartition(" ")
- chapter, dot, minor = chapter.partition(".")
- results.append((url, {
- "manga" : manga,
- "title" : text.unescape(title),
- "volume" : text.parse_int(vol),
- "chapter" : text.parse_int(chapter),
- "chapter_minor": dot + minor,
- "lang" : self.lang,
- "language": language,
- }))
- return results
diff --git a/gallery_dl/extractor/furaffinity.py b/gallery_dl/extractor/furaffinity.py
index 6040187..f48a984 100644
--- a/gallery_dl/extractor/furaffinity.py
+++ b/gallery_dl/extractor/furaffinity.py
@@ -113,6 +113,12 @@ class FuraffinityExtractor(Extractor):
data["gender"] = rh(extr('>Gender</strong>', '</div>'))
data["width"] = pi(extr("<span>", "x"))
data["height"] = pi(extr("", "p"))
+ data["folders"] = folders = []
+ for folder in extr(
+ "<h3>Listed in Folders</h3>", "</section>").split("</a>"):
+ folder = rh(folder)
+ if folder:
+ folders.append(folder)
else:
# old site layout
data["title"] = text.unescape(extr("<h2>", "</h2>"))
@@ -132,11 +138,14 @@ class FuraffinityExtractor(Extractor):
data["_description"] = extr(
'<td valign="top" align="left" width="70%" class="alt1" '
'style="padding:8px">', ' </td>')
+ data["folders"] = () # folders not present in old layout
data["artist_url"] = data["artist"].replace("_", "").lower()
data["user"] = self.user or data["artist_url"]
data["date"] = text.parse_timestamp(data["filename"].partition(".")[0])
data["description"] = self._process_description(data["_description"])
+ data["thumbnail"] = "https://t.furaffinity.net/{}@600-{}.jpg".format(
+ post_id, path.rsplit("/", 2)[1])
return data
diff --git a/gallery_dl/extractor/gelbooru_v02.py b/gallery_dl/extractor/gelbooru_v02.py
index 8d8b8ad..fbbd26c 100644
--- a/gallery_dl/extractor/gelbooru_v02.py
+++ b/gallery_dl/extractor/gelbooru_v02.py
@@ -36,7 +36,9 @@ class GelbooruV02Extractor(booru.BooruExtractor):
params["pid"] = self.page_start
params["limit"] = self.per_page
- post = None
+ post = total = None
+ count = 0
+
while True:
try:
root = self._api_request(params)
@@ -50,12 +52,29 @@ class GelbooruV02Extractor(booru.BooruExtractor):
params["pid"] = 0
continue
+ if total is None:
+ try:
+ total = int(root.attrib["count"])
+ self.log.debug("%s posts in total", total)
+ except Exception as exc:
+ total = 0
+ self.log.debug(
+ "Failed to get total number of posts (%s: %s)",
+ exc.__class__.__name__, exc)
+
post = None
for post in root:
yield post.attrib
- if len(root) < self.per_page:
- return
+ num = len(root)
+ count += num
+ if num < self.per_page:
+ if not total or count >= total:
+ return
+ if not num:
+ self.log.debug("Empty response - Retrying")
+ continue
+
params["pid"] += 1
def _pagination_html(self, params):
diff --git a/gallery_dl/extractor/hentainexus.py b/gallery_dl/extractor/hentainexus.py
index 97b7844..286ee38 100644
--- a/gallery_dl/extractor/hentainexus.py
+++ b/gallery_dl/extractor/hentainexus.py
@@ -70,10 +70,13 @@ class HentainexusGalleryExtractor(GalleryExtractor):
for img in imgs:
img["_http_headers"] = headers
- return [
- (img["image"], img)
- for img in imgs
- ]
+ results = []
+ for img in imgs:
+ try:
+ results.append((img["image"], img))
+ except KeyError:
+ pass
+ return results
@staticmethod
def _decode(data):
diff --git a/gallery_dl/extractor/hotleak.py b/gallery_dl/extractor/hotleak.py
index a2b51be..34fbabd 100644
--- a/gallery_dl/extractor/hotleak.py
+++ b/gallery_dl/extractor/hotleak.py
@@ -23,6 +23,12 @@ class HotleakExtractor(Extractor):
def items(self):
for post in self.posts():
+ if self.type == "photo":
+ post["url"] = (
+ post["url"]
+ .replace("/storage/storage/", "/storage/")
+ .replace("_thumb.", ".")
+ )
post["_http_expected_status"] = (404,)
yield Message.Directory, post
yield Message.Url, post["url"], post
diff --git a/gallery_dl/extractor/imagefap.py b/gallery_dl/extractor/imagefap.py
index 85446c0..345f51d 100644
--- a/gallery_dl/extractor/imagefap.py
+++ b/gallery_dl/extractor/imagefap.py
@@ -19,7 +19,7 @@ class ImagefapExtractor(Extractor):
category = "imagefap"
root = "https://www.imagefap.com"
directory_fmt = ("{category}", "{gallery_id} {title}")
- filename_fmt = "{category}_{gallery_id}_{filename}.{extension}"
+ filename_fmt = "{category}_{gallery_id}_{num:04}_{filename}.{extension}"
archive_fmt = "{gallery_id}_{image_id}"
request_interval = (2.0, 4.0)
diff --git a/gallery_dl/extractor/inkbunny.py b/gallery_dl/extractor/inkbunny.py
index 2ae8cbe..f3098f1 100644
--- a/gallery_dl/extractor/inkbunny.py
+++ b/gallery_dl/extractor/inkbunny.py
@@ -246,14 +246,12 @@ class InkbunnyFollowingExtractor(InkbunnyExtractor):
data = {"_extractor": InkbunnyUserExtractor}
while True:
- cnt = 0
for user in text.extract_iter(
page, '<a class="widget_userNameSmall" href="', '"',
page.index('id="changethumboriginal_form"')):
- cnt += 1
yield Message.Queue, self.root + user, data
- if cnt < 20:
+ if "<a title='next page' " not in page:
return
params["page"] += 1
page = self.request(url, params=params).text
diff --git a/gallery_dl/extractor/instagram.py b/gallery_dl/extractor/instagram.py
index f7a5cc7..dbe2df3 100644
--- a/gallery_dl/extractor/instagram.py
+++ b/gallery_dl/extractor/instagram.py
@@ -596,6 +596,22 @@ class InstagramTagExtractor(InstagramExtractor):
return self.api.tags_media(self.item)
+class InstagramInfoExtractor(InstagramExtractor):
+ """Extractor for an Instagram user's profile data"""
+ subcategory = "info"
+ pattern = USER_PATTERN + r"/info"
+ example = "https://www.instagram.com/USER/info/"
+
+ def items(self):
+ screen_name = self.item
+ if screen_name.startswith("id:"):
+ user = self.api.user_by_id(screen_name[3:])
+ else:
+ user = self.api.user_by_name(screen_name)
+
+ return iter(((Message.Directory, user),))
+
+
class InstagramAvatarExtractor(InstagramExtractor):
"""Extractor for an Instagram user's avatar"""
subcategory = "avatar"
@@ -975,9 +991,9 @@ class InstagramGraphqlAPI():
if not info["has_next_page"]:
return extr._update_cursor(None)
elif not data["edges"]:
- s = "" if self.item.endswith("s") else "s"
+ s = "" if self.extractor.item.endswith("s") else "s"
raise exception.StopExtraction(
- "%s'%s posts are private", self.item, s)
+ "%s'%s posts are private", self.extractor.item, s)
variables["after"] = extr._update_cursor(info["end_cursor"])
diff --git a/gallery_dl/extractor/koharu.py b/gallery_dl/extractor/koharu.py
new file mode 100644
index 0000000..979b1a2
--- /dev/null
+++ b/gallery_dl/extractor/koharu.py
@@ -0,0 +1,221 @@
+# -*- coding: utf-8 -*-
+
+# Copyright 2024 Mike Fährmann
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Extractors for https://koharu.to/"""
+
+from .common import GalleryExtractor, Extractor, Message
+from .. import text, exception
+from ..cache import cache
+
+BASE_PATTERN = r"(?i)(?:https?://)?(?:koharu|anchira)\.to"
+
+
+class KoharuExtractor(Extractor):
+ """Base class for koharu extractors"""
+ category = "koharu"
+ root = "https://koharu.to"
+ root_api = "https://api.koharu.to"
+ request_interval = (0.5, 1.5)
+
+ def _init(self):
+ self.headers = {
+ "Accept" : "*/*",
+ "Referer": self.root + "/",
+ "Origin" : self.root,
+ }
+
+ def _pagination(self, endpoint, params):
+ url_api = self.root_api + endpoint
+
+ while True:
+ data = self.request(
+ url_api, params=params, headers=self.headers).json()
+
+ try:
+ entries = data["entries"]
+ except KeyError:
+ return
+
+ for entry in entries:
+ url = "{}/g/{}/{}".format(
+ self.root, entry["id"], entry["public_key"])
+ entry["_extractor"] = KoharuGalleryExtractor
+ yield Message.Queue, url, entry
+
+ try:
+ if data["limit"] * data["page"] >= data["total"]:
+ return
+ except Exception:
+ pass
+ params["page"] += 1
+
+
+class KoharuGalleryExtractor(KoharuExtractor, GalleryExtractor):
+ """Extractor for koharu galleries"""
+ filename_fmt = "{num:>03}.{extension}"
+ directory_fmt = ("{category}", "{id} {title}")
+ archive_fmt = "{id}_{num}"
+ request_interval = 0.0
+ pattern = BASE_PATTERN + r"/(?:g|reader)/(\d+)/(\w+)"
+ example = "https://koharu.to/g/12345/67890abcde/"
+
+ TAG_TYPES = {
+ 0 : "general",
+ 1 : "artist",
+ 2 : "circle",
+ 3 : "parody",
+ 4 : "magazine",
+ 5 : "character",
+ 6 : "",
+ 7 : "uploader",
+ 8 : "male",
+ 9 : "female",
+ 10: "mixed",
+ 11: "language",
+ 12: "other",
+ }
+
+ def __init__(self, match):
+ GalleryExtractor.__init__(self, match)
+ self.gallery_url = None
+
+ def _init(self):
+ self.headers = {
+ "Accept" : "*/*",
+ "Referer": self.root + "/",
+ "Origin" : self.root,
+ }
+
+ self.fmt = self.config("format")
+ self.cbz = self.config("cbz", True)
+
+ if self.cbz:
+ self.filename_fmt = "{id} {title}.{extension}"
+ self.directory_fmt = ("{category}",)
+
+ def metadata(self, _):
+ url = "{}/books/detail/{}/{}".format(
+ self.root_api, self.groups[0], self.groups[1])
+ self.data = data = self.request(url, headers=self.headers).json()
+
+ tags = []
+ for tag in data["tags"]:
+ name = tag["name"]
+ namespace = tag.get("namespace", 0)
+ tags.append(self.TAG_TYPES[namespace] + ":" + name)
+ data["tags"] = tags
+ data["date"] = text.parse_timestamp(data["created_at"] // 1000)
+
+ try:
+ if self.cbz:
+ data["count"] = len(data["thumbnails"]["entries"])
+ del data["thumbnails"]
+ del data["rels"]
+ except Exception:
+ pass
+
+ return data
+
+ def images(self, _):
+ data = self.data
+ fmt = self._select_format(data["data"])
+
+ url = "{}/books/data/{}/{}/{}/{}".format(
+ self.root_api,
+ data["id"], data["public_key"],
+ fmt["id"], fmt["public_key"],
+ )
+ params = {
+ "v": data["updated_at"],
+ "w": fmt["w"],
+ }
+
+ if self.cbz:
+ params["action"] = "dl"
+ base = self.request(
+ url, method="POST", params=params, headers=self.headers,
+ ).json()["base"]
+ url = "{}?v={}&w={}".format(base, data["updated_at"], fmt["w"])
+ info = text.nameext_from_url(base)
+ if not info["extension"]:
+ info["extension"] = "cbz"
+ return ((url, info),)
+
+ data = self.request(url, params=params, headers=self.headers).json()
+ base = data["base"]
+
+ results = []
+ for entry in data["entries"]:
+ dimensions = entry["dimensions"]
+ info = {
+ "w": dimensions[0],
+ "h": dimensions[1],
+ "_http_headers": self.headers,
+ }
+ results.append((base + entry["path"], info))
+ return results
+
+ def _select_format(self, formats):
+ if not self.fmt or self.fmt == "original":
+ fmtid = "0"
+ else:
+ fmtid = str(self.fmt)
+
+ try:
+ fmt = formats[fmtid]
+ except KeyError:
+ raise exception.NotFoundError("format")
+
+ fmt["w"] = fmtid
+ return fmt
+
+
+class KoharuSearchExtractor(KoharuExtractor):
+ """Extractor for koharu search results"""
+ subcategory = "search"
+ pattern = BASE_PATTERN + r"/\?([^#]*)"
+ example = "https://koharu.to/?s=QUERY"
+
+ def items(self):
+ params = text.parse_query(self.groups[0])
+ params["page"] = text.parse_int(params.get("page"), 1)
+ return self._pagination("/books", params)
+
+
+class KoharuFavoriteExtractor(KoharuExtractor):
+ """Extractor for koharu favorites"""
+ subcategory = "favorite"
+ pattern = BASE_PATTERN + r"/favorites(?:\?([^#]*))?"
+ example = "https://koharu.to/favorites"
+
+ def items(self):
+ self.login()
+
+ params = text.parse_query(self.groups[0])
+ params["page"] = text.parse_int(params.get("page"), 1)
+ return self._pagination("/favorites", params)
+
+ def login(self):
+ username, password = self._get_auth_info()
+ if username:
+ self.headers["Authorization"] = \
+ "Bearer " + self._login_impl(username, password)
+ return
+
+ raise exception.AuthenticationError("Username and password required")
+
+ @cache(maxage=86400, keyarg=1)
+ def _login_impl(self, username, password):
+ self.log.info("Logging in as %s", username)
+
+ url = "https://auth.koharu.to/login"
+ data = {"uname": username, "passwd": password}
+ response = self.request(
+ url, method="POST", headers=self.headers, data=data)
+
+ return response.json()["session"]
diff --git a/gallery_dl/extractor/nijie.py b/gallery_dl/extractor/nijie.py
index 60cca22..b01c591 100644
--- a/gallery_dl/extractor/nijie.py
+++ b/gallery_dl/extractor/nijie.py
@@ -120,7 +120,8 @@ class NijieExtractor(AsynchronousMixin, BaseExtractor):
]
else:
pos = page.find('id="view-center"') + 1
- return (text.extr(page, 'itemprop="image" src="', '"', pos),)
+ # do NOT use text.extr() here, as it doesn't support a pos argument
+ return (text.extract(page, 'itemprop="image" src="', '"', pos)[0],)
@staticmethod
def _extract_user_name(page):
diff --git a/gallery_dl/extractor/paheal.py b/gallery_dl/extractor/paheal.py
index b21e1eb..2330b08 100644
--- a/gallery_dl/extractor/paheal.py
+++ b/gallery_dl/extractor/paheal.py
@@ -77,6 +77,7 @@ class PahealTagExtractor(PahealExtractor):
pattern = (r"(?:https?://)?(?:rule34|rule63|cosplay)\.paheal\.net"
r"/post/list/([^/?#]+)")
example = "https://rule34.paheal.net/post/list/TAG/1"
+ page_start = 1
per_page = 70
def __init__(self, match):
@@ -87,11 +88,16 @@ class PahealTagExtractor(PahealExtractor):
if self.config("metadata"):
self._extract_data = self._extract_data_ex
+ def skip(self, num):
+ pages = num // self.per_page
+ self.page_start += pages
+ return pages * self.per_page
+
def get_metadata(self):
return {"search_tags": self.tags}
def get_posts(self):
- pnum = 1
+ pnum = self.page_start
while True:
url = "{}/post/list/{}/{}".format(self.root, self.tags, pnum)
page = self.request(url).text
diff --git a/gallery_dl/extractor/readcomiconline.py b/gallery_dl/extractor/readcomiconline.py
index 115de9a..271fa50 100644
--- a/gallery_dl/extractor/readcomiconline.py
+++ b/gallery_dl/extractor/readcomiconline.py
@@ -78,12 +78,16 @@ class ReadcomiconlineIssueExtractor(ReadcomiconlineBase, ChapterExtractor):
}
def images(self, page):
- return [
- (beau(url), None)
- for url in text.extract_iter(
- page, "lstImages.push('", "'",
- )
- ]
+ results = []
+
+ for block in page.split(" pth = '")[1:]:
+ pth = text.extr(block, "", "'")
+ for needle, repl in re.findall(
+ r"pth = pth\.replace\(/([^/]+)/g, [\"']([^\"']*)", block):
+ pth = pth.replace(needle, repl)
+ results.append((beau(pth), None))
+
+ return results
class ReadcomiconlineComicExtractor(ReadcomiconlineBase, MangaExtractor):
@@ -116,9 +120,9 @@ class ReadcomiconlineComicExtractor(ReadcomiconlineBase, MangaExtractor):
def beau(url):
- """https://readcomiconline.li/Scripts/rguard.min.js"""
- url = url.replace("_x236", "d")
- url = url.replace("_x945", "g")
+ """https://readcomiconline.li/Scripts/rguard.min.js?v=1.5.1"""
+ url = url.replace("pw_.g28x", "b")
+ url = url.replace("d2pr.x_27", "h")
if url.startswith("https"):
return url
@@ -126,8 +130,8 @@ def beau(url):
url, sep, rest = url.partition("?")
containsS0 = "=s0" in url
url = url[:-3 if containsS0 else -6]
- url = url[4:22] + url[25:]
- url = url[0:-6] + url[-2:]
+ url = url[15:33] + url[50:]
+ url = url[0:-11] + url[-2:]
url = binascii.a2b_base64(url).decode()
url = url[0:13] + url[17:]
url = url[0:-2] + ("=s0" if containsS0 else "=s1600")
diff --git a/gallery_dl/extractor/redgifs.py b/gallery_dl/extractor/redgifs.py
index 327bcd1..506f6ac 100644
--- a/gallery_dl/extractor/redgifs.py
+++ b/gallery_dl/extractor/redgifs.py
@@ -190,7 +190,7 @@ class RedgifsImageExtractor(RedgifsExtractor):
r"(?:\w+\.)?redgifs\.com/(?:watch|ifr)|"
r"(?:\w+\.)?gfycat\.com(?:/gifs/detail|/\w+)?|"
r"(?:www\.)?gifdeliverynetwork\.com|"
- r"i\.redgifs\.com/i)/([A-Za-z]+)")
+ r"i\.redgifs\.com/i)/([A-Za-z0-9]+)")
example = "https://redgifs.com/watch/ID"
def gifs(self):
diff --git a/gallery_dl/extractor/sankaku.py b/gallery_dl/extractor/sankaku.py
index caf3e16..ad3efa7 100644
--- a/gallery_dl/extractor/sankaku.py
+++ b/gallery_dl/extractor/sankaku.py
@@ -16,7 +16,7 @@ import collections
import re
BASE_PATTERN = r"(?:https?://)?" \
- r"(?:(?:chan|beta|black|white)\.sankakucomplex\.com|sankaku\.app)" \
+ r"(?:(?:chan|www|beta|black|white)\.sankakucomplex\.com|sankaku\.app)" \
r"(?:/[a-z]{2})?"
@@ -45,6 +45,9 @@ class SankakuExtractor(BooruExtractor):
def skip(self, num):
return 0
+ def _init(self):
+ self.api = SankakuAPI(self)
+
def _file_url(self, post):
url = post["file_url"]
if not url:
@@ -81,6 +84,15 @@ class SankakuExtractor(BooruExtractor):
post["tags_" + key] = value
post["tag_string_" + key] = " ".join(value)
+ def _notes(self, post, page):
+ if post.get("has_notes"):
+ post["notes"] = self.api.notes(post["id"])
+ for note in post["notes"]:
+ note["created_at"] = note["created_at"]["s"]
+ note["updated_at"] = note["updated_at"]["s"]
+ else:
+ post["notes"] = ()
+
class SankakuTagExtractor(SankakuExtractor):
"""Extractor for images from sankaku.app by search-tags"""
@@ -109,7 +121,7 @@ class SankakuTagExtractor(SankakuExtractor):
def posts(self):
params = {"tags": self.tags}
- return SankakuAPI(self).posts_keyset(params)
+ return self.api.posts_keyset(params)
class SankakuPoolExtractor(SankakuExtractor):
@@ -125,7 +137,7 @@ class SankakuPoolExtractor(SankakuExtractor):
self.pool_id = match.group(1)
def metadata(self):
- pool = SankakuAPI(self).pools(self.pool_id)
+ pool = self.api.pools(self.pool_id)
pool["tags"] = [tag["name"] for tag in pool["tags"]]
pool["artist_tags"] = [tag["name"] for tag in pool["artist_tags"]]
@@ -151,7 +163,7 @@ class SankakuPostExtractor(SankakuExtractor):
self.post_id = match.group(1)
def posts(self):
- return SankakuAPI(self).posts(self.post_id)
+ return self.api.posts(self.post_id)
class SankakuBooksExtractor(SankakuExtractor):
@@ -167,7 +179,7 @@ class SankakuBooksExtractor(SankakuExtractor):
def items(self):
params = {"tags": self.tags, "pool_type": "0"}
- for pool in SankakuAPI(self).pools_keyset(params):
+ for pool in self.api.pools_keyset(params):
pool["_extractor"] = SankakuPoolExtractor
url = "https://sankaku.app/books/{}".format(pool["id"])
yield Message.Queue, url, pool
@@ -192,6 +204,10 @@ class SankakuAPI():
if not self.username:
self.authenticate = util.noop
+ def notes(self, post_id):
+ params = {"lang": "en"}
+ return self._call("/posts/{}/notes".format(post_id), params)
+
def pools(self, pool_id):
params = {"lang": "en"}
return self._call("/pools/" + pool_id, params)
diff --git a/gallery_dl/extractor/sankakucomplex.py b/gallery_dl/extractor/sankakucomplex.py
index e1d4153..50c21e3 100644
--- a/gallery_dl/extractor/sankakucomplex.py
+++ b/gallery_dl/extractor/sankakucomplex.py
@@ -6,7 +6,7 @@
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
-"""Extractors for https://www.sankakucomplex.com/"""
+"""Extractors for https://news.sankakucomplex.com/"""
from .common import Extractor, Message
from .. import text, util
@@ -16,7 +16,7 @@ import re
class SankakucomplexExtractor(Extractor):
"""Base class for sankakucomplex extractors"""
category = "sankakucomplex"
- root = "https://www.sankakucomplex.com"
+ root = "https://news.sankakucomplex.com"
def __init__(self, match):
Extractor.__init__(self, match)
@@ -24,14 +24,14 @@ class SankakucomplexExtractor(Extractor):
class SankakucomplexArticleExtractor(SankakucomplexExtractor):
- """Extractor for articles on www.sankakucomplex.com"""
+ """Extractor for articles on news.sankakucomplex.com"""
subcategory = "article"
directory_fmt = ("{category}", "{date:%Y-%m-%d} {title}")
filename_fmt = "{filename}.{extension}"
archive_fmt = "{date:%Y%m%d}_{filename}"
- pattern = (r"(?:https?://)?www\.sankakucomplex\.com"
+ pattern = (r"(?:https?://)?(?:news|www)\.sankakucomplex\.com"
r"/(\d\d\d\d/\d\d/\d\d/[^/?#]+)")
- example = "https://www.sankakucomplex.com/1970/01/01/TITLE"
+ example = "https://news.sankakucomplex.com/1970/01/01/TITLE"
def items(self):
url = "{}/{}/?pg=X".format(self.root, self.path)
@@ -87,9 +87,9 @@ class SankakucomplexArticleExtractor(SankakucomplexExtractor):
class SankakucomplexTagExtractor(SankakucomplexExtractor):
"""Extractor for sankakucomplex blog articles by tag or author"""
subcategory = "tag"
- pattern = (r"(?:https?://)?www\.sankakucomplex\.com"
+ pattern = (r"(?:https?://)?(?:news|www)\.sankakucomplex\.com"
r"/((?:tag|category|author)/[^/?#]+)")
- example = "https://www.sankakucomplex.com/tag/TAG/"
+ example = "https://news.sankakucomplex.com/tag/TAG/"
def items(self):
pnum = 1
diff --git a/gallery_dl/extractor/subscribestar.py b/gallery_dl/extractor/subscribestar.py
index 0abb3ab..7c760ac 100644
--- a/gallery_dl/extractor/subscribestar.py
+++ b/gallery_dl/extractor/subscribestar.py
@@ -127,6 +127,8 @@ class SubscribestarExtractor(Extractor):
}
def _parse_datetime(self, dt):
+ if dt.startswith("Updated on "):
+ dt = dt[11:]
date = text.parse_datetime(dt, "%b %d, %Y %I:%M %p")
if date is dt:
date = text.parse_datetime(dt, "%B %d, %Y %I:%M %p")
diff --git a/gallery_dl/extractor/toyhouse.py b/gallery_dl/extractor/toyhouse.py
index 78ff265..64fa951 100644
--- a/gallery_dl/extractor/toyhouse.py
+++ b/gallery_dl/extractor/toyhouse.py
@@ -77,23 +77,27 @@ class ToyhouseExtractor(Extractor):
cnt += 1
yield self._parse_post(post)
- if cnt == 0 and params["page"] == 1:
- token, pos = text.extract(
- page, '<input name="_token" type="hidden" value="', '"')
- if not token:
- return
- data = {
- "_token": token,
- "user" : text.extract(page, 'value="', '"', pos)[0],
- }
- self.request(self.root + "/~account/warnings/accept",
- method="POST", data=data, allow_redirects=False)
- continue
+ if not cnt and params["page"] == 1:
+ if self._accept_content_warning(page):
+ continue
+ return
if cnt < 18:
return
params["page"] += 1
+ def _accept_content_warning(self, page):
+ pos = page.find(' name="_token"') + 1
+ token, pos = text.extract(page, ' value="', '"', pos)
+ user , pos = text.extract(page, ' value="', '"', pos)
+ if not token or not user:
+ return False
+
+ data = {"_token": token, "user": user}
+ self.request(self.root + "/~account/warnings/accept",
+ method="POST", data=data, allow_redirects=False)
+ return True
+
class ToyhouseArtExtractor(ToyhouseExtractor):
"""Extractor for artworks of a toyhouse user"""
diff --git a/gallery_dl/extractor/tumblr.py b/gallery_dl/extractor/tumblr.py
index c34910f..ff29c04 100644
--- a/gallery_dl/extractor/tumblr.py
+++ b/gallery_dl/extractor/tumblr.py
@@ -386,7 +386,7 @@ class TumblrAPI(oauth.OAuth1API):
def posts(self, blog, params):
"""Retrieve published posts"""
params["offset"] = self.extractor.config("offset")
- params["limit"] = "50"
+ params["limit"] = 50
params["reblog_info"] = "true"
params["type"] = self.posts_type
params["before"] = self.before
@@ -398,8 +398,14 @@ class TumblrAPI(oauth.OAuth1API):
def likes(self, blog):
"""Retrieve liked posts"""
+ endpoint = "/v2/blog/{}/likes".format(blog)
params = {"limit": "50", "before": self.before}
- return self._pagination(blog, "/likes", params, key="liked_posts")
+ while True:
+ posts = self._call(endpoint, params)["liked_posts"]
+ if not posts:
+ return
+ yield from posts
+ params["before"] = posts[-1]["liked_timestamp"]
def _call(self, endpoint, params, **kwargs):
url = self.ROOT + endpoint
@@ -474,6 +480,7 @@ class TumblrAPI(oauth.OAuth1API):
if self.api_key:
params["api_key"] = self.api_key
+ strategy = self.extractor.config("pagination")
while True:
data = self._call(endpoint, params)
@@ -481,13 +488,31 @@ class TumblrAPI(oauth.OAuth1API):
self.BLOG_CACHE[blog] = data["blog"]
cache = False
- yield from data[key]
-
- try:
- endpoint = data["_links"]["next"]["href"]
- except KeyError:
- return
+ posts = data[key]
+ yield from posts
- params = None
- if self.api_key:
- endpoint += "&api_key=" + self.api_key
+ if strategy == "api":
+ try:
+ endpoint = data["_links"]["next"]["href"]
+ except KeyError:
+ return
+
+ params = None
+ if self.api_key:
+ endpoint += "&api_key=" + self.api_key
+
+ elif strategy == "before":
+ if not posts:
+ return
+ timestamp = posts[-1]["timestamp"] + 1
+ if params["before"] and timestamp >= params["before"]:
+ return
+ params["before"] = timestamp
+ params["offset"] = None
+
+ else: # offset
+ params["offset"] = \
+ text.parse_int(params["offset"]) + params["limit"]
+ params["before"] = None
+ if params["offset"] >= data["total_posts"]:
+ return
diff --git a/gallery_dl/extractor/twitter.py b/gallery_dl/extractor/twitter.py
index ec098aa..9fa5b3f 100644
--- a/gallery_dl/extractor/twitter.py
+++ b/gallery_dl/extractor/twitter.py
@@ -51,6 +51,8 @@ class TwitterExtractor(Extractor):
if not self.config("transform", True):
self._transform_user = util.identity
self._transform_tweet = util.identity
+
+ self._cursor = None
self._user = None
self._user_obj = None
self._user_cache = {}
@@ -321,8 +323,17 @@ class TwitterExtractor(Extractor):
"quote_count" : tget("quote_count"),
"reply_count" : tget("reply_count"),
"retweet_count" : tget("retweet_count"),
+ "bookmark_count": tget("bookmark_count"),
}
+ if "views" in tweet:
+ try:
+ tdata["view_count"] = int(tweet["views"]["count"])
+ except Exception:
+ tdata["view_count"] = 0
+ else:
+ tdata["view_count"] = 0
+
if "note_tweet" in tweet:
note = tweet["note_tweet"]["note_tweet_results"]["result"]
content = note["text"]
@@ -492,6 +503,14 @@ class TwitterExtractor(Extractor):
},
}
+ def _init_cursor(self):
+ return self.config("cursor") or None
+
+ def _update_cursor(self, cursor):
+ self.log.debug("Cursor: %s", cursor)
+ self._cursor = cursor
+ return cursor
+
def metadata(self):
"""Return general metadata"""
return {}
@@ -499,6 +518,11 @@ class TwitterExtractor(Extractor):
def tweets(self):
"""Yield all relevant tweet objects"""
+ def finalize(self):
+ if self._cursor:
+ self.log.info("Use '-o cursor=%s' to continue downloading "
+ "from the current position", self._cursor)
+
def login(self):
if self.cookies_check(self.cookies_names):
return
@@ -530,6 +554,9 @@ class TwitterUserExtractor(TwitterExtractor):
def initialize(self):
pass
+ def finalize(self):
+ pass
+
def items(self):
base = "{}/{}/".format(self.root, self.user)
return self._dispatch_extractors((
@@ -549,30 +576,73 @@ class TwitterTimelineExtractor(TwitterExtractor):
pattern = BASE_PATTERN + r"/(?!search)([^/?#]+)/timeline(?!\w)"
example = "https://x.com/USER/timeline"
+ def _init_cursor(self):
+ if self._cursor:
+ return self._cursor.partition("/")[2] or None
+ return None
+
+ def _update_cursor(self, cursor):
+ if cursor:
+ self._cursor = self._cursor_prefix + cursor
+ self.log.debug("Cursor: %s", self._cursor)
+ else:
+ self._cursor = None
+ return cursor
+
def tweets(self):
- # yield initial batch of (media) tweets
- tweet = None
- for tweet in self._select_tweet_source()(self.user):
- yield tweet
- if tweet is None:
- return
+ self._cursor = cursor = self.config("cursor") or None
+ reset = False
- # build search query
- query = "from:{} max_id:{}".format(
- self._user["name"], tweet["rest_id"])
- if self.retweets:
- query += " include:retweets include:nativeretweets"
+ if cursor:
+ state = cursor.partition("/")[0]
+ state, _, tweet_id = state.partition("_")
+ state = text.parse_int(state, 1)
+ else:
+ state = 1
+
+ if state <= 1:
+ self._cursor_prefix = "1/"
- if not self.textonly:
- # try to search for media-only tweets
+ # yield initial batch of (media) tweets
tweet = None
- for tweet in self.api.search_timeline(query + " filter:links"):
+ for tweet in self._select_tweet_source()(self.user):
yield tweet
- if tweet is not None:
+ if tweet is None and not cursor:
return
+ tweet_id = tweet["rest_id"]
+
+ state = reset = 2
+ else:
+ self.api._user_id_by_screen_name(self.user)
+
+ # build search query
+ query = "from:{} max_id:{}".format(self._user["name"], tweet_id)
+ if self.retweets:
+ query += " include:retweets include:nativeretweets"
- # yield unfiltered search results
- yield from self.api.search_timeline(query)
+ if state <= 2:
+ self._cursor_prefix = "2_{}/".format(tweet_id)
+ if reset:
+ self._cursor = self._cursor_prefix
+
+ if not self.textonly:
+ # try to search for media-only tweets
+ tweet = None
+ for tweet in self.api.search_timeline(query + " filter:links"):
+ yield tweet
+ if tweet is not None:
+ return self._update_cursor(None)
+
+ state = reset = 3
+
+ if state <= 3:
+ # yield unfiltered search results
+ self._cursor_prefix = "3_{}/".format(tweet_id)
+ if reset:
+ self._cursor = self._cursor_prefix
+
+ yield from self.api.search_timeline(query)
+ return self._update_cursor(None)
def _select_tweet_source(self):
strategy = self.config("strategy")
@@ -854,6 +924,24 @@ class TwitterQuotesExtractor(TwitterExtractor):
yield Message.Queue, url, data
+class TwitterInfoExtractor(TwitterExtractor):
+ """Extractor for a user's profile data"""
+ subcategory = "info"
+ pattern = BASE_PATTERN + r"/(?!search)([^/?#]+)/info"
+ example = "https://x.com/USER/info"
+
+ def items(self):
+ api = TwitterAPI(self)
+
+ screen_name = self.user
+ if screen_name.startswith("id:"):
+ user = api.user_by_rest_id(screen_name[3:])
+ else:
+ user = api.user_by_screen_name(screen_name)
+
+ return iter(((Message.Directory, self._transform_user(user)),))
+
+
class TwitterAvatarExtractor(TwitterExtractor):
subcategory = "avatar"
filename_fmt = "avatar {date}.{extension}"
@@ -1388,7 +1476,11 @@ class TwitterAPI():
"%s %s (%s)", response.status_code, response.reason, errors)
def _pagination_legacy(self, endpoint, params):
- original_retweets = (self.extractor.retweets == "original")
+ extr = self.extractor
+ cursor = extr._init_cursor()
+ if cursor:
+ params["cursor"] = cursor
+ original_retweets = (extr.retweets == "original")
bottom = ("cursor-bottom-", "sq-cursor-bottom")
while True:
@@ -1396,7 +1488,7 @@ class TwitterAPI():
instructions = data["timeline"]["instructions"]
if not instructions:
- return
+ return extr._update_cursor(None)
tweets = data["globalObjects"]["tweets"]
users = data["globalObjects"]["users"]
@@ -1477,8 +1569,8 @@ class TwitterAPI():
# stop on empty response
if not cursor or (not tweets and not tweet_id):
- return
- params["cursor"] = cursor
+ return extr._update_cursor(None)
+ params["cursor"] = extr._update_cursor(cursor)
def _pagination_tweets(self, endpoint, variables,
path=None, stop_tweets=True, features=None):
@@ -1487,6 +1579,9 @@ class TwitterAPI():
pinned_tweet = extr.pinned
params = {"variables": None}
+ cursor = extr._init_cursor()
+ if cursor:
+ variables["cursor"] = cursor
if features is None:
features = self.features_pagination
if features:
@@ -1523,7 +1618,7 @@ class TwitterAPI():
cursor = entry["content"]["value"]
if entries is None:
if not cursor:
- return
+ return extr._update_cursor(None)
entries = ()
except LookupError:
@@ -1672,12 +1767,16 @@ class TwitterAPI():
continue
if stop_tweets and not tweet:
- return
+ return extr._update_cursor(None)
if not cursor or cursor == variables.get("cursor"):
- return
- variables["cursor"] = cursor
+ return extr._update_cursor(None)
+ variables["cursor"] = extr._update_cursor(cursor)
def _pagination_users(self, endpoint, variables, path=None):
+ extr = self.extractor
+ cursor = extr._init_cursor()
+ if cursor:
+ variables["cursor"] = cursor
params = {
"variables": None,
"features" : self._json_dumps(self.features_pagination),
@@ -1697,7 +1796,7 @@ class TwitterAPI():
data = data[key]
instructions = data["instructions"]
except KeyError:
- return
+ return extr._update_cursor(None)
for instr in instructions:
if instr["type"] == "TimelineAddEntries":
@@ -1715,8 +1814,8 @@ class TwitterAPI():
cursor = entry["content"]["value"]
if not cursor or cursor.startswith(("-1|", "0|")) or not entry:
- return
- variables["cursor"] = cursor
+ return extr._update_cursor(None)
+ variables["cursor"] = extr._update_cursor(cursor)
def _handle_ratelimit(self, response):
rl = self.extractor.config("ratelimit")
@@ -1864,7 +1963,7 @@ def _login_impl(extr, username, password):
},
}
elif subtask == "LoginEnterAlternateIdentifierSubtask":
- alt = extr.config("username_alt") or extr.input(
+ alt = extr.config("username-alt") or extr.input(
"Alternate Identifier (username, email, phone number): ")
data = {
"enter_text": {
diff --git a/gallery_dl/extractor/vipergirls.py b/gallery_dl/extractor/vipergirls.py
index 6dfb23c..5cde0d6 100644
--- a/gallery_dl/extractor/vipergirls.py
+++ b/gallery_dl/extractor/vipergirls.py
@@ -101,7 +101,8 @@ class VipergirlsExtractor(Extractor):
class VipergirlsThreadExtractor(VipergirlsExtractor):
"""Extractor for vipergirls threads"""
subcategory = "thread"
- pattern = BASE_PATTERN + r"/threads/(\d+)(?:-[^/?#]+)?(/page\d+)?$"
+ pattern = (BASE_PATTERN +
+ r"/threads/(\d+)(?:-[^/?#]+)?(/page\d+)?(?:$|#|\?(?!p=))")
example = "https://vipergirls.to/threads/12345-TITLE"
def __init__(self, match):
diff --git a/gallery_dl/extractor/vsco.py b/gallery_dl/extractor/vsco.py
index c112f4a..922a591 100644
--- a/gallery_dl/extractor/vsco.py
+++ b/gallery_dl/extractor/vsco.py
@@ -115,9 +115,28 @@ class VscoExtractor(Extractor):
class VscoUserExtractor(VscoExtractor):
- """Extractor for images from a user on vsco.co"""
+ """Extractor for a vsco user profile"""
subcategory = "user"
- pattern = USER_PATTERN + r"(?:/gallery|/images(?:/\d+)?)?/?(?:$|[?#])"
+ pattern = USER_PATTERN + r"/?$"
+ example = "https://vsco.co/USER"
+
+ def initialize(self):
+ pass
+
+ def items(self):
+ base = "{}/{}/".format(self.root, self.user)
+ return self._dispatch_extractors((
+ (VscoAvatarExtractor , base + "avatar"),
+ (VscoGalleryExtractor , base + "gallery"),
+ (VscoSpacesExtractor , base + "spaces"),
+ (VscoCollectionExtractor, base + "collection"),
+ ), ("gallery",))
+
+
+class VscoGalleryExtractor(VscoExtractor):
+ """Extractor for a vsco user's gallery"""
+ subcategory = "gallery"
+ pattern = USER_PATTERN + r"/(?:gallery|images)"
example = "https://vsco.co/USER/gallery"
def images(self):
diff --git a/gallery_dl/extractor/wallpapercave.py b/gallery_dl/extractor/wallpapercave.py
index faf3b0d..796f3f8 100644
--- a/gallery_dl/extractor/wallpapercave.py
+++ b/gallery_dl/extractor/wallpapercave.py
@@ -18,7 +18,7 @@ class WallpapercaveImageExtractor(Extractor):
category = "wallpapercave"
subcategory = "image"
root = "https://wallpapercave.com"
- pattern = r"(?:https?://)?(?:www\.)?wallpapercave\.com"
+ pattern = r"(?:https?://)?(?:www\.)?wallpapercave\.com/"
example = "https://wallpapercave.com/w/wp12345"
def items(self):
@@ -40,3 +40,12 @@ class WallpapercaveImageExtractor(Extractor):
image = text.nameext_from_url(path)
yield Message.Directory, image
yield Message.Url, self.root + path, image
+
+ if path is None:
+ for wp in text.extract_iter(
+ page, 'class="wallpaper" id="wp', '</picture>'):
+ path = text.rextract(wp, ' src="', '"')[0]
+ if path:
+ image = text.nameext_from_url(path)
+ yield Message.Directory, image
+ yield Message.Url, self.root + path, image
diff --git a/gallery_dl/extractor/warosu.py b/gallery_dl/extractor/warosu.py
index e91f45f..61a36d5 100644
--- a/gallery_dl/extractor/warosu.py
+++ b/gallery_dl/extractor/warosu.py
@@ -64,7 +64,7 @@ class WarosuThreadExtractor(Extractor):
def parse(self, post):
"""Build post object by extracting data from an HTML post"""
data = self._extract_post(post)
- if "<span> File:" in post and self._extract_image(post, data):
+ if "<span class=fileinfo>" in post and self._extract_image(post, data):
part = data["image"].rpartition("/")[2]
data["tim"], _, data["extension"] = part.partition(".")
data["ext"] = "." + data["extension"]
@@ -83,7 +83,7 @@ class WarosuThreadExtractor(Extractor):
def _extract_image(self, post, data):
extr = text.extract_from(post)
- data["fsize"] = extr("<span> File: ", ", ")
+ data["fsize"] = extr("<span class=fileinfo> File: ", ", ")
data["w"] = extr("", "x")
data["h"] = extr("", ", ")
data["filename"] = text.unquote(extr(
diff --git a/gallery_dl/extractor/zerochan.py b/gallery_dl/extractor/zerochan.py
index fc61dff..126ef49 100644
--- a/gallery_dl/extractor/zerochan.py
+++ b/gallery_dl/extractor/zerochan.py
@@ -11,6 +11,8 @@
from .booru import BooruExtractor
from ..cache import cache
from .. import text, util, exception
+import collections
+import re
BASE_PATTERN = r"(?:https?://)?(?:www\.)?zerochan\.net"
@@ -76,22 +78,29 @@ class ZerochanExtractor(BooruExtractor):
'class="breadcrumbs', '</nav>'))[2:],
"uploader": extr('href="/user/', '"'),
"tags" : extr('<ul id="tags"', '</ul>'),
- "source" : extr('<h2>Source</h2>', '</p><h2>').rpartition(
- ">")[2] or None,
+ "source" : text.unescape(text.extr(
+ extr('id="source-url"', '</a>'), 'href="', '"')),
}
html = data["tags"]
tags = data["tags"] = []
for tag in html.split("<li class=")[1:]:
- category = text.extr(tag, 'data-type="', '"')
+ category = text.extr(tag, '"', '"')
name = text.extr(tag, 'data-tag="', '"')
- tags.append(category.capitalize() + ":" + name)
+ tags.append(category.partition(" ")[0].capitalize() + ":" + name)
return data
def _parse_entry_api(self, entry_id):
url = "{}/{}?json".format(self.root, entry_id)
- item = self.request(url).json()
+ text = self.request(url).text
+ try:
+ item = util.json_loads(text)
+ except ValueError as exc:
+ if " control character " not in str(exc):
+ raise
+ text = re.sub(r"[\x00-\x1f\x7f]", "", text)
+ item = util.json_loads(text)
data = {
"id" : item["id"],
@@ -109,6 +118,14 @@ class ZerochanExtractor(BooruExtractor):
return data
+ def _tags(self, post, page):
+ tags = collections.defaultdict(list)
+ for tag in post["tags"]:
+ category, _, name = tag.partition(":")
+ tags[category].append(name)
+ for key, value in tags.items():
+ post["tags_" + key.lower()] = value
+
class ZerochanTagExtractor(ZerochanExtractor):
subcategory = "tag"
@@ -180,10 +197,16 @@ class ZerochanTagExtractor(ZerochanExtractor):
static = "https://static.zerochan.net/.full."
while True:
- data = self.request(url, params=params).json()
+ response = self.request(url, params=params, allow_redirects=False)
+ if response.status_code >= 300:
+ url = text.urljoin(self.root, response.headers["location"])
+ response = self.request(url, params=params)
+ data = response.json()
+
try:
posts = data["items"]
- except ValueError:
+ except Exception:
+ self.log.debug("Server response: %s", data)
return
if metadata:
@@ -191,13 +214,13 @@ class ZerochanTagExtractor(ZerochanExtractor):
post_id = post["id"]
post.update(self._parse_entry_html(post_id))
post.update(self._parse_entry_api(post_id))
+ yield post
else:
for post in posts:
base = static + str(post["id"])
post["file_url"] = base + ".jpg"
post["_fallback"] = (base + ".png",)
-
- yield from posts
+ yield post
if not data.get("next"):
return
diff --git a/gallery_dl/job.py b/gallery_dl/job.py
index 4562b05..0e0916d 100644
--- a/gallery_dl/job.py
+++ b/gallery_dl/job.py
@@ -33,6 +33,7 @@ stdout_write = output.stdout_write
class Job():
"""Base class for Job types"""
ulog = None
+ _logger_adapter = output.LoggerAdapter
def __init__(self, extr, parent=None):
if isinstance(extr, str):
@@ -77,9 +78,9 @@ class Job():
actions = extr.config("actions")
if actions:
- from .actions import parse
+ from .actions import LoggerAdapter, parse
+ self._logger_adapter = LoggerAdapter
self._logger_actions = parse(actions)
- self._wrap_logger = self._wrap_logger_actions
path_proxy = output.PathfmtProxy(self)
self._logger_extra = {
@@ -267,10 +268,7 @@ class Job():
return self._wrap_logger(logging.getLogger(name))
def _wrap_logger(self, logger):
- return output.LoggerAdapter(logger, self)
-
- def _wrap_logger_actions(self, logger):
- return output.LoggerAdapterActions(logger, self)
+ return self._logger_adapter(logger, self)
def _write_unsupported(self, url):
if self.ulog:
@@ -315,7 +313,7 @@ class DownloadJob(Job):
pathfmt.build_path()
if pathfmt.exists():
- if archive:
+ if archive and self._archive_write_skip:
archive.add(kwdict)
self.handle_skip()
return
@@ -345,7 +343,7 @@ class DownloadJob(Job):
return
if not pathfmt.temppath:
- if archive:
+ if archive and self._archive_write_skip:
archive.add(kwdict)
self.handle_skip()
return
@@ -359,7 +357,7 @@ class DownloadJob(Job):
pathfmt.finalize()
self.out.success(pathfmt.path)
self._skipcnt = 0
- if archive:
+ if archive and self._archive_write_file:
archive.add(kwdict)
if "after" in hooks:
for callback in hooks["after"]:
@@ -561,6 +559,16 @@ class DownloadJob(Job):
else:
extr.log.debug("Using download archive '%s'", archive_path)
+ events = cfg("archive-event")
+ if events is None:
+ self._archive_write_file = True
+ self._archive_write_skip = False
+ else:
+ if isinstance(events, str):
+ events = events.split(",")
+ self._archive_write_file = ("file" in events)
+ self._archive_write_skip = ("skip" in events)
+
skip = cfg("skip", True)
if skip:
self._skipexc = None
@@ -676,7 +684,7 @@ class SimulationJob(DownloadJob):
kwdict["extension"] = "jpg"
if self.sleep:
self.extractor.sleep(self.sleep(), "download")
- if self.archive:
+ if self.archive and self._archive_write_skip:
self.archive.add(kwdict)
self.out.skip(self.pathfmt.build_filename(kwdict))
@@ -848,16 +856,22 @@ class InfoJob(Job):
class DataJob(Job):
"""Collect extractor results and dump them"""
+ resolve = False
- def __init__(self, url, parent=None, file=sys.stdout, ensure_ascii=True):
+ def __init__(self, url, parent=None, file=sys.stdout, ensure_ascii=True,
+ resolve=False):
Job.__init__(self, url, parent)
self.file = file
self.data = []
self.ascii = config.get(("output",), "ascii", ensure_ascii)
+ self.resolve = 128 if resolve is True else (resolve or self.resolve)
private = config.get(("output",), "private")
self.filter = dict.copy if private else util.filter_dict
+ if self.resolve > 0:
+ self.handle_queue = self.handle_queue_resolve
+
def run(self):
self._init()
@@ -883,12 +897,13 @@ class DataJob(Job):
for msg in self.data:
util.transform_dict(msg[-1], util.number_to_string)
- # dump to 'file'
- try:
- util.dump_json(self.data, self.file, self.ascii, 2)
- self.file.flush()
- except Exception:
- pass
+ if self.file:
+ # dump to 'file'
+ try:
+ util.dump_json(self.data, self.file, self.ascii, 2)
+ self.file.flush()
+ except Exception:
+ pass
return 0
@@ -900,3 +915,17 @@ class DataJob(Job):
def handle_queue(self, url, kwdict):
self.data.append((Message.Queue, url, self.filter(kwdict)))
+
+ def handle_queue_resolve(self, url, kwdict):
+ cls = kwdict.get("_extractor")
+ if cls:
+ extr = cls.from_url(url)
+ else:
+ extr = extractor.find(url)
+
+ if not extr:
+ return self.data.append((Message.Queue, url, self.filter(kwdict)))
+
+ job = self.__class__(extr, self, None, self.ascii, self.resolve-1)
+ job.data = self.data
+ job.run()
diff --git a/gallery_dl/option.py b/gallery_dl/option.py
index f31d5ac..155cbd9 100644
--- a/gallery_dl/option.py
+++ b/gallery_dl/option.py
@@ -293,10 +293,15 @@ def build_parser():
)
output.add_argument(
"-j", "--dump-json",
- dest="jobtype", action="store_const", const=job.DataJob,
+ dest="dump_json", action="count",
help="Print JSON information",
)
output.add_argument(
+ "-J", "--resolve-json",
+ dest="dump_json", action="store_const", const=128,
+ help="Print JSON information; resolve intermediary URLs",
+ )
+ output.add_argument(
"-s", "--simulate",
dest="jobtype", action="store_const", const=job.SimulationJob,
help="Simulate data extraction; do not download anything",
@@ -346,6 +351,11 @@ def build_parser():
"in the current directory to debug problems"),
)
output.add_argument(
+ "--print-traffic",
+ dest="print_traffic", action="store_true",
+ help=("Display sent and read HTTP traffic"),
+ )
+ output.add_argument(
"--no-colors",
dest="colors", action="store_false",
help=("Do not emit ANSI color codes in output"),
diff --git a/gallery_dl/output.py b/gallery_dl/output.py
index bd5d959..13b6a8a 100644
--- a/gallery_dl/output.py
+++ b/gallery_dl/output.py
@@ -10,7 +10,6 @@ import os
import sys
import shutil
import logging
-import functools
import unicodedata
from . import config, util, formatter
@@ -92,39 +91,6 @@ class LoggerAdapter():
self.logger._log(logging.ERROR, msg, args, **kwargs)
-class LoggerAdapterActions():
-
- def __init__(self, logger, job):
- self.logger = logger
- self.extra = job._logger_extra
- self.actions = job._logger_actions
-
- self.debug = functools.partial(self.log, logging.DEBUG)
- self.info = functools.partial(self.log, logging.INFO)
- self.warning = functools.partial(self.log, logging.WARNING)
- self.error = functools.partial(self.log, logging.ERROR)
-
- def log(self, level, msg, *args, **kwargs):
- msg = str(msg)
- if args:
- msg = msg % args
-
- actions = self.actions[level]
- if actions:
- args = self.extra.copy()
- args["level"] = level
-
- for cond, action in actions:
- if cond(msg):
- action(args)
-
- level = args["level"]
-
- if self.logger.isEnabledFor(level):
- kwargs["extra"] = self.extra
- self.logger._log(level, msg, (), **kwargs)
-
-
class PathfmtProxy():
__slots__ = ("job",)
diff --git a/gallery_dl/path.py b/gallery_dl/path.py
index 1616bbd..7892776 100644
--- a/gallery_dl/path.py
+++ b/gallery_dl/path.py
@@ -51,6 +51,7 @@ class PathFormat():
raise exception.FilenameFormatError(exc)
directory_fmt = config("directory")
+ self.directory_conditions = ()
try:
if directory_fmt is None:
directory_fmt = extractor.directory_fmt
@@ -266,7 +267,7 @@ class PathFormat():
try:
for fmt in self.directory_formatters:
segment = fmt(kwdict).strip()
- if strip:
+ if strip and segment != "..":
# remove trailing dots and spaces (#647)
segment = segment.rstrip(strip)
if segment:
@@ -288,7 +289,7 @@ class PathFormat():
formatters = self.directory_formatters
for fmt in formatters:
segment = fmt(kwdict).strip()
- if strip:
+ if strip and segment != "..":
segment = segment.rstrip(strip)
if segment:
append(self.clean_segment(segment))
@@ -344,7 +345,11 @@ class PathFormat():
continue
except OSError:
# move across different filesystems
- shutil.copyfile(self.temppath, self.realpath)
+ try:
+ shutil.copyfile(self.temppath, self.realpath)
+ except FileNotFoundError:
+ os.makedirs(self.realdirectory)
+ shutil.copyfile(self.temppath, self.realpath)
os.unlink(self.temppath)
break
diff --git a/gallery_dl/postprocessor/metadata.py b/gallery_dl/postprocessor/metadata.py
index 18d00e1..a520a34 100644
--- a/gallery_dl/postprocessor/metadata.py
+++ b/gallery_dl/postprocessor/metadata.py
@@ -56,7 +56,13 @@ class MetadataPP(PostProcessor):
ext = "json"
directory = options.get("directory")
- if directory:
+ if isinstance(directory, list):
+ self._directory = self._directory_format
+ self._directory_formatters = [
+ formatter.parse(dirfmt, util.NONE).format_map
+ for dirfmt in directory
+ ]
+ elif directory:
self._directory = self._directory_custom
sep = os.sep + (os.altsep or "")
self._metadir = util.expand_path(directory).rstrip(sep) + os.sep
@@ -147,6 +153,19 @@ class MetadataPP(PostProcessor):
def _directory_custom(self, pathfmt):
return os.path.join(pathfmt.realdirectory, self._metadir)
+ def _directory_format(self, pathfmt):
+ formatters = pathfmt.directory_formatters
+ conditions = pathfmt.directory_conditions
+ try:
+ pathfmt.directory_formatters = self._directory_formatters
+ pathfmt.directory_conditions = ()
+ segments = pathfmt.build_directory(pathfmt.kwdict)
+ directory = pathfmt.clean_path(os.sep.join(segments) + os.sep)
+ return os.path.join(pathfmt.realdirectory, directory)
+ finally:
+ pathfmt.directory_conditions = conditions
+ pathfmt.directory_formatters = formatters
+
def _filename(self, pathfmt):
return (pathfmt.filename or "metadata") + "." + self.extension
diff --git a/gallery_dl/util.py b/gallery_dl/util.py
index e76ddf3..5744ef3 100644
--- a/gallery_dl/util.py
+++ b/gallery_dl/util.py
@@ -540,10 +540,14 @@ class CustomNone():
def __bool__():
return False
+ def __eq__(self, other):
+ return self is other
+
+ def __ne__(self, other):
+ return self is not other
+
__lt__ = true
__le__ = true
- __eq__ = false
- __ne__ = true
__gt__ = false
__ge__ = false
@@ -616,11 +620,28 @@ else:
Popen = subprocess.Popen
-def compile_expression(expr, name="<expr>", globals=None):
+def compile_expression_raw(expr, name="<expr>", globals=None):
code_object = compile(expr, name, "eval")
return functools.partial(eval, code_object, globals or GLOBALS)
+def compile_expression_tryexcept(expr, name="<expr>", globals=None):
+ code_object = compile(expr, name, "eval")
+
+ def _eval(locals=None, globals=(globals or GLOBALS), co=code_object):
+ try:
+ return eval(co, globals, locals)
+ except exception.GalleryDLException:
+ raise
+ except Exception:
+ return False
+
+ return _eval
+
+
+compile_expression = compile_expression_tryexcept
+
+
def import_file(path):
"""Import a Python module from a filesystem path"""
path, name = os.path.split(path)
diff --git a/gallery_dl/version.py b/gallery_dl/version.py
index a8ff38e..f234af1 100644
--- a/gallery_dl/version.py
+++ b/gallery_dl/version.py
@@ -6,5 +6,5 @@
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
-__version__ = "1.27.1"
+__version__ = "1.27.2"
__variant__ = None
diff --git a/gallery_dl/ytdl.py b/gallery_dl/ytdl.py
index 0a0bf86..d4fdedc 100644
--- a/gallery_dl/ytdl.py
+++ b/gallery_dl/ytdl.py
@@ -249,6 +249,22 @@ def parse_command_line(module, argv):
None if opts.match_filter is None
else module.match_filter_func(opts.match_filter))
+ cookiesfrombrowser = getattr(opts, "cookiesfrombrowser", None)
+ if cookiesfrombrowser:
+ match = re.fullmatch(r"""(?x)
+ (?P<name>[^+:]+)
+ (?:\s*\+\s*(?P<keyring>[^:]+))?
+ (?:\s*:\s*(?!:)(?P<profile>.+?))?
+ (?:\s*::\s*(?P<container>.+))?
+ """, cookiesfrombrowser)
+ if match:
+ browser, keyring, profile, container = match.groups()
+ if keyring is not None:
+ keyring = keyring.upper()
+ cookiesfrombrowser = (browser.lower(), profile, keyring, container)
+ else:
+ cookiesfrombrowser = None
+
return {
"usenetrc": opts.usenetrc,
"netrc_location": getattr(opts, "netrc_location", None),
@@ -364,7 +380,7 @@ def parse_command_line(module, argv):
"skip_playlist_after_errors": getattr(
opts, "skip_playlist_after_errors", None),
"cookiefile": opts.cookiefile,
- "cookiesfrombrowser": getattr(opts, "cookiesfrombrowser", None),
+ "cookiesfrombrowser": cookiesfrombrowser,
"nocheckcertificate": opts.no_check_certificate,
"prefer_insecure": opts.prefer_insecure,
"proxy": opts.proxy,
diff --git a/test/test_extractor.py b/test/test_extractor.py
index 6af1226..abf122b 100644
--- a/test/test_extractor.py
+++ b/test/test_extractor.py
@@ -17,7 +17,7 @@ import string
from datetime import datetime, timedelta
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
-from gallery_dl import extractor # noqa E402
+from gallery_dl import extractor, util # noqa E402
from gallery_dl.extractor import mastodon # noqa E402
from gallery_dl.extractor.common import Extractor, Message # noqa E402
from gallery_dl.extractor.directlink import DirectlinkExtractor # noqa E402
@@ -25,7 +25,11 @@ from gallery_dl.extractor.directlink import DirectlinkExtractor # noqa E402
_list_classes = extractor._list_classes
try:
- from test import results
+ RESULTS = os.environ.get("GDL_TEST_RESULTS")
+ if RESULTS:
+ results = util.import_file(RESULTS)
+ else:
+ from test import results
except ImportError:
results = None
@@ -109,6 +113,7 @@ class TestExtractorModule(unittest.TestCase):
print("Skipping '{}' category checks".format(cat))
continue
raise
+ self.assertTrue(extr, url)
self.assertEqual(extr.category, cat, url)
self.assertEqual(extr.subcategory, sub, url)
self.assertEqual(extr.basecategory, base, url)
diff --git a/test/test_postprocessor.py b/test/test_postprocessor.py
index d509052..3e6d1df 100644
--- a/test/test_postprocessor.py
+++ b/test/test_postprocessor.py
@@ -440,6 +440,18 @@ class MetadataTest(BasePostprocessorTest):
path = self.pathfmt.realdirectory + "metadata/file.json"
m.assert_called_once_with(path, "w", encoding="utf-8")
+ def test_metadata_directory_format(self):
+ self._create(
+ {"directory": ["..", "json", "\fE str(id // 500 * 500 + 500)"]},
+ {"id": 12345},
+ )
+
+ with patch("builtins.open", mock_open()) as m:
+ self._trigger()
+
+ path = self.pathfmt.realdirectory + "../json/12500/file.ext.json"
+ m.assert_called_once_with(path, "w", encoding="utf-8")
+
def test_metadata_filename(self):
self._create({
"filename" : "{category}_{filename}_/meta/\n\r.data",
diff --git a/test/test_results.py b/test/test_results.py
index ab3668e..e2c7ca2 100644
--- a/test/test_results.py
+++ b/test/test_results.py
@@ -20,7 +20,13 @@ import collections
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from gallery_dl import \
extractor, util, job, config, exception, formatter # noqa E402
-from test import results # noqa E402
+
+
+RESULTS = os.environ.get("GDL_TEST_RESULTS")
+if RESULTS:
+ results = util.import_file(RESULTS)
+else:
+ from test import results
# temporary issues, etc.
@@ -86,38 +92,34 @@ class TestExtractorResults(unittest.TestCase):
def _run_test(self, result):
result.pop("#comment", None)
- only_matching = (len(result) <= 3)
+ auth = result.pop("#auth", None)
+
+ extractor.find(result["#url"])
+ extr = result["#class"].from_url(result["#url"])
+ if not extr:
+ raise exception.NoExtractorError()
+ if len(result) <= 3:
+ return # only matching
- auth = result.get("#auth")
if auth is None:
auth = (result["#category"][1] in AUTH)
elif not auth:
+ # auth explicitly disabled
for key in AUTH_CONFIG:
config.set((), key, None)
- if auth:
- extr = result["#class"].from_url(result["#url"])
- if not any(extr.config(key) for key in AUTH_CONFIG):
- self._skipped.append((result["#url"], "no auth"))
- only_matching = True
+ if auth and not any(extr.config(key) for key in AUTH_CONFIG):
+ return self._skipped.append((result["#url"], "no auth"))
- if only_matching:
- content = False
- else:
- if "#options" in result:
- for key, value in result["#options"].items():
- key = key.split(".")
- config.set(key[:-1], key[-1], value)
- if "#range" in result:
- config.set((), "image-range" , result["#range"])
- config.set((), "chapter-range", result["#range"])
- content = ("#sha1_content" in result)
-
- tjob = ResultJob(result["#url"], content=content)
- self.assertEqual(result["#class"], tjob.extractor.__class__, "#class")
-
- if only_matching:
- return
+ if "#options" in result:
+ for key, value in result["#options"].items():
+ key = key.split(".")
+ config.set(key[:-1], key[-1], value)
+ if "#range" in result:
+ config.set((), "image-range" , result["#range"])
+ config.set((), "chapter-range", result["#range"])
+
+ tjob = ResultJob(extr, content=("#sha1_content" in result))
if "#exception" in result:
with self.assertRaises(result["#exception"], msg="#exception"):
diff --git a/test/test_util.py b/test/test_util.py
index 35e7247..4622c28 100644
--- a/test/test_util.py
+++ b/test/test_util.py
@@ -134,19 +134,18 @@ class TestPredicate(unittest.TestCase):
with self.assertRaises(SyntaxError):
util.FilterPredicate("(")
- with self.assertRaises(exception.FilterError):
- util.FilterPredicate("a > 1")(url, {"a": None})
-
- with self.assertRaises(exception.FilterError):
- util.FilterPredicate("b > 1")(url, {"a": 2})
+ self.assertFalse(
+ util.FilterPredicate("a > 1")(url, {"a": None}))
+ self.assertFalse(
+ util.FilterPredicate("b > 1")(url, {"a": 2}))
pred = util.FilterPredicate(["a < 3", "b < 4", "c < 5"])
self.assertTrue(pred(url, {"a": 2, "b": 3, "c": 4}))
self.assertFalse(pred(url, {"a": 3, "b": 3, "c": 4}))
self.assertFalse(pred(url, {"a": 2, "b": 4, "c": 4}))
self.assertFalse(pred(url, {"a": 2, "b": 3, "c": 5}))
- with self.assertRaises(exception.FilterError):
- pred(url, {"a": 2})
+
+ self.assertFalse(pred(url, {"a": 2}))
def test_build_predicate(self):
pred = util.build_predicate([])
@@ -445,6 +444,7 @@ class TestOther(unittest.TestCase):
self.assertEqual(expr({"a": 1, "b": 2, "c": 3}), 7)
self.assertEqual(expr({"a": 9, "b": 9, "c": 9}), 90)
+ expr = util.compile_expression_raw("a + b * c")
with self.assertRaises(NameError):
expr()
with self.assertRaises(NameError):
@@ -755,8 +755,9 @@ def hash(value):
self.assertLess(obj, "foo")
self.assertLessEqual(obj, None)
- self.assertFalse(obj == obj)
- self.assertTrue(obj != obj)
+ self.assertTrue(obj == obj)
+ self.assertFalse(obj == 0)
+ self.assertFalse(obj != obj)
self.assertGreater(123, obj)
self.assertGreaterEqual(1.23, obj)
diff --git a/test/test_ytdl.py b/test/test_ytdl.py
index 878ac85..fd2e40a 100644
--- a/test/test_ytdl.py
+++ b/test/test_ytdl.py
@@ -294,6 +294,20 @@ class Test_CommandlineArguments_YtDlp(Test_CommandlineArguments):
self._(["--geo-bypass-ip-block", "198.51.100.14/24"],
"geo_bypass", "198.51.100.14/24")
+ def test_cookiesfrombrowser(self):
+ self._(["--cookies-from-browser", "firefox"],
+ "cookiesfrombrowser", ("firefox", None, None, None))
+ self._(["--cookies-from-browser", "firefox:profile"],
+ "cookiesfrombrowser", ("firefox", "profile", None, None))
+ self._(["--cookies-from-browser", "firefox+keyring"],
+ "cookiesfrombrowser", ("firefox", None, "KEYRING", None))
+ self._(["--cookies-from-browser", "firefox::container"],
+ "cookiesfrombrowser", ("firefox", None, None, "container"))
+ self._(["--cookies-from-browser",
+ "firefox+keyring:profile::container"],
+ "cookiesfrombrowser",
+ ("firefox", "profile", "KEYRING", "container"))
+
if __name__ == "__main__":
unittest.main(warnings="ignore")