summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--CHANGELOG.md110
-rw-r--r--PKG-INFO10
-rw-r--r--README.rst8
-rw-r--r--data/completion/_gallery-dl2
-rw-r--r--data/completion/gallery-dl2
-rw-r--r--data/completion/gallery-dl.fish2
-rw-r--r--data/man/gallery-dl.18
-rw-r--r--data/man/gallery-dl.conf.5180
-rw-r--r--docs/gallery-dl.conf942
-rw-r--r--gallery_dl.egg-info/PKG-INFO10
-rw-r--r--gallery_dl.egg-info/SOURCES.txt8
-rw-r--r--gallery_dl/__init__.py19
-rw-r--r--gallery_dl/aes.py5
-rw-r--r--gallery_dl/config.py10
-rw-r--r--gallery_dl/cookies.py78
-rw-r--r--gallery_dl/downloader/ytdl.py4
-rw-r--r--gallery_dl/extractor/8chan.py7
-rw-r--r--gallery_dl/extractor/__init__.py8
-rw-r--r--gallery_dl/extractor/bilibili.py116
-rw-r--r--gallery_dl/extractor/blogger.py2
-rw-r--r--gallery_dl/extractor/bluesky.py49
-rw-r--r--gallery_dl/extractor/boosty.py13
-rw-r--r--gallery_dl/extractor/bunkr.py11
-rw-r--r--gallery_dl/extractor/civitai.py22
-rw-r--r--gallery_dl/extractor/common.py31
-rw-r--r--gallery_dl/extractor/danbooru.py65
-rw-r--r--gallery_dl/extractor/deviantart.py7
-rw-r--r--gallery_dl/extractor/e621.py3
-rw-r--r--gallery_dl/extractor/everia.py99
-rw-r--r--gallery_dl/extractor/exhentai.py8
-rw-r--r--gallery_dl/extractor/facebook.py447
-rw-r--r--gallery_dl/extractor/flickr.py14
-rw-r--r--gallery_dl/extractor/gelbooru_v02.py6
-rw-r--r--gallery_dl/extractor/hentaifoundry.py16
-rw-r--r--gallery_dl/extractor/hiperdex.py14
-rw-r--r--gallery_dl/extractor/hitomi.py109
-rw-r--r--gallery_dl/extractor/imagechest.py66
-rw-r--r--gallery_dl/extractor/instagram.py12
-rw-r--r--gallery_dl/extractor/kemonoparty.py352
-rw-r--r--gallery_dl/extractor/koharu.py49
-rw-r--r--gallery_dl/extractor/lolisafe.py7
-rw-r--r--gallery_dl/extractor/mangadex.py35
-rw-r--r--gallery_dl/extractor/mastodon.py1
-rw-r--r--gallery_dl/extractor/motherless.py167
-rw-r--r--gallery_dl/extractor/newgrounds.py19
-rw-r--r--gallery_dl/extractor/nhentai.py2
-rw-r--r--gallery_dl/extractor/noop.py27
-rw-r--r--gallery_dl/extractor/patreon.py13
-rw-r--r--gallery_dl/extractor/philomena.py2
-rw-r--r--gallery_dl/extractor/piczel.py8
-rw-r--r--gallery_dl/extractor/pillowfort.py1
-rw-r--r--gallery_dl/extractor/pinterest.py40
-rw-r--r--gallery_dl/extractor/pixiv.py123
-rw-r--r--gallery_dl/extractor/poipiku.py2
-rw-r--r--gallery_dl/extractor/reddit.py14
-rw-r--r--gallery_dl/extractor/rule34vault.py119
-rw-r--r--gallery_dl/extractor/rule34xyz.py143
-rw-r--r--gallery_dl/extractor/saint.py101
-rw-r--r--gallery_dl/extractor/sankaku.py11
-rw-r--r--gallery_dl/extractor/scrolller.py7
-rw-r--r--gallery_dl/extractor/smugmug.py1
-rw-r--r--gallery_dl/extractor/steamgriddb.py17
-rw-r--r--gallery_dl/extractor/tumblr.py77
-rw-r--r--gallery_dl/extractor/tumblrgallery.py1
-rw-r--r--gallery_dl/extractor/twitter.py9
-rw-r--r--gallery_dl/extractor/urlgalleries.py4
-rw-r--r--gallery_dl/extractor/webtoons.py7
-rw-r--r--gallery_dl/extractor/weibo.py11
-rw-r--r--gallery_dl/job.py29
-rw-r--r--gallery_dl/option.py17
-rw-r--r--gallery_dl/output.py39
-rw-r--r--gallery_dl/path.py4
-rw-r--r--gallery_dl/postprocessor/classify.py47
-rw-r--r--gallery_dl/postprocessor/common.py22
-rw-r--r--gallery_dl/util.py35
-rw-r--r--gallery_dl/version.py2
-rw-r--r--gallery_dl/ytdl.py5
-rw-r--r--test/test_config.py18
-rw-r--r--test/test_extractor.py5
-rw-r--r--test/test_postprocessor.py39
-rw-r--r--test/test_util.py146
81 files changed, 3431 insertions, 870 deletions
diff --git a/CHANGELOG.md b/CHANGELOG.md
index f4bb546..e3dec8c 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,39 +1,87 @@
-## 1.27.7 - 2024-10-25
+## 1.28.0 - 2024-11-30
+### Changes
+- [common] disable using environment network settings by default (`HTTP_PROXY`, `.netrc`, …)
+ - disable `trust_env` session attribute
+ - disable `Authorization` header injection from `.netrc` auth ([#5780](https://github.com/mikf/gallery-dl/issues/5780), [#6134](https://github.com/mikf/gallery-dl/issues/6134), [#6455](https://github.com/mikf/gallery-dl/issues/6455))
+ - add `proxy-env` option
+- [ytdl] change `forward-cookies` default value to `true` ([#6401](https://github.com/mikf/gallery-dl/issues/6401), [#6348](https://github.com/mikf/gallery-dl/issues/6348))
### Extractors
#### Additions
-- [civitai] add extractors for global `models` and `images` ([#6310](https://github.com/mikf/gallery-dl/issues/6310))
-- [mangadex] add `author` extractor ([#6372](https://github.com/mikf/gallery-dl/issues/6372))
-- [scrolller] add support ([#295](https://github.com/mikf/gallery-dl/issues/295), [#3418](https://github.com/mikf/gallery-dl/issues/3418), [#5051](https://github.com/mikf/gallery-dl/issues/5051))
+- [bilibili] add support for `opus` articles ([#2824](https://github.com/mikf/gallery-dl/issues/2824), [#6443](https://github.com/mikf/gallery-dl/issues/6443))
+- [bluesky] add `hashtag` extractor ([#4438](https://github.com/mikf/gallery-dl/issues/4438))
+- [danbooru] add `artist` and `artist-search` extractors ([#5348](https://github.com/mikf/gallery-dl/issues/5348))
+- [everia] add support ([#1067](https://github.com/mikf/gallery-dl/issues/1067), [#2472](https://github.com/mikf/gallery-dl/issues/2472), [#4091](https://github.com/mikf/gallery-dl/issues/4091), [#6227](https://github.com/mikf/gallery-dl/issues/6227))
+- [facebook] add support ([#470](https://github.com/mikf/gallery-dl/issues/470), [#2612](https://github.com/mikf/gallery-dl/issues/2612), [#5626](https://github.com/mikf/gallery-dl/issues/5626), [#6548](https://github.com/mikf/gallery-dl/issues/6548))
+- [hentaifoundry] add `tag` extractor ([#6465](https://github.com/mikf/gallery-dl/issues/6465))
+- [hitomi] add `index` and `search` extractors ([#2502](https://github.com/mikf/gallery-dl/issues/2502), [#6392](https://github.com/mikf/gallery-dl/issues/6392), [#3720](https://github.com/mikf/gallery-dl/issues/3720))
+- [motherless] add support ([#2074](https://github.com/mikf/gallery-dl/issues/2074), [#4413](https://github.com/mikf/gallery-dl/issues/4413), [#6221](https://github.com/mikf/gallery-dl/issues/6221))
+- [noop] add `noop` extractor
+- [rule34vault] add support ([#5708](https://github.com/mikf/gallery-dl/issues/5708), [#6240](https://github.com/mikf/gallery-dl/issues/6240))
+- [rule34xyz] add support ([#1078](https://github.com/mikf/gallery-dl/issues/1078), [#4960](https://github.com/mikf/gallery-dl/issues/4960))
+- [saint] add support ([#4405](https://github.com/mikf/gallery-dl/issues/4405), [#6324](https://github.com/mikf/gallery-dl/issues/6324))
+- [tumblr] add `search` extractor ([#6394](https://github.com/mikf/gallery-dl/issues/6394))
#### Fixes
-- [8chan] automatically detect `TOS` cookie name ([#6318](https://github.com/mikf/gallery-dl/issues/6318))
-- [bunkr] update to new site layout ([#6344](https://github.com/mikf/gallery-dl/issues/6344), [#6352](https://github.com/mikf/gallery-dl/issues/6352), [#6368](https://github.com/mikf/gallery-dl/issues/6368))
-- [bunkr] send proper `Referer` headers for file downloads ([#6319](https://github.com/mikf/gallery-dl/issues/6319))
-- [civitai] add `uuid` metadata field & use it as default archive format ([#6326](https://github.com/mikf/gallery-dl/issues/6326))
-- [civitai] fix "My Reactions" results ([#6263](https://github.com/mikf/gallery-dl/issues/6263))
-- [civitai] fix `model` file download URLs for tRPC API
-- [lensdump] fix extraction ([#6313](https://github.com/mikf/gallery-dl/issues/6313))
-- [pixiv] make retrieving ugoira metadata non-fatal ([#6297](https://github.com/mikf/gallery-dl/issues/6297))
-- [pixiv] fix exception when processing deleted `sanity_level` works ([#6339](https://github.com/mikf/gallery-dl/issues/6339))
-- [urlgalleries] fix extraction
-- [wikimedia] fix non-English Fandom/wiki.gg articles ([#6370](https://github.com/mikf/gallery-dl/issues/6370))
+- [8chan] avoid performing network requests within `_init()` ([#6387](https://github.com/mikf/gallery-dl/issues/6387))
+- [bluesky] fix downloads from non-bsky PDSs ([#6406](https://github.com/mikf/gallery-dl/issues/6406))
+- [bunkr] fix album names containing `<>&` characters
+- [flickr] use `download` URLs ([#6360](https://github.com/mikf/gallery-dl/issues/6360), [#6464](https://github.com/mikf/gallery-dl/issues/6464))
+- [hiperdex] update domain to `hipertoon.com` ([#6420](https://github.com/mikf/gallery-dl/issues/6420))
+- [imagechest] fix extractors ([#6475](https://github.com/mikf/gallery-dl/issues/6475), [#6491](https://github.com/mikf/gallery-dl/issues/6491))
+- [instagram] fix using numeric cursor values ([#6414](https://github.com/mikf/gallery-dl/issues/6414))
+- [kemonoparty] update to new site layout ([#6415](https://github.com/mikf/gallery-dl/issues/6415), [#6503](https://github.com/mikf/gallery-dl/issues/6503), [#6528](https://github.com/mikf/gallery-dl/issues/6528), [#6530](https://github.com/mikf/gallery-dl/issues/6530), [#6536](https://github.com/mikf/gallery-dl/issues/6536), [#6542](https://github.com/mikf/gallery-dl/issues/6542), [#6554](https://github.com/mikf/gallery-dl/issues/6554))
+- [koharu] update domain to `niyaniya.moe` ([#6430](https://github.com/mikf/gallery-dl/issues/6430), [#6432](https://github.com/mikf/gallery-dl/issues/6432))
+- [mangadex] apply `lang` option only to chapter results ([#6372](https://github.com/mikf/gallery-dl/issues/6372))
+- [newgrounds] fix metadata extraction ([#6463](https://github.com/mikf/gallery-dl/issues/6463), [#6533](https://github.com/mikf/gallery-dl/issues/6533))
+- [nhentai] support `.webp` files ([#6442](https://github.com/mikf/gallery-dl/issues/6442), [#6479](https://github.com/mikf/gallery-dl/issues/6479))
+- [patreon] use legacy mobile UA when no `session_id` is set
+- [pinterest] update API headers ([#6513](https://github.com/mikf/gallery-dl/issues/6513))
+- [pinterest] detect video/audio by block content ([#6421](https://github.com/mikf/gallery-dl/issues/6421))
+- [scrolller] prevent exception for posts without `mediaSources` ([#5051](https://github.com/mikf/gallery-dl/issues/5051))
+- [tumblrgallery] fix file downloads ([#6391](https://github.com/mikf/gallery-dl/issues/6391))
+- [twitter] make `source` metadata extraction non-fatal ([#6472](https://github.com/mikf/gallery-dl/issues/6472))
+- [weibo] fix livephoto `filename` & `extension` ([#6471](https://github.com/mikf/gallery-dl/issues/6471))
#### Improvements
-- [8chan] support `/last/` thread URLs ([#6318](https://github.com/mikf/gallery-dl/issues/6318))
-- [bunkr] support `bunkr.ph` and `bunkr.ps` URLs
-- [newgrounds] support page numbers in URLs ([#6320](https://github.com/mikf/gallery-dl/issues/6320))
-- [patreon] support `/c/` prefix in creator URLs ([#6348](https://github.com/mikf/gallery-dl/issues/6348))
-- [pinterest] support `story` pins ([#6188](https://github.com/mikf/gallery-dl/issues/6188), [#6078](https://github.com/mikf/gallery-dl/issues/6078), [#4229](https://github.com/mikf/gallery-dl/issues/4229))
-- [pixiv] implement `sanity_level` workaround for user artworks results ([#4327](https://github.com/mikf/gallery-dl/issues/4327), [#5435](https://github.com/mikf/gallery-dl/issues/5435), [#6339](https://github.com/mikf/gallery-dl/issues/6339))
+- [bluesky] support `main.bsky.dev` URLs ([#4438](https://github.com/mikf/gallery-dl/issues/4438))
+- [bluesky] match common embed fixes ([#6410](https://github.com/mikf/gallery-dl/issues/6410), [#6411](https://github.com/mikf/gallery-dl/issues/6411))
+- [boosty] update default video format list ([#2387](https://github.com/mikf/gallery-dl/issues/2387))
+- [bunkr] support `bunkr.cr` URLs
+- [common] allow passing cookies to OAuth extractors
+- [common] allow overriding more default `User-Agent` headers ([#6496](https://github.com/mikf/gallery-dl/issues/6496))
+- [philomena] switch default `ponybooru` filter ([#6437](https://github.com/mikf/gallery-dl/issues/6437))
+- [pinterest] support `story_pin_music` blocks ([#6421](https://github.com/mikf/gallery-dl/issues/6421))
+- [pixiv] get ugoira frame extension from `meta_single_page` values ([#6056](https://github.com/mikf/gallery-dl/issues/6056))
+- [reddit] support user profile share links ([#6389](https://github.com/mikf/gallery-dl/issues/6389))
+- [steamgriddb] disable `adjust-extensions` for `fake-png` files ([#5274](https://github.com/mikf/gallery-dl/issues/5274))
+- [twitter] remove cookies migration workaround
+#### Metadata
+- [bluesky] provide `author[instance]` metadata ([#4438](https://github.com/mikf/gallery-dl/issues/4438))
+- [instagram] fix `extension` of apparent `.webp` files ([#6541](https://github.com/mikf/gallery-dl/issues/6541))
+- [pillowfort] provide `count` metadata ([#6478](https://github.com/mikf/gallery-dl/issues/6478))
+- [pixiv:ranking] add `rank` metadata field ([#6531](https://github.com/mikf/gallery-dl/issues/6531))
+- [poipiku] return `count` as proper number ([#6445](https://github.com/mikf/gallery-dl/issues/6445))
+- [webtoons] extract `episode_no` for comic results ([#6439](https://github.com/mikf/gallery-dl/issues/6439))
#### Options
-- [bluesky] add `quoted` option ([#6323](https://github.com/mikf/gallery-dl/issues/6323))
-- [pixiv] add `captions` option ([#4327](https://github.com/mikf/gallery-dl/issues/4327))
-- [reddit] add `embeds` option ([#6357](https://github.com/mikf/gallery-dl/issues/6357))
-- [vk] add `offset` option ([#6328](https://github.com/mikf/gallery-dl/issues/6328))
+- [civitai] add `metadata` option - support fetching `generation` data ([#6383](https://github.com/mikf/gallery-dl/issues/6383))
+- [exhentai] implement `tags` option ([#2117](https://github.com/mikf/gallery-dl/issues/2117))
+- [koharu] implement `tags` option
+- [rule34xyz] add `format` option ([#1078](https://github.com/mikf/gallery-dl/issues/1078))
### Downloaders
-- [ytdl] implement explicit HLS/DASH handling
+- [ytdl] fix `AttributeError` caused by `decodeOption()` removal ([#6552](https://github.com/mikf/gallery-dl/issues/6552))
### Post Processors
-- add `error` event
+- [classify] rewrite - fix skipping existing files ([#5213](https://github.com/mikf/gallery-dl/issues/5213))
+- enable inheriting options from global `postprocessor` objects
+- allow `postprocessors` values to be a single post processor object
+### Cookies
+- support Chromium table version 24 ([#6162](https://github.com/mikf/gallery-dl/issues/6162))
+- fix GCM pad length calculation ([#6162](https://github.com/mikf/gallery-dl/issues/6162))
+- try decryption with empty password as fallback
+### Documentation
+- update recommended `pip` command for installing `dev` version ([#6493](https://github.com/mikf/gallery-dl/issues/6493))
+- update `gallery-dl.conf` ([#6501](https://github.com/mikf/gallery-dl/issues/6501))
+### Options
+- add `-4/--force-ipv4` and `-6/--force-ipv6` command-line options
+- fix passing negative numbers as arguments ([#5262](https://github.com/mikf/gallery-dl/issues/5262))
### Miscellaneous
-- [cookies] convert Chromium `expires_utc` values to Unix timestamps
-- [util] add `std` object to global eval namespace ([#6330](https://github.com/mikf/gallery-dl/issues/6330))
-- add `--print` and `--print-to-file` command-line options ([#6343](https://github.com/mikf/gallery-dl/issues/6343))
-- use child extractor fallbacks only when a non-user error occurs ([#6329](https://github.com/mikf/gallery-dl/issues/6329))
+- [output] use default ANSI colors only when stream is a TTY
+- [util] implement `defaultdict` filters-environment
+- [util] enable using multiple statements for all `filter` options ([#5262](https://github.com/mikf/gallery-dl/issues/5262))
diff --git a/PKG-INFO b/PKG-INFO
index abc0001..842dead 100644
--- a/PKG-INFO
+++ b/PKG-INFO
@@ -1,6 +1,6 @@
Metadata-Version: 2.1
Name: gallery_dl
-Version: 1.27.7
+Version: 1.28.0
Summary: Command-line program to download image galleries and collections from several image hosting sites
Home-page: https://github.com/mikf/gallery-dl
Download-URL: https://github.com/mikf/gallery-dl/releases/latest
@@ -96,7 +96,9 @@ pip_ as well:
.. code:: bash
- python3 -m pip install -U -I --no-deps --no-cache-dir https://github.com/mikf/gallery-dl/archive/master.tar.gz
+ python3 -m pip install -U --force-reinstall --no-deps https://github.com/mikf/gallery-dl/archive/master.tar.gz
+
+Omit :code:`--no-deps` if Requests_ hasn't been installed yet.
Note: Windows users should use :code:`py -3` instead of :code:`python3`.
@@ -115,9 +117,9 @@ Standalone Executable
Prebuilt executable files with a Python interpreter and
required Python packages included are available for
-- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.27.7/gallery-dl.exe>`__
+- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.28.0/gallery-dl.exe>`__
(Requires `Microsoft Visual C++ Redistributable Package (x86) <https://aka.ms/vs/17/release/vc_redist.x86.exe>`__)
-- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.27.7/gallery-dl.bin>`__
+- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.28.0/gallery-dl.bin>`__
Nightly Builds
diff --git a/README.rst b/README.rst
index 335101c..8b8b74f 100644
--- a/README.rst
+++ b/README.rst
@@ -55,7 +55,9 @@ pip_ as well:
.. code:: bash
- python3 -m pip install -U -I --no-deps --no-cache-dir https://github.com/mikf/gallery-dl/archive/master.tar.gz
+ python3 -m pip install -U --force-reinstall --no-deps https://github.com/mikf/gallery-dl/archive/master.tar.gz
+
+Omit :code:`--no-deps` if Requests_ hasn't been installed yet.
Note: Windows users should use :code:`py -3` instead of :code:`python3`.
@@ -74,9 +76,9 @@ Standalone Executable
Prebuilt executable files with a Python interpreter and
required Python packages included are available for
-- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.27.7/gallery-dl.exe>`__
+- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.28.0/gallery-dl.exe>`__
(Requires `Microsoft Visual C++ Redistributable Package (x86) <https://aka.ms/vs/17/release/vc_redist.x86.exe>`__)
-- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.27.7/gallery-dl.bin>`__
+- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.28.0/gallery-dl.bin>`__
Nightly Builds
diff --git a/data/completion/_gallery-dl b/data/completion/_gallery-dl
index 743808c..073ac05 100644
--- a/data/completion/_gallery-dl
+++ b/data/completion/_gallery-dl
@@ -42,6 +42,8 @@ _arguments -s -S \
--http-timeout'[Timeout for HTTP connections (default: 30.0)]':'<seconds>' \
--proxy'[Use the specified proxy]':'<url>' \
--source-address'[Client-side IP address to bind to]':'<ip>' \
+{-4,--force-ipv4}'[Make all connections via IPv4]' \
+{-6,--force-ipv6}'[Make all connections via IPv6]' \
--no-check-certificate'[Disable HTTPS certificate validation]' \
{-r,--limit-rate}'[Maximum download rate (e.g. 500k or 2.5M)]':'<rate>' \
--chunk-size'[Size of in-memory data chunks (default: 32k)]':'<size>' \
diff --git a/data/completion/gallery-dl b/data/completion/gallery-dl
index fd5268f..161113c 100644
--- a/data/completion/gallery-dl
+++ b/data/completion/gallery-dl
@@ -10,7 +10,7 @@ _gallery_dl()
elif [[ "${prev}" =~ ^()$ ]]; then
COMPREPLY=( $(compgen -d -- "${cur}") )
else
- COMPREPLY=( $(compgen -W "--help --version --filename --destination --directory --extractors --user-agent --clear-cache --update-check --input-file --input-file-comment --input-file-delete --no-input --quiet --warning --verbose --get-urls --resolve-urls --dump-json --resolve-json --simulate --extractor-info --list-keywords --error-file --print --print-to-file --list-modules --list-extractors --write-log --write-unsupported --write-pages --print-traffic --no-colors --retries --http-timeout --proxy --source-address --no-check-certificate --limit-rate --chunk-size --sleep --sleep-request --sleep-extractor --no-part --no-skip --no-mtime --no-download --option --config --config-yaml --config-toml --config-create --config-status --config-open --config-ignore --ignore-config --username --password --netrc --cookies --cookies-export --cookies-from-browser --abort --terminate --filesize-min --filesize-max --download-archive --range --chapter-range --filter --chapter-filter --postprocessor --no-postprocessors --postprocessor-option --write-metadata --write-info-json --write-infojson --write-tags --zip --cbz --mtime --mtime-from-date --rename --rename-to --ugoira --ugoira-conv --ugoira-conv-lossless --ugoira-conv-copy --exec --exec-after" -- "${cur}") )
+ COMPREPLY=( $(compgen -W "--help --version --filename --destination --directory --extractors --user-agent --clear-cache --update-check --input-file --input-file-comment --input-file-delete --no-input --quiet --warning --verbose --get-urls --resolve-urls --dump-json --resolve-json --simulate --extractor-info --list-keywords --error-file --print --print-to-file --list-modules --list-extractors --write-log --write-unsupported --write-pages --print-traffic --no-colors --retries --http-timeout --proxy --source-address --force-ipv4 --force-ipv6 --no-check-certificate --limit-rate --chunk-size --sleep --sleep-request --sleep-extractor --no-part --no-skip --no-mtime --no-download --option --config --config-yaml --config-toml --config-create --config-status --config-open --config-ignore --ignore-config --username --password --netrc --cookies --cookies-export --cookies-from-browser --abort --terminate --filesize-min --filesize-max --download-archive --range --chapter-range --filter --chapter-filter --postprocessor --no-postprocessors --postprocessor-option --write-metadata --write-info-json --write-infojson --write-tags --zip --cbz --mtime --mtime-from-date --rename --rename-to --ugoira --ugoira-conv --ugoira-conv-lossless --ugoira-conv-copy --exec --exec-after" -- "${cur}") )
fi
}
diff --git a/data/completion/gallery-dl.fish b/data/completion/gallery-dl.fish
index a239c50..f8bb723 100644
--- a/data/completion/gallery-dl.fish
+++ b/data/completion/gallery-dl.fish
@@ -36,6 +36,8 @@ complete -c gallery-dl -x -s 'R' -l 'retries' -d 'Maximum number of retries for
complete -c gallery-dl -x -l 'http-timeout' -d 'Timeout for HTTP connections (default: 30.0)'
complete -c gallery-dl -x -l 'proxy' -d 'Use the specified proxy'
complete -c gallery-dl -x -l 'source-address' -d 'Client-side IP address to bind to'
+complete -c gallery-dl -s '4' -l 'force-ipv4' -d 'Make all connections via IPv4'
+complete -c gallery-dl -s '6' -l 'force-ipv6' -d 'Make all connections via IPv6'
complete -c gallery-dl -l 'no-check-certificate' -d 'Disable HTTPS certificate validation'
complete -c gallery-dl -x -s 'r' -l 'limit-rate' -d 'Maximum download rate (e.g. 500k or 2.5M)'
complete -c gallery-dl -x -l 'chunk-size' -d 'Size of in-memory data chunks (default: 32k)'
diff --git a/data/man/gallery-dl.1 b/data/man/gallery-dl.1
index a56dbcd..29568cf 100644
--- a/data/man/gallery-dl.1
+++ b/data/man/gallery-dl.1
@@ -1,4 +1,4 @@
-.TH "GALLERY-DL" "1" "2024-10-25" "1.27.7" "gallery-dl Manual"
+.TH "GALLERY-DL" "1" "2024-11-30" "1.28.0" "gallery-dl Manual"
.\" disable hyphenation
.nh
@@ -128,6 +128,12 @@ Use the specified proxy
.B "\-\-source\-address" \f[I]IP\f[]
Client-side IP address to bind to
.TP
+.B "\-4, \-\-force\-ipv4"
+Make all connections via IPv4
+.TP
+.B "\-6, \-\-force\-ipv6"
+Make all connections via IPv6
+.TP
.B "\-\-no\-check\-certificate"
Disable HTTPS certificate validation
.TP
diff --git a/data/man/gallery-dl.conf.5 b/data/man/gallery-dl.conf.5
index 0ae8c38..c441095 100644
--- a/data/man/gallery-dl.conf.5
+++ b/data/man/gallery-dl.conf.5
@@ -1,4 +1,4 @@
-.TH "GALLERY-DL.CONF" "5" "2024-10-25" "1.27.7" "gallery-dl Manual"
+.TH "GALLERY-DL.CONF" "5" "2024-11-30" "1.28.0" "gallery-dl Manual"
.\" disable hyphenation
.nh
.\" disable justification (adjust text to left margin only)
@@ -458,8 +458,8 @@ response before \f[I]retrying\f[] the request.
\f[I][Danbooru]\f[], \f[I][E621]\f[], \f[I][foolfuuka]:search\f[], \f[I]itaku\f[],
\f[I]koharu\f[],
\f[I]newgrounds\f[], \f[I][philomena]\f[], \f[I]pixiv:novel\f[], \f[I]plurk\f[],
-\f[I]poipiku\f[] , \f[I]pornpics\f[], \f[I]soundgasm\f[], \f[I]urlgalleries\f[],
-\f[I]vk\f[], \f[I]zerochan\f[]
+\f[I]poipiku\f[] , \f[I]pornpics\f[], \f[I]scrolller\f[], \f[I]soundgasm\f[],
+\f[I]urlgalleries\f[], \f[I]vk\f[], \f[I]zerochan\f[]
.br
* \f[I]"1.0-2.0"\f[]
\f[I]flickr\f[], \f[I]weibo\f[], \f[I][wikimedia]\f[]
@@ -468,7 +468,11 @@ response before \f[I]retrying\f[] the request.
\f[I]behance\f[], \f[I]imagefap\f[], \f[I][Nijie]\f[]
.br
* \f[I]"3.0-6.0"\f[]
-\f[I]exhentai\f[], \f[I]idolcomplex\f[], \f[I][reactor]\f[], \f[I]readcomiconline\f[]
+\f[I]bilibili\f[],
+\f[I]exhentai\f[],
+\f[I]idolcomplex\f[],
+\f[I][reactor]\f[],
+\f[I]readcomiconline\f[]
.br
* \f[I]"6.0-6.1"\f[]
\f[I]twibooru\f[]
@@ -734,10 +738,23 @@ It is also possible to set a proxy for a specific host by using
\f[I]scheme://host\f[] as key.
See \f[I]Requests' proxy documentation\f[] for more details.
-Note: If a proxy URLs does not include a scheme,
+Note: If a proxy URL does not include a scheme,
\f[I]http://\f[] is assumed.
+.SS extractor.*.proxy-env
+.IP "Type:" 6
+\f[I]bool\f[]
+
+.IP "Default:" 9
+\f[I]false\f[]
+
+.IP "Description:" 4
+Collect proxy configuration information from environment variables
+(\f[I]HTTP_PROXY\f[], \f[I]HTTPS_PROXY\f[], \f[I]NO_PROXY\f[])
+and Windows Registry settings.
+
+
.SS extractor.*.source-address
.IP "Type:" 6
.br
@@ -1220,7 +1237,10 @@ Exit the program with the given argument as exit status.
.SS extractor.*.postprocessors
.IP "Type:" 6
-\f[I]list\f[] of \f[I]Postprocessor Configuration\f[] objects
+.br
+* \f[I]Postprocessor Configuration\f[] object
+.br
+* \f[I]list\f[] of \f[I]Postprocessor Configuration\f[] objects
.IP "Example:" 4
.. code:: json
@@ -1827,8 +1847,23 @@ If this is a \f[I]list\f[], it selects which format to try to download.
.br
Possibly available formats are
.br
-\f[I]"quad_hd"\f[], \f[I]"ultra_hd"\f[], \f[I]"full_hd"\f[],
-\f[I]"high"\f[], \f[I]"medium"\f[], \f[I]"low"\f[]
+
+.br
+* \f[I]ultra_hd\f[] (2160p)
+.br
+* \f[I]quad_hd\f[] (1440p)
+.br
+* \f[I]full_hd\f[] (1080p)
+.br
+* \f[I]high\f[] (720p)
+.br
+* \f[I]medium\f[] (480p)
+.br
+* \f[I]low\f[] (360p)
+.br
+* \f[I]lowest\f[] (240p)
+.br
+* \f[I]tiny\f[] (144p)
.SS extractor.bunkr.tlds
@@ -1931,6 +1966,30 @@ Possible values are
It is possible to use \f[I]"all"\f[] instead of listing all values separately.
+.SS extractor.civitai.metadata
+.IP "Type:" 6
+.br
+* \f[I]bool\f[]
+.br
+* \f[I]string\f[]
+.br
+* \f[I]list\f[] of \f[I]strings\f[]
+
+.IP "Default:" 9
+\f[I]false\f[]
+
+.IP "Example:" 4
+.br
+* "generation"
+.br
+* ["generation"]
+
+.IP "Description:" 4
+Extract additional \f[I]generation\f[] metadata.
+
+Note: This requires 1 additional HTTP request per image.
+
+
.SS extractor.civitai.nsfw
.IP "Type:" 6
.br
@@ -2090,7 +2149,7 @@ Controls the download target for Ugoira posts.
.IP "Example:" 4
.br
-* replacements,comments,ai_tags
+* "replacements,comments,ai_tags"
.br
* ["replacements", "comments", "ai_tags"]
@@ -2712,6 +2771,51 @@ Selects an alternative source to download files from.
* \f[I]"hitomi"\f[]: Download the corresponding gallery from \f[I]hitomi.la\f[]
+.SS extractor.exhentai.tags
+.IP "Type:" 6
+\f[I]bool\f[]
+
+.IP "Default:" 9
+\f[I]false\f[]
+
+.IP "Description:" 4
+Group \f[I]tags\f[] by type and
+provide them as \f[I]tags_<type>\f[] metadata fields,
+for example \f[I]tags_artist\f[] or \f[I]tags_character\f[].
+
+
+.SS extractor.facebook.author-followups
+.IP "Type:" 6
+\f[I]bool\f[]
+
+.IP "Default:" 9
+\f[I]false\f[]
+
+.IP "description:" 4
+Extract comments that include photo attachments made by the author of the post.
+
+
+.SS extractor.facebook.videos
+.IP "Type:" 6
+.br
+* \f[I]bool\f[]
+.br
+* \f[I]string\f[]
+
+.IP "Default:" 9
+\f[I]true\f[]
+
+.IP "Description:" 4
+Control video download behavior.
+
+.br
+* \f[I]true\f[]: Extract and download video & audio separately.
+.br
+* \f[I]"ytdl"\f[]: Let \f[I]ytdl\f[] handle video extraction and download, and merge video & audio streams.
+.br
+* \f[I]false\f[]: Ignore videos.
+
+
.SS extractor.fanbox.comments
.IP "Type:" 6
\f[I]bool\f[]
@@ -3505,6 +3609,19 @@ Possible formats are
.br
+.SS extractor.koharu.tags
+.IP "Type:" 6
+\f[I]bool\f[]
+
+.IP "Default:" 9
+\f[I]false\f[]
+
+.IP "Description:" 4
+Group \f[I]tags\f[] by type and
+provide them as \f[I]tags_<type>\f[] metadata fields,
+for example \f[I]tags_artist\f[] or \f[I]tags_character\f[].
+
+
.SS extractor.lolisafe.domain
.IP "Type:" 6
\f[I]string\f[]
@@ -4461,6 +4578,25 @@ If the format is given as \f[I]string\f[], it will be extended with
restrict it to only one possible format.
+.SS extractor.rule34xyz.format
+.IP "Type:" 6
+.br
+* \f[I]string\f[]
+.br
+* \f[I]list\f[] of \f[I]strings\f[]
+
+.IP "Default:" 9
+\f[I]["10", "40", "41", "2"]\f[]
+
+.IP "Example:" 4
+"33,34,4"
+
+.IP "Description:" 4
+Selects the file format to extract.
+
+When more than one format is given, the first available one is selected.
+
+
.SS extractor.sankaku.id-format
.IP "Type:" 6
\f[I]string\f[]
@@ -5954,8 +6090,9 @@ Automatically follow tag redirects.
\f[I]false\f[]
.IP "Description:" 4
-Categorize tags by their respective types
-and provide them as \f[I]tags_<type>\f[] metadata fields.
+Group \f[I]tags\f[] by type and
+provide them as \f[I]tags_<type>\f[] metadata fields,
+for example \f[I]tags_artist\f[] or \f[I]tags_character\f[].
Note: This requires 1 additional HTTP request per post.
@@ -6344,7 +6481,7 @@ See
\f[I]bool\f[]
.IP "Default:" 9
-\f[I]false\f[]
+\f[I]true\f[]
.IP "Description:" 4
Forward gallery-dl's cookies to \f[I]ytdl\f[].
@@ -7889,15 +8026,26 @@ this cache.
.SS filters-environment
.IP "Type:" 6
-\f[I]bool\f[]
+.br
+* \f[I]bool\f[]
+.br
+* \f[I]string\f[]
.IP "Default:" 9
\f[I]true\f[]
.IP "Description:" 4
-Evaluate filter expressions raising an exception as \f[I]false\f[]
-instead of aborting the current extractor run
-by wrapping them in a try/except block.
+Evaluate filter expressions in a special environment
+preventing them from raising fatal exceptions.
+
+\f[I]true\f[] or \f[I]"tryexcept"\f[]:
+Wrap expressions in a try/except block;
+Evaluate expressions raising an exception as \f[I]false\f[]
+\f[I]false\f[] or \f[I]"raw"\f[]:
+Do not wrap expressions in a special environment
+\f[I]"defaultdict"\f[]:
+Prevent exceptions when accessing undefined variables
+by using a \f[I]defaultdict\f[]
.SS format-separator
diff --git a/docs/gallery-dl.conf b/docs/gallery-dl.conf
index 2a7f8f2..c7382f3 100644
--- a/docs/gallery-dl.conf
+++ b/docs/gallery-dl.conf
@@ -1,31 +1,83 @@
{
+ "#": "gallery-dl default configuration file",
+
+ "#": "full documentation at",
+ "#": "https://gdl-org.github.io/docs/configuration.html",
+
"extractor":
{
+ "#": "===============================================================",
+ "#": "==== General Extractor Options ==========================",
+ "#": "(these can be set as site-specific extractor options as well) ",
+
"base-directory": "./gallery-dl/",
- "parent-directory": false,
"postprocessors": null,
- "archive": null,
+ "skip" : true,
+ "skip-filter" : null,
+
+ "user-agent" : "auto",
+ "referer" : true,
+ "headers" : {},
+ "ciphers" : null,
+ "tls12" : true,
+ "browser" : null,
+ "proxy" : null,
+ "proxy-env" : false,
+ "source-address": null,
+ "retries" : 4,
+ "retry-codes" : [],
+ "timeout" : 30.0,
+ "verify" : true,
+ "download" : true,
+ "fallback" : true,
+
+ "archive" : null,
+ "archive-format": null,
+ "archive-prefix": null,
+ "archive-pragma": [],
+ "archive-event" : ["file"],
+ "archive-mode" : "file",
+
"cookies": null,
+ "cookies-select": null,
"cookies-update": true,
- "proxy": null,
- "skip": true,
- "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:128.0) Gecko/20100101 Firefox/128.0",
- "retries": 4,
- "timeout": 30.0,
- "verify": true,
- "fallback": true,
+ "image-filter" : null,
+ "image-range" : null,
+ "image-unique" : false,
+ "chapter-filter": null,
+ "chapter-range" : null,
+ "chapter-unique": false,
- "sleep": 0,
- "sleep-request": 0,
- "sleep-extractor": 0,
+ "keywords" : {},
+ "keywords-eval" : false,
+ "keywords-default" : null,
+
+ "parent-directory": false,
+ "parent-metadata" : false,
+ "parent-skip" : false,
"path-restrict": "auto",
- "path-replace": "_",
- "path-remove": "\\u0000-\\u001f\\u007f",
- "path-strip": "auto",
+ "path-replace" : "_",
+ "path-remove" : "\\u0000-\\u001f\\u007f",
+ "path-strip" : "auto",
"path-extended": true,
+ "metadata-extractor": null,
+ "metadata-http" : null,
+ "metadata-parent" : null,
+ "metadata-path" : null,
+ "metadata-url" : null,
+ "metadata-version" : null,
+
+ "sleep" : 0,
+ "sleep-request" : 0,
+ "sleep-extractor": 0,
+ "sleep-429" : 60.0,
+
+ "actions": [],
+ "input" : null,
+ "netrc" : false,
"extension-map": {
"jpeg": "jpg",
"jpe" : "jpg",
@@ -35,157 +87,313 @@
},
+ "#": "===============================================================",
+ "#": "==== Site-specific Extractor Options ====================",
+
+ "ao3":
+ {
+ "username": "",
+ "password": "",
+ "sleep-request": "0.5-1.5",
+
+ "formats": ["pdf"]
+ },
"artstation":
{
- "external": false,
- "pro-first": true
+ "external" : false,
+ "max-posts": null,
+ "previews" : false,
+ "videos" : true,
+
+ "search": {
+ "pro-first": true
+ }
},
"aryion":
{
- "username": null,
- "password": null,
+ "username": "",
+ "password": "",
+
"recursive": true
},
- "bbc": {
+ "bbc":
+ {
"width": 1920
},
- "blogger":
+ "behance":
{
- "videos": true
+ "sleep-request": "2.0-4.0",
+
+ "modules": ["image", "video", "mediacollection", "embed"]
},
- "cyberdrop":
+ "bilibili":
{
- "domain": null
+ "sleep-request": "2.0-4.0"
},
- "danbooru":
+ "bluesky":
{
- "username": null,
- "password": null,
- "external": false,
+ "username": "",
+ "password": "",
+
+ "include" : ["media"],
+ "metadata": false,
+ "quoted" : false,
+ "reposts" : false,
+ "videos" : true,
+
+ "post": {
+ "depth": 0
+ }
+ },
+ "boosty":
+ {
+ "allowed" : true,
+ "bought" : false,
"metadata": false,
- "ugoira": false
+ "videos" : true
+ },
+ "bunkr":
+ {
+ "tlds": false
},
- "derpibooru":
+ "cien":
+ {
+ "sleep-request": "1.0-2.0",
+ "files": ["image", "video", "download", "gallery"]
+ },
+ "civitai":
{
"api-key": null,
- "filter": 56027
+ "sleep-request": "0.5-1.5",
+
+ "api" : "trpc",
+ "files" : ["image"],
+ "include" : ["user-models", "user-posts"],
+ "metadata": false,
+ "nsfw" : true,
+ "quality" : "original=true"
+ },
+ "cohost":
+ {
+ "asks" : true,
+ "pinned" : false,
+ "replies": true,
+ "shares" : true
+ },
+ "coomerparty":
+ {
+ "username": "",
+ "password": "",
+
+ "announcements": false,
+ "comments" : false,
+ "dms" : false,
+ "duplicates" : false,
+ "favorites" : "artist",
+ "files" : ["attachments", "file", "inline"],
+ "max-posts" : null,
+ "metadata" : false,
+ "revisions" : false,
+ "order-revisions": "desc"
+ },
+ "cyberdrop":
+ {
+ "domain": null
},
"deviantart":
{
- "client-id": null,
+ "client-id" : null,
"client-secret": null,
"refresh-token": null,
- "auto-watch": false,
- "auto-unwatch": false,
- "comments": false,
- "extra": false,
- "flat": true,
- "folders": false,
- "group": true,
- "include": "gallery",
- "journals": "html",
- "jwt": false,
- "mature": true,
- "metadata": false,
- "original": true,
- "pagination": "api",
- "public": true,
- "quality": 100,
- "wait-min": 0
- },
- "e621":
- {
- "username": null,
- "password": null
+ "auto-watch" : false,
+ "auto-unwatch" : false,
+ "comments" : false,
+ "comments-avatars": false,
+ "extra" : false,
+ "flat" : true,
+ "folders" : false,
+ "group" : true,
+ "include" : "gallery",
+ "intermediary" : true,
+ "journals" : "html",
+ "jwt" : false,
+ "mature" : true,
+ "metadata" : false,
+ "original" : true,
+ "pagination" : "api",
+ "previews" : false,
+ "public" : true,
+ "quality" : 100,
+ "wait-min" : 0,
+
+ "avatar": {
+ "formats": null
+ }
},
"exhentai":
{
- "username": null,
- "password": null,
- "domain": "auto",
- "limits": true,
+ "username": "",
+ "password": "",
+ "cookies" : null,
+ "sleep-request": "3.0-6.0",
+
+ "domain" : "auto",
+ "fav" : null,
+ "gp" : "resized",
+ "limits" : null,
"metadata": false,
"original": true,
- "sleep-request": 5.0
+ "source" : null,
+ "tags" : false,
+ "fallback-retries": 2
+ },
+ "fanbox":
+ {
+ "cookies" : null,
+
+ "comments": false,
+ "embeds" : true,
+ "metadata": false
},
"flickr":
{
- "exif": false,
+ "access-token" : null,
+ "access-token-secret": null,
+ "sleep-request" : "1.0-2.0",
+
+ "contexts": false,
+ "exif" : false,
"metadata": false,
"size-max": null,
- "videos": true
+ "videos" : true
},
"furaffinity":
{
+ "cookies" : null,
+
"descriptions": "text",
- "external": false,
- "include": "gallery",
- "layout": "auto"
+ "external" : false,
+ "include" : ["gallery"],
+ "layout" : "auto"
},
"gelbooru":
{
"api-key": null,
- "user-id": null
+ "user-id": null,
+
+ "favorite": {
+ "order-posts": "desc"
+ }
},
- "gofile": {
+ "generic":
+ {
+ "enabled": false
+ },
+ "gofile":
+ {
"api-token": null,
- "website-token": null
+ "website-token": null,
+ "recursive": false
},
"hentaifoundry":
{
- "include": "pictures"
+ "include": ["pictures"]
},
"hitomi":
{
- "format": "webp",
- "metadata": false
+ "format": "webp"
},
"idolcomplex":
{
- "username": null,
- "password": null,
- "sleep-request": 5.0
+ "username": "",
+ "password": "",
+ "referer" : false,
+ "sleep-request": "3.0-6.0"
},
- "imagechest": {
+ "imagechest":
+ {
"access-token": null
},
+ "imagefap":
+ {
+ "sleep-request": "2.0-4.0"
+ },
"imgbb":
{
- "username": null,
- "password": null
+ "username": "",
+ "password": ""
},
"imgur":
{
+ "client-id": null,
"mp4": true
},
"inkbunny":
{
- "username": null,
- "password": null,
+ "username": "",
+ "password": "",
"orderby": "create_datetime"
},
"instagram":
{
- "api": "rest",
"cookies": null,
- "include": "posts",
+ "sleep-request": "6.0-12.0",
+
+ "api" : "rest",
+ "cursor" : true,
+ "include" : "posts",
+ "max-posts" : null,
+ "metadata" : false,
"order-files": "asc",
"order-posts": "asc",
- "previews": false,
- "sleep-request": [6.0, 12.0],
+ "previews" : false,
+ "videos" : true
+ },
+ "itaku":
+ {
+ "sleep-request": "0.5-1.5",
"videos": true
},
+ "kemonoparty":
+ {
+ "username": "",
+ "password": "",
+
+ "announcements": false,
+ "comments" : false,
+ "dms" : false,
+ "duplicates" : false,
+ "favorites" : "artist",
+ "files" : ["attachments", "file", "inline"],
+ "max-posts" : null,
+ "metadata" : false,
+ "revisions" : false,
+ "order-revisions": "desc"
+ },
"khinsider":
{
"format": "mp3"
},
+ "koharu":
+ {
+ "username": "",
+ "password": "",
+ "sleep-request": "0.5-1.5",
+
+ "cbz" : true,
+ "format": ["0", "1600", "1280", "980", "780"],
+ "tags" : false
+ },
"luscious":
{
"gif": false
},
"mangadex":
{
+ "username": "",
+ "password": "",
+
"api-server": "https://api.mangadex.org",
"api-parameters": null,
"lang": null,
@@ -193,248 +401,562 @@
},
"mangoxo":
{
- "username": null,
- "password": null
- },
- "misskey": {
- "access-token": null,
- "renotes": false,
- "replies": true
+ "username": "",
+ "password": ""
},
"newgrounds":
{
- "username": null,
- "password": null,
- "flash": true,
- "format": "original",
- "include": "art"
+ "username": "",
+ "password": "",
+ "sleep-request": "0.5-1.5",
+
+ "flash" : true,
+ "format" : "original",
+ "include": ["art"]
},
- "nijie":
+ "nsfwalbum":
{
- "username": null,
- "password": null,
- "include": "illustration,doujin"
- },
- "nitter": {
- "quoted": false,
- "retweets": false,
- "videos": true
+ "referer": false
},
"oauth":
{
"browser": true,
- "cache": true,
- "host": "localhost",
- "port": 6414
+ "cache" : true,
+ "host" : "localhost",
+ "port" : 6414
},
"paheal":
{
"metadata": false
},
+ "patreon":
+ {
+ "cookies": null,
+
+ "files" : ["images", "image_large", "attachments", "postfile", "content"]
+ },
"pillowfort":
{
+ "username": "",
+ "password": "",
+
"external": false,
- "inline": true,
- "reblogs": false
+ "inline" : true,
+ "reblogs" : false
},
"pinterest":
{
- "domain": "auto",
+ "domain" : "auto",
"sections": true,
- "videos": true
+ "stories" : true,
+ "videos" : true
+ },
+ "pixeldrain":
+ {
+ "api-key": null
},
"pixiv":
{
"refresh-token": null,
- "include": "artworks",
- "embeds": false,
- "metadata": false,
+ "cookies" : null,
+
+ "captions" : false,
+ "comments" : false,
+ "include" : ["artworks"],
+ "max-posts": null,
+ "metadata" : false,
"metadata-bookmark": false,
- "tags": "japanese",
- "ugoira": true
+ "sanity" : true,
+ "tags" : "japanese",
+ "ugoira" : true,
+
+ "covers" : false,
+ "embeds" : false,
+ "full-series": false
},
- "reactor":
+ "plurk":
{
- "gif": false,
- "sleep-request": 5.0
+ "sleep-request": "0.5-1.5",
+ "comments": false
+ },
+ "poipiku":
+ {
+ "sleep-request": "0.5-1.5"
+ },
+ "pornpics":
+ {
+ "sleep-request": "0.5-1.5"
+ },
+ "readcomiconline":
+ {
+ "sleep-request": "3.0-6.0",
+
+ "captcha": "stop",
+ "quality": "auto"
},
"reddit":
{
- "client-id": null,
- "user-agent": null,
+ "client-id" : null,
+ "user-agent" : null,
"refresh-token": null,
- "comments": 0,
+
+ "comments" : 0,
"morecomments": false,
- "date-min": 0,
- "date-max": 253402210800,
- "date-format": "%Y-%m-%dT%H:%M:%S",
- "id-min": null,
- "id-max": null,
- "recursion": 0,
- "videos": true
+ "embeds" : true,
+ "date-min" : 0,
+ "date-max" : 253402210800,
+ "date-format" : "%Y-%m-%dT%H:%M:%S",
+ "id-min" : null,
+ "id-max" : null,
+ "previews" : true,
+ "recursion" : 0,
+ "videos" : true
},
"redgifs":
{
"format": ["hd", "sd", "gif"]
},
+ "rule34xyz":
+ {
+ "format": ["10", "40", "41", "2"]
+ },
"sankaku":
{
- "username": null,
- "password": null,
- "refresh": false
+ "username": "",
+ "password": "",
+
+ "id-format": "numeric",
+ "refresh" : false,
+ "tags" : false
},
"sankakucomplex":
{
"embeds": false,
"videos": true
},
+ "scrolller":
+ {
+ "username": "",
+ "password": "",
+ "sleep-request": "0.5-1.5"
+ },
"skeb":
{
- "article": false,
- "filters": null,
+ "article" : false,
"sent-requests": false,
- "thumbnails": false
+ "thumbnails" : false,
+
+ "search": {
+ "filters": null
+ }
},
"smugmug":
{
+ "access-token" : null,
+ "access-token-secret": null,
+
"videos": true
},
+ "soundgasm":
+ {
+ "sleep-request": "0.5-1.5"
+ },
+ "steamgriddb":
+ {
+ "animated" : true,
+ "epilepsy" : true,
+ "humor" : true,
+ "dimensions": "all",
+ "file-types": "all",
+ "languages" : "all,",
+ "nsfw" : true,
+ "sort" : "score_desc",
+ "static" : true,
+ "styles" : "all",
+ "untagged" : true,
+ "download-fake-png": true
+ },
"seiga":
{
- "username": null,
- "password": null
+ "username": "",
+ "password": "",
+ "cookies" : null
},
"subscribestar":
{
- "username": null,
- "password": null
+ "username": "",
+ "password": ""
+ },
+ "tapas":
+ {
+ "username": "",
+ "password": ""
},
"tsumino":
{
- "username": null,
- "password": null
+ "username": "",
+ "password": ""
},
"tumblr":
{
- "avatar": false,
- "external": false,
- "inline": true,
- "posts": "all",
- "offset": 0,
- "original": true,
- "reblogs": true
+ "access-token" : null,
+ "access-token-secret": null,
+
+ "avatar" : false,
+ "date-min" : 0,
+ "date-max" : null,
+ "external" : false,
+ "inline" : true,
+ "offset" : 0,
+ "original" : true,
+ "pagination": "offset",
+ "posts" : "all",
+ "ratelimit" : "abort",
+ "reblogs" : true,
+ "fallback-delay" : 120.0,
+ "fallback-retries": 2
+ },
+ "tumblrgallery":
+ {
+ "referer": false
},
"twitter":
{
- "username": null,
- "password": null,
- "cards": false,
- "conversations": false,
- "pinned": false,
- "quoted": false,
- "replies": true,
- "retweets": false,
- "strategy": null,
- "text-tweets": false,
- "twitpic": false,
- "unique": true,
- "users": "user",
- "videos": true
+ "username" : "",
+ "username-alt": "",
+ "password" : "",
+ "cookies" : null,
+
+ "ads" : false,
+ "cards" : false,
+ "cards-blacklist": [],
+ "csrf" : "cookies",
+ "cursor" : true,
+ "expand" : false,
+ "include" : ["timeline"],
+ "locked" : "abort",
+ "logout" : true,
+ "pinned" : false,
+ "quoted" : false,
+ "ratelimit" : "wait",
+ "relogin" : true,
+ "replies" : true,
+ "retweets" : false,
+ "size" : ["orig", "4096x4096", "large", "medium", "small"],
+ "text-tweets" : false,
+ "tweet-endpoint": "auto",
+ "transform" : true,
+ "twitpic" : false,
+ "unavailable" : false,
+ "unique" : true,
+ "users" : "user",
+ "videos" : true,
+
+ "timeline": {
+ "strategy": "auto"
+ },
+ "tweet": {
+ "conversations": false
+ }
},
"unsplash":
{
"format": "raw"
},
+ "urlgalleries":
+ {
+ "sleep-request": "0.5-1.5"
+ },
+ "vipergirls":
+ {
+ "username": "",
+ "password": "",
+ "sleep-request": "0.5",
+
+ "domain" : "vipergirls.to",
+ "like" : false
+ },
+ "vk":
+ {
+ "sleep-request": "0.5-1.5",
+ "offset": 0
+ },
"vsco":
{
- "videos": true
+ "include": ["gallery"],
+ "videos" : true
},
"wallhaven":
{
- "api-key": null,
- "metadata": false,
- "include": "uploads"
+ "api-key" : null,
+ "include" : ["uploads"],
+ "metadata": false
},
"weasyl":
{
- "api-key": null,
+ "api-key" : null,
"metadata": false
},
"weibo":
{
+ "sleep-request": "1.0-2.0",
+
+ "gifs" : true,
+ "include" : ["feed"],
"livephoto": true,
- "retweets": true,
- "videos": true
+ "retweets" : false,
+ "videos" : true
},
"ytdl":
{
- "enabled": false,
- "format": null,
- "generic": true,
- "logging": true,
- "module": null,
- "raw-options": null
+ "cmdline-args": null,
+ "config-file" : null,
+ "enabled" : false,
+ "format" : null,
+ "generic" : true,
+ "logging" : true,
+ "module" : null,
+ "raw-options" : null
},
"zerochan":
{
- "username": null,
- "password": null,
- "metadata": false
+ "username": "",
+ "password": "",
+ "sleep-request": "0.5-1.5",
+
+ "metadata" : false,
+ "pagination": "api",
+ "redirects" : false
+ },
+
+
+ "#": "===============================================================",
+ "#": "==== Base-Extractor and Instance Options ================",
+
+ "blogger":
+ {
+ "api-key": null,
+ "videos" : true
+ },
+
+ "Danbooru":
+ {
+ "sleep-request": "0.5-1.5",
+
+ "external" : false,
+ "metadata" : false,
+ "threshold": "auto",
+ "ugoira" : false
+ },
+ "danbooru":
+ {
+ "username": "",
+ "password": ""
+ },
+ "atfbooru":
+ {
+ "username": "",
+ "password": ""
+ },
+ "aibooru":
+ {
+ "username": "",
+ "password": ""
+ },
+ "booruvar":
+ {
+ "username": "",
+ "password": ""
+ },
+
+ "E621":
+ {
+ "sleep-request": "0.5-1.5",
+
+ "metadata" : false,
+ "threshold": "auto"
+ },
+ "e621":
+ {
+ "username": "",
+ "password": ""
+ },
+ "e926":
+ {
+ "username": "",
+ "password": ""
+ },
+ "e6ai":
+ {
+ "username": "",
+ "password": ""
+ },
+
+ "foolfuuka":
+ {
+ "sleep-request": "0.5-1.5"
+ },
+ "archivedmoe":
+ {
+ "referer": false
+ },
+
+ "mastodon":
+ {
+ "access-token": null,
+ "cards" : false,
+ "reblogs" : false,
+ "replies" : true,
+ "text-posts" : false
+ },
+
+ "misskey":
+ {
+ "access-token": null,
+ "renotes" : false,
+ "replies" : true
},
+
+ "Nijie":
+ {
+ "sleep-request": "2.0-4.0",
+ "include" : ["illustration", "doujin"]
+ },
+ "nijie":
+ {
+ "username": "",
+ "password": ""
+ },
+ "horne":
+ {
+ "username": "",
+ "password": ""
+ },
+
+ "nitter":
+ {
+ "quoted" : false,
+ "retweets": false,
+ "videos" : true
+ },
+
+ "philomena":
+ {
+ "api-key": null,
+ "sleep-request": "0.5-1.5",
+
+ "svg" : true,
+ "filter": 2
+ },
+ "derpibooru": {
+ "filter": 56027
+ },
+ "ponybooru": {
+ "filter": 3
+ },
+ "twibooru": {
+ "sleep-request": "6.0-6.1"
+ },
+
+ "postmill":
+ {
+ "save-link-post-body": false
+ },
+
+ "reactor":
+ {
+ "sleep-request": "3.0-6.0",
+ "gif": false
+ },
+
+ "wikimedia":
+ {
+ "sleep-request": "1.0-2.0",
+ "limit": 50
+ },
+
"booru":
{
- "tags": false,
- "notes": false
+ "tags" : false,
+ "notes": false,
+ "url" : "file_url"
}
},
+
+ "#": "===================================================================",
+ "#": "==== Downloader Options =====================================",
+
"downloader":
{
- "filesize-min": null,
- "filesize-max": null,
- "mtime": true,
- "part": true,
+ "filesize-min" : null,
+ "filesize-max" : null,
+ "mtime" : true,
+ "part" : true,
"part-directory": null,
- "progress": 3.0,
- "rate": null,
- "retries": 4,
- "timeout": 30.0,
- "verify": true,
+ "progress" : 3.0,
+ "proxy" : null,
+ "rate" : null,
+ "retries" : 4,
+ "timeout" : 30.0,
+ "verify" : true,
"http":
{
"adjust-extensions": true,
- "chunk-size": 32768,
- "headers": null,
- "validate": true
+ "chunk-size" : 32768,
+ "consume-content" : false,
+ "enabled" : true,
+ "headers" : null,
+ "retry-codes" : [404, 429, 430],
+ "validate" : true
},
"ytdl":
{
- "format": null,
+ "cmdline-args" : null,
+ "config-file" : null,
+ "enabled" : true,
+ "format" : null,
"forward-cookies": false,
- "logging": true,
- "module": null,
- "outtmpl": null,
- "raw-options": null
+ "logging" : true,
+ "module" : null,
+ "outtmpl" : null,
+ "raw-options" : null
}
},
+
+ "#": "===================================================================",
+ "#": "==== Output Options =========================================",
+
"output":
{
- "mode": "auto",
- "progress": true,
- "shorten": true,
- "ansi": false,
- "colors": {
- "success": "1;32",
- "skip" : "2"
- },
- "skip": true,
- "log": "[{name}][{levelname}] {message}",
- "logfile": null,
- "unsupportedfile": null
- },
+ "ansi" : true,
+ "fallback" : true,
+ "mode" : "auto",
+ "private" : false,
+ "progress" : true,
+ "shorten" : true,
+ "skip" : true,
+
+ "stdin" : null,
+ "stdout" : null,
+ "stderr" : null,
+
+ "log" : "[{name}][{levelname}] {message}",
+ "logfile" : null,
+ "errorfile": null,
+ "unsupportedfile": null,
- "netrc": false
+ "colors" :
+ {
+ "success": "1;32",
+ "skip" : "2",
+ "debug" : "0;37",
+ "info" : "1;37",
+ "warning": "1;33",
+ "error" : "1;31"
+ }
+ }
}
diff --git a/gallery_dl.egg-info/PKG-INFO b/gallery_dl.egg-info/PKG-INFO
index abc0001..842dead 100644
--- a/gallery_dl.egg-info/PKG-INFO
+++ b/gallery_dl.egg-info/PKG-INFO
@@ -1,6 +1,6 @@
Metadata-Version: 2.1
Name: gallery_dl
-Version: 1.27.7
+Version: 1.28.0
Summary: Command-line program to download image galleries and collections from several image hosting sites
Home-page: https://github.com/mikf/gallery-dl
Download-URL: https://github.com/mikf/gallery-dl/releases/latest
@@ -96,7 +96,9 @@ pip_ as well:
.. code:: bash
- python3 -m pip install -U -I --no-deps --no-cache-dir https://github.com/mikf/gallery-dl/archive/master.tar.gz
+ python3 -m pip install -U --force-reinstall --no-deps https://github.com/mikf/gallery-dl/archive/master.tar.gz
+
+Omit :code:`--no-deps` if Requests_ hasn't been installed yet.
Note: Windows users should use :code:`py -3` instead of :code:`python3`.
@@ -115,9 +117,9 @@ Standalone Executable
Prebuilt executable files with a Python interpreter and
required Python packages included are available for
-- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.27.7/gallery-dl.exe>`__
+- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.28.0/gallery-dl.exe>`__
(Requires `Microsoft Visual C++ Redistributable Package (x86) <https://aka.ms/vs/17/release/vc_redist.x86.exe>`__)
-- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.27.7/gallery-dl.bin>`__
+- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.28.0/gallery-dl.bin>`__
Nightly Builds
diff --git a/gallery_dl.egg-info/SOURCES.txt b/gallery_dl.egg-info/SOURCES.txt
index 42a5df1..a98e9da 100644
--- a/gallery_dl.egg-info/SOURCES.txt
+++ b/gallery_dl.egg-info/SOURCES.txt
@@ -64,6 +64,7 @@ gallery_dl/extractor/aryion.py
gallery_dl/extractor/batoto.py
gallery_dl/extractor/bbc.py
gallery_dl/extractor/behance.py
+gallery_dl/extractor/bilibili.py
gallery_dl/extractor/blogger.py
gallery_dl/extractor/bluesky.py
gallery_dl/extractor/booru.py
@@ -84,7 +85,9 @@ gallery_dl/extractor/directlink.py
gallery_dl/extractor/dynastyscans.py
gallery_dl/extractor/e621.py
gallery_dl/extractor/erome.py
+gallery_dl/extractor/everia.py
gallery_dl/extractor/exhentai.py
+gallery_dl/extractor/facebook.py
gallery_dl/extractor/fanbox.py
gallery_dl/extractor/fanleaks.py
gallery_dl/extractor/fantia.py
@@ -152,6 +155,7 @@ gallery_dl/extractor/mastodon.py
gallery_dl/extractor/message.py
gallery_dl/extractor/misskey.py
gallery_dl/extractor/moebooru.py
+gallery_dl/extractor/motherless.py
gallery_dl/extractor/myhentaigallery.py
gallery_dl/extractor/myportfolio.py
gallery_dl/extractor/naver.py
@@ -160,6 +164,7 @@ gallery_dl/extractor/newgrounds.py
gallery_dl/extractor/nhentai.py
gallery_dl/extractor/nijie.py
gallery_dl/extractor/nitter.py
+gallery_dl/extractor/noop.py
gallery_dl/extractor/nozomi.py
gallery_dl/extractor/nsfwalbum.py
gallery_dl/extractor/oauth.py
@@ -186,6 +191,9 @@ gallery_dl/extractor/recursive.py
gallery_dl/extractor/reddit.py
gallery_dl/extractor/redgifs.py
gallery_dl/extractor/rule34us.py
+gallery_dl/extractor/rule34vault.py
+gallery_dl/extractor/rule34xyz.py
+gallery_dl/extractor/saint.py
gallery_dl/extractor/sankaku.py
gallery_dl/extractor/sankakucomplex.py
gallery_dl/extractor/scrolller.py
diff --git a/gallery_dl/__init__.py b/gallery_dl/__init__.py
index 62e96ae..ec882c3 100644
--- a/gallery_dl/__init__.py
+++ b/gallery_dl/__init__.py
@@ -107,8 +107,15 @@ def main():
# filter environment
filterenv = config.get((), "filters-environment", True)
- if not filterenv:
+ if filterenv is True:
+ pass
+ elif not filterenv:
util.compile_expression = util.compile_expression_raw
+ elif isinstance(filterenv, str):
+ if filterenv == "raw":
+ util.compile_expression = util.compile_expression_raw
+ elif filterenv.startswith("default"):
+ util.compile_expression = util.compile_expression_defaultdict
# format string separator
separator = config.get((), "format-separator")
@@ -252,9 +259,13 @@ def main():
args.input_files.append(input_file)
if not args.urls and not args.input_files:
- parser.error(
- "The following arguments are required: URL\n"
- "Use 'gallery-dl --help' to get a list of all options.")
+ if args.cookies_from_browser or config.interpolate(
+ ("extractor",), "cookies"):
+ args.urls.append("noop")
+ else:
+ parser.error(
+ "The following arguments are required: URL\nUse "
+ "'gallery-dl --help' to get a list of all options.")
if args.list_urls:
jobtype = job.UrlJob
diff --git a/gallery_dl/aes.py b/gallery_dl/aes.py
index 22cb052..891104a 100644
--- a/gallery_dl/aes.py
+++ b/gallery_dl/aes.py
@@ -227,11 +227,12 @@ def aes_gcm_decrypt_and_verify(data, key, tag, nonce):
decrypted_data = aes_ctr_decrypt(
data, key, iv_ctr + [0] * (BLOCK_SIZE_BYTES - len(iv_ctr)))
- pad_len = len(data) // 16 * 16
+ pad_len = (
+ (BLOCK_SIZE_BYTES - (len(data) % BLOCK_SIZE_BYTES)) % BLOCK_SIZE_BYTES)
s_tag = ghash(
hash_subkey,
data +
- [0] * (BLOCK_SIZE_BYTES - len(data) + pad_len) + # pad
+ [0] * pad_len + # pad
bytes_to_intlist(
(0 * 8).to_bytes(8, "big") + # length of associated data
((len(data) * 8).to_bytes(8, "big")) # length of data
diff --git a/gallery_dl/config.py b/gallery_dl/config.py
index 855fb4f..f932e3a 100644
--- a/gallery_dl/config.py
+++ b/gallery_dl/config.py
@@ -261,13 +261,19 @@ def accumulate(path, key, conf=_config):
if key in conf:
value = conf[key]
if value:
- result.extend(value)
+ if isinstance(value, list):
+ result.extend(value)
+ else:
+ result.append(value)
for p in path:
conf = conf[p]
if key in conf:
value = conf[key]
if value:
- result[:0] = value
+ if isinstance(value, list):
+ result[:0] = value
+ else:
+ result.insert(0, value)
except Exception:
pass
return result
diff --git a/gallery_dl/cookies.py b/gallery_dl/cookies.py
index cec2ea0..71b0b6b 100644
--- a/gallery_dl/cookies.py
+++ b/gallery_dl/cookies.py
@@ -126,20 +126,29 @@ def load_cookies_chromium(browser_name, profile=None,
with DatabaseConnection(path) as db:
db.text_factory = bytes
+ cursor = db.cursor()
try:
- rows = db.execute(
+ meta_version = int(cursor.execute(
+ "SELECT value FROM meta WHERE key = 'version'").fetchone()[0])
+ except Exception as exc:
+ _log_warning("Failed to get cookie database meta version (%s: %s)",
+ exc.__class__.__name__, exc)
+ meta_version = 0
+
+ try:
+ rows = cursor.execute(
"SELECT host_key, name, value, encrypted_value, path, "
"expires_utc, is_secure FROM cookies" + condition, parameters)
except sqlite3.OperationalError:
- rows = db.execute(
+ rows = cursor.execute(
"SELECT host_key, name, value, encrypted_value, path, "
"expires_utc, secure FROM cookies" + condition, parameters)
failed_cookies = 0
unencrypted_cookies = 0
decryptor = _chromium_cookie_decryptor(
- config["directory"], config["keyring"], keyring)
+ config["directory"], config["keyring"], keyring, meta_version)
cookies = []
for domain, name, value, enc_value, path, expires, secure in rows:
@@ -432,13 +441,16 @@ def _chromium_browser_settings(browser_name):
def _chromium_cookie_decryptor(
- browser_root, browser_keyring_name, keyring=None):
+ browser_root, browser_keyring_name, keyring=None, meta_version=0):
if sys.platform in ("win32", "cygwin"):
- return WindowsChromiumCookieDecryptor(browser_root)
+ return WindowsChromiumCookieDecryptor(
+ browser_root, meta_version)
elif sys.platform == "darwin":
- return MacChromiumCookieDecryptor(browser_keyring_name)
+ return MacChromiumCookieDecryptor(
+ browser_keyring_name, meta_version)
else:
- return LinuxChromiumCookieDecryptor(browser_keyring_name, keyring)
+ return LinuxChromiumCookieDecryptor(
+ browser_keyring_name, keyring, meta_version)
class ChromiumCookieDecryptor:
@@ -480,11 +492,13 @@ class ChromiumCookieDecryptor:
class LinuxChromiumCookieDecryptor(ChromiumCookieDecryptor):
- def __init__(self, browser_keyring_name, keyring=None):
- self._v10_key = self.derive_key(b"peanuts")
+ def __init__(self, browser_keyring_name, keyring=None, meta_version=0):
password = _get_linux_keyring_password(browser_keyring_name, keyring)
+ self._empty_key = self.derive_key(b"")
+ self._v10_key = self.derive_key(b"peanuts")
self._v11_key = None if password is None else self.derive_key(password)
self._cookie_counts = {"v10": 0, "v11": 0, "other": 0}
+ self._offset = (32 if meta_version >= 24 else 0)
@staticmethod
def derive_key(password):
@@ -504,25 +518,32 @@ class LinuxChromiumCookieDecryptor(ChromiumCookieDecryptor):
if version == b"v10":
self._cookie_counts["v10"] += 1
- return _decrypt_aes_cbc(ciphertext, self._v10_key)
+ value = _decrypt_aes_cbc(ciphertext, self._v10_key, self._offset)
elif version == b"v11":
self._cookie_counts["v11"] += 1
if self._v11_key is None:
_log_warning("Unable to decrypt v11 cookies: no key found")
return None
- return _decrypt_aes_cbc(ciphertext, self._v11_key)
+ value = _decrypt_aes_cbc(ciphertext, self._v11_key, self._offset)
else:
self._cookie_counts["other"] += 1
return None
+ if value is None:
+ value = _decrypt_aes_cbc(ciphertext, self._empty_key, self._offset)
+ if value is None:
+ _log_warning("Failed to decrypt cookie (AES-CBC)")
+ return value
+
class MacChromiumCookieDecryptor(ChromiumCookieDecryptor):
- def __init__(self, browser_keyring_name):
+ def __init__(self, browser_keyring_name, meta_version=0):
password = _get_mac_keyring_password(browser_keyring_name)
self._v10_key = None if password is None else self.derive_key(password)
self._cookie_counts = {"v10": 0, "other": 0}
+ self._offset = (32 if meta_version >= 24 else 0)
@staticmethod
def derive_key(password):
@@ -545,8 +566,7 @@ class MacChromiumCookieDecryptor(ChromiumCookieDecryptor):
if self._v10_key is None:
_log_warning("Unable to decrypt v10 cookies: no key found")
return None
-
- return _decrypt_aes_cbc(ciphertext, self._v10_key)
+ return _decrypt_aes_cbc(ciphertext, self._v10_key, self._offset)
else:
self._cookie_counts["other"] += 1
@@ -558,9 +578,10 @@ class MacChromiumCookieDecryptor(ChromiumCookieDecryptor):
class WindowsChromiumCookieDecryptor(ChromiumCookieDecryptor):
- def __init__(self, browser_root):
+ def __init__(self, browser_root, meta_version=0):
self._v10_key = _get_windows_v10_key(browser_root)
self._cookie_counts = {"v10": 0, "other": 0}
+ self._offset = (32 if meta_version >= 24 else 0)
@property
def cookie_counts(self):
@@ -591,7 +612,8 @@ class WindowsChromiumCookieDecryptor(ChromiumCookieDecryptor):
authentication_tag = raw_ciphertext[-authentication_tag_length:]
return _decrypt_aes_gcm(
- ciphertext, self._v10_key, nonce, authentication_tag)
+ ciphertext, self._v10_key, nonce, authentication_tag,
+ self._offset)
else:
self._cookie_counts["other"] += 1
@@ -975,21 +997,25 @@ def pbkdf2_sha1(password, salt, iterations, key_length):
return pbkdf2_hmac("sha1", password, salt, iterations, key_length)
-def _decrypt_aes_cbc(ciphertext, key, initialization_vector=b" " * 16):
+def _decrypt_aes_cbc(ciphertext, key, offset=0,
+ initialization_vector=b" " * 16):
+ plaintext = aes.unpad_pkcs7(aes.aes_cbc_decrypt_bytes(
+ ciphertext, key, initialization_vector))
+ if offset:
+ plaintext = plaintext[offset:]
try:
- return aes.unpad_pkcs7(aes.aes_cbc_decrypt_bytes(
- ciphertext, key, initialization_vector)).decode()
+ return plaintext.decode()
except UnicodeDecodeError:
- _log_warning("Failed to decrypt cookie (AES-CBC Unicode)")
- except ValueError:
- _log_warning("Failed to decrypt cookie (AES-CBC)")
- return None
+ return None
-def _decrypt_aes_gcm(ciphertext, key, nonce, authentication_tag):
+def _decrypt_aes_gcm(ciphertext, key, nonce, authentication_tag, offset=0):
try:
- return aes.aes_gcm_decrypt_and_verify_bytes(
- ciphertext, key, authentication_tag, nonce).decode()
+ plaintext = aes.aes_gcm_decrypt_and_verify_bytes(
+ ciphertext, key, authentication_tag, nonce)
+ if offset:
+ plaintext = plaintext[offset:]
+ return plaintext.decode()
except UnicodeDecodeError:
_log_warning("Failed to decrypt cookie (AES-GCM Unicode)")
except ValueError:
diff --git a/gallery_dl/downloader/ytdl.py b/gallery_dl/downloader/ytdl.py
index 26f328d..40cddec 100644
--- a/gallery_dl/downloader/ytdl.py
+++ b/gallery_dl/downloader/ytdl.py
@@ -29,7 +29,7 @@ class YoutubeDLDownloader(DownloaderBase):
}
self.ytdl_instance = None
- self.forward_cookies = self.config("forward-cookies", False)
+ self.forward_cookies = self.config("forward-cookies", True)
self.progress = self.config("progress", 3.0)
self.outtmpl = self.config("outtmpl")
@@ -53,6 +53,8 @@ class YoutubeDLDownloader(DownloaderBase):
if self.outtmpl == "default":
self.outtmpl = module.DEFAULT_OUTTMPL
if self.forward_cookies:
+ self.log.debug("Forwarding cookies to %s",
+ ytdl_instance.__module__)
set_cookie = ytdl_instance.cookiejar.set_cookie
for cookie in self.session.cookies:
set_cookie(cookie)
diff --git a/gallery_dl/extractor/8chan.py b/gallery_dl/extractor/8chan.py
index ce1c52a..3e30ddc 100644
--- a/gallery_dl/extractor/8chan.py
+++ b/gallery_dl/extractor/8chan.py
@@ -26,10 +26,6 @@ class _8chanExtractor(Extractor):
self.root = "https://8chan." + match.group(1)
Extractor.__init__(self, match)
- def _init(self):
- tos = self.cookies_tos_name()
- self.cookies.set(tos, "1", domain=self.root[8:])
-
@memcache()
def cookies_tos_name(self):
url = self.root + "/.static/pages/confirmed.html"
@@ -79,6 +75,7 @@ class _8chanThreadExtractor(_8chanExtractor):
def items(self):
_, board, thread = self.groups
+ self.cookies.set(self.cookies_tos_name(), "1", domain=self.root[8:])
# fetch thread data
url = "{}/{}/res/{}.".format(self.root, board, thread)
@@ -116,6 +113,8 @@ class _8chanBoardExtractor(_8chanExtractor):
def items(self):
_, board, pnum = self.groups
+ self.cookies.set(self.cookies_tos_name(), "1", domain=self.root[8:])
+
pnum = text.parse_int(pnum, 1)
url = "{}/{}/{}.json".format(self.root, board, pnum)
data = self.request(url).json()
diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py
index 4e9fa50..594ce41 100644
--- a/gallery_dl/extractor/__init__.py
+++ b/gallery_dl/extractor/__init__.py
@@ -30,6 +30,7 @@ modules = [
"batoto",
"bbc",
"behance",
+ "bilibili",
"blogger",
"bluesky",
"boosty",
@@ -47,7 +48,9 @@ modules = [
"dynastyscans",
"e621",
"erome",
+ "everia",
"exhentai",
+ "facebook",
"fanbox",
"fanleaks",
"fantia",
@@ -107,6 +110,7 @@ modules = [
"mangasee",
"mangoxo",
"misskey",
+ "motherless",
"myhentaigallery",
"myportfolio",
"naver",
@@ -139,6 +143,9 @@ modules = [
"reddit",
"redgifs",
"rule34us",
+ "rule34vault",
+ "rule34xyz",
+ "saint",
"sankaku",
"sankakucomplex",
"scrolller",
@@ -200,6 +207,7 @@ modules = [
"directlink",
"recursive",
"oauth",
+ "noop",
"ytdl",
"generic",
]
diff --git a/gallery_dl/extractor/bilibili.py b/gallery_dl/extractor/bilibili.py
new file mode 100644
index 0000000..d5c419e
--- /dev/null
+++ b/gallery_dl/extractor/bilibili.py
@@ -0,0 +1,116 @@
+# -*- coding: utf-8 -*-
+
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Extractors for https://www.bilibili.com/"""
+
+from .common import Extractor, Message
+from .. import text, util, exception
+
+
+class BilibiliExtractor(Extractor):
+ """Base class for bilibili extractors"""
+ category = "bilibili"
+ root = "https://www.bilibili.com"
+ request_interval = (3.0, 6.0)
+
+ def _init(self):
+ self.api = BilibiliAPI(self)
+
+
+class BilibiliUserArticlesExtractor(BilibiliExtractor):
+ """Extractor for a bilibili user's articles"""
+ subcategory = "user-articles"
+ pattern = r"(?:https?://)?space\.bilibili\.com/(\d+)/article"
+ example = "https://space.bilibili.com/12345/article"
+
+ def items(self):
+ for article in self.api.user_articles(self.groups[0]):
+ article["_extractor"] = BilibiliArticleExtractor
+ url = "{}/opus/{}".format(self.root, article["opus_id"])
+ yield Message.Queue, url, article
+
+
+class BilibiliArticleExtractor(BilibiliExtractor):
+ """Extractor for a bilibili article"""
+ subcategory = "article"
+ pattern = (r"(?:https?://)?"
+ r"(?:t\.bilibili\.com|(?:www\.)?bilibili.com/opus)/(\d+)")
+ example = "https://www.bilibili.com/opus/12345"
+ directory_fmt = ("{category}", "{username}")
+ filename_fmt = "{id}_{num}.{extension}"
+ archive_fmt = "{id}_{num}"
+
+ def items(self):
+ article = self.api.article(self.groups[0])
+
+ # Flatten modules list
+ modules = {}
+ for module in article["detail"]["modules"]:
+ del module['module_type']
+ modules.update(module)
+ article["detail"]["modules"] = modules
+
+ article["username"] = modules["module_author"]["name"]
+
+ pics = []
+ for paragraph in modules['module_content']['paragraphs']:
+ if "pic" not in paragraph:
+ continue
+
+ try:
+ pics.extend(paragraph["pic"]["pics"])
+ except Exception:
+ pass
+
+ article["count"] = len(pics)
+ yield Message.Directory, article
+ for article["num"], pic in enumerate(pics, 1):
+ url = pic["url"]
+ article.update(pic)
+ yield Message.Url, url, text.nameext_from_url(url, article)
+
+
+class BilibiliAPI():
+ def __init__(self, extractor):
+ self.extractor = extractor
+
+ def _call(self, endpoint, params):
+ url = "https://api.bilibili.com/x/polymer/web-dynamic/v1" + endpoint
+ data = self.extractor.request(url, params=params).json()
+
+ if data["code"] != 0:
+ self.extractor.log.debug("Server response: %s", data)
+ raise exception.StopExtraction("API request failed")
+
+ return data
+
+ def user_articles(self, user_id):
+ endpoint = "/opus/feed/space"
+ params = {"host_mid": user_id}
+
+ while True:
+ data = self._call(endpoint, params)
+
+ for item in data["data"]["items"]:
+ params["offset"] = item["opus_id"]
+ yield item
+
+ if not data["data"]["has_more"]:
+ break
+
+ def article(self, article_id):
+ url = "https://www.bilibili.com/opus/" + article_id
+
+ while True:
+ page = self.extractor.request(url).text
+ try:
+ return util.json_loads(text.extr(
+ page, "window.__INITIAL_STATE__=", "};") + "}")
+ except Exception:
+ if "window._riskdata_" not in page:
+ raise exception.StopExtraction(
+ "%s: Unable to extract INITIAL_STATE data", article_id)
+ self.extractor.wait(seconds=300)
diff --git a/gallery_dl/extractor/blogger.py b/gallery_dl/extractor/blogger.py
index 37075ea..ef117da 100644
--- a/gallery_dl/extractor/blogger.py
+++ b/gallery_dl/extractor/blogger.py
@@ -159,7 +159,7 @@ class BloggerAPI():
def __init__(self, extractor):
self.extractor = extractor
- self.api_key = extractor.config("api-key", self.API_KEY)
+ self.api_key = extractor.config("api-key") or self.API_KEY
def blog_by_url(self, url):
return self._call("blogs/byurl", {"url": url}, "blog")
diff --git a/gallery_dl/extractor/bluesky.py b/gallery_dl/extractor/bluesky.py
index a1a488e..bbff17c 100644
--- a/gallery_dl/extractor/bluesky.py
+++ b/gallery_dl/extractor/bluesky.py
@@ -12,7 +12,8 @@ from .common import Extractor, Message
from .. import text, util, exception
from ..cache import cache, memcache
-BASE_PATTERN = r"(?:https?://)?bsky\.app"
+BASE_PATTERN = (r"(?:https?://)?"
+ r"(?:(?:www\.)?(?:c|[fv]x)?bs[ky]y[ex]?\.app|main\.bsky\.dev)")
USER_PATTERN = BASE_PATTERN + r"/profile/([^/?#]+)"
@@ -60,8 +61,10 @@ class BlueskyExtractor(Extractor):
yield Message.Directory, post
if files:
- base = ("https://bsky.social/xrpc/com.atproto.sync.getBlob"
- "?did={}&cid=".format(post["author"]["did"]))
+ did = post["author"]["did"]
+ base = (
+ "{}/xrpc/com.atproto.sync.getBlob?did={}&cid=".format(
+ self.api.service_endpoint(did), did))
for post["num"], file in enumerate(files, 1):
post.update(file)
yield Message.Url, base + file["filename"], post
@@ -84,7 +87,14 @@ class BlueskyExtractor(Extractor):
def _pid(self, post):
return post["uri"].rpartition("/")[2]
+ @memcache(keyarg=1)
+ def _instance(self, handle):
+ return ".".join(handle.rsplit(".", 2)[-2:])
+
def _prepare(self, post):
+ author = post["author"]
+ author["instance"] = self._instance(author["handle"])
+
if self._metadata_facets:
if "facets" in post:
post["hashtags"] = tags = []
@@ -102,7 +112,7 @@ class BlueskyExtractor(Extractor):
post["hashtags"] = post["mentions"] = post["uris"] = ()
if self._metadata_user:
- post["user"] = self._user or post["author"]
+ post["user"] = self._user or author
post["instance"] = self.instance
post["post_id"] = self._pid(post)
@@ -317,6 +327,15 @@ class BlueskySearchExtractor(BlueskyExtractor):
return self.api.search_posts(self.user)
+class BlueskyHashtagExtractor(BlueskyExtractor):
+ subcategory = "hashtag"
+ pattern = BASE_PATTERN + r"/hashtag/([^/?#]+)(?:/(top|latest))?"
+ example = "https://bsky.app/hashtag/NAME"
+
+ def posts(self):
+ return self.api.search_posts("#"+self.user, self.groups[1])
+
+
class BlueskyAPI():
"""Interface for the Bluesky API
@@ -412,11 +431,28 @@ class BlueskyAPI():
params = {"handle": handle}
return self._call(endpoint, params)["did"]
- def search_posts(self, query):
+ @memcache(keyarg=1)
+ def service_endpoint(self, did):
+ if did.startswith('did:web:'):
+ url = "https://" + did[8:] + "/.well-known/did.json"
+ else:
+ url = "https://plc.directory/" + did
+
+ try:
+ data = self.extractor.request(url).json()
+ for service in data["service"]:
+ if service["type"] == "AtprotoPersonalDataServer":
+ return service["serviceEndpoint"]
+ except Exception:
+ pass
+ return "https://bsky.social"
+
+ def search_posts(self, query, sort=None):
endpoint = "app.bsky.feed.searchPosts"
params = {
"q" : query,
"limit": "100",
+ "sort" : sort,
}
return self._pagination(endpoint, params, "posts")
@@ -430,7 +466,8 @@ class BlueskyAPI():
if user_did and not extr.config("reposts", False):
extr._user_did = did
if extr._metadata_user:
- extr._user = self.get_profile(did)
+ extr._user = user = self.get_profile(did)
+ user["instance"] = extr._instance(user["handle"])
return did
diff --git a/gallery_dl/extractor/boosty.py b/gallery_dl/extractor/boosty.py
index 997de4a..33823be 100644
--- a/gallery_dl/extractor/boosty.py
+++ b/gallery_dl/extractor/boosty.py
@@ -35,8 +35,16 @@ class BoostyExtractor(Extractor):
if isinstance(videos, str):
videos = videos.split(",")
elif not isinstance(videos, (list, tuple)):
- videos = ("quad_hd", "ultra_hd", "full_hd",
- "high", "medium", "low")
+ # ultra_hd: 2160p
+ # quad_hd: 1440p
+ # full_hd: 1080p
+ # high: 720p
+ # medium: 480p
+ # low: 360p
+ # lowest: 240p
+ # tiny: 144p
+ videos = ("ultra_hd", "quad_hd", "full_hd",
+ "high", "medium", "low", "lowest", "tiny")
self.videos = videos
def items(self):
@@ -325,6 +333,7 @@ class BoostyAPI():
def _pagination(self, endpoint, params, transform=None, key=None):
if "is_only_allowed" not in params and self.extractor.only_allowed:
+ params["only_allowed"] = "true"
params["is_only_allowed"] = "true"
while True:
diff --git a/gallery_dl/extractor/bunkr.py b/gallery_dl/extractor/bunkr.py
index 6c79d0a..3e12452 100644
--- a/gallery_dl/extractor/bunkr.py
+++ b/gallery_dl/extractor/bunkr.py
@@ -22,13 +22,14 @@ else:
BASE_PATTERN = (
r"(?:bunkr:(?:https?://)?([^/?#]+)|"
r"(?:https?://)?(?:app\.)?(bunkr+"
- r"\.(?:s[kiu]|[cf]i|p[hks]|ru|la|is|to|a[cx]"
+ r"\.(?:s[kiu]|c[ir]|fi|p[hks]|ru|la|is|to|a[cx]"
r"|black|cat|media|red|site|ws|org)))"
)
DOMAINS = [
"bunkr.ac",
"bunkr.ci",
+ "bunkr.cr",
"bunkr.fi",
"bunkr.ph",
"bunkr.pk",
@@ -110,13 +111,17 @@ class BunkrAlbumExtractor(LolisafeAlbumExtractor):
def fetch_album(self, album_id):
# album metadata
- page = self.request(self.root + "/a/" + self.album_id).text
+ page = self.request(self.root + "/a/" + album_id).text
title, size = text.split_html(text.extr(
page, "<h1", "</span>").partition(">")[2])
+ if "&" in title:
+ title = title.replace(
+ "&lt;", "<").replace("&gt;", ">").replace("&amp;", "&")
+ # files
items = list(text.extract_iter(page, "<!-- item -->", "<!-- -->"))
return self._extract_files(items), {
- "album_id" : self.album_id,
+ "album_id" : album_id,
"album_name" : title,
"album_size" : text.extr(size, "(", ")"),
"count" : len(items),
diff --git a/gallery_dl/extractor/civitai.py b/gallery_dl/extractor/civitai.py
index 0b1e44a..1e8cb42 100644
--- a/gallery_dl/extractor/civitai.py
+++ b/gallery_dl/extractor/civitai.py
@@ -44,6 +44,16 @@ class CivitaiExtractor(Extractor):
self._image_quality = "original=true"
self._image_ext = "png"
+ metadata = self.config("metadata")
+ if metadata:
+ if isinstance(metadata, str):
+ metadata = metadata.split(",")
+ elif not isinstance(metadata, (list, tuple)):
+ metadata = ("generation",)
+ self._meta_generation = ("generation" in metadata)
+ else:
+ self._meta_generation = False
+
def items(self):
models = self.models()
if models:
@@ -81,6 +91,9 @@ class CivitaiExtractor(Extractor):
if images:
for image in images:
url = self._url(image)
+ if self._meta_generation:
+ image["generation"] = self.api.image_generationdata(
+ image["id"])
image["date"] = text.parse_datetime(
image["createdAt"], "%Y-%m-%dT%H:%M:%S.%fZ")
text.nameext_from_url(url, image)
@@ -127,6 +140,8 @@ class CivitaiExtractor(Extractor):
data["extension"] = self._image_ext
if "id" not in file and data["filename"].isdecimal():
file["id"] = text.parse_int(data["filename"])
+ if self._meta_generation:
+ file["generation"] = self.api.image_generationdata(file["id"])
yield data
@@ -469,7 +484,7 @@ class CivitaiTrpcAPI():
self.root = extractor.root + "/api/trpc/"
self.headers = {
"content-type" : "application/json",
- "x-client-version": "5.0.185",
+ "x-client-version": "5.0.211",
"x-client-date" : "",
"x-client" : "web",
"x-fingerprint" : "undefined",
@@ -491,6 +506,11 @@ class CivitaiTrpcAPI():
params = {"id": int(image_id)}
return (self._call(endpoint, params),)
+ def image_generationdata(self, image_id):
+ endpoint = "image.getGenerationData"
+ params = {"id": int(image_id)}
+ return self._call(endpoint, params)
+
def images(self, params, defaults=True):
endpoint = "image.getInfinite"
diff --git a/gallery_dl/extractor/common.py b/gallery_dl/extractor/common.py
index 2146fa6..f364124 100644
--- a/gallery_dl/extractor/common.py
+++ b/gallery_dl/extractor/common.py
@@ -11,7 +11,6 @@
import os
import re
import ssl
-import sys
import time
import netrc
import queue
@@ -23,7 +22,7 @@ import requests
import threading
from requests.adapters import HTTPAdapter
from .message import Message
-from .. import config, text, util, cache, exception
+from .. import config, output, text, util, cache, exception
urllib3 = requests.packages.urllib3
@@ -43,6 +42,8 @@ class Extractor():
ciphers = None
tls12 = True
browser = None
+ useragent = ("Mozilla/5.0 (Windows NT 10.0; Win64; x64; "
+ "rv:128.0) Gecko/20100101 Firefox/128.0")
request_interval = 0.0
request_interval_min = 0.0
request_interval_429 = 60.0
@@ -289,13 +290,8 @@ class Extractor():
def _check_input_allowed(self, prompt=""):
input = self.config("input")
-
if input is None:
- try:
- input = sys.stdin.isatty()
- except Exception:
- input = False
-
+ input = output.TTY_STDIN
if not input:
raise exception.StopExtraction(
"User input required (%s)", prompt.strip(" :"))
@@ -351,6 +347,9 @@ class Extractor():
headers.clear()
ssl_options = ssl_ciphers = 0
+ # .netrc Authorization headers are alwsays disabled
+ session.trust_env = True if self.config("proxy-env", False) else False
+
browser = self.config("browser")
if browser is None:
browser = self.browser
@@ -384,11 +383,13 @@ class Extractor():
ssl_ciphers = SSL_CIPHERS[browser]
else:
useragent = self.config("user-agent")
- if useragent is None:
- useragent = ("Mozilla/5.0 (Windows NT 10.0; Win64; x64; "
- "rv:128.0) Gecko/20100101 Firefox/128.0")
+ if useragent is None or useragent == "auto":
+ useragent = self.useragent
elif useragent == "browser":
useragent = _browser_useragent()
+ elif useragent is config.get(("extractor",), "user-agent") and \
+ useragent == Extractor.useragent:
+ useragent = self.useragent
headers["User-Agent"] = useragent
headers["Accept"] = "*/*"
headers["Accept-Language"] = "en-US,en;q=0.5"
@@ -660,6 +661,8 @@ class Extractor():
headers=(self._write_pages in ("all", "ALL")),
hide_auth=(self._write_pages != "ALL")
)
+ self.log.info("Writing '%s' response to '%s'",
+ response.url, path + ".txt")
except Exception as e:
self.log.warning("Failed to dump HTTP request (%s: %s)",
e.__class__.__name__, e)
@@ -1008,6 +1011,12 @@ SSL_CIPHERS = {
}
+# disable Basic Authorization header injection from .netrc data
+try:
+ requests.sessions.get_netrc_auth = lambda _: None
+except Exception:
+ pass
+
# detect brotli support
try:
BROTLI = urllib3.response.brotli is not None
diff --git a/gallery_dl/extractor/danbooru.py b/gallery_dl/extractor/danbooru.py
index 1746647..c3dfd91 100644
--- a/gallery_dl/extractor/danbooru.py
+++ b/gallery_dl/extractor/danbooru.py
@@ -20,12 +20,22 @@ class DanbooruExtractor(BaseExtractor):
page_limit = 1000
page_start = None
per_page = 200
+ useragent = util.USERAGENT
request_interval = (0.5, 1.5)
def _init(self):
self.ugoira = self.config("ugoira", False)
self.external = self.config("external", False)
- self.includes = False
+
+ includes = self.config("metadata")
+ if includes:
+ if isinstance(includes, (list, tuple)):
+ includes = ",".join(includes)
+ elif not isinstance(includes, str):
+ includes = "artist_commentary,children,notes,parent,uploader"
+ self.includes = includes + ",id"
+ else:
+ self.includes = False
threshold = self.config("threshold")
if isinstance(threshold, int):
@@ -46,16 +56,6 @@ class DanbooruExtractor(BaseExtractor):
return pages * self.per_page
def items(self):
- self.session.headers["User-Agent"] = util.USERAGENT
-
- includes = self.config("metadata")
- if includes:
- if isinstance(includes, (list, tuple)):
- includes = ",".join(includes)
- elif not isinstance(includes, str):
- includes = "artist_commentary,children,notes,parent,uploader"
- self.includes = includes + ",id"
-
data = self.metadata()
for post in self.posts():
@@ -108,6 +108,13 @@ class DanbooruExtractor(BaseExtractor):
yield Message.Directory, post
yield Message.Url, url, post
+ def items_artists(self):
+ for artist in self.artists():
+ artist["_extractor"] = DanbooruTagExtractor
+ url = "{}/posts?tags={}".format(
+ self.root, text.quote(artist["name"]))
+ yield Message.Queue, url, artist
+
def metadata(self):
return ()
@@ -294,3 +301,39 @@ class DanbooruPopularExtractor(DanbooruExtractor):
def posts(self):
return self._pagination("/explore/posts/popular.json", self.params)
+
+
+class DanbooruArtistExtractor(DanbooruExtractor):
+ """Extractor for danbooru artists"""
+ subcategory = "artist"
+ pattern = BASE_PATTERN + r"/artists/(\d+)"
+ example = "https://danbooru.donmai.us/artists/12345"
+
+ items = DanbooruExtractor.items_artists
+
+ def artists(self):
+ url = "{}/artists/{}.json".format(self.root, self.groups[-1])
+ return (self.request(url).json(),)
+
+
+class DanbooruArtistSearchExtractor(DanbooruExtractor):
+ """Extractor for danbooru artist searches"""
+ subcategory = "artist-search"
+ pattern = BASE_PATTERN + r"/artists/?\?([^#]+)"
+ example = "https://danbooru.donmai.us/artists?QUERY"
+
+ items = DanbooruExtractor.items_artists
+
+ def artists(self):
+ url = self.root + "/artists.json"
+ params = text.parse_query(self.groups[-1])
+ params["page"] = text.parse_int(params.get("page"), 1)
+
+ while True:
+ artists = self.request(url, params=params).json()
+
+ yield from artists
+
+ if len(artists) < 20:
+ return
+ params["page"] += 1
diff --git a/gallery_dl/extractor/deviantart.py b/gallery_dl/extractor/deviantart.py
index 693def9..ea3f13d 100644
--- a/gallery_dl/extractor/deviantart.py
+++ b/gallery_dl/extractor/deviantart.py
@@ -31,7 +31,7 @@ class DeviantartExtractor(Extractor):
root = "https://www.deviantart.com"
directory_fmt = ("{category}", "{username}")
filename_fmt = "{category}_{index}_{title}.{extension}"
- cookies_domain = None
+ cookies_domain = ".deviantart.com"
cookies_names = ("auth", "auth_secure", "userinfo")
_last_request = 0
@@ -399,7 +399,7 @@ class DeviantartExtractor(Extractor):
def _textcontent_to_html(self, deviation, content):
html = content["html"]
- markup = html["markup"]
+ markup = html.get("markup")
if not markup or markup[0] != "{":
return markup
@@ -1144,7 +1144,6 @@ class DeviantartScrapsExtractor(DeviantartExtractor):
subcategory = "scraps"
directory_fmt = ("{category}", "{username}", "Scraps")
archive_fmt = "s_{_username}_{index}.{extension}"
- cookies_domain = ".deviantart.com"
pattern = BASE_PATTERN + r"/gallery/(?:\?catpath=)?scraps\b"
example = "https://www.deviantart.com/USER/gallery/scraps"
@@ -1161,7 +1160,6 @@ class DeviantartSearchExtractor(DeviantartExtractor):
subcategory = "search"
directory_fmt = ("{category}", "Search", "{search_tags}")
archive_fmt = "Q_{search_tags}_{index}.{extension}"
- cookies_domain = ".deviantart.com"
pattern = (r"(?:https?://)?www\.deviantart\.com"
r"/search(?:/deviations)?/?\?([^#]+)")
example = "https://www.deviantart.com/search?q=QUERY"
@@ -1213,7 +1211,6 @@ class DeviantartGallerySearchExtractor(DeviantartExtractor):
"""Extractor for deviantart gallery searches"""
subcategory = "gallery-search"
archive_fmt = "g_{_username}_{index}.{extension}"
- cookies_domain = ".deviantart.com"
pattern = BASE_PATTERN + r"/gallery/?\?(q=[^#]+)"
example = "https://www.deviantart.com/USER/gallery?q=QUERY"
diff --git a/gallery_dl/extractor/e621.py b/gallery_dl/extractor/e621.py
index 553ec22..4a6624d 100644
--- a/gallery_dl/extractor/e621.py
+++ b/gallery_dl/extractor/e621.py
@@ -20,11 +20,10 @@ class E621Extractor(danbooru.DanbooruExtractor):
page_limit = 750
page_start = None
per_page = 320
+ useragent = util.USERAGENT + " (by mikf)"
request_interval_min = 1.0
def items(self):
- self.session.headers["User-Agent"] = util.USERAGENT + " (by mikf)"
-
includes = self.config("metadata") or ()
if includes:
if isinstance(includes, str):
diff --git a/gallery_dl/extractor/everia.py b/gallery_dl/extractor/everia.py
new file mode 100644
index 0000000..94444ff
--- /dev/null
+++ b/gallery_dl/extractor/everia.py
@@ -0,0 +1,99 @@
+# -*- coding: utf-8 -*-
+
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Extractors for https://everia.club"""
+
+from .common import Extractor, Message
+from .. import text
+import re
+
+BASE_PATTERN = r"(?:https?://)?everia\.club"
+
+
+class EveriaExtractor(Extractor):
+ category = "everia"
+ root = "https://everia.club"
+
+ def items(self):
+ data = {"_extractor": EveriaPostExtractor}
+ for url in self.posts():
+ yield Message.Queue, url, data
+
+ def posts(self):
+ return self._pagination(self.groups[0])
+
+ def _pagination(self, path, params=None, pnum=1):
+ find_posts = re.compile(r'thumbnail">\s*<a href="([^"]+)').findall
+
+ while True:
+ if pnum == 1:
+ url = "{}{}/".format(self.root, path)
+ else:
+ url = "{}{}/page/{}/".format(self.root, path, pnum)
+ response = self.request(url, params=params, allow_redirects=False)
+
+ if response.status_code >= 300:
+ return
+
+ yield from find_posts(response.text)
+ pnum += 1
+
+
+class EveriaPostExtractor(EveriaExtractor):
+ subcategory = "post"
+ directory_fmt = ("{category}", "{title}")
+ archive_fmt = "{post_url}_{num}"
+ pattern = BASE_PATTERN + r"(/\d{4}/\d{2}/\d{2}/[^/?#]+)"
+ example = "https://everia.club/0000/00/00/TITLE"
+
+ def items(self):
+ url = self.root + self.groups[0]
+ page = self.request(url).text
+ content = text.extr(page, 'itemprop="text">', "</div>")
+ urls = re.findall(r'img.*?src="([^"]+)', content)
+
+ data = {
+ "title": text.unescape(
+ text.extr(page, 'itemprop="headline">', "</h1>")),
+ "tags": list(text.extract_iter(page, 'rel="tag">', "</a>")),
+ "post_url": url,
+ "post_category": text.extr(
+ page, "post-in-category-", " ").capitalize(),
+ "count": len(urls),
+ }
+
+ yield Message.Directory, data
+ for data["num"], url in enumerate(urls, 1):
+ yield Message.Url, url, text.nameext_from_url(url, data)
+
+
+class EveriaTagExtractor(EveriaExtractor):
+ subcategory = "tag"
+ pattern = BASE_PATTERN + r"(/tag/[^/?#]+)"
+ example = "https://everia.club/tag/TAG"
+
+
+class EveriaCategoryExtractor(EveriaExtractor):
+ subcategory = "category"
+ pattern = BASE_PATTERN + r"(/category/[^/?#]+)"
+ example = "https://everia.club/category/CATEGORY"
+
+
+class EveriaDateExtractor(EveriaExtractor):
+ subcategory = "date"
+ pattern = (BASE_PATTERN +
+ r"(/\d{4}(?:/\d{2})?(?:/\d{2})?)(?:/page/\d+)?/?$")
+ example = "https://everia.club/0000/00/00"
+
+
+class EveriaSearchExtractor(EveriaExtractor):
+ subcategory = "search"
+ pattern = BASE_PATTERN + r"/(?:page/\d+/)?\?s=([^&#]+)"
+ example = "https://everia.club/?s=SEARCH"
+
+ def posts(self):
+ params = {"s": self.groups[0]}
+ return self._pagination("", params)
diff --git a/gallery_dl/extractor/exhentai.py b/gallery_dl/extractor/exhentai.py
index 3e6d537..e7ba78e 100644
--- a/gallery_dl/extractor/exhentai.py
+++ b/gallery_dl/extractor/exhentai.py
@@ -11,6 +11,7 @@
from .common import Extractor, Message
from .. import text, util, exception
from ..cache import cache
+import collections
import itertools
import math
@@ -227,6 +228,13 @@ class ExhentaiGalleryExtractor(ExhentaiExtractor):
if self.config("metadata", False):
data.update(self.metadata_from_api())
data["date"] = text.parse_timestamp(data["posted"])
+ if self.config("tags", False):
+ tags = collections.defaultdict(list)
+ for tag in data["tags"]:
+ type, _, value = tag.partition(":")
+ tags[type].append(value)
+ for type, values in tags.items():
+ data["tags_" + type] = values
return data
def metadata_from_page(self, page):
diff --git a/gallery_dl/extractor/facebook.py b/gallery_dl/extractor/facebook.py
new file mode 100644
index 0000000..04acfc5
--- /dev/null
+++ b/gallery_dl/extractor/facebook.py
@@ -0,0 +1,447 @@
+# -*- coding: utf-8 -*-
+
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Extractors for https://www.facebook.com/"""
+
+from .common import Extractor, Message
+from .. import text, exception
+
+BASE_PATTERN = r"(?:https?://)?(?:[\w-]+\.)?facebook\.com"
+
+
+class FacebookExtractor(Extractor):
+ """Base class for Facebook extractors"""
+ category = "facebook"
+ root = "https://www.facebook.com"
+ directory_fmt = ("{category}", "{username}", "{title} ({set_id})")
+ filename_fmt = "{id}.{extension}"
+ archive_fmt = "{id}.{extension}"
+
+ set_url_fmt = root + "/media/set/?set={set_id}"
+ photo_url_fmt = root + "/photo/?fbid={photo_id}&set={set_id}"
+
+ def _init(self):
+ headers = self.session.headers
+ headers["Accept"] = (
+ "text/html,application/xhtml+xml,application/xml;q=0.9,"
+ "image/avif,image/webp,image/png,image/svg+xml,*/*;q=0.8"
+ )
+ headers["Sec-Fetch-Dest"] = "empty"
+ headers["Sec-Fetch-Mode"] = "navigate"
+ headers["Sec-Fetch-Site"] = "same-origin"
+
+ self.fallback_retries = self.config("fallback-retries", 2)
+ self.videos = self.config("videos", True)
+ self.author_followups = self.config("author-followups", False)
+
+ @staticmethod
+ def decode_all(txt):
+ return text.unescape(
+ txt.encode("utf-8").decode("unicode_escape")
+ ).replace("\\/", "/")
+
+ @staticmethod
+ def parse_set_page(set_page):
+ directory = {
+ "set_id": text.extr(
+ set_page, '"mediaSetToken":"', '"'
+ ) or text.extr(
+ set_page, '"mediasetToken":"', '"'
+ ),
+ "username": FacebookExtractor.decode_all(
+ text.extr(
+ set_page, '"user":{"__isProfile":"User","name":"', '","'
+ ) or text.extr(
+ set_page, '"actors":[{"__typename":"User","name":"', '","'
+ )
+ ),
+ "user_id": text.extr(
+ set_page, '"owner":{"__typename":"User","id":"', '"'
+ ),
+ "title": FacebookExtractor.decode_all(text.extr(
+ set_page, '"title":{"text":"', '"'
+ )),
+ "first_photo_id": text.extr(
+ set_page,
+ '{"__typename":"Photo","__isMedia":"Photo","',
+ '","creation_story"'
+ ).rsplit('"id":"', 1)[-1] or
+ text.extr(
+ set_page, '{"__typename":"Photo","id":"', '"'
+ )
+ }
+
+ return directory
+
+ @staticmethod
+ def parse_photo_page(photo_page):
+ photo = {
+ "id": text.extr(
+ photo_page, '"__isNode":"Photo","id":"', '"'
+ ),
+ "set_id": text.extr(
+ photo_page,
+ '"url":"https:\\/\\/www.facebook.com\\/photo\\/?fbid=',
+ '"'
+ ).rsplit("&set=", 1)[-1],
+ "username": FacebookExtractor.decode_all(text.extr(
+ photo_page, '"owner":{"__typename":"User","name":"', '"'
+ )),
+ "user_id": text.extr(
+ photo_page, '"owner":{"__typename":"User","id":"', '"'
+ ),
+ "caption": FacebookExtractor.decode_all(text.extr(
+ photo_page,
+ '"message":{"delight_ranges"',
+ '"},"message_preferred_body"'
+ ).rsplit('],"text":"', 1)[-1]),
+ "date": text.parse_timestamp(text.extr(
+ photo_page, '\\"publish_time\\":', ','
+ )),
+ "url": FacebookExtractor.decode_all(text.extr(
+ photo_page, ',"image":{"uri":"', '","'
+ )),
+ "next_photo_id": text.extr(
+ photo_page,
+ '"nextMediaAfterNodeId":{"__typename":"Photo","id":"',
+ '"'
+ ) or text.extr(
+ photo_page,
+ '"nextMedia":{"edges":[{"node":{"__typename":"Photo","id":"',
+ '"'
+ )
+ }
+
+ text.nameext_from_url(photo["url"], photo)
+
+ photo["followups_ids"] = []
+ for comment_raw in text.extract_iter(
+ photo_page, '{"node":{"id"', '"cursor":null}'
+ ):
+ if ('"is_author_original_poster":true' in comment_raw and
+ '{"__typename":"Photo","id":"' in comment_raw):
+ photo["followups_ids"].append(text.extr(
+ comment_raw,
+ '{"__typename":"Photo","id":"',
+ '"'
+ ))
+
+ return photo
+
+ @staticmethod
+ def parse_post_page(post_page):
+ first_photo_url = text.extr(
+ text.extr(
+ post_page, '"__isMedia":"Photo"', '"target_group"'
+ ), '"url":"', ','
+ )
+
+ post = {
+ "set_id": text.extr(post_page, '{"mediaset_token":"', '"') or
+ text.extr(first_photo_url, 'set=', '"').rsplit("&", 1)[0]
+ }
+
+ return post
+
+ @staticmethod
+ def parse_video_page(video_page):
+ video = {
+ "id": text.extr(
+ video_page, '\\"video_id\\":\\"', '\\"'
+ ),
+ "username": FacebookExtractor.decode_all(text.extr(
+ video_page, '"actors":[{"__typename":"User","name":"', '","'
+ )),
+ "user_id": text.extr(
+ video_page, '"owner":{"__typename":"User","id":"', '"'
+ ),
+ "date": text.parse_timestamp(text.extr(
+ video_page, '\\"publish_time\\":', ','
+ )),
+ "type": "video"
+ }
+
+ if not video["username"]:
+ video["username"] = FacebookExtractor.decode_all(text.extr(
+ video_page,
+ '"__typename":"User","id":"' + video["user_id"] + '","name":"',
+ '","'
+ ))
+
+ first_video_raw = text.extr(
+ video_page, '"permalink_url"', '\\/Period>\\u003C\\/MPD>'
+ )
+
+ audio = {
+ **video,
+ "url": FacebookExtractor.decode_all(text.extr(
+ text.extr(
+ first_video_raw,
+ "AudioChannelConfiguration",
+ "BaseURL>\\u003C"
+ ),
+ "BaseURL>", "\\u003C\\/"
+ )),
+ "type": "audio"
+ }
+
+ video["urls"] = {}
+
+ for raw_url in text.extract_iter(
+ first_video_raw, 'FBQualityLabel=\\"', '\\u003C\\/BaseURL>'
+ ):
+ resolution = raw_url.split('\\"', 1)[0]
+ video["urls"][resolution] = FacebookExtractor.decode_all(
+ raw_url.split('BaseURL>', 1)[1]
+ )
+
+ if not video["urls"]:
+ return video, audio
+
+ video["url"] = max(
+ video["urls"].items(),
+ key=lambda x: text.parse_int(x[0][:-1])
+ )[1]
+
+ text.nameext_from_url(video["url"], video)
+ audio["filename"] = video["filename"]
+ audio["extension"] = "m4a"
+
+ return video, audio
+
+ def photo_page_request_wrapper(self, url, **kwargs):
+ LEFT_OFF_TXT = "" if url.endswith("&set=") else (
+ "\nYou can use this URL to continue from "
+ "where you left off (added \"&setextract\"): "
+ "\n" + url + "&setextract"
+ )
+
+ res = self.request(url, **kwargs)
+
+ if res.url.startswith(self.root + "/login"):
+ raise exception.AuthenticationError(
+ "You must be logged in to continue viewing images." +
+ LEFT_OFF_TXT
+ )
+
+ if b'{"__dr":"CometErrorRoot.react"}' in res.content:
+ raise exception.StopExtraction(
+ "You've been temporarily blocked from viewing images. "
+ "\nPlease try using a different account, "
+ "using a VPN or waiting before you retry." +
+ LEFT_OFF_TXT
+ )
+
+ return res
+
+ def extract_set(self, first_photo_id, set_id):
+ all_photo_ids = [first_photo_id]
+
+ retries = 0
+ i = 0
+
+ while i < len(all_photo_ids):
+ photo_id = all_photo_ids[i]
+ photo_url = self.photo_url_fmt.format(
+ photo_id=photo_id, set_id=set_id
+ )
+ photo_page = self.photo_page_request_wrapper(photo_url).text
+
+ photo = self.parse_photo_page(photo_page)
+ photo["set_id"] = set_id
+ photo["num"] = i + 1
+
+ if self.author_followups:
+ for followup_id in photo["followups_ids"]:
+ if followup_id not in all_photo_ids:
+ self.log.debug(
+ "Found a followup in comments: %s", followup_id
+ )
+ all_photo_ids.append(followup_id)
+
+ if not photo["url"]:
+ if retries < self.fallback_retries and self._interval_429:
+ seconds = self._interval_429()
+ self.log.warning(
+ "Failed to find photo download URL for %s. "
+ "Retrying in %s seconds.", photo_url, seconds,
+ )
+ self.wait(seconds=seconds, reason="429 Too Many Requests")
+ retries += 1
+ continue
+ else:
+ self.log.error(
+ "Failed to find photo download URL for " + photo_url +
+ ". Skipping."
+ )
+ retries = 0
+ else:
+ retries = 0
+ yield Message.Url, photo["url"], photo
+
+ if photo["next_photo_id"] == "":
+ self.log.debug(
+ "Can't find next image in the set. "
+ "Extraction is over."
+ )
+ elif photo["next_photo_id"] in all_photo_ids:
+ if photo["next_photo_id"] != photo["id"]:
+ self.log.debug(
+ "Detected a loop in the set, it's likely finished. "
+ "Extraction is over."
+ )
+ else:
+ all_photo_ids.append(photo["next_photo_id"])
+
+ i += 1
+
+
+class FacebookSetExtractor(FacebookExtractor):
+ """Base class for Facebook Set extractors"""
+ subcategory = "set"
+ pattern = (
+ BASE_PATTERN +
+ r"/(?:(?:media/set|photo)/?\?(?:[^&#]+&)*set=([^&#]+)"
+ r"[^/?#]*(?<!&setextract)$"
+ r"|([^/?#]+/posts/[^/?#]+)"
+ r"|photo/\?(?:[^&#]+&)*fbid=([^/?&#]+)&set=([^/?&#]+)&setextract)"
+ )
+ example = "https://www.facebook.com/media/set/?set=SET_ID"
+
+ def items(self):
+ set_id = self.groups[0] or self.groups[3]
+ path = self.groups[1]
+ if path:
+ post_url = self.root + "/" + path
+ post_page = self.request(post_url).text
+ set_id = self.parse_post_page(post_page)["set_id"]
+
+ set_url = self.set_url_fmt.format(set_id=set_id)
+ set_page = self.request(set_url).text
+
+ directory = self.parse_set_page(set_page)
+
+ yield Message.Directory, directory
+
+ yield from self.extract_set(
+ self.groups[2] or directory["first_photo_id"],
+ directory["set_id"]
+ )
+
+
+class FacebookPhotoExtractor(FacebookExtractor):
+ """Base class for Facebook Photo extractors"""
+ subcategory = "photo"
+ pattern = (BASE_PATTERN +
+ r"/(?:[^/?#]+/photos/[^/?#]+/|photo(?:.php)?/?\?"
+ r"(?:[^&#]+&)*fbid=)([^/?&#]+)[^/?#]*(?<!&setextract)$")
+ example = "https://www.facebook.com/photo/?fbid=PHOTO_ID"
+
+ def items(self):
+ photo_id = self.groups[0]
+ photo_url = self.photo_url_fmt.format(photo_id=photo_id, set_id="")
+ photo_page = self.photo_page_request_wrapper(photo_url).text
+
+ i = 1
+ photo = self.parse_photo_page(photo_page)
+ photo["num"] = i
+
+ set_page = self.request(
+ self.set_url_fmt.format(set_id=photo["set_id"])
+ ).text
+
+ directory = self.parse_set_page(set_page)
+
+ yield Message.Directory, directory
+ yield Message.Url, photo["url"], photo
+
+ if self.author_followups:
+ for comment_photo_id in photo["followups_ids"]:
+ comment_photo = self.parse_photo_page(
+ self.photo_page_request_wrapper(
+ self.photo_url_fmt.format(
+ photo_id=comment_photo_id, set_id=""
+ )
+ ).text
+ )
+ i += 1
+ comment_photo["num"] = i
+ yield Message.Url, comment_photo["url"], comment_photo
+
+
+class FacebookVideoExtractor(FacebookExtractor):
+ """Base class for Facebook Video extractors"""
+ subcategory = "video"
+ directory_fmt = ("{category}", "{username}", "{subcategory}")
+ pattern = BASE_PATTERN + r"/(?:[^/?#]+/videos/|watch/?\?v=)([^/?&#]+)"
+ example = "https://www.facebook.com/watch/?v=VIDEO_ID"
+
+ def items(self):
+ video_id = self.groups[0]
+ video_url = self.root + "/watch/?v=" + video_id
+ video_page = self.request(video_url).text
+
+ video, audio = self.parse_video_page(video_page)
+
+ if "url" not in video:
+ return
+
+ yield Message.Directory, video
+
+ if self.videos == "ytdl":
+ yield Message.Url, "ytdl:" + video_url, video
+ elif self.videos:
+ yield Message.Url, video["url"], video
+ if audio["url"]:
+ yield Message.Url, audio["url"], audio
+
+
+class FacebookProfileExtractor(FacebookExtractor):
+ """Base class for Facebook Profile Photos Set extractors"""
+ subcategory = "profile"
+ pattern = (
+ BASE_PATTERN +
+ r"/(?!media/|photo/|photo.php|watch/)"
+ r"(?:profile\.php\?id=|people/[^/?#]+/)?"
+ r"([^/?&#]+)(?:/photos(?:_by)?|/videos|/posts)?/?(?:$|\?|#)"
+ )
+ example = "https://www.facebook.com/USERNAME"
+
+ @staticmethod
+ def get_profile_photos_set_id(profile_photos_page):
+ set_ids_raw = text.extr(
+ profile_photos_page, '"pageItems"', '"page_info"'
+ )
+
+ set_id = text.extr(
+ set_ids_raw, 'set=', '"'
+ ).rsplit("&", 1)[0] or text.extr(
+ set_ids_raw, '\\/photos\\/', '\\/'
+ )
+
+ return set_id
+
+ def items(self):
+ profile_photos_url = (
+ self.root + "/" + self.groups[0] + "/photos_by"
+ )
+ profile_photos_page = self.request(profile_photos_url).text
+
+ set_id = self.get_profile_photos_set_id(profile_photos_page)
+
+ if set_id:
+ set_url = self.set_url_fmt.format(set_id=set_id)
+ set_page = self.request(set_url).text
+
+ directory = self.parse_set_page(set_page)
+
+ yield Message.Directory, directory
+
+ yield from self.extract_set(
+ directory["first_photo_id"], directory["set_id"]
+ )
+ else:
+ self.log.debug("Profile photos set ID not found.")
diff --git a/gallery_dl/extractor/flickr.py b/gallery_dl/extractor/flickr.py
index df252ee..e85a375 100644
--- a/gallery_dl/extractor/flickr.py
+++ b/gallery_dl/extractor/flickr.py
@@ -20,7 +20,6 @@ class FlickrExtractor(Extractor):
filename_fmt = "{category}_{id}.{extension}"
directory_fmt = ("{category}", "{user[username]}")
archive_fmt = "{id}"
- cookies_domain = None
request_interval = (1.0, 2.0)
request_interval_min = 0.5
@@ -45,7 +44,7 @@ class FlickrExtractor(Extractor):
self.log.debug("", exc_info=exc)
else:
photo.update(data)
- url = photo["url"]
+ url = self._file_url(photo)
yield Message.Directory, photo
yield Message.Url, url, text.nameext_from_url(url, photo)
@@ -57,6 +56,15 @@ class FlickrExtractor(Extractor):
def photos(self):
"""Return an iterable with all relevant photo objects"""
+ def _file_url(self, photo):
+ url = photo["url"]
+
+ if "/video/" in url:
+ return url
+
+ path, _, ext = url.rpartition(".")
+ return path + "_d." + ext
+
class FlickrImageExtractor(FlickrExtractor):
"""Extractor for individual images from flickr.com"""
@@ -98,7 +106,7 @@ class FlickrImageExtractor(FlickrExtractor):
if isinstance(value, dict):
location[key] = value["_content"]
- url = photo["url"]
+ url = self._file_url(photo)
yield Message.Directory, photo
yield Message.Url, url, text.nameext_from_url(url, photo)
diff --git a/gallery_dl/extractor/gelbooru_v02.py b/gallery_dl/extractor/gelbooru_v02.py
index 0baad2f..aad5752 100644
--- a/gallery_dl/extractor/gelbooru_v02.py
+++ b/gallery_dl/extractor/gelbooru_v02.py
@@ -22,14 +22,14 @@ class GelbooruV02Extractor(booru.BooruExtractor):
def _init(self):
self.api_key = self.config("api-key")
self.user_id = self.config("user-id")
- self.api_root = self.config_instance("api_root") or self.root
+ self.root_api = self.config_instance("root-api") or self.root
if self.category == "realbooru":
self.items = self._items_realbooru
self._tags = self._tags_realbooru
def _api_request(self, params):
- url = self.api_root + "/index.php?page=dapi&s=post&q=index"
+ url = self.root_api + "/index.php?page=dapi&s=post&q=index"
return ElementTree.fromstring(self.request(url, params=params).text)
def _pagination(self, params):
@@ -191,8 +191,8 @@ BASE_PATTERN = GelbooruV02Extractor.update({
},
"rule34": {
"root": "https://rule34.xxx",
+ "root-api": "https://api.rule34.xxx",
"pattern": r"(?:www\.)?rule34\.xxx",
- "api_root": "https://api.rule34.xxx",
},
"safebooru": {
"root": "https://safebooru.org",
diff --git a/gallery_dl/extractor/hentaifoundry.py b/gallery_dl/extractor/hentaifoundry.py
index c75c90d..7e128a4 100644
--- a/gallery_dl/extractor/hentaifoundry.py
+++ b/gallery_dl/extractor/hentaifoundry.py
@@ -254,6 +254,22 @@ class HentaifoundryFavoriteExtractor(HentaifoundryExtractor):
self.root, self.user)
+class HentaifoundryTagExtractor(HentaifoundryExtractor):
+ """Extractor for tag searches on hentaifoundry.com"""
+ subcategory = "tag"
+ directory_fmt = ("{category}", "{search_tags}")
+ archive_fmt = "t_{search_tags}_{index}"
+ pattern = BASE_PATTERN + r"/pictures/tagged/([^/?#]+)"
+ example = "https://www.hentai-foundry.com/pictures/tagged/TAG"
+
+ def __init__(self, match):
+ HentaifoundryExtractor.__init__(self, match)
+ self.page_url = "{}/pictures/tagged/{}".format(self.root, self.user)
+
+ def metadata(self):
+ return {"search_tags": self.user}
+
+
class HentaifoundryRecentExtractor(HentaifoundryExtractor):
"""Extractor for 'Recent Pictures' on hentaifoundry.com"""
subcategory = "recent"
diff --git a/gallery_dl/extractor/hiperdex.py b/gallery_dl/extractor/hiperdex.py
index 4a9759f..c939a3c 100644
--- a/gallery_dl/extractor/hiperdex.py
+++ b/gallery_dl/extractor/hiperdex.py
@@ -6,7 +6,7 @@
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
-"""Extractors for https://hiperdex.top/"""
+"""Extractors for https://hipertoon.com/"""
from .common import ChapterExtractor, MangaExtractor
from .. import text
@@ -14,13 +14,13 @@ from ..cache import memcache
import re
BASE_PATTERN = (r"((?:https?://)?(?:www\.)?"
- r"(?:1st)?hiperdex\d?\.(?:com|net|info|top))")
+ r"(?:1st)?hiper(?:dex|toon)\d?\.(?:com|net|info|top))")
class HiperdexBase():
"""Base class for hiperdex extractors"""
category = "hiperdex"
- root = "https://hiperdex.top"
+ root = "https://hipertoon.com"
@memcache(keyarg=1)
def manga_data(self, manga, page=None):
@@ -49,7 +49,7 @@ class HiperdexBase():
"status" : extr(
'class="summary-content">', '<').strip(),
"description": text.remove_html(text.unescape(extr(
- 'class="description-summary">', '</div>'))),
+ "Summary </h5>", "</div>"))),
"language": "English",
"lang" : "en",
}
@@ -69,7 +69,7 @@ class HiperdexBase():
class HiperdexChapterExtractor(HiperdexBase, ChapterExtractor):
"""Extractor for hiperdex manga chapters"""
pattern = BASE_PATTERN + r"(/mangas?/([^/?#]+)/([^/?#]+))"
- example = "https://hiperdex.top/manga/MANGA/CHAPTER/"
+ example = "https://hipertoon.com/manga/MANGA/CHAPTER/"
def __init__(self, match):
root, path, self.manga, self.chapter = match.groups()
@@ -91,7 +91,7 @@ class HiperdexMangaExtractor(HiperdexBase, MangaExtractor):
"""Extractor for hiperdex manga"""
chapterclass = HiperdexChapterExtractor
pattern = BASE_PATTERN + r"(/mangas?/([^/?#]+))/?$"
- example = "https://hiperdex.top/manga/MANGA/"
+ example = "https://hipertoon.com/manga/MANGA/"
def __init__(self, match):
root, path, self.manga = match.groups()
@@ -127,7 +127,7 @@ class HiperdexArtistExtractor(HiperdexBase, MangaExtractor):
chapterclass = HiperdexMangaExtractor
reverse = False
pattern = BASE_PATTERN + r"(/manga-a(?:rtist|uthor)/(?:[^/?#]+))"
- example = "https://hiperdex.top/manga-artist/NAME/"
+ example = "https://hipertoon.com/manga-artist/NAME/"
def __init__(self, match):
self.root = text.ensure_http_scheme(match.group(1))
diff --git a/gallery_dl/extractor/hitomi.py b/gallery_dl/extractor/hitomi.py
index 18df9df..308b42c 100644
--- a/gallery_dl/extractor/hitomi.py
+++ b/gallery_dl/extractor/hitomi.py
@@ -108,9 +108,9 @@ class HitomiTagExtractor(Extractor):
category = "hitomi"
subcategory = "tag"
root = "https://hitomi.la"
- pattern = (r"(?:https?://)?hitomi\.la/"
- r"(tag|artist|group|series|type|character)/"
- r"([^/?#]+)\.html")
+ pattern = (r"(?:https?://)?hitomi\.la"
+ r"/(tag|artist|group|series|type|character)"
+ r"/([^/?#]+)\.html")
example = "https://hitomi.la/tag/TAG-LANG.html"
def __init__(self, match):
@@ -151,6 +151,109 @@ class HitomiTagExtractor(Extractor):
return
+class HitomiIndexExtractor(HitomiTagExtractor):
+ """Extractor for galleries from index searches on hitomi.la"""
+ subcategory = "index"
+ pattern = r"(?:https?://)?hitomi\.la/(\w+)-(\w+)\.html"
+ example = "https://hitomi.la/index-LANG.html"
+
+ def __init__(self, match):
+ Extractor.__init__(self, match)
+ self.tag, self.language = match.groups()
+
+ def items(self):
+ data = {"_extractor": HitomiGalleryExtractor}
+ nozomi_url = "https://ltn.hitomi.la/{}-{}.nozomi".format(
+ self.tag, self.language)
+ headers = {
+ "Origin": self.root,
+ "Cache-Control": "max-age=0",
+ }
+
+ offset = 0
+ total = None
+ while True:
+ headers["Referer"] = "{}/{}-{}.html?page={}".format(
+ self.root, self.tag, self.language, offset // 100 + 1)
+ headers["Range"] = "bytes={}-{}".format(offset, offset+99)
+ response = self.request(nozomi_url, headers=headers)
+
+ for gallery_id in decode_nozomi(response.content):
+ gallery_url = "{}/galleries/{}.html".format(
+ self.root, gallery_id)
+ yield Message.Queue, gallery_url, data
+
+ offset += 100
+ if total is None:
+ total = text.parse_int(
+ response.headers["content-range"].rpartition("/")[2])
+ if offset >= total:
+ return
+
+
+class HitomiSearchExtractor(Extractor):
+ """Extractor for galleries from multiple tag searches on hitomi.la"""
+ category = "hitomi"
+ subcategory = "search"
+ root = "https://hitomi.la"
+ pattern = r"(?:https?://)?hitomi\.la/search\.html\?([^/?#]+)"
+ example = "https://hitomi.la/search.html?QUERY"
+
+ def __init__(self, match):
+ Extractor.__init__(self, match)
+ self.query = match.group(1)
+ self.tags = text.unquote(self.query).split(" ")
+
+ def items(self):
+ data = {"_extractor": HitomiGalleryExtractor}
+
+ results = [self.get_nozomi_items(tag) for tag in self.tags]
+ intersects = set.intersection(*results)
+
+ for gallery_id in sorted(intersects, reverse=True):
+ gallery_url = "{}/galleries/{}.html".format(
+ self.root, gallery_id)
+ yield Message.Queue, gallery_url, data
+
+ def get_nozomi_items(self, full_tag):
+ area, tag, language = self.get_nozomi_args(full_tag)
+
+ if area:
+ referer_base = "{}/n/{}/{}-{}.html".format(
+ self.root, area, tag, language)
+ nozomi_url = "https://ltn.hitomi.la/{}/{}-{}.nozomi".format(
+ area, tag, language)
+ else:
+ referer_base = "{}/n/{}-{}.html".format(
+ self.root, tag, language)
+ nozomi_url = "https://ltn.hitomi.la/{}-{}.nozomi".format(
+ tag, language)
+
+ headers = {
+ "Origin": self.root,
+ "Cache-Control": "max-age=0",
+ "Referer": "{}/search.html?{}".format(referer_base, self.query),
+ }
+
+ response = self.request(nozomi_url, headers=headers)
+ return set(decode_nozomi(response.content))
+
+ def get_nozomi_args(self, query):
+ ns, _, tag = query.strip().partition(":")
+ area = ns
+ language = "all"
+
+ if ns == "female" or ns == "male":
+ area = "tag"
+ tag = query
+ elif ns == "language":
+ area = None
+ language = tag
+ tag = "index"
+
+ return area, tag, language
+
+
@memcache(maxage=1800)
def _parse_gg(extr):
page = extr.request("https://ltn.hitomi.la/gg.js").text
diff --git a/gallery_dl/extractor/imagechest.py b/gallery_dl/extractor/imagechest.py
index 115fff3..159feba 100644
--- a/gallery_dl/extractor/imagechest.py
+++ b/gallery_dl/extractor/imagechest.py
@@ -10,7 +10,7 @@
"""Extractors for https://imgchest.com/"""
from .common import GalleryExtractor, Extractor, Message
-from .. import text, exception
+from .. import text, util, exception
BASE_PATTERN = r"(?:https?://)?(?:www\.)?imgchest\.com"
@@ -33,35 +33,23 @@ class ImagechestGalleryExtractor(GalleryExtractor):
self.api = ImagechestAPI(self, access_token)
self.gallery_url = None
self.metadata = self._metadata_api
- self.images = self._images_api
def metadata(self, page):
- if "Sorry, but the page you requested could not be found." in page:
- raise exception.NotFoundError("gallery")
-
- return {
- "gallery_id": self.gallery_id,
- "title": text.unescape(text.extr(
- page, 'property="og:title" content="', '"').strip())
- }
+ try:
+ data = util.json_loads(text.unescape(text.extr(
+ page, 'data-page="', '"')))
+ post = data["props"]["post"]
+ except Exception:
+ if "<title>Not Found</title>" in page:
+ raise exception.NotFoundError("gallery")
+ self.files = ()
+ return {}
+
+ self.files = post.pop("files", ())
+ post["gallery_id"] = self.gallery_id
+ post["tags"] = [tag["name"] for tag in post["tags"]]
- def images(self, page):
- if ' load-all">' in page:
- url = "{}/p/{}/loadAll".format(self.root, self.gallery_id)
- headers = {
- "X-Requested-With": "XMLHttpRequest",
- "Origin" : self.root,
- "Referer" : self.gallery_url,
- }
- csrf_token = text.extr(page, 'name="csrf-token" content="', '"')
- data = {"_token": csrf_token}
- page += self.request(
- url, method="POST", headers=headers, data=data).text
-
- return [
- (url, None)
- for url in text.extract_iter(page, 'data-url="', '"')
- ]
+ return post
def _metadata_api(self, page):
post = self.api.post(self.gallery_id)
@@ -74,15 +62,18 @@ class ImagechestGalleryExtractor(GalleryExtractor):
post["gallery_id"] = self.gallery_id
post.pop("image_count", None)
- self._image_list = post.pop("images")
+ self.files = post.pop("images")
return post
- def _images_api(self, page):
- return [
- (img["link"], img)
- for img in self._image_list
- ]
+ def images(self, page):
+ try:
+ return [
+ (file["link"], file)
+ for file in self.files
+ ]
+ except Exception:
+ return ()
class ImagechestUserExtractor(Extractor):
@@ -93,10 +84,6 @@ class ImagechestUserExtractor(Extractor):
pattern = BASE_PATTERN + r"/u/([^/?#]+)"
example = "https://imgchest.com/u/USER"
- def __init__(self, match):
- Extractor.__init__(self, match)
- self.user = match.group(1)
-
def items(self):
url = self.root + "/api/posts"
params = {
@@ -104,7 +91,7 @@ class ImagechestUserExtractor(Extractor):
"sort" : "new",
"tag" : "",
"q" : "",
- "username": text.unquote(self.user),
+ "username": text.unquote(self.groups[0]),
"nsfw" : "true",
}
@@ -114,6 +101,9 @@ class ImagechestUserExtractor(Extractor):
except (TypeError, KeyError):
return
+ if not data:
+ return
+
for gallery in data:
gallery["_extractor"] = ImagechestGalleryExtractor
yield Message.Queue, gallery["link"], gallery
diff --git a/gallery_dl/extractor/instagram.py b/gallery_dl/extractor/instagram.py
index dd1272f..a866f45 100644
--- a/gallery_dl/extractor/instagram.py
+++ b/gallery_dl/extractor/instagram.py
@@ -101,7 +101,10 @@ class InstagramExtractor(Extractor):
continue
url = file["display_url"]
- yield Message.Url, url, text.nameext_from_url(url, file)
+ text.nameext_from_url(url, file)
+ if file["extension"] == "webp" and "stp=dst-jpg" in url:
+ file["extension"] = "jpg"
+ yield Message.Url, url, file
def metadata(self):
return ()
@@ -390,10 +393,11 @@ class InstagramExtractor(Extractor):
def _init_cursor(self):
cursor = self.config("cursor", True)
- if not cursor:
+ if cursor is True:
+ return None
+ elif not cursor:
self._update_cursor = util.identity
- elif isinstance(cursor, str):
- return cursor
+ return cursor
def _update_cursor(self, cursor):
self.log.debug("Cursor: %s", cursor)
diff --git a/gallery_dl/extractor/kemonoparty.py b/gallery_dl/extractor/kemonoparty.py
index 6f2d5f3..3d04f75 100644
--- a/gallery_dl/extractor/kemonoparty.py
+++ b/gallery_dl/extractor/kemonoparty.py
@@ -10,7 +10,7 @@
from .common import Extractor, Message
from .. import text, util, exception
-from ..cache import cache, memcache
+from ..cache import cache
import itertools
import json
import re
@@ -38,6 +38,7 @@ class KemonopartyExtractor(Extractor):
Extractor.__init__(self, match)
def _init(self):
+ self.api = KemonoAPI(self)
self.revisions = self.config("revisions")
if self.revisions:
self.revisions_unique = (self.revisions == "unique")
@@ -53,48 +54,60 @@ class KemonopartyExtractor(Extractor):
sort_keys=True, separators=(",", ":")).encode
def items(self):
+ service = self.groups[2]
+ creator_id = self.groups[3]
+
find_hash = re.compile(HASH_PATTERN).match
generators = self._build_file_generators(self.config("files"))
- duplicates = self.config("duplicates")
- comments = self.config("comments")
- username = dms = announcements = None
+ announcements = True if self.config("announcements") else None
+ comments = True if self.config("comments") else False
+ duplicates = True if self.config("duplicates") else False
+ dms = True if self.config("dms") else None
+ profile = username = None
# prevent files from being sent with gzip compression
headers = {"Accept-Encoding": "identity"}
if self.config("metadata"):
- username = text.unescape(text.extract(
- self.request(self.user_url).text,
- '<meta name="artist_name" content="', '"')[0])
- if self.config("dms"):
- dms = True
- if self.config("announcements"):
- announcements = True
+ profile = self.api.creator_profile(service, creator_id)
+ username = profile["name"]
posts = self.posts()
max_posts = self.config("max-posts")
if max_posts:
posts = itertools.islice(posts, max_posts)
+ if self.revisions:
+ posts = self._revisions(posts)
for post in posts:
-
headers["Referer"] = "{}/{}/user/{}/post/{}".format(
self.root, post["service"], post["user"], post["id"])
post["_http_headers"] = headers
post["date"] = self._parse_datetime(
post.get("published") or post.get("added") or "")
- if username:
+ if profile is not None:
post["username"] = username
+ post["user_profile"] = profile
if comments:
- post["comments"] = self._extract_comments(post)
+ try:
+ post["comments"] = self.api.creator_post_comments(
+ service, creator_id, post["id"])
+ except exception.HttpError:
+ post["comments"] = ()
if dms is not None:
if dms is True:
- dms = self._extract_cards(post, "dms")
+ dms = self.api.creator_dms(
+ post["service"], post["user"])
+ try:
+ dms = dms["props"]["dms"]
+ except Exception:
+ dms = ()
post["dms"] = dms
if announcements is not None:
if announcements is True:
- announcements = self._extract_cards(post, "announcements")
+ announcements = self.api.creator_announcements(
+ post["service"], post["user"])
post["announcements"] = announcements
files = []
@@ -145,20 +158,23 @@ class KemonopartyExtractor(Extractor):
self.cookies_update(self._login_impl(
(username, self.cookies_domain), password))
- @cache(maxage=28*86400, keyarg=1)
+ @cache(maxage=3650*86400, keyarg=1)
def _login_impl(self, username, password):
username = username[0]
self.log.info("Logging in as %s", username)
- url = self.root + "/account/login"
+ url = self.root + "/api/v1/authentication/login"
data = {"username": username, "password": password}
- response = self.request(url, method="POST", data=data)
- if response.url.endswith("/account/login") and \
- "Username or password is incorrect" in response.text:
- raise exception.AuthenticationError()
+ response = self.request(url, method="POST", json=data, fatal=False)
+ if response.status_code >= 400:
+ try:
+ msg = '"' + response.json()["error"] + '"'
+ except Exception:
+ msg = '"0/1 Username or password is incorrect"'
+ raise exception.AuthenticationError(msg)
- return {c.name: c.value for c in response.history[0].cookies}
+ return {c.name: c.value for c in response.cookies}
def _file(self, post):
file = post["file"]
@@ -188,56 +204,21 @@ class KemonopartyExtractor(Extractor):
filetypes = filetypes.split(",")
return [genmap[ft] for ft in filetypes]
- def _extract_comments(self, post):
- url = "{}/{}/user/{}/post/{}".format(
- self.root, post["service"], post["user"], post["id"])
- page = self.request(url).text
-
- comments = []
- for comment in text.extract_iter(page, "<article", "</article>"):
- extr = text.extract_from(comment)
- cid = extr('id="', '"')
- comments.append({
- "id" : cid,
- "user": extr('href="#' + cid + '"', '</').strip(" \n\r>"),
- "body": extr(
- '<section class="comment__body">', '</section>').strip(),
- "date": extr('datetime="', '"'),
- })
- return comments
-
- def _extract_cards(self, post, type):
- url = "{}/{}/user/{}/{}".format(
- self.root, post["service"], post["user"], type)
- page = self.request(url).text
-
- cards = []
- for card in text.extract_iter(page, "<article", "</article>"):
- footer = text.extr(card, "<footer", "</footer>")
- cards.append({
- "body": text.unescape(text.extr(
- card, "<pre>", "</pre></",
- ).strip()),
- "date": text.extr(footer, ': ', '\n'),
- })
- return cards
-
def _parse_datetime(self, date_string):
if len(date_string) > 19:
date_string = date_string[:19]
return text.parse_datetime(date_string, "%Y-%m-%dT%H:%M:%S")
- @memcache(keyarg=1)
- def _discord_channels(self, server):
- url = "{}/api/v1/discord/channel/lookup/{}".format(
- self.root, server)
- return self.request(url).json()
+ def _revisions(self, posts):
+ return itertools.chain.from_iterable(
+ self._revisions_post(post) for post in posts)
- def _revisions_post(self, post, url):
+ def _revisions_post(self, post):
post["revision_id"] = 0
try:
- revs = self.request(url + "/revisions").json()
+ revs = self.api.creator_post_revisions(
+ post["service"], post["user"], post["id"])
except exception.HttpError:
post["revision_hash"] = self._revision_hash(post)
post["revision_index"] = 1
@@ -268,8 +249,8 @@ class KemonopartyExtractor(Extractor):
return revs
- def _revisions_all(self, url):
- revs = self.request(url + "/revisions").json()
+ def _revisions_all(self, service, creator_id, post_id):
+ revs = self.api.creator_post_revisions(service, creator_id, post_id)
cnt = idx = len(revs)
for rev in revs:
@@ -305,50 +286,30 @@ def _validate(response):
class KemonopartyUserExtractor(KemonopartyExtractor):
"""Extractor for all posts from a kemono.su user listing"""
subcategory = "user"
- pattern = USER_PATTERN + r"/?(?:\?([^#]+))?(?:$|[?#])"
+ pattern = USER_PATTERN + r"/?(?:\?([^#]+))?(?:$|\?|#)"
example = "https://kemono.su/SERVICE/user/12345"
def __init__(self, match):
- _, _, service, user_id, self.query = match.groups()
- self.subcategory = service
+ self.subcategory = match.group(3)
KemonopartyExtractor.__init__(self, match)
- self.api_url = "{}/api/v1/{}/user/{}".format(
- self.root, service, user_id)
- self.user_url = "{}/{}/user/{}".format(self.root, service, user_id)
def posts(self):
- url = self.api_url
- params = text.parse_query(self.query)
- params["o"] = text.parse_int(params.get("o"))
-
- while True:
- posts = self.request(url, params=params).json()
-
- if self.revisions:
- for post in posts:
- post_url = "{}/api/v1/{}/user/{}/post/{}".format(
- self.root, post["service"], post["user"], post["id"])
- yield from self._revisions_post(post, post_url)
- else:
- yield from posts
-
- if len(posts) < 50:
- break
- params["o"] += 50
+ _, _, service, creator_id, query = self.groups
+ params = text.parse_query(query)
+ return self.api.creator_posts(
+ service, creator_id, params.get("o"), params.get("q"))
class KemonopartyPostsExtractor(KemonopartyExtractor):
"""Extractor for kemono.su post listings"""
subcategory = "posts"
- pattern = BASE_PATTERN + r"/posts(?:/?\?([^#]+))?"
+ pattern = BASE_PATTERN + r"/posts()()(?:/?\?([^#]+))?"
example = "https://kemono.su/posts"
- def __init__(self, match):
- KemonopartyExtractor.__init__(self, match)
- self.query = match.group(3)
- self.api_url = self.root + "/api/v1/posts"
-
- posts = KemonopartyUserExtractor.posts
+ def posts(self):
+ params = text.parse_query(self.groups[4])
+ return self.api.posts(
+ params.get("o"), params.get("q"), params.get("tag"))
class KemonopartyPostExtractor(KemonopartyExtractor):
@@ -358,27 +319,23 @@ class KemonopartyPostExtractor(KemonopartyExtractor):
example = "https://kemono.su/SERVICE/user/12345/post/12345"
def __init__(self, match):
- _, _, service, user_id, post_id, self.revision, self.revision_id = \
- match.groups()
- self.subcategory = service
+ self.subcategory = match.group(3)
KemonopartyExtractor.__init__(self, match)
- self.api_url = "{}/api/v1/{}/user/{}/post/{}".format(
- self.root, service, user_id, post_id)
- self.user_url = "{}/{}/user/{}".format(self.root, service, user_id)
def posts(self):
- if not self.revision:
- post = self.request(self.api_url).json()
- if self.revisions:
- return self._revisions_post(post, self.api_url)
- return (post,)
+ _, _, service, creator_id, post_id, revision, revision_id = self.groups
+ post = self.api.creator_post(service, creator_id, post_id)
+ if not revision:
+ return (post["post"],)
- revs = self._revisions_all(self.api_url)
- if not self.revision_id:
+ self.revisions = False
+
+ revs = self._revisions_all(service, creator_id, post_id)
+ if not revision_id:
return revs
for rev in revs:
- if str(rev["revision_id"]) == self.revision_id:
+ if str(rev["revision_id"]) == revision_id:
return (rev,)
raise exception.NotFoundError("revision")
@@ -391,40 +348,37 @@ class KemonopartyDiscordExtractor(KemonopartyExtractor):
"{channel_name|channel}")
filename_fmt = "{id}_{num:>02}_{filename}.{extension}"
archive_fmt = "discord_{server}_{id}_{num}"
- pattern = BASE_PATTERN + r"/discord/server/(\d+)(?:/channel/(\d+))?#(.*)"
- example = "https://kemono.su/discord/server/12345#CHANNEL"
-
- def __init__(self, match):
- KemonopartyExtractor.__init__(self, match)
- _, _, self.server, self.channel_id, self.channel = match.groups()
- self.channel_name = ""
+ pattern = (BASE_PATTERN + r"/discord/server/(\d+)"
+ r"(?:/(?:channel/)?(\d+)(?:#(.+))?|#(.+))")
+ example = "https://kemono.su/discord/server/12345/12345"
def items(self):
self._prepare_ddosguard_cookies()
+ _, _, server_id, channel_id, channel_name, channel = self.groups
- if self.channel_id:
- self.channel_name = self.channel
- else:
- if self.channel.isdecimal() and len(self.channel) >= 16:
+ if channel_id is None:
+ if channel.isdecimal() and len(channel) >= 16:
key = "id"
else:
key = "name"
- for channel in self._discord_channels(self.server):
- if channel[key] == self.channel:
+ for ch in self.api.discord_server(server_id):
+ if ch[key] == channel:
break
else:
raise exception.NotFoundError("channel")
- self.channel_id = channel["id"]
- self.channel_name = channel["name"]
+ channel_id = ch["id"]
+ channel_name = ch["name"]
+ elif channel_name is None:
+ channel_name = ""
find_inline = re.compile(
r"https?://(?:cdn\.discordapp.com|media\.discordapp\.net)"
r"(/[A-Za-z0-9-._~:/?#\[\]@!$&'()*+,;%=]+)").findall
find_hash = re.compile(HASH_PATTERN).match
- posts = self.posts()
+ posts = self.api.discord_channel(channel_id)
max_posts = self.config("max-posts")
if max_posts:
posts = itertools.islice(posts, max_posts)
@@ -441,7 +395,7 @@ class KemonopartyDiscordExtractor(KemonopartyExtractor):
append({"path": "https://cdn.discordapp.com" + path,
"name": path, "type": "inline", "hash": ""})
- post["channel_name"] = self.channel_name
+ post["channel_name"] = channel_name
post["date"] = self._parse_datetime(post["published"])
post["count"] = len(files)
yield Message.Directory, post
@@ -461,33 +415,17 @@ class KemonopartyDiscordExtractor(KemonopartyExtractor):
url = self.root + "/data" + url[20:]
yield Message.Url, url, post
- def posts(self):
- url = "{}/api/v1/discord/channel/{}".format(
- self.root, self.channel_id)
- params = {"o": 0}
-
- while True:
- posts = self.request(url, params=params).json()
- yield from posts
-
- if len(posts) < 150:
- break
- params["o"] += 150
-
class KemonopartyDiscordServerExtractor(KemonopartyExtractor):
subcategory = "discord-server"
pattern = BASE_PATTERN + r"/discord/server/(\d+)$"
example = "https://kemono.su/discord/server/12345"
- def __init__(self, match):
- KemonopartyExtractor.__init__(self, match)
- self.server = match.group(3)
-
def items(self):
- for channel in self._discord_channels(self.server):
- url = "{}/discord/server/{}/channel/{}#{}".format(
- self.root, self.server, channel["id"], channel["name"])
+ server_id = self.groups[2]
+ for channel in self.api.discord_server(server_id):
+ url = "{}/discord/server/{}/{}#{}".format(
+ self.root, server_id, channel["id"], channel["name"])
channel["_extractor"] = KemonopartyDiscordExtractor
yield Message.Queue, url, channel
@@ -495,26 +433,21 @@ class KemonopartyDiscordServerExtractor(KemonopartyExtractor):
class KemonopartyFavoriteExtractor(KemonopartyExtractor):
"""Extractor for kemono.su favorites"""
subcategory = "favorite"
- pattern = BASE_PATTERN + r"/favorites(?:/?\?([^#]+))?"
+ pattern = BASE_PATTERN + r"/favorites()()(?:/?\?([^#]+))?"
example = "https://kemono.su/favorites"
- def __init__(self, match):
- KemonopartyExtractor.__init__(self, match)
- self.params = text.parse_query(match.group(3))
- self.favorites = (self.params.get("type") or
- self.config("favorites") or
- "artist")
-
def items(self):
self._prepare_ddosguard_cookies()
self.login()
- sort = self.params.get("sort")
- order = self.params.get("order") or "desc"
+ params = text.parse_query(self.groups[4])
+ type = params.get("type") or self.config("favorites") or "artist"
- if self.favorites == "artist":
- users = self.request(
- self.root + "/api/v1/account/favorites?type=artist").json()
+ sort = params.get("sort")
+ order = params.get("order") or "desc"
+
+ if type == "artist":
+ users = self.api.account_favorites("artist")
if not sort:
sort = "updated"
@@ -527,9 +460,8 @@ class KemonopartyFavoriteExtractor(KemonopartyExtractor):
self.root, user["service"], user["id"])
yield Message.Queue, url, user
- elif self.favorites == "post":
- posts = self.request(
- self.root + "/api/v1/account/favorites?type=post").json()
+ elif type == "post":
+ posts = self.api.account_favorites("post")
if not sort:
sort = "faved_seq"
@@ -541,3 +473,95 @@ class KemonopartyFavoriteExtractor(KemonopartyExtractor):
url = "{}/{}/user/{}/post/{}".format(
self.root, post["service"], post["user"], post["id"])
yield Message.Queue, url, post
+
+
+class KemonoAPI():
+ """Interface for the Kemono API v1.1.0
+
+ https://kemono.su/documentation/api
+ """
+
+ def __init__(self, extractor):
+ self.extractor = extractor
+ self.root = extractor.root + "/api/v1"
+
+ def posts(self, offset=0, query=None, tags=None):
+ endpoint = "/posts"
+ params = {"q": query, "o": offset, "tags": tags}
+ return self._pagination(endpoint, params, 50, "posts")
+
+ def creator_posts(self, service, creator_id, offset=0, query=None):
+ endpoint = "/{}/user/{}".format(service, creator_id)
+ params = {"q": query, "o": offset}
+ return self._pagination(endpoint, params, 50)
+
+ def creator_announcements(self, service, creator_id):
+ endpoint = "/{}/user/{}/announcements".format(service, creator_id)
+ return self._call(endpoint)
+
+ def creator_dms(self, service, creator_id):
+ endpoint = "/{}/user/{}/dms".format(service, creator_id)
+ return self._call(endpoint)
+
+ def creator_fancards(self, service, creator_id):
+ endpoint = "/{}/user/{}/fancards".format(service, creator_id)
+ return self._call(endpoint)
+
+ def creator_post(self, service, creator_id, post_id):
+ endpoint = "/{}/user/{}/post/{}".format(service, creator_id, post_id)
+ return self._call(endpoint)
+
+ def creator_post_comments(self, service, creator_id, post_id):
+ endpoint = "/{}/user/{}/post/{}/comments".format(
+ service, creator_id, post_id)
+ return self._call(endpoint)
+
+ def creator_post_revisions(self, service, creator_id, post_id):
+ endpoint = "/{}/user/{}/post/{}/revisions".format(
+ service, creator_id, post_id)
+ return self._call(endpoint)
+
+ def creator_profile(self, service, creator_id):
+ endpoint = "/{}/user/{}/profile".format(service, creator_id)
+ return self._call(endpoint)
+
+ def creator_links(self, service, creator_id):
+ endpoint = "/{}/user/{}/links".format(service, creator_id)
+ return self._call(endpoint)
+
+ def creator_tags(self, service, creator_id):
+ endpoint = "/{}/user/{}/tags".format(service, creator_id)
+ return self._call(endpoint)
+
+ def discord_channel(self, channel_id):
+ endpoint = "/discord/channel/{}".format(channel_id)
+ return self._pagination(endpoint, {}, 150)
+
+ def discord_server(self, server_id):
+ endpoint = "/discord/channel/lookup/{}".format(server_id)
+ return self._call(endpoint)
+
+ def account_favorites(self, type):
+ endpoint = "/account/favorites"
+ params = {"type": type}
+ return self._call(endpoint, params)
+
+ def _call(self, endpoint, params=None):
+ url = self.root + endpoint
+ response = self.extractor.request(url, params=params)
+ return response.json()
+
+ def _pagination(self, endpoint, params, batch=50, key=False):
+ params["o"] = text.parse_int(params.get("o")) % 50
+
+ while True:
+ data = self._call(endpoint, params)
+
+ if key:
+ yield from data[key]
+ else:
+ yield from data
+
+ if len(data) < batch:
+ return
+ params["o"] += batch
diff --git a/gallery_dl/extractor/koharu.py b/gallery_dl/extractor/koharu.py
index cacf504..b60157e 100644
--- a/gallery_dl/extractor/koharu.py
+++ b/gallery_dl/extractor/koharu.py
@@ -6,20 +6,27 @@
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
-"""Extractors for https://koharu.to/"""
+"""Extractors for https://niyaniya.moe/"""
from .common import GalleryExtractor, Extractor, Message
from .. import text, exception
from ..cache import cache
+import collections
-BASE_PATTERN = r"(?i)(?:https?://)?(?:koharu|anchira)\.to"
+BASE_PATTERN = (
+ r"(?i)(?:https?://)?("
+ r"(?:niyaniya|shupogaki)\.moe|"
+ r"(?:koharu|anchira|seia)\.to|"
+ r"(?:hoshino)\.one"
+ r")"
+)
class KoharuExtractor(Extractor):
"""Base class for koharu extractors"""
category = "koharu"
- root = "https://koharu.to"
- root_api = "https://api.koharu.to"
+ root = "https://niyaniya.moe"
+ root_api = "https://api.schale.network"
request_interval = (0.5, 1.5)
def _init(self):
@@ -62,7 +69,7 @@ class KoharuGalleryExtractor(KoharuExtractor, GalleryExtractor):
archive_fmt = "{id}_{num}"
request_interval = 0.0
pattern = BASE_PATTERN + r"/(?:g|reader)/(\d+)/(\w+)"
- example = "https://koharu.to/g/12345/67890abcde/"
+ example = "https://niyaniya.moe/g/12345/67890abcde/"
TAG_TYPES = {
0 : "general",
@@ -100,16 +107,26 @@ class KoharuGalleryExtractor(KoharuExtractor, GalleryExtractor):
def metadata(self, _):
url = "{}/books/detail/{}/{}".format(
- self.root_api, self.groups[0], self.groups[1])
+ self.root_api, self.groups[1], self.groups[2])
self.data = data = self.request(url, headers=self.headers).json()
+ data["date"] = text.parse_timestamp(data["created_at"] // 1000)
tags = []
- for tag in data["tags"]:
+ types = self.TAG_TYPES
+ tags_data = data["tags"]
+
+ for tag in tags_data:
name = tag["name"]
namespace = tag.get("namespace", 0)
- tags.append(self.TAG_TYPES[namespace] + ":" + name)
+ tags.append(types[namespace] + ":" + name)
data["tags"] = tags
- data["date"] = text.parse_timestamp(data["created_at"] // 1000)
+
+ if self.config("tags", False):
+ tags = collections.defaultdict(list)
+ for tag in tags_data :
+ tags[tag.get("namespace", 0)].append(tag["name"])
+ for type, values in tags.items():
+ data["tags_" + types[type]] = values
try:
if self.cbz:
@@ -179,11 +196,11 @@ class KoharuGalleryExtractor(KoharuExtractor, GalleryExtractor):
break
except KeyError:
self.log.debug("%s: Format %s is not available",
- self.groups[0], fmtid)
+ self.groups[1], fmtid)
else:
raise exception.NotFoundError("format")
- self.log.debug("%s: Selected format %s", self.groups[0], fmtid)
+ self.log.debug("%s: Selected format %s", self.groups[1], fmtid)
fmt["w"] = fmtid
return fmt
@@ -192,10 +209,10 @@ class KoharuSearchExtractor(KoharuExtractor):
"""Extractor for koharu search results"""
subcategory = "search"
pattern = BASE_PATTERN + r"/\?([^#]*)"
- example = "https://koharu.to/?s=QUERY"
+ example = "https://niyaniya.moe/?s=QUERY"
def items(self):
- params = text.parse_query(self.groups[0])
+ params = text.parse_query(self.groups[1])
params["page"] = text.parse_int(params.get("page"), 1)
return self._pagination("/books", params)
@@ -204,12 +221,12 @@ class KoharuFavoriteExtractor(KoharuExtractor):
"""Extractor for koharu favorites"""
subcategory = "favorite"
pattern = BASE_PATTERN + r"/favorites(?:\?([^#]*))?"
- example = "https://koharu.to/favorites"
+ example = "https://niyaniya.moe/favorites"
def items(self):
self.login()
- params = text.parse_query(self.groups[0])
+ params = text.parse_query(self.groups[1])
params["page"] = text.parse_int(params.get("page"), 1)
return self._pagination("/favorites", params)
@@ -226,7 +243,7 @@ class KoharuFavoriteExtractor(KoharuExtractor):
def _login_impl(self, username, password):
self.log.info("Logging in as %s", username)
- url = "https://auth.koharu.to/login"
+ url = "https://auth.schale.network/login"
data = {"uname": username, "passwd": password}
response = self.request(
url, method="POST", headers=self.headers, data=data)
diff --git a/gallery_dl/extractor/lolisafe.py b/gallery_dl/extractor/lolisafe.py
index 044f4f5..295b9c4 100644
--- a/gallery_dl/extractor/lolisafe.py
+++ b/gallery_dl/extractor/lolisafe.py
@@ -46,12 +46,17 @@ class LolisafeAlbumExtractor(LolisafeExtractor):
for data["num"], file in enumerate(files, 1):
url = file["file"]
file.update(data)
- text.nameext_from_url(url, file)
+
+ if "extension" not in file:
+ text.nameext_from_url(url, file)
if "name" in file:
name = file["name"]
file["name"] = name.rpartition(".")[0] or name
file["id"] = file["filename"].rpartition("-")[2]
+ elif "id" in file:
+ file["name"] = file["filename"]
+ file["filename"] = "{}-{}".format(file["name"], file["id"])
else:
file["name"], sep, file["id"] = \
file["filename"].rpartition("-")
diff --git a/gallery_dl/extractor/mangadex.py b/gallery_dl/extractor/mangadex.py
index 1f24593..7f87cff 100644
--- a/gallery_dl/extractor/mangadex.py
+++ b/gallery_dl/extractor/mangadex.py
@@ -26,6 +26,7 @@ class MangadexExtractor(Extractor):
"{manga}_c{chapter:>03}{chapter_minor}_{page:>03}.{extension}")
archive_fmt = "{chapter_id}_{page}"
root = "https://mangadex.org"
+ useragent = util.USERAGENT
_cache = {}
def __init__(self, match):
@@ -33,7 +34,6 @@ class MangadexExtractor(Extractor):
self.uuid = match.group(1)
def _init(self):
- self.session.headers["User-Agent"] = util.USERAGENT
self.api = MangadexAPI(self)
def items(self):
@@ -221,7 +221,7 @@ class MangadexAPI():
return self._call("/list/" + uuid)["data"]
def list_feed(self, uuid):
- return self._pagination("/list/" + uuid + "/feed")
+ return self._pagination_chapters("/list/" + uuid + "/feed")
@memcache(keyarg=1)
def manga(self, uuid):
@@ -230,7 +230,7 @@ class MangadexAPI():
def manga_author(self, uuid_author):
params = {"authorOrArtist": uuid_author}
- return self._pagination("/manga", params)
+ return self._pagination_manga("/manga", params)
def manga_feed(self, uuid):
order = "desc" if self.extractor.config("chapter-reverse") else "asc"
@@ -238,11 +238,11 @@ class MangadexAPI():
"order[volume]" : order,
"order[chapter]": order,
}
- return self._pagination("/manga/" + uuid + "/feed", params)
+ return self._pagination_chapters("/manga/" + uuid + "/feed", params)
def user_follows_manga_feed(self):
params = {"order[publishAt]": "desc"}
- return self._pagination("/user/follows/manga/feed", params)
+ return self._pagination_chapters("/user/follows/manga/feed", params)
def authenticate(self):
self.headers["Authorization"] = \
@@ -289,22 +289,31 @@ class MangadexAPI():
raise exception.StopExtraction(
"%s %s (%s)", response.status_code, response.reason, msg)
- def _pagination(self, endpoint, params=None):
+ def _pagination_chapters(self, endpoint, params=None):
if params is None:
params = {}
+ lang = self.extractor.config("lang")
+ if isinstance(lang, str) and "," in lang:
+ lang = lang.split(",")
+ params["translatedLanguage[]"] = lang
+ params["includes[]"] = ("scanlation_group",)
+
+ return self._pagination(endpoint, params)
+
+ def _pagination_manga(self, endpoint, params=None):
+ if params is None:
+ params = {}
+
+ return self._pagination(endpoint, params)
+
+ def _pagination(self, endpoint, params):
config = self.extractor.config
+
ratings = config("ratings")
if ratings is None:
ratings = ("safe", "suggestive", "erotica", "pornographic")
-
- lang = config("lang")
- if isinstance(lang, str) and "," in lang:
- lang = lang.split(",")
-
params["contentRating[]"] = ratings
- params["translatedLanguage[]"] = lang
- params["includes[]"] = ("scanlation_group",)
params["offset"] = 0
api_params = config("api-parameters")
diff --git a/gallery_dl/extractor/mastodon.py b/gallery_dl/extractor/mastodon.py
index cb7f701..5b354ac 100644
--- a/gallery_dl/extractor/mastodon.py
+++ b/gallery_dl/extractor/mastodon.py
@@ -19,7 +19,6 @@ class MastodonExtractor(BaseExtractor):
directory_fmt = ("mastodon", "{instance}", "{account[username]}")
filename_fmt = "{category}_{id}_{media[id]}.{extension}"
archive_fmt = "{media[id]}"
- cookies_domain = None
def __init__(self, match):
BaseExtractor.__init__(self, match)
diff --git a/gallery_dl/extractor/motherless.py b/gallery_dl/extractor/motherless.py
new file mode 100644
index 0000000..c5b9322
--- /dev/null
+++ b/gallery_dl/extractor/motherless.py
@@ -0,0 +1,167 @@
+# -*- coding: utf-8 -*-
+
+# Copyright 2024 Mike Fährmann
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Extractors for https://motherless.com/"""
+
+from .common import Extractor, Message
+from .. import text, util
+from ..cache import memcache
+from datetime import timedelta
+
+BASE_PATTERN = r"(?:https?://)?motherless\.com"
+
+
+class MotherlessExtractor(Extractor):
+ """Base class for motherless extractors"""
+ category = "motherless"
+ root = "https://motherless.com"
+ filename_fmt = "{id} {title}.{extension}"
+ archive_fmt = "{id}"
+
+
+class MotherlessMediaExtractor(MotherlessExtractor):
+ """Extractor for a single image/video from motherless.com"""
+ subcategory = "media"
+ pattern = (BASE_PATTERN +
+ r"/((?:g/[^/?#]+/|G[IV]?[A-Z0-9]+/)?"
+ r"(?!G)[A-Z0-9]+)")
+ example = "https://motherless.com/ABC123"
+
+ def items(self):
+ file = self._extract_media(self.groups[0])
+ url = file["url"]
+ yield Message.Directory, file
+ yield Message.Url, url, text.nameext_from_url(url, file)
+
+ def _extract_media(self, path):
+ url = self.root + "/" + path
+ page = self.request(url).text
+ extr = text.extract_from(page)
+
+ path, _, media_id = path.rpartition("/")
+ data = {
+ "id" : media_id,
+ "type" : extr("__mediatype = '", "'"),
+ "group": extr("__group = '", "'"),
+ "url" : extr("__fileurl = '", "'"),
+ "tags" : [
+ text.unescape(tag)
+ for tag in text.extract_iter(
+ extr('class="media-meta-tags">', "</div>"), ">#", "<")
+ ],
+ "title": text.unescape(extr("<h1>", "<")),
+ "views": text.parse_int(extr(
+ 'class="count">', " ").replace(",", "")),
+ "favorites": text.parse_int(extr(
+ 'class="count">', " ").replace(",", "")),
+ "date" : self._parse_datetime(extr('class="count">', "<")),
+ "uploader": text.unescape(extr('class="username">', "<").strip()),
+ }
+
+ if path and path[0] == "G":
+ data["gallery_id"] = path[1:]
+ data["gallery_title"] = self._extract_gallery_title(
+ page, data["gallery_id"])
+
+ return data
+
+ def _parse_datetime(self, dt):
+ if " ago" not in dt:
+ return text.parse_datetime(dt, "%d %b %Y")
+
+ value = text.parse_int(dt[:-5])
+ delta = timedelta(0, value*3600) if dt[-5] == "h" else timedelta(value)
+ return (util.datetime_utcnow() - delta).replace(
+ hour=0, minute=0, second=0)
+
+ @memcache(keyarg=2)
+ def _extract_gallery_title(self, page, gallery_id):
+ title = text.extr(
+ text.extr(page, '<h1 class="content-title">', "</h1>"),
+ "From the gallery:", "<")
+ if title:
+ return text.unescape(title.strip())
+
+ pos = page.find(' href="/G' + gallery_id + '"')
+ if pos >= 0:
+ return text.unescape(text.extract(
+ page, ' title="', '"', pos)[0])
+
+ return ""
+
+
+class MotherlessGalleryExtractor(MotherlessExtractor):
+ """Extractor for a motherless.com gallery"""
+ subcategory = "gallery"
+ directory_fmt = ("{category}", "{uploader}",
+ "{gallery_id} {gallery_title}")
+ archive_fmt = "{gallery_id}_{id}"
+ pattern = BASE_PATTERN + "/G([IVG])?([A-Z0-9]+)/?$"
+ example = "https://motherless.com/GABC123"
+
+ def items(self):
+ type, gid = self.groups
+
+ if not type:
+ data = {"_extractor": MotherlessGalleryExtractor}
+ yield Message.Queue, self.root + "/GI" + gid, data
+ yield Message.Queue, self.root + "/GV" + gid, data
+ return
+
+ url = "{}/G{}{}".format(self.root, type, gid)
+ page = self.request(url).text
+ data = self._extract_gallery_data(page)
+
+ for num, thumb in enumerate(self._pagination(page), 1):
+ file = self._parse_thumb_data(thumb)
+ file.update(data)
+ file["num"] = num
+ url = file["url"]
+ yield Message.Directory, file
+ yield Message.Url, url, text.nameext_from_url(url, file)
+
+ def _pagination(self, page):
+ while True:
+ for thumb in text.extract_iter(
+ page, 'class="thumb-container', "</div>"):
+ yield thumb
+
+ url = text.extr(page, '<link rel="next" href="', '"')
+ if not url:
+ return
+ page = self.request(text.unescape(url)).text
+
+ def _extract_gallery_data(self, page):
+ extr = text.extract_from(page)
+ return {
+ "gallery_id": self.groups[-1],
+ "gallery_title": text.unescape(extr(
+ "<title>", "<").rpartition(" | ")[0]),
+ "uploader": text.remove_html(extr(
+ 'class="gallery-member-username">', "</")),
+ "count": text.parse_int(
+ extr('<span class="active">', ")")
+ .rpartition("(")[2].replace(",", "")),
+ }
+
+ def _parse_thumb_data(self, thumb):
+ extr = text.extract_from(thumb)
+ data = {
+ "id" : extr('data-codename="', '"'),
+ "type" : extr('data-mediatype="', '"'),
+ "thumbnail": extr('class="static" src="', '"'),
+ "title" : extr(' alt="', '"'),
+ }
+
+ type = data["type"]
+ url = data["thumbnail"].replace("thumb", type)
+ if type == "video":
+ url = "{}/{}.mp4".format(url.rpartition("/")[0], data["id"])
+ data["url"] = url
+
+ return data
diff --git a/gallery_dl/extractor/newgrounds.py b/gallery_dl/extractor/newgrounds.py
index 61ffdee..8ffa14b 100644
--- a/gallery_dl/extractor/newgrounds.py
+++ b/gallery_dl/extractor/newgrounds.py
@@ -193,7 +193,8 @@ class NewgroundsExtractor(Extractor):
data["_comment"] = extr(
'id="author_comments"', '</div>').partition(">")[2]
data["comment"] = text.unescape(text.remove_html(
- data["_comment"], "", ""))
+ data["_comment"]
+ .replace("<p><br></p>", "\n\n").replace("<br>", "\n"), "", ""))
data["favorites"] = text.parse_int(extr(
'id="faves_load">', '<').replace(",", ""))
data["score"] = text.parse_float(extr('id="score_number">', '<'))
@@ -214,7 +215,7 @@ class NewgroundsExtractor(Extractor):
data = {
"title" : text.unescape(extr('"og:title" content="', '"')),
"description": text.unescape(extr(':description" content="', '"')),
- "type" : extr('og:type" content="', '"'),
+ "type" : "art",
"_type" : "i",
"date" : text.parse_datetime(extr(
'itemprop="datePublished" content="', '"')),
@@ -231,7 +232,7 @@ class NewgroundsExtractor(Extractor):
if image_data:
data["_multi"] = self._extract_images_multi(image_data)
else:
- art_images = extr('<div class="art-images', '\n</div>')
+ art_images = extr('<div class="art-images', '\n\t\t</div>')
if art_images:
data["_multi"] = self._extract_images_art(art_images, data)
@@ -263,7 +264,7 @@ class NewgroundsExtractor(Extractor):
return {
"title" : text.unescape(extr('"og:title" content="', '"')),
"description": text.unescape(extr(':description" content="', '"')),
- "type" : extr('og:type" content="', '"'),
+ "type" : "audio",
"_type" : "a",
"date" : text.parse_datetime(extr(
'itemprop="datePublished" content="', '"')),
@@ -283,8 +284,13 @@ class NewgroundsExtractor(Extractor):
if src:
src = src.replace("\\/", "/")
formats = ()
+ type = extr(',"description":"', '"')
date = text.parse_datetime(extr(
'itemprop="datePublished" content="', '"'))
+ if type:
+ type = type.rpartition(" ")[2].lower()
+ else:
+ type = "flash" if text.ext_from_url(url) == "swf" else "game"
else:
url = self.root + "/portal/video/" + index
headers = {
@@ -295,6 +301,7 @@ class NewgroundsExtractor(Extractor):
formats = self._video_formats(sources)
src = next(formats, "")
date = text.parse_timestamp(src.rpartition("?")[2])
+ type = "movie"
return {
"title" : text.unescape(title),
@@ -513,7 +520,9 @@ class NewgroundsFavoriteExtractor(NewgroundsExtractor):
class NewgroundsFollowingExtractor(NewgroundsFavoriteExtractor):
"""Extractor for a newgrounds user's favorited users"""
subcategory = "following"
- pattern = USER_PATTERN + r"/favorites/(following)"
+ pattern = (USER_PATTERN + r"/favorites/(following)"
+ r"(?:(?:/page/|/?\?page=)(\d+))?")
+
example = "https://USER.newgrounds.com/favorites/following"
def items(self):
diff --git a/gallery_dl/extractor/nhentai.py b/gallery_dl/extractor/nhentai.py
index 09b2b16..90c5420 100644
--- a/gallery_dl/extractor/nhentai.py
+++ b/gallery_dl/extractor/nhentai.py
@@ -61,7 +61,7 @@ class NhentaiGalleryExtractor(GalleryExtractor):
def images(self, _):
ufmt = ("https://i.nhentai.net/galleries/" +
self.data["media_id"] + "/{}.{}")
- extdict = {"j": "jpg", "p": "png", "g": "gif"}
+ extdict = {"j": "jpg", "p": "png", "g": "gif", "w": "webp"}
return [
(ufmt.format(num, extdict.get(img["t"], "jpg")), {
diff --git a/gallery_dl/extractor/noop.py b/gallery_dl/extractor/noop.py
new file mode 100644
index 0000000..df2316c
--- /dev/null
+++ b/gallery_dl/extractor/noop.py
@@ -0,0 +1,27 @@
+# -*- coding: utf-8 -*-
+
+# Copyright 2024 Mike Fährmann
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""noop extractor"""
+
+from .common import Extractor, Message
+
+
+class NoopExtractor(Extractor):
+ category = "noop"
+ pattern = r"(?i)noo?p$"
+ example = "noop"
+
+ def items(self):
+ # yield *something* to prevent a 'No results' message
+ yield Message.Version, 1
+
+ # Save cookies manually, since it happens automatically only after
+ # extended extractor initialization, i.e. Message.Directory, which
+ # itself might cause some unintended effects.
+ if self.cookies:
+ self.cookies_store()
diff --git a/gallery_dl/extractor/patreon.py b/gallery_dl/extractor/patreon.py
index 0b64ea3..3eacf1a 100644
--- a/gallery_dl/extractor/patreon.py
+++ b/gallery_dl/extractor/patreon.py
@@ -26,12 +26,15 @@ class PatreonExtractor(Extractor):
_warning = True
def _init(self):
- self.session.headers["User-Agent"] = \
- "Patreon/72.2.28 (Android; Android 14; Scale/2.10)"
- if self._warning:
- if not self.cookies_check(("session_id",)):
+ if self.cookies_check(("session_id",)):
+ self.session.headers["User-Agent"] = \
+ "Patreon/72.2.28 (Android; Android 14; Scale/2.10)"
+ else:
+ if self._warning:
+ PatreonExtractor._warning = False
self.log.warning("no 'session_id' cookie set")
- PatreonExtractor._warning = False
+ self.session.headers["User-Agent"] = \
+ "Patreon/7.6.28 (Android; Android 11; Scale/2.10)"
def items(self):
generators = self._build_file_generators(self.config("files"))
diff --git a/gallery_dl/extractor/philomena.py b/gallery_dl/extractor/philomena.py
index 150efed..1b67272 100644
--- a/gallery_dl/extractor/philomena.py
+++ b/gallery_dl/extractor/philomena.py
@@ -46,7 +46,7 @@ BASE_PATTERN = PhilomenaExtractor.update({
"ponybooru": {
"root": "https://ponybooru.org",
"pattern": r"(?:www\.)?ponybooru\.org",
- "filter_id": "2",
+ "filter_id": "3",
},
"furbooru": {
"root": "https://furbooru.org",
diff --git a/gallery_dl/extractor/piczel.py b/gallery_dl/extractor/piczel.py
index 422325f..fe26704 100644
--- a/gallery_dl/extractor/piczel.py
+++ b/gallery_dl/extractor/piczel.py
@@ -19,7 +19,7 @@ class PiczelExtractor(Extractor):
filename_fmt = "{category}_{id}_{title}_{num:>02}.{extension}"
archive_fmt = "{id}_{num}"
root = "https://piczel.tv"
- api_root = root
+ root_api = root
def items(self):
for post in self.posts():
@@ -75,7 +75,7 @@ class PiczelUserExtractor(PiczelExtractor):
self.user = match.group(1)
def posts(self):
- url = "{}/api/users/{}/gallery".format(self.api_root, self.user)
+ url = "{}/api/users/{}/gallery".format(self.root_api, self.user)
return self._pagination(url)
@@ -93,7 +93,7 @@ class PiczelFolderExtractor(PiczelExtractor):
self.user, self.folder_id = match.groups()
def posts(self):
- url = "{}/api/users/{}/gallery".format(self.api_root, self.user)
+ url = "{}/api/users/{}/gallery".format(self.root_api, self.user)
return self._pagination(url, int(self.folder_id))
@@ -108,5 +108,5 @@ class PiczelImageExtractor(PiczelExtractor):
self.image_id = match.group(1)
def posts(self):
- url = "{}/api/gallery/{}".format(self.api_root, self.image_id)
+ url = "{}/api/gallery/{}".format(self.root_api, self.image_id)
return (self.request(url).json(),)
diff --git a/gallery_dl/extractor/pillowfort.py b/gallery_dl/extractor/pillowfort.py
index 5362f13..5749240 100644
--- a/gallery_dl/extractor/pillowfort.py
+++ b/gallery_dl/extractor/pillowfort.py
@@ -52,6 +52,7 @@ class PillowfortExtractor(Extractor):
post["date"] = text.parse_datetime(
post["created_at"], "%Y-%m-%dT%H:%M:%S.%f%z")
post["post_id"] = post.pop("id")
+ post["count"] = len(files)
yield Message.Directory, post
post["num"] = 0
diff --git a/gallery_dl/extractor/pinterest.py b/gallery_dl/extractor/pinterest.py
index 499c579..121c7bf 100644
--- a/gallery_dl/extractor/pinterest.py
+++ b/gallery_dl/extractor/pinterest.py
@@ -117,11 +117,16 @@ class PinterestExtractor(Extractor):
else:
media = self._extract_image(page, block)
- elif type == "story_pin_video_block":
+ elif type == "story_pin_video_block" or "video" in block:
video = block["video"]
media = self._extract_video(video)
media["media_id"] = video.get("id") or ""
+ elif type == "story_pin_music_block" or "audio" in block:
+ media = block["audio"]
+ media["url"] = media["audio_url"]
+ media["media_id"] = media.get("id") or ""
+
elif type == "story_pin_paragraph_block":
media = {"url": "text:" + block["text"],
"extension": "txt",
@@ -130,7 +135,10 @@ class PinterestExtractor(Extractor):
else:
self.log.warning("%s: Unsupported story block '%s'",
pin.get("id"), type)
- continue
+ try:
+ media = self._extract_image(page, block)
+ except Exception:
+ continue
media["story_id"] = story_id
media["page_id"] = page_id
@@ -397,14 +405,19 @@ class PinterestAPI():
self.root = extractor.root
self.cookies = {"csrftoken": csrf_token}
self.headers = {
- "Accept" : "application/json, text/javascript, "
- "*/*, q=0.01",
- "Accept-Language" : "en-US,en;q=0.5",
- "X-Requested-With" : "XMLHttpRequest",
- "X-APP-VERSION" : "0c4af40",
- "X-CSRFToken" : csrf_token,
- "X-Pinterest-AppState": "active",
- "Origin" : self.root,
+ "Accept" : "application/json, text/javascript, "
+ "*/*, q=0.01",
+ "X-Requested-With" : "XMLHttpRequest",
+ "X-APP-VERSION" : "a89153f",
+ "X-Pinterest-AppState" : "active",
+ "X-Pinterest-Source-Url" : None,
+ "X-Pinterest-PWS-Handler": "www/[username].js",
+ "Alt-Used" : "www.pinterest.com",
+ "Connection" : "keep-alive",
+ "Cookie" : None,
+ "Sec-Fetch-Dest" : "empty",
+ "Sec-Fetch-Mode" : "cors",
+ "Sec-Fetch-Site" : "same-origin",
}
def pin(self, pin_id):
@@ -437,7 +450,12 @@ class PinterestAPI():
def board_pins(self, board_id):
"""Yield all pins of a specific board"""
- options = {"board_id": board_id}
+ options = {
+ "board_id": board_id,
+ "field_set_key": "react_grid_pin",
+ "prepend": False,
+ "bookmarks": None,
+ }
return self._pagination("BoardFeed", options)
def board_section(self, section_id):
diff --git a/gallery_dl/extractor/pixiv.py b/gallery_dl/extractor/pixiv.py
index 8c6e6d8..8ad061d 100644
--- a/gallery_dl/extractor/pixiv.py
+++ b/gallery_dl/extractor/pixiv.py
@@ -26,13 +26,14 @@ class PixivExtractor(Extractor):
directory_fmt = ("{category}", "{user[id]} {user[account]}")
filename_fmt = "{id}_p{num}.{extension}"
archive_fmt = "{id}{suffix}.{extension}"
- cookies_domain = None
+ cookies_domain = ".pixiv.net"
sanity_url = "https://s.pximg.net/common/images/limit_sanity_level_360.png"
mypixiv_url = "https://s.pximg.net/common/images/limit_mypixiv_360.png"
def _init(self):
self.api = PixivAppAPI(self)
self.load_ugoira = self.config("ugoira", True)
+ self.load_ugoira_original = (self.load_ugoira == "original")
self.max_posts = self.config("max-posts", 0)
self.sanity_workaround = self.config("sanity", True)
self.meta_user = self.config("metadata")
@@ -105,34 +106,7 @@ class PixivExtractor(Extractor):
del work["image_urls"]
del work["meta_pages"]
- if work["type"] == "ugoira":
- if self.load_ugoira:
- try:
- return self._extract_ugoira(work)
- except Exception as exc:
- self.log.warning(
- "%s: Unable to retrieve Ugoira metatdata (%s - %s)",
- work["id"], exc.__class__.__name__, exc)
-
- elif work["page_count"] == 1:
- url = meta_single_page["original_image_url"]
- if url == self.sanity_url:
- if self.sanity_workaround:
- self.log.warning("%s: 'sanity_level' warning", work["id"])
- body = self._request_ajax("/illust/" + str(work["id"]))
- return self._extract_ajax(work, body)
- else:
- self.log.warning(
- "%s: Unable to download work ('sanity_level' warning)",
- work["id"])
- elif url == self.mypixiv_url:
- work["_mypixiv"] = True
- self.log.warning("%s: 'My pixiv' locked", work["id"])
- return ()
- else:
- return ({"url": url},)
-
- else:
+ if meta_pages:
return [
{
"url" : img["image_urls"]["original"],
@@ -141,30 +115,58 @@ class PixivExtractor(Extractor):
for num, img in enumerate(meta_pages)
]
+ url = meta_single_page["original_image_url"]
+ if url == self.sanity_url:
+ work["_ajax"] = True
+ self.log.warning("%s: 'limit_sanity_level' warning", work["id"])
+ if self.sanity_workaround:
+ body = self._request_ajax("/illust/" + str(work["id"]))
+ return self._extract_ajax(work, body)
+
+ elif url == self.mypixiv_url:
+ work["_mypixiv"] = True
+ self.log.warning("%s: 'My pixiv' locked", work["id"])
+
+ elif work["type"] != "ugoira":
+ return ({"url": url},)
+
+ elif self.load_ugoira:
+ try:
+ return self._extract_ugoira(work, url)
+ except Exception as exc:
+ self.log.warning(
+ "%s: Unable to retrieve Ugoira metatdata (%s - %s)",
+ work["id"], exc.__class__.__name__, exc)
+
return ()
- def _extract_ugoira(self, work):
+ def _extract_ugoira(self, work, img_url):
ugoira = self.api.ugoira_metadata(work["id"])
- url = ugoira["zip_urls"]["medium"]
work["_ugoira_frame_data"] = work["frames"] = frames = ugoira["frames"]
- work["date_url"] = self._date_from_url(url)
+ work["_ugoira_original"] = self.load_ugoira_original
work["_http_adjust_extension"] = False
- if self.load_ugoira == "original":
- work["_ugoira_original"] = True
- base, sep, _ = url.rpartition("_ugoira")
- base = base.replace("/img-zip-ugoira/", "/img-original/", 1) + sep
+ if self.load_ugoira_original:
+ work["date_url"] = self._date_from_url(img_url)
- for ext in ("jpg", "png", "gif"):
- try:
- url = "{}0.{}".format(base, ext)
- self.request(url, method="HEAD")
- break
- except exception.HttpError:
- pass
+ base, sep, ext = img_url.rpartition("_ugoira0.")
+ if sep:
+ base += "_ugoira"
else:
- self.log.warning(
- "Unable to find Ugoira frame URLs (%s)", work["id"])
+ base, sep, _ = img_url.rpartition("_ugoira")
+ base = base.replace(
+ "/img-zip-ugoira/", "/img-original/", 1) + sep
+
+ for ext in ("jpg", "png", "gif"):
+ try:
+ url = "{}0.{}".format(base, ext)
+ self.request(url, method="HEAD")
+ break
+ except exception.HttpError:
+ pass
+ else:
+ self.log.warning(
+ "Unable to find Ugoira frame URLs (%s)", work["id"])
return [
{
@@ -174,9 +176,11 @@ class PixivExtractor(Extractor):
}
for num in range(len(frames))
]
+
else:
- work["_ugoira_original"] = False
- url = url.replace("_ugoira600x600", "_ugoira1920x1080", 1)
+ zip_url = ugoira["zip_urls"]["medium"]
+ work["date_url"] = self._date_from_url(zip_url)
+ url = zip_url.replace("_ugoira600x600", "_ugoira1920x1080", 1)
return ({"url": url},)
def _request_ajax(self, endpoint):
@@ -333,12 +337,12 @@ class PixivUserExtractor(PixivExtractor):
class PixivArtworksExtractor(PixivExtractor):
"""Extractor for artworks of a pixiv user"""
subcategory = "artworks"
- _warning = True
pattern = (BASE_PATTERN + r"/(?:"
r"(?:en/)?users/(\d+)/(?:artworks|illustrations|manga)"
r"(?:/([^/?#]+))?/?(?:$|[?#])"
r"|member_illust\.php\?id=(\d+)(?:&([^#]+))?)")
example = "https://www.pixiv.net/en/users/12345/artworks"
+ _warn_phpsessid = True
def _init(self):
PixivExtractor._init(self)
@@ -352,12 +356,13 @@ class PixivArtworksExtractor(PixivExtractor):
self.tag = t1 or t2
if self.sanity_workaround:
- self.cookies_domain = d = ".pixiv.net"
+ self.cookies_domain = domain = ".pixiv.net"
self._init_cookies()
- if self._warning and not self.cookies.get("PHPSESSID", domain=d):
- PixivArtworksExtractor._warning = False
- self.log.warning("No 'PHPSESSID' cookie set. Can detect only "
- "non R-18 'sanity_level' works.")
+ if self._warn_phpsessid:
+ PixivArtworksExtractor._warn_phpsessid = False
+ if not self.cookies.get("PHPSESSID", domain=domain):
+ self.log.warning("No 'PHPSESSID' cookie set. Can detect on"
+ "ly non R-18 'limit_sanity_level' works.")
def metadata(self):
if self.config("metadata"):
@@ -601,7 +606,10 @@ class PixivRankingExtractor(PixivExtractor):
self.mode = self.date = None
def works(self):
- return self.api.illust_ranking(self.mode, self.date)
+ ranking = self.ranking
+ for ranking["rank"], work in enumerate(
+ self.api.illust_ranking(self.mode, self.date), 1):
+ yield work
def metadata(self):
query = text.parse_query(self.query)
@@ -640,10 +648,12 @@ class PixivRankingExtractor(PixivExtractor):
date = (now - timedelta(days=1)).strftime("%Y-%m-%d")
self.date = date
- return {"ranking": {
+ self.ranking = ranking = {
"mode": mode,
"date": self.date,
- }}
+ "rank": 0,
+ }
+ return {"ranking": ranking}
class PixivSearchExtractor(PixivExtractor):
@@ -734,7 +744,6 @@ class PixivPixivisionExtractor(PixivExtractor):
directory_fmt = ("{category}", "pixivision",
"{pixivision_id} {pixivision_title}")
archive_fmt = "V{pixivision_id}_{id}{suffix}.{extension}"
- cookies_domain = ".pixiv.net"
pattern = r"(?:https?://)?(?:www\.)?pixivision\.net/(?:en/)?a/(\d+)"
example = "https://www.pixivision.net/en/a/12345"
diff --git a/gallery_dl/extractor/poipiku.py b/gallery_dl/extractor/poipiku.py
index bd22283..e09a7aa 100644
--- a/gallery_dl/extractor/poipiku.py
+++ b/gallery_dl/extractor/poipiku.py
@@ -41,7 +41,7 @@ class PoipikuExtractor(Extractor):
post = {
"post_category": extr("<title>[", "]"),
- "count" : extr("(", " "),
+ "count" : text.parse_int(extr("(", " ")),
"post_id" : parts[-1].partition(".")[0],
"user_id" : parts[-2],
"user_name" : text.unescape(extr(
diff --git a/gallery_dl/extractor/reddit.py b/gallery_dl/extractor/reddit.py
index 8577e74..89eafc8 100644
--- a/gallery_dl/extractor/reddit.py
+++ b/gallery_dl/extractor/reddit.py
@@ -340,18 +340,16 @@ class RedditRedirectExtractor(Extractor):
category = "reddit"
subcategory = "redirect"
pattern = (r"(?:https?://)?(?:"
- r"(?:\w+\.)?reddit\.com/(?:(?:r)/([^/?#]+)))"
+ r"(?:\w+\.)?reddit\.com/(?:(r|u|user)/([^/?#]+)))"
r"/s/([a-zA-Z0-9]{10})")
example = "https://www.reddit.com/r/SUBREDDIT/s/abc456GHIJ"
- def __init__(self, match):
- Extractor.__init__(self, match)
- self.subreddit = match.group(1)
- self.share_url = match.group(2)
-
def items(self):
- url = "https://www.reddit.com/r/" + self.subreddit + "/s/" + \
- self.share_url
+ sub_type, subreddit, share_url = self.groups
+ if sub_type == "u":
+ sub_type = "user"
+ url = "https://www.reddit.com/{}/{}/s/{}".format(
+ sub_type, subreddit, share_url)
data = {"_extractor": RedditSubmissionExtractor}
response = self.request(url, method="HEAD", allow_redirects=False,
notfound="submission")
diff --git a/gallery_dl/extractor/rule34vault.py b/gallery_dl/extractor/rule34vault.py
new file mode 100644
index 0000000..8c8abfa
--- /dev/null
+++ b/gallery_dl/extractor/rule34vault.py
@@ -0,0 +1,119 @@
+# -*- coding: utf-8 -*-
+
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Extractors for https://rule34vault.com/"""
+
+from .booru import BooruExtractor
+from .. import text
+import collections
+
+BASE_PATTERN = r"(?:https?://)?rule34vault\.com"
+
+
+class Rule34vaultExtractor(BooruExtractor):
+ category = "rule34vault"
+ root = "https://rule34vault.com"
+ root_cdn = "https://r34xyz.b-cdn.net"
+ filename_fmt = "{category}_{id}.{extension}"
+ per_page = 100
+
+ TAG_TYPES = {
+ 1: "general",
+ 2: "copyright",
+ 4: "character",
+ 8: "artist",
+ }
+
+ def _file_url(self, post):
+ post_id = post["id"]
+ extension = "jpg" if post["type"] == 0 else "mp4"
+ post["file_url"] = url = "{}/posts/{}/{}/{}.{}".format(
+ self.root_cdn, post_id // 1000, post_id, post_id, extension)
+ return url
+
+ def _prepare(self, post):
+ post.pop("files", None)
+ post["date"] = text.parse_datetime(
+ post["created"], "%Y-%m-%dT%H:%M:%S.%fZ")
+ if "tags" in post:
+ post["tags"] = [t["value"] for t in post["tags"]]
+
+ def _tags(self, post, _):
+ if "tags" not in post:
+ post.update(self._fetch_post(post["id"]))
+
+ tags = collections.defaultdict(list)
+ for tag in post["tags"]:
+ tags[tag["type"]].append(tag["value"])
+ types = self.TAG_TYPES
+ for type, values in tags.items():
+ post["tags_" + types[type]] = values
+
+ def _fetch_post(self, post_id):
+ url = "{}/api/v2/post/{}".format(self.root, post_id)
+ return self.request(url).json()
+
+ def _pagination(self, endpoint, params=None):
+ url = "{}/api{}".format(self.root, endpoint)
+
+ if params is None:
+ params = {}
+ params["CountTotal"] = False
+ params["Skip"] = self.page_start * self.per_page
+ params["take"] = self.per_page
+ threshold = self.per_page
+
+ while True:
+ data = self.request(url, method="POST", json=params).json()
+
+ yield from data["items"]
+
+ if len(data["items"]) < threshold:
+ return
+ params["cursor"] = data.get("cursor")
+ params["Skip"] += params["take"]
+
+
+class Rule34vaultPostExtractor(Rule34vaultExtractor):
+ subcategory = "post"
+ archive_fmt = "{id}"
+ pattern = BASE_PATTERN + r"/post/(\d+)"
+ example = "https://rule34vault.com/post/12345"
+
+ def posts(self):
+ return (self._fetch_post(self.groups[0]),)
+
+
+class Rule34vaultPlaylistExtractor(Rule34vaultExtractor):
+ subcategory = "playlist"
+ directory_fmt = ("{category}", "{playlist_id}")
+ archive_fmt = "p_{playlist_id}_{id}"
+ pattern = BASE_PATTERN + r"/playlists/view/(\d+)"
+ example = "https://rule34vault.com/playlists/view/12345"
+
+ def metadata(self):
+ return {"playlist_id": self.groups[0]}
+
+ def posts(self):
+ endpoint = "/v2/post/search/playlist/" + self.groups[0]
+ return self._pagination(endpoint)
+
+
+class Rule34vaultTagExtractor(Rule34vaultExtractor):
+ subcategory = "tag"
+ directory_fmt = ("{category}", "{search_tags}")
+ archive_fmt = "t_{search_tags}_{id}"
+ pattern = BASE_PATTERN + r"/(?!p(?:ost|laylists)/)([^/?#]+)"
+ example = "https://rule34vault.com/TAG"
+
+ def metadata(self):
+ self.tags = text.unquote(self.groups[0]).split("%7C")
+ return {"search_tags": " ".join(self.tags)}
+
+ def posts(self):
+ endpoint = "/v2/post/search/root"
+ params = {"includeTags": [t.replace("_", " ") for t in self.tags]}
+ return self._pagination(endpoint, params)
diff --git a/gallery_dl/extractor/rule34xyz.py b/gallery_dl/extractor/rule34xyz.py
new file mode 100644
index 0000000..f1e7518
--- /dev/null
+++ b/gallery_dl/extractor/rule34xyz.py
@@ -0,0 +1,143 @@
+# -*- coding: utf-8 -*-
+
+# Copyright 2024 Mike Fährmann
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Extractors for https://rule34.xyz/"""
+
+from .booru import BooruExtractor
+from .. import text
+import collections
+
+BASE_PATTERN = r"(?:https?://)?rule34\.xyz"
+
+
+class Rule34xyzExtractor(BooruExtractor):
+ category = "rule34xyz"
+ root = "https://rule34.xyz"
+ root_cdn = "https://rule34xyz.b-cdn.net"
+ filename_fmt = "{category}_{id}.{extension}"
+ per_page = 60
+
+ TAG_TYPES = {
+ 0: "general",
+ 1: "copyright",
+ 2: "character",
+ 3: "artist",
+ }
+
+ def _init(self):
+ formats = self.config("format")
+ if formats:
+ if isinstance(formats, str):
+ formats = formats.split(",")
+ self.formats = formats
+ else:
+ self.formats = ("10", "40", "41", "2")
+
+ def _file_url(self, post):
+ post["files"] = files = {
+ str(link["type"]): link["url"]
+ for link in post.pop("imageLinks")
+ }
+
+ for fmt in self.formats:
+ if fmt in files:
+ break
+ else:
+ fmt = "2"
+ self.log.warning("%s: Requested format not available", post["id"])
+
+ post["file_url"] = url = files[fmt]
+ post["format_id"] = fmt
+ post["format"] = url.rsplit(".", 2)[1]
+ return url
+
+ def _prepare(self, post):
+ post.pop("filesPreview", None)
+ post.pop("tagsWithType", None)
+ post["date"] = text.parse_datetime(
+ post["created"], "%Y-%m-%dT%H:%M:%S.%f")
+
+ def _tags(self, post, _):
+ if post.get("tagsWithType") is None:
+ post.update(self._fetch_post(post["id"]))
+
+ tags = collections.defaultdict(list)
+ for tag in post["tagsWithType"]:
+ tags[tag["type"]].append(tag["value"])
+ types = self.TAG_TYPES
+ for type, values in tags.items():
+ post["tags_" + types[type]] = values
+
+ def _fetch_post(self, post_id):
+ url = "{}/api/post/{}".format(self.root, post_id)
+ return self.request(url).json()
+
+ def _pagination(self, endpoint, params=None):
+ url = "{}/api{}".format(self.root, endpoint)
+
+ if params is None:
+ params = {}
+ params["IncludeLinks"] = "true"
+ params["IncludeTags"] = "true"
+ params["OrderBy"] = "0"
+ params["Skip"] = self.page_start * self.per_page
+ params["Take"] = self.per_page
+ params["DisableTotal"] = "true"
+ threshold = self.per_page
+
+ while True:
+ data = self.request(url, params=params).json()
+
+ yield from data["items"]
+
+ if len(data["items"]) < threshold:
+ return
+ params["Skip"] += params["Take"]
+
+
+class Rule34xyzPostExtractor(Rule34xyzExtractor):
+ subcategory = "post"
+ archive_fmt = "{id}"
+ pattern = BASE_PATTERN + r"/post/(\d+)"
+ example = "https://rule34.xyz/post/12345"
+
+ def posts(self):
+ return (self._fetch_post(self.groups[0]),)
+
+
+class Rule34xyzPlaylistExtractor(Rule34xyzExtractor):
+ subcategory = "playlist"
+ directory_fmt = ("{category}", "{playlist_id}")
+ archive_fmt = "p_{playlist_id}_{id}"
+ pattern = BASE_PATTERN + r"/playlists/view/(\d+)"
+ example = "https://rule34.xyz/playlists/view/12345"
+
+ def metadata(self):
+ return {"playlist_id": self.groups[0]}
+
+ def posts(self):
+ endpoint = "/playlist-item"
+ params = {"PlaylistId": self.groups[0]}
+ return self._pagination(endpoint, params)
+
+
+class Rule34xyzTagExtractor(Rule34xyzExtractor):
+ subcategory = "tag"
+ directory_fmt = ("{category}", "{search_tags}")
+ archive_fmt = "t_{search_tags}_{id}"
+ pattern = BASE_PATTERN + r"/([^/?#]+)$"
+ example = "https://rule34.xyz/TAG"
+
+ def metadata(self):
+ self.tags = text.unquote(self.groups[0]).replace("_", " ")
+ return {"search_tags": self.tags}
+
+ def posts(self):
+ endpoint = "/post/search"
+ params = {"Tag": self.tags}
+ return self._pagination(endpoint, params)
diff --git a/gallery_dl/extractor/saint.py b/gallery_dl/extractor/saint.py
new file mode 100644
index 0000000..784cdc0
--- /dev/null
+++ b/gallery_dl/extractor/saint.py
@@ -0,0 +1,101 @@
+# -*- coding: utf-8 -*-
+
+# Copyright 2024 Mike Fährmann
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Extractors for https://saint2.su/"""
+
+from .lolisafe import LolisafeAlbumExtractor
+from .. import text
+
+BASE_PATTERN = r"(?:https?://)?saint\d*\.(?:su|pk|to)"
+
+
+class SaintAlbumExtractor(LolisafeAlbumExtractor):
+ """Extractor for saint albums"""
+ category = "saint"
+ root = "https://saint2.su"
+ pattern = BASE_PATTERN + r"/a/([^/?#]+)"
+ example = "https://saint2.su/a/ID"
+
+ def fetch_album(self, album_id):
+ # album metadata
+ response = self.request(self.root + "/a/" + album_id)
+ extr = text.extract_from(response.text)
+
+ title = extr("<title>", "<")
+ descr = extr('name="description" content="', '"')
+ files = []
+
+ while True:
+ id2 = extr("/thumbs/", "-")
+ if not id2:
+ break
+ files.append({
+ "id2" : id2,
+ "date" : text.parse_timestamp(extr("", ".")),
+ "id" : extr("/embed/", '"'),
+ "size" : text.parse_int(extr('data="', '"')),
+ "file" : text.unescape(extr(
+ "onclick=\"play(", ")").strip("\"'")),
+ "id_dl": extr("/d/", ")").rstrip("\"'"),
+ })
+
+ return files, {
+ "album_id" : album_id,
+ "album_name" : text.unescape(title.rpartition(" - ")[0]),
+ "album_size" : sum(file["size"] for file in files),
+ "description" : text.unescape(descr),
+ "count" : len(files),
+ "_http_headers": {"Referer": response.url}
+ }
+
+
+class SaintMediaExtractor(SaintAlbumExtractor):
+ """Extractor for saint media links"""
+ subcategory = "media"
+ directory_fmt = ("{category}",)
+ pattern = BASE_PATTERN + r"(/(embe)?d/([^/?#]+))"
+ example = "https://saint2.su/embed/ID"
+
+ def fetch_album(self, album_id):
+ try:
+ path, embed, _ = self.groups
+
+ url = self.root + path
+ response = self.request(url)
+ extr = text.extract_from(response.text)
+
+ if embed:
+ file = {
+ "id" : album_id,
+ "id2" : extr("/thumbs/", "-"),
+ "date" : text.parse_timestamp(extr("", ".")),
+ "file" : text.unescape(extr('<source src="', '"')),
+ "id_dl": extr("/d/", "'"),
+ }
+
+ else: # /d/
+ file = {
+ "file" : text.unescape(extr('<a href="', '"')),
+ "id_dl" : album_id,
+ "name" : album_id,
+ "filename" : album_id,
+ "extension": "mp4",
+ }
+
+ file["_http_headers"] = {"Referer": response.url}
+ except Exception as exc:
+ self.log.error("%s: %s", exc.__class__.__name__, exc)
+ return (), {}
+
+ return (file,), {
+ "album_id" : "",
+ "album_name" : "",
+ "album_size" : -1,
+ "description": "",
+ "count" : 1,
+ }
diff --git a/gallery_dl/extractor/sankaku.py b/gallery_dl/extractor/sankaku.py
index 7db8172..d5309dc 100644
--- a/gallery_dl/extractor/sankaku.py
+++ b/gallery_dl/extractor/sankaku.py
@@ -76,14 +76,15 @@ class SankakuExtractor(BooruExtractor):
def _tags(self, post, page):
tags = collections.defaultdict(list)
- types = self.TAG_TYPES
for tag in post["tags"]:
name = tag["name"]
if name:
- tags[types[tag["type"]]].append(name.lower().replace(" ", "_"))
- for key, value in tags.items():
- post["tags_" + key] = value
- post["tag_string_" + key] = " ".join(value)
+ tags[tag["type"]].append(name.lower().replace(" ", "_"))
+ types = self.TAG_TYPES
+ for type, values in tags.items():
+ name = types[type]
+ post["tags_" + name] = values
+ post["tag_string_" + name] = " ".join(values)
def _notes(self, post, page):
if post.get("has_notes"):
diff --git a/gallery_dl/extractor/scrolller.py b/gallery_dl/extractor/scrolller.py
index 9f9f0c4..c818c98 100644
--- a/gallery_dl/extractor/scrolller.py
+++ b/gallery_dl/extractor/scrolller.py
@@ -32,7 +32,12 @@ class ScrolllerExtractor(Extractor):
for post in self.posts():
- src = max(post["mediaSources"], key=self._sort_key)
+ media_sources = post.get("mediaSources")
+ if not media_sources:
+ self.log.warning("%s: No media files", post.get("id"))
+ continue
+
+ src = max(media_sources, key=self._sort_key)
post.update(src)
url = src["url"]
text.nameext_from_url(url, post)
diff --git a/gallery_dl/extractor/smugmug.py b/gallery_dl/extractor/smugmug.py
index 3639c0b..48bd918 100644
--- a/gallery_dl/extractor/smugmug.py
+++ b/gallery_dl/extractor/smugmug.py
@@ -21,7 +21,6 @@ class SmugmugExtractor(Extractor):
category = "smugmug"
filename_fmt = ("{category}_{User[NickName]:?/_/}"
"{Image[UploadKey]}_{Image[ImageKey]}.{extension}")
- cookies_domain = None
empty_user = {
"Uri": "",
"ResponseLevel": "Public",
diff --git a/gallery_dl/extractor/steamgriddb.py b/gallery_dl/extractor/steamgriddb.py
index 8582824..c120ee5 100644
--- a/gallery_dl/extractor/steamgriddb.py
+++ b/gallery_dl/extractor/steamgriddb.py
@@ -56,14 +56,19 @@ class SteamgriddbExtractor(Extractor):
download_fake_png = self.config("download-fake-png", True)
for asset in self.assets():
- if download_fake_png and asset.get("fake_png"):
- urls = (asset["url"], asset["fake_png"])
- else:
- urls = (asset["url"],)
+ fake_png = download_fake_png and asset.get("fake_png")
- asset["count"] = len(urls)
+ asset["count"] = 2 if fake_png else 1
yield Message.Directory, asset
- for asset["num"], url in enumerate(urls, 1):
+
+ asset["num"] = 1
+ url = asset["url"]
+ yield Message.Url, url, text.nameext_from_url(url, asset)
+
+ if fake_png:
+ asset["num"] = 2
+ asset["_http_adjust_extension"] = False
+ url = fake_png
yield Message.Url, url, text.nameext_from_url(url, asset)
def _call(self, endpoint, **kwargs):
diff --git a/gallery_dl/extractor/tumblr.py b/gallery_dl/extractor/tumblr.py
index 73455d2..8d1fcde 100644
--- a/gallery_dl/extractor/tumblr.py
+++ b/gallery_dl/extractor/tumblr.py
@@ -21,8 +21,8 @@ BASE_PATTERN = (
r"([\w-]+\.tumblr\.com)))"
)
-POST_TYPES = frozenset((
- "text", "quote", "link", "answer", "video", "audio", "photo", "chat"))
+POST_TYPES = frozenset(("text", "quote", "link", "answer", "video",
+ "audio", "photo", "chat", "search"))
class TumblrExtractor(Extractor):
@@ -31,7 +31,6 @@ class TumblrExtractor(Extractor):
directory_fmt = ("{category}", "{blog_name}")
filename_fmt = "{category}_{blog_name}_{id}_{num:>02}.{extension}"
archive_fmt = "{id}_{num}"
- cookies_domain = None
def __init__(self, match):
Extractor.__init__(self, match)
@@ -83,14 +82,21 @@ class TumblrExtractor(Extractor):
return
if post["type"] not in self.types:
continue
- if not blog:
- blog = self.api.info(self.blog)
- blog["uuid"] = self.blog
- if self.avatar:
- url = self.api.avatar(self.blog)
- yield Message.Directory, {"blog": blog}
- yield self._prepare_avatar(url, post.copy(), blog)
+ if "blog" in post:
+ blog = post["blog"]
+ self.blog = blog["name"] + ".tumblr.com"
+ else:
+ if not blog:
+ blog = self.api.info(self.blog)
+ blog["uuid"] = self.blog
+
+ if self.avatar:
+ url = self.api.avatar(self.blog)
+ yield Message.Directory, {"blog": blog}
+ yield self._prepare_avatar(url, post.copy(), blog)
+
+ post["blog"] = blog
reblog = "reblogged_from_id" in post
if reblog and self._skip_reblog(post):
@@ -99,7 +105,6 @@ class TumblrExtractor(Extractor):
if "trail" in post:
del post["trail"]
- post["blog"] = blog
post["date"] = text.parse_timestamp(post["timestamp"])
posts = []
@@ -349,6 +354,19 @@ class TumblrLikesExtractor(TumblrExtractor):
return self.api.likes(self.blog)
+class TumblrSearchExtractor(TumblrExtractor):
+ """Extractor for a Tumblr search"""
+ subcategory = "search"
+ pattern = (BASE_PATTERN + r"/search/([^/?#]+)"
+ r"(?:/([^/?#]+)(?:/([^/?#]+))?)?(?:/?\?([^#]+))?")
+ example = "https://www.tumblr.com/search/QUERY"
+
+ def posts(self):
+ _, _, _, search, mode, post_type, query = self.groups
+ params = text.parse_query(query)
+ return self.api.search(text.unquote(search), params, mode, post_type)
+
+
class TumblrAPI(oauth.OAuth1API):
"""Interface for the Tumblr API v2
@@ -394,7 +412,8 @@ class TumblrAPI(oauth.OAuth1API):
if self.before and params["offset"]:
self.log.warning("'offset' and 'date-max' cannot be used together")
- return self._pagination(blog, "/posts", params, cache=True)
+ endpoint = "/v2/blog/{}/posts".format(blog)
+ return self._pagination(endpoint, params, blog=blog, cache=True)
def likes(self, blog):
"""Retrieve liked posts"""
@@ -410,6 +429,20 @@ class TumblrAPI(oauth.OAuth1API):
yield from posts
params["before"] = posts[-1]["liked_timestamp"]
+ def search(self, query, params, mode="top", post_type=None):
+ """Retrieve search results"""
+ endpoint = "/v2/timeline/search"
+
+ params["limit"] = "50"
+ params["days"] = params.pop("t", None)
+ params["query"] = query
+ params["mode"] = mode
+ params["reblog_info"] = "true" if self.extractor.reblogs else "false"
+ if post_type:
+ params["post_type_filter"] = post_type
+
+ return self._pagination(endpoint, params)
+
def _call(self, endpoint, params, **kwargs):
url = self.ROOT + endpoint
kwargs["params"] = params
@@ -478,20 +511,28 @@ class TumblrAPI(oauth.OAuth1API):
raise exception.StopExtraction(data)
- def _pagination(self, blog, endpoint, params, key="posts", cache=False):
- endpoint = "/v2/blog/{}{}".format(blog, endpoint)
+ def _pagination(self, endpoint, params,
+ blog=None, key="posts", cache=False):
if self.api_key:
params["api_key"] = self.api_key
strategy = self.extractor.config("pagination")
+ if not strategy and "offset" not in params:
+ strategy = "api"
+
while True:
data = self._call(endpoint, params)
- if cache:
- self.BLOG_CACHE[blog] = data["blog"]
- cache = False
+ if "timeline" in data:
+ data = data["timeline"]
+ posts = data["elements"]
+
+ else:
+ if cache:
+ self.BLOG_CACHE[blog] = data["blog"]
+ cache = False
+ posts = data[key]
- posts = data[key]
yield from posts
if strategy == "api":
diff --git a/gallery_dl/extractor/tumblrgallery.py b/gallery_dl/extractor/tumblrgallery.py
index 27cc9d0..448625e 100644
--- a/gallery_dl/extractor/tumblrgallery.py
+++ b/gallery_dl/extractor/tumblrgallery.py
@@ -18,6 +18,7 @@ class TumblrgalleryExtractor(GalleryExtractor):
filename_fmt = "{category}_{gallery_id}_{num:>03}_{id}.{extension}"
directory_fmt = ("{category}", "{gallery_id} {title}")
root = "https://tumblrgallery.xyz"
+ referer = False
@staticmethod
def _urls_from_page(page):
diff --git a/gallery_dl/extractor/twitter.py b/gallery_dl/extractor/twitter.py
index 9c9d505..090b11a 100644
--- a/gallery_dl/extractor/twitter.py
+++ b/gallery_dl/extractor/twitter.py
@@ -305,6 +305,7 @@ class TwitterExtractor(Extractor):
legacy["created_at"], "%a %b %d %H:%M:%S %z %Y")
except Exception:
date = util.NONE
+ source = tweet.get("source")
tdata = {
"tweet_id" : tweet_id,
@@ -320,7 +321,7 @@ class TwitterExtractor(Extractor):
"author" : author,
"user" : self._user or author,
"lang" : legacy["lang"],
- "source" : text.extr(tweet["source"], ">", "<"),
+ "source" : text.extr(source, ">", "<") if source else "",
"sensitive" : tget("possibly_sensitive"),
"favorite_count": tget("favorite_count"),
"quote_count" : tget("quote_count"),
@@ -538,12 +539,6 @@ class TwitterExtractor(Extractor):
if username:
return self.cookies_update(_login_impl(self, username, password))
- for cookie in self.cookies:
- if cookie.domain == ".twitter.com":
- self.cookies.set(
- cookie.name, cookie.value, domain=self.cookies_domain,
- expires=cookie.expires, secure=cookie.secure)
-
class TwitterUserExtractor(TwitterExtractor):
"""Extractor for a Twitter user"""
diff --git a/gallery_dl/extractor/urlgalleries.py b/gallery_dl/extractor/urlgalleries.py
index f7ce44b..bb80055 100644
--- a/gallery_dl/extractor/urlgalleries.py
+++ b/gallery_dl/extractor/urlgalleries.py
@@ -13,8 +13,8 @@ from .. import text, exception
class UrlgalleriesGalleryExtractor(GalleryExtractor):
"""Base class for Urlgalleries extractors"""
category = "urlgalleries"
- root = "urlgalleries.net"
- request_interval = (0.5, 1.0)
+ root = "https://urlgalleries.net"
+ request_interval = (0.5, 1.5)
pattern = r"(?:https?://)(?:(\w+)\.)?urlgalleries\.net/(?:[\w-]+-)?(\d+)"
example = "https://BLOG.urlgalleries.net/gallery-12345/TITLE"
diff --git a/gallery_dl/extractor/webtoons.py b/gallery_dl/extractor/webtoons.py
index 949c7cb..70ab259 100644
--- a/gallery_dl/extractor/webtoons.py
+++ b/gallery_dl/extractor/webtoons.py
@@ -155,7 +155,10 @@ class WebtoonsComicExtractor(WebtoonsBase, Extractor):
def items(self):
page = None
- data = {"_extractor": WebtoonsEpisodeExtractor}
+ data = {
+ "_extractor": WebtoonsEpisodeExtractor,
+ "title_no" : text.parse_int(self.title_no),
+ }
while True:
path = "/{}/list?title_no={}&page={}".format(
@@ -173,6 +176,8 @@ class WebtoonsComicExtractor(WebtoonsBase, Extractor):
data["page"] = self.page_no
for url in self.get_episode_urls(page):
+ params = text.parse_query(url.rpartition("?")[2])
+ data["episode_no"] = text.parse_int(params.get("episode_no"))
yield Message.Queue, url, data
self.page_no += 1
diff --git a/gallery_dl/extractor/weibo.py b/gallery_dl/extractor/weibo.py
index 83b1642..9885d79 100644
--- a/gallery_dl/extractor/weibo.py
+++ b/gallery_dl/extractor/weibo.py
@@ -126,11 +126,7 @@ class WeiboExtractor(Extractor):
elif pic_type == "livephoto" and self.livephoto:
append(pic["largest"].copy())
-
- file = {"url": pic["video"]}
- file["filename"], _, file["extension"] = \
- pic["video"].rpartition("%2F")[2].rpartition(".")
- append(file)
+ append({"url": pic["video"]})
else:
append(pic["largest"].copy())
@@ -251,6 +247,11 @@ class WeiboUserExtractor(WeiboExtractor):
pattern = USER_PATTERN + r"(?:$|#)"
example = "https://weibo.com/USER"
+ # do NOT override 'initialize()'
+ # it is needed for 'self._user_id()'
+ # def initialize(self):
+ # pass
+
def items(self):
base = "{}/u/{}?tabtype=".format(self.root, self._user_id())
return self._dispatch_extractors((
diff --git a/gallery_dl/job.py b/gallery_dl/job.py
index 30801ee..c41f382 100644
--- a/gallery_dl/job.py
+++ b/gallery_dl/job.py
@@ -551,9 +551,15 @@ class DownloadJob(Job):
archive_path = cfg("archive")
if archive_path:
archive_path = util.expand_path(archive_path)
- archive_format = (cfg("archive-prefix", extr.category) +
- cfg("archive-format", extr.archive_fmt))
- archive_pragma = (cfg("archive-pragma"))
+
+ archive_prefix = cfg("archive-prefix")
+ if archive_prefix is None:
+ archive_prefix = extr.category
+
+ archive_format = cfg("archive-format")
+ if archive_format is None:
+ archive_format = extr.archive_fmt
+
try:
if "{" in archive_path:
archive_path = formatter.parse(
@@ -563,7 +569,10 @@ class DownloadJob(Job):
else:
archive_cls = archive.DownloadArchive
self.archive = archive_cls(
- archive_path, archive_format, archive_pragma)
+ archive_path,
+ archive_prefix + archive_format,
+ cfg("archive-pragma"),
+ )
except Exception as exc:
extr.log.warning(
"Failed to open download archive at '%s' (%s: %s)",
@@ -598,7 +607,7 @@ class DownloadJob(Job):
skip_filter = cfg("skip-filter")
if skip_filter:
- self._skipftr = util.compile_expression(skip_filter)
+ self._skipftr = util.compile_filter(skip_filter)
else:
self._skipftr = None
else:
@@ -622,6 +631,14 @@ class DownloadJob(Job):
for pp_dict in postprocessors:
if isinstance(pp_dict, str):
pp_dict = pp_conf.get(pp_dict) or {"name": pp_dict}
+ elif "type" in pp_dict:
+ pp_type = pp_dict["type"]
+ if pp_type in pp_conf:
+ pp = pp_conf[pp_type].copy()
+ pp.update(pp_dict)
+ pp_dict = pp
+ if "name" not in pp_dict:
+ pp_dict["name"] = pp_type
if pp_opts:
pp_dict = pp_dict.copy()
pp_dict.update(pp_opts)
@@ -660,7 +677,7 @@ class DownloadJob(Job):
expr = options.get("filter") if options else None
if expr:
- condition = util.compile_expression(expr)
+ condition = util.compile_filter(expr)
for hook, callback in hooks.items():
self.hooks[hook].append(functools.partial(
self._call_hook, callback, condition))
diff --git a/gallery_dl/option.py b/gallery_dl/option.py
index b38ad74..a3f78e5 100644
--- a/gallery_dl/option.py
+++ b/gallery_dl/option.py
@@ -461,6 +461,17 @@ def build_parser():
help="Client-side IP address to bind to",
)
networking.add_argument(
+ "-4", "--force-ipv4",
+ dest="source-address", nargs=0, action=ConfigConstAction,
+ const="0.0.0.0",
+ help="Make all connections via IPv4",
+ )
+ networking.add_argument(
+ "-6", "--force-ipv6",
+ dest="source-address", nargs=0, action=ConfigConstAction, const="::",
+ help="Make all connections via IPv6",
+ )
+ networking.add_argument(
"--no-check-certificate",
dest="verify", nargs=0, action=ConfigConstAction, const=False,
help="Disable HTTPS certificate validation",
@@ -792,4 +803,10 @@ def build_parser():
"&& convert * ../doc.pdf\""),
)
+ try:
+ # restore normal behavior when adding '-4' or '-6' as arguments
+ parser._has_negative_number_optionals.clear()
+ except Exception:
+ pass
+
return parser
diff --git a/gallery_dl/output.py b/gallery_dl/output.py
index 13b6a8a..1649487 100644
--- a/gallery_dl/output.py
+++ b/gallery_dl/output.py
@@ -17,15 +17,34 @@ from . import config, util, formatter
# --------------------------------------------------------------------
# Globals
+try:
+ TTY_STDOUT = sys.stdout.isatty()
+except Exception:
+ TTY_STDOUT = False
+
+try:
+ TTY_STDERR = sys.stderr.isatty()
+except Exception:
+ TTY_STDERR = False
+
+try:
+ TTY_STDIN = sys.stdin.isatty()
+except Exception:
+ TTY_STDIN = False
+
+
+COLORS_DEFAULT = {}
COLORS = not os.environ.get("NO_COLOR")
-COLORS_DEFAULT = {
- "success": "1;32",
- "skip" : "2",
- "debug" : "0;37",
- "info" : "1;37",
- "warning": "1;33",
- "error" : "1;31",
-} if COLORS else {}
+if COLORS:
+ if TTY_STDOUT:
+ COLORS_DEFAULT["success"] = "1;32"
+ COLORS_DEFAULT["skip"] = "2"
+ if TTY_STDERR:
+ COLORS_DEFAULT["debug"] = "0;37"
+ COLORS_DEFAULT["info"] = "1;37"
+ COLORS_DEFAULT["warning"] = "1;33"
+ COLORS_DEFAULT["error"] = "1;31"
+
if util.WINDOWS:
ANSI = COLORS and os.environ.get("TERM") == "ANSI"
@@ -323,7 +342,7 @@ def select():
if mode is None or mode == "auto":
try:
- if sys.stdout.isatty():
+ if TTY_STDOUT:
output = ColorOutput() if ANSI else TerminalOutput()
else:
output = PipeOutput()
@@ -331,6 +350,8 @@ def select():
output = PipeOutput()
elif isinstance(mode, dict):
output = CustomOutput(mode)
+ elif not mode:
+ output = NullOutput()
else:
output = {
"default" : PipeOutput,
diff --git a/gallery_dl/path.py b/gallery_dl/path.py
index d408a41..f57b02e 100644
--- a/gallery_dl/path.py
+++ b/gallery_dl/path.py
@@ -38,7 +38,7 @@ class PathFormat():
filename_fmt = extractor.filename_fmt
elif isinstance(filename_fmt, dict):
self.filename_conditions = [
- (util.compile_expression(expr),
+ (util.compile_filter(expr),
formatter.parse(fmt, kwdefault).format_map)
for expr, fmt in filename_fmt.items() if expr
]
@@ -57,7 +57,7 @@ class PathFormat():
directory_fmt = extractor.directory_fmt
elif isinstance(directory_fmt, dict):
self.directory_conditions = [
- (util.compile_expression(expr), [
+ (util.compile_filter(expr), [
formatter.parse(fmt, kwdefault).format_map
for fmt in fmts
])
diff --git a/gallery_dl/postprocessor/classify.py b/gallery_dl/postprocessor/classify.py
index 34af1d9..5642955 100644
--- a/gallery_dl/postprocessor/classify.py
+++ b/gallery_dl/postprocessor/classify.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2018-2021 Mike Fährmann
+# Copyright 2018-2024 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -15,44 +15,43 @@ import os
class ClassifyPP(PostProcessor):
DEFAULT_MAPPING = {
- "Music" : ("mp3", "aac", "flac", "ogg", "wma", "m4a", "wav"),
- "Video" : ("flv", "ogv", "avi", "mp4", "mpg", "mpeg", "3gp", "mkv",
- "webm", "vob", "wmv"),
- "Pictures" : ("jpg", "jpeg", "png", "gif", "bmp", "svg", "webp"),
+ "Pictures" : ("jpg", "jpeg", "png", "gif", "bmp", "svg", "webp",
+ "avif", "heic", "heif", "ico", "psd"),
+ "Video" : ("flv", "ogv", "avi", "mp4", "mpg", "mpeg", "3gp", "mkv",
+ "webm", "vob", "wmv", "m4v", "mov"),
+ "Music" : ("mp3", "aac", "flac", "ogg", "wma", "m4a", "wav"),
"Archives" : ("zip", "rar", "7z", "tar", "gz", "bz2"),
+ "Documents": ("txt", "pdf"),
}
def __init__(self, job, options):
PostProcessor.__init__(self, job)
- mapping = options.get("mapping", self.DEFAULT_MAPPING)
+ self.directory = self.realdirectory = ""
+ mapping = options.get("mapping", self.DEFAULT_MAPPING)
self.mapping = {
ext: directory
for directory, exts in mapping.items()
for ext in exts
}
- job.register_hooks(
- {"prepare": self.prepare, "file": self.move}, options)
- def prepare(self, pathfmt):
- ext = pathfmt.extension
- if ext in self.mapping:
- # set initial paths to enable download skips
- self._build_paths(pathfmt, self.mapping[ext])
+ job.register_hooks({
+ "post" : self.initialize,
+ "prepare": self.prepare,
+ }, options)
+
+ def initialize(self, pathfmt):
+ # store base directory paths
+ self.directory = pathfmt.directory
+ self.realdirectory = pathfmt.realdirectory
- def move(self, pathfmt):
+ def prepare(self, pathfmt):
+ # extend directory paths depending on file extension
ext = pathfmt.extension
if ext in self.mapping:
- # rebuild paths in case the filename extension changed
- path = self._build_paths(pathfmt, self.mapping[ext])
- os.makedirs(path, exist_ok=True)
-
- @staticmethod
- def _build_paths(pathfmt, extra):
- path = pathfmt.realdirectory + extra
- pathfmt.realpath = path + os.sep + pathfmt.filename
- pathfmt.path = pathfmt.directory + extra + os.sep + pathfmt.filename
- return path
+ extra = self.mapping[ext] + os.sep
+ pathfmt.directory = self.directory + extra
+ pathfmt.realdirectory = self.realdirectory + extra
__postprocessor__ = ClassifyPP
diff --git a/gallery_dl/postprocessor/common.py b/gallery_dl/postprocessor/common.py
index d4e1603..a9143a6 100644
--- a/gallery_dl/postprocessor/common.py
+++ b/gallery_dl/postprocessor/common.py
@@ -26,19 +26,27 @@ class PostProcessor():
if archive_path:
extr = job.extractor
archive_path = util.expand_path(archive_path)
- if not prefix:
- prefix = "_" + self.name.upper() + "_"
- archive_format = (
- options.get("archive-prefix", extr.category) +
- options.get("archive-format", prefix + extr.archive_fmt))
+
+ archive_prefix = options.get("archive-prefix")
+ if archive_prefix is None:
+ archive_prefix = extr.category
+
+ archive_format = options.get("archive-format")
+ if archive_format is None:
+ if prefix is None:
+ prefix = "_" + self.name.upper() + "_"
+ archive_format = prefix + extr.archive_fmt
+
try:
if "{" in archive_path:
archive_path = formatter.parse(archive_path).format_map(
job.pathfmt.kwdict)
self.archive = archive.DownloadArchive(
- archive_path, archive_format,
+ archive_path,
+ archive_prefix + archive_format,
options.get("archive-pragma"),
- "_archive_" + self.name)
+ "_archive_" + self.name,
+ )
except Exception as exc:
self.log.warning(
"Failed to open %s archive at '%s' (%s: %s)",
diff --git a/gallery_dl/util.py b/gallery_dl/util.py
index 6cdd994..3cbe510 100644
--- a/gallery_dl/util.py
+++ b/gallery_dl/util.py
@@ -21,6 +21,7 @@ import datetime
import functools
import itertools
import subprocess
+import collections
import urllib.parse
from http.cookiejar import Cookie
from email.utils import mktime_tz, parsedate_tz
@@ -702,6 +703,28 @@ def compile_expression_raw(expr, name="<expr>", globals=None):
return functools.partial(eval, code_object, globals or GLOBALS)
+def compile_expression_defaultdict(expr, name="<expr>", globals=None):
+ global GLOBALS_DEFAULT
+
+ if isinstance(__builtins__, dict):
+ # cpython
+ GLOBALS_DEFAULT = collections.defaultdict(lambda n=NONE: n, GLOBALS)
+ else:
+ # pypy3 - insert __builtins__ symbols into globals dict
+ GLOBALS_DEFAULT = collections.defaultdict(
+ lambda n=NONE: n, __builtins__.__dict__)
+ GLOBALS_DEFAULT.update(GLOBALS)
+
+ global compile_expression_defaultdict
+ compile_expression_defaultdict = compile_expression_defaultdict_impl
+ return compile_expression_defaultdict_impl(expr, name, globals)
+
+
+def compile_expression_defaultdict_impl(expr, name="<expr>", globals=None):
+ code_object = compile(expr, name, "eval")
+ return functools.partial(eval, code_object, globals or GLOBALS_DEFAULT)
+
+
def compile_expression_tryexcept(expr, name="<expr>", globals=None):
code_object = compile(expr, name, "eval")
@@ -711,7 +734,7 @@ def compile_expression_tryexcept(expr, name="<expr>", globals=None):
except exception.GalleryDLException:
raise
except Exception:
- return False
+ return NONE
return _eval
@@ -719,6 +742,12 @@ def compile_expression_tryexcept(expr, name="<expr>", globals=None):
compile_expression = compile_expression_tryexcept
+def compile_filter(expr, name="<filter>", globals=None):
+ if not isinstance(expr, str):
+ expr = "(" + ") and (".join(expr) + ")"
+ return compile_expression(expr, name, globals)
+
+
def import_file(path):
"""Import a Python module from a filesystem path"""
path, name = os.path.split(path)
@@ -949,10 +978,8 @@ class FilterPredicate():
"""Predicate; True if evaluating the given expression returns True"""
def __init__(self, expr, target="image"):
- if not isinstance(expr, str):
- expr = "(" + ") and (".join(expr) + ")"
name = "<{} filter>".format(target)
- self.expr = compile_expression(expr, name)
+ self.expr = compile_filter(expr, name)
def __call__(self, _, kwdict):
try:
diff --git a/gallery_dl/version.py b/gallery_dl/version.py
index 6c2a32e..2bf03f4 100644
--- a/gallery_dl/version.py
+++ b/gallery_dl/version.py
@@ -6,5 +6,5 @@
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
-__version__ = "1.27.7"
+__version__ = "1.28.0"
__variant__ = None
diff --git a/gallery_dl/ytdl.py b/gallery_dl/ytdl.py
index fe88c2c..32545e2 100644
--- a/gallery_dl/ytdl.py
+++ b/gallery_dl/ytdl.py
@@ -134,6 +134,7 @@ def parse_command_line(module, argv):
else:
date = module.DateRange(opts.dateafter, opts.datebefore)
+ decodeOption = getattr(module, "decodeOption", util.identity)
compat_opts = getattr(opts, "compat_opts", ())
def _unused_compat_opt(name):
@@ -355,8 +356,8 @@ def parse_command_line(module, argv):
"allsubtitles": opts.allsubtitles,
"subtitlesformat": opts.subtitlesformat,
"subtitleslangs": opts.subtitleslangs,
- "matchtitle": module.decodeOption(opts.matchtitle),
- "rejecttitle": module.decodeOption(opts.rejecttitle),
+ "matchtitle": decodeOption(opts.matchtitle),
+ "rejecttitle": decodeOption(opts.rejecttitle),
"max_downloads": opts.max_downloads,
"prefer_free_formats": opts.prefer_free_formats,
"trim_file_name": getattr(opts, "trim_file_name", None),
diff --git a/test/test_config.py b/test/test_config.py
index bbe288f..1d49d77 100644
--- a/test/test_config.py
+++ b/test/test_config.py
@@ -117,6 +117,24 @@ class TestConfig(unittest.TestCase):
self.assertEqual(
config.accumulate(("c", "c"), "l"), [5, 6])
+ config.set(() , "l", 4)
+ config.set(("c",) , "l", [2, 3])
+ config.set(("c", "c"), "l", 1)
+ self.assertEqual(
+ config.accumulate((), "l") , [4])
+ self.assertEqual(
+ config.accumulate(("c",), "l") , [2, 3, 4])
+ self.assertEqual(
+ config.accumulate(("c", "c"), "l"), [1, 2, 3, 4])
+
+ config.set(("c",), "l", None)
+ self.assertEqual(
+ config.accumulate((), "l") , [4])
+ self.assertEqual(
+ config.accumulate(("c",), "l") , [4])
+ self.assertEqual(
+ config.accumulate(("c", "c"), "l"), [1, 4])
+
def test_set(self):
config.set(() , "c", [1, 2, 3])
config.set(("b",) , "c", [1, 2, 3])
diff --git a/test/test_extractor.py b/test/test_extractor.py
index e622fa8..cc85fb2 100644
--- a/test/test_extractor.py
+++ b/test/test_extractor.py
@@ -168,12 +168,17 @@ class TestExtractorModule(unittest.TestCase):
def test_init(self):
"""Test for exceptions in Extractor.initialize() and .finalize()"""
+ def fail_request(*args, **kwargs):
+ self.fail("called 'request() during initialization")
+
for cls in extractor.extractors():
if cls.category == "ytdl":
continue
extr = cls.from_url(cls.example)
if not extr and cls.basecategory and not cls.instances:
continue
+
+ extr.request = fail_request
extr.initialize()
extr.finalize()
diff --git a/test/test_postprocessor.py b/test/test_postprocessor.py
index dd53803..2941b81 100644
--- a/test/test_postprocessor.py
+++ b/test/test_postprocessor.py
@@ -120,30 +120,37 @@ class ClassifyTest(BasePostprocessorTest):
for directory, exts in pp.DEFAULT_MAPPING.items()
for ext in exts
})
+
+ self.assertEqual(pp.directory, "")
+ self._trigger(("post",))
+ self.assertEqual(pp.directory, self.pathfmt.directory)
+
self.pathfmt.set_extension("jpg")
+ self._trigger(("prepare",))
self.pathfmt.build_path()
-
- pp.prepare(self.pathfmt)
path = os.path.join(self.dir.name, "test", "Pictures")
self.assertEqual(self.pathfmt.path, path + "/file.jpg")
self.assertEqual(self.pathfmt.realpath, path + "/file.jpg")
- with patch("os.makedirs") as mkdirs:
- self._trigger()
- mkdirs.assert_called_once_with(path, exist_ok=True)
+ self.pathfmt.set_extension("mp4")
+ self._trigger(("prepare",))
+ self.pathfmt.build_path()
+ path = os.path.join(self.dir.name, "test", "Video")
+ self.assertEqual(self.pathfmt.path, path + "/file.mp4")
+ self.assertEqual(self.pathfmt.realpath, path + "/file.mp4")
def test_classify_noop(self):
pp = self._create()
rp = self.pathfmt.realpath
- pp.prepare(self.pathfmt)
+ self.assertEqual(pp.directory, "")
+ self._trigger(("post",))
+ self._trigger(("prepare",))
+
+ self.assertEqual(pp.directory, self.pathfmt.directory)
self.assertEqual(self.pathfmt.path, rp)
self.assertEqual(self.pathfmt.realpath, rp)
- with patch("os.makedirs") as mkdirs:
- self._trigger()
- self.assertEqual(mkdirs.call_count, 0)
-
def test_classify_custom(self):
pp = self._create({"mapping": {
"foo/bar": ["foo", "bar"],
@@ -153,18 +160,18 @@ class ClassifyTest(BasePostprocessorTest):
"foo": "foo/bar",
"bar": "foo/bar",
})
+
+ self.assertEqual(pp.directory, "")
+ self._trigger(("post",))
+ self.assertEqual(pp.directory, self.pathfmt.directory)
+
self.pathfmt.set_extension("foo")
+ self._trigger(("prepare",))
self.pathfmt.build_path()
-
- pp.prepare(self.pathfmt)
path = os.path.join(self.dir.name, "test", "foo", "bar")
self.assertEqual(self.pathfmt.path, path + "/file.foo")
self.assertEqual(self.pathfmt.realpath, path + "/file.foo")
- with patch("os.makedirs") as mkdirs:
- self._trigger()
- mkdirs.assert_called_once_with(path, exist_ok=True)
-
class ExecTest(BasePostprocessorTest):
diff --git a/test/test_util.py b/test/test_util.py
index 888a70a..fa16c44 100644
--- a/test/test_util.py
+++ b/test/test_util.py
@@ -300,6 +300,96 @@ class TestCookiesTxt(unittest.TestCase):
)
+class TestCompileExpression(unittest.TestCase):
+
+ def test_compile_expression(self):
+ expr = util.compile_expression("1 + 2 * 3")
+ self.assertEqual(expr(), 7)
+ self.assertEqual(expr({"a": 1, "b": 2, "c": 3}), 7)
+ self.assertEqual(expr({"a": 9, "b": 9, "c": 9}), 7)
+
+ expr = util.compile_expression("a + b * c")
+ self.assertEqual(expr({"a": 1, "b": 2, "c": 3}), 7)
+ self.assertEqual(expr({"a": 9, "b": 9, "c": 9}), 90)
+
+ with self.assertRaises(SyntaxError):
+ util.compile_expression("")
+ with self.assertRaises(SyntaxError):
+ util.compile_expression("x++")
+
+ expr = util.compile_expression("1 and abort()")
+ with self.assertRaises(exception.StopExtraction):
+ expr()
+
+ def test_compile_expression_raw(self):
+ expr = util.compile_expression_raw("a + b * c")
+ with self.assertRaises(NameError):
+ expr()
+ with self.assertRaises(NameError):
+ expr({"a": 2})
+
+ expr = util.compile_expression_raw("int.param")
+ with self.assertRaises(AttributeError):
+ expr({"a": 2})
+
+ def test_compile_expression_tryexcept(self):
+ expr = util.compile_expression_tryexcept("a + b * c")
+ self.assertIs(expr(), util.NONE)
+ self.assertIs(expr({"a": 2}), util.NONE)
+
+ expr = util.compile_expression_tryexcept("int.param")
+ self.assertIs(expr({"a": 2}), util.NONE)
+
+ def test_compile_expression_defaultdict(self):
+ expr = util.compile_expression_defaultdict("a + b * c")
+ self.assertIs(expr(), util.NONE)
+ self.assertIs(expr({"a": 2}), util.NONE)
+
+ expr = util.compile_expression_defaultdict("int.param")
+ with self.assertRaises(AttributeError):
+ expr({"a": 2})
+
+ def test_compile_filter(self):
+ expr = util.compile_filter("a + b * c")
+ self.assertEqual(expr({"a": 1, "b": 2, "c": 3}), 7)
+ self.assertEqual(expr({"a": 9, "b": 9, "c": 9}), 90)
+
+ expr = util.compile_filter(["a % 2 == 0", "b % 3 == 0", "c % 5 == 0"])
+ self.assertTrue(expr({"a": 4, "b": 6, "c": 10}))
+ self.assertFalse(expr({"a": 1, "b": 2, "c": 3}))
+
+ def test_custom_globals(self):
+ value = {"v": "foobar"}
+ result = "8843d7f92416211de9ebb963ff4ce28125932878"
+
+ expr = util.compile_expression("hash_sha1(v)")
+ self.assertEqual(expr(value), result)
+
+ expr = util.compile_expression("hs(v)", globals={"hs": util.sha1})
+ self.assertEqual(expr(value), result)
+
+ with tempfile.TemporaryDirectory() as path:
+ file = path + "/module_sha1.py"
+ with open(file, "w") as fp:
+ fp.write("""
+import hashlib
+def hash(value):
+ return hashlib.sha1(value.encode()).hexdigest()
+""")
+ module = util.import_file(file)
+
+ expr = util.compile_expression("hash(v)", globals=module.__dict__)
+ self.assertEqual(expr(value), result)
+
+ GLOBALS_ORIG = util.GLOBALS
+ try:
+ util.GLOBALS = module.__dict__
+ expr = util.compile_expression("hash(v)")
+ finally:
+ util.GLOBALS = GLOBALS_ORIG
+ self.assertEqual(expr(value), result)
+
+
class TestOther(unittest.TestCase):
def test_bencode(self):
@@ -434,31 +524,6 @@ class TestOther(unittest.TestCase):
self.assertEqual(util.sha1(None),
"da39a3ee5e6b4b0d3255bfef95601890afd80709")
- def test_compile_expression(self):
- expr = util.compile_expression("1 + 2 * 3")
- self.assertEqual(expr(), 7)
- self.assertEqual(expr({"a": 1, "b": 2, "c": 3}), 7)
- self.assertEqual(expr({"a": 9, "b": 9, "c": 9}), 7)
-
- expr = util.compile_expression("a + b * c")
- self.assertEqual(expr({"a": 1, "b": 2, "c": 3}), 7)
- self.assertEqual(expr({"a": 9, "b": 9, "c": 9}), 90)
-
- expr = util.compile_expression_raw("a + b * c")
- with self.assertRaises(NameError):
- expr()
- with self.assertRaises(NameError):
- expr({"a": 2})
-
- with self.assertRaises(SyntaxError):
- util.compile_expression("")
- with self.assertRaises(SyntaxError):
- util.compile_expression("x++")
-
- expr = util.compile_expression("1 and abort()")
- with self.assertRaises(exception.StopExtraction):
- expr()
-
def test_import_file(self):
module = util.import_file("datetime")
self.assertIs(module, datetime)
@@ -478,37 +543,6 @@ value = 123
self.assertEqual(module.value, 123)
self.assertIs(module.datetime, datetime)
- def test_custom_globals(self):
- value = {"v": "foobar"}
- result = "8843d7f92416211de9ebb963ff4ce28125932878"
-
- expr = util.compile_expression("hash_sha1(v)")
- self.assertEqual(expr(value), result)
-
- expr = util.compile_expression("hs(v)", globals={"hs": util.sha1})
- self.assertEqual(expr(value), result)
-
- with tempfile.TemporaryDirectory() as path:
- file = path + "/module_sha1.py"
- with open(file, "w") as fp:
- fp.write("""
-import hashlib
-def hash(value):
- return hashlib.sha1(value.encode()).hexdigest()
-""")
- module = util.import_file(file)
-
- expr = util.compile_expression("hash(v)", globals=module.__dict__)
- self.assertEqual(expr(value), result)
-
- GLOBALS_ORIG = util.GLOBALS
- try:
- util.GLOBALS = module.__dict__
- expr = util.compile_expression("hash(v)")
- finally:
- util.GLOBALS = GLOBALS_ORIG
- self.assertEqual(expr(value), result)
-
def test_build_duration_func(self, f=util.build_duration_func):
def test_single(df, v):