aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLibravatarUnit 193 <unit193@unit193.net>2025-08-16 07:00:33 -0400
committerLibravatarUnit 193 <unit193@unit193.net>2025-08-16 07:00:33 -0400
commit3d18761f620a294ea6c5bff13c5994b93b29f3ed (patch)
tree092fa6f8128bc187512be532801670417f215986
parenta6e995c093de8aae2e91a0787281bb34c0b871eb (diff)
New upstream version 1.30.3.upstream/1.30.3
-rw-r--r--CHANGELOG.md74
-rw-r--r--PKG-INFO7
-rw-r--r--README.rst4
-rw-r--r--data/completion/_gallery-dl4
-rw-r--r--data/completion/gallery-dl.fish4
-rw-r--r--data/man/gallery-dl.18
-rw-r--r--data/man/gallery-dl.conf.5250
-rw-r--r--docs/gallery-dl.conf22
-rw-r--r--gallery_dl.egg-info/PKG-INFO7
-rw-r--r--gallery_dl.egg-info/SOURCES.txt2
-rw-r--r--gallery_dl.egg-info/requires.txt1
-rw-r--r--gallery_dl/__init__.py6
-rw-r--r--gallery_dl/archive.py42
-rw-r--r--gallery_dl/downloader/ytdl.py6
-rw-r--r--gallery_dl/exception.py31
-rw-r--r--gallery_dl/extractor/__init__.py2
-rw-r--r--gallery_dl/extractor/blogger.py42
-rw-r--r--gallery_dl/extractor/booth.py127
-rw-r--r--gallery_dl/extractor/cien.py6
-rw-r--r--gallery_dl/extractor/civitai.py65
-rw-r--r--gallery_dl/extractor/comick.py184
-rw-r--r--gallery_dl/extractor/common.py36
-rw-r--r--gallery_dl/extractor/danbooru.py2
-rw-r--r--gallery_dl/extractor/dankefuerslesen.py6
-rw-r--r--gallery_dl/extractor/deviantart.py16
-rw-r--r--gallery_dl/extractor/everia.py2
-rw-r--r--gallery_dl/extractor/facebook.py129
-rw-r--r--gallery_dl/extractor/hentaifoundry.py18
-rw-r--r--gallery_dl/extractor/idolcomplex.py269
-rw-r--r--gallery_dl/extractor/imagehosts.py72
-rw-r--r--gallery_dl/extractor/instagram.py20
-rw-r--r--gallery_dl/extractor/iwara.py3
-rw-r--r--gallery_dl/extractor/kemono.py72
-rw-r--r--gallery_dl/extractor/madokami.py2
-rw-r--r--gallery_dl/extractor/misskey.py4
-rw-r--r--gallery_dl/extractor/motherless.py13
-rw-r--r--gallery_dl/extractor/pixiv.py52
-rw-r--r--gallery_dl/extractor/sankaku.py28
-rw-r--r--gallery_dl/extractor/scrolller.py4
-rw-r--r--gallery_dl/extractor/skeb.py82
-rw-r--r--gallery_dl/extractor/tumblr.py55
-rw-r--r--gallery_dl/extractor/twitter.py2
-rw-r--r--gallery_dl/extractor/vk.py2
-rw-r--r--gallery_dl/extractor/vsco.py1
-rw-r--r--gallery_dl/extractor/wikimedia.py7
-rw-r--r--gallery_dl/extractor/xasiat.py103
-rw-r--r--gallery_dl/job.py46
-rw-r--r--gallery_dl/option.py14
-rw-r--r--gallery_dl/output.py37
-rw-r--r--gallery_dl/path.py86
-rw-r--r--gallery_dl/transaction_id.py4
-rw-r--r--gallery_dl/util.py18
-rw-r--r--gallery_dl/version.py2
-rwxr-xr-xscripts/run_tests.py6
-rw-r--r--setup.py1
-rw-r--r--test/test_config.py2
-rw-r--r--test/test_cookies.py8
-rw-r--r--test/test_downloader.py28
-rw-r--r--test/test_extractor.py21
-rw-r--r--test/test_formatter.py53
-rw-r--r--test/test_job.py6
-rw-r--r--test/test_postprocessor.py89
-rw-r--r--test/test_results.py88
-rw-r--r--test/test_util.py4
-rw-r--r--test/test_ytdl.py4
65 files changed, 1573 insertions, 838 deletions
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 159ff0d..1bdbcc7 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,11 +1,71 @@
-## 1.30.2 - 2025-07-27
+## 1.30.3 - 2025-08-15
### Extractors
#### Additions
-- [itaku] add `posts` & `bookmarks` extractors ([#7707](https://github.com/mikf/gallery-dl/issues/7707))
+- [booth] add support ([#7920](https://github.com/mikf/gallery-dl/issues/7920))
+- [civitai] add `collection` & `user-collections` extractors ([#8005](https://github.com/mikf/gallery-dl/issues/8005))
+- [facebook] add `info` extractor ([#6582](https://github.com/mikf/gallery-dl/issues/6582))
+- [facebook] add `albums` extractor ([#7848](https://github.com/mikf/gallery-dl/issues/7848))
+- [imgdrive] add `image` extractor ([#7976](https://github.com/mikf/gallery-dl/issues/7976))
+- [imgtaxi] add `image` extractor ([#8019](https://github.com/mikf/gallery-dl/issues/8019))
+- [imgwallet] add `image` extractor ([#8021](https://github.com/mikf/gallery-dl/issues/8021))
+- [picstate] add `image` extractor ([#7946](https://github.com/mikf/gallery-dl/issues/7946))
+- [silverpic] add `image` extractor ([#8020](https://github.com/mikf/gallery-dl/issues/8020))
+- [tumblr] add `following` & `followers` extractors ([#8018](https://github.com/mikf/gallery-dl/issues/8018))
+- [xasiat] add support ([#4161](https://github.com/mikf/gallery-dl/issues/4161) [#5929](https://github.com/mikf/gallery-dl/issues/5929) [#7934](https://github.com/mikf/gallery-dl/issues/7934))
#### Fixes
-- [kemono] support new `kemono.cr` domain ([#7902](https://github.com/mikf/gallery-dl/issues/7902) [#7909](https://github.com/mikf/gallery-dl/issues/7909) [#7911](https://github.com/mikf/gallery-dl/issues/7911) [#7913](https://github.com/mikf/gallery-dl/issues/7913) [#7904](https://github.com/mikf/gallery-dl/issues/7904))
-- [coomer] support new `coomer.st` domain ([#7907](https://github.com/mikf/gallery-dl/issues/7907) [#7909](https://github.com/mikf/gallery-dl/issues/7909) [#7911](https://github.com/mikf/gallery-dl/issues/7911) [#7904](https://github.com/mikf/gallery-dl/issues/7904))
-### Post Processors
-- [exec] use `False` as `start_new_session` default to avoid a `TypeError` ([#7899](https://github.com/mikf/gallery-dl/issues/7899))
+- [blogger] fix video extraction ([#7892](https://github.com/mikf/gallery-dl/issues/7892))
+- [comick] handle chapters without chapter data ([#7972](https://github.com/mikf/gallery-dl/issues/7972))
+- [comick] handle volume-only chapters ([#8043](https://github.com/mikf/gallery-dl/issues/8043))
+- [comick] fix exception when filtering by translation group ([#8045](https://github.com/mikf/gallery-dl/issues/8045))
+- [deviantart:tiptap] fix `KeyError: 'attrs'` ([#7929](https://github.com/mikf/gallery-dl/issues/7929))
+- [everia] fix image extraction ([#7973](https://github.com/mikf/gallery-dl/issues/7973) [#7977](https://github.com/mikf/gallery-dl/issues/7977))
+- [facebook] fix `avatar` extraction for empty profiles ([#7962](https://github.com/mikf/gallery-dl/issues/7962))
+- [facebook] handle profiles without photos or `set_id` ([#7962](https://github.com/mikf/gallery-dl/issues/7962))
+- [fappic] rewrite thumbnail URLs ([#8013](https://github.com/mikf/gallery-dl/issues/8013))
+- [idolcomplex] update to new domain and interface ([#7559](https://github.com/mikf/gallery-dl/issues/7559) [#8009](https://github.com/mikf/gallery-dl/issues/8009))
+- [kemono][coomer] fix extraction ([#8028](https://github.com/mikf/gallery-dl/issues/8028) [#8031](https://github.com/mikf/gallery-dl/issues/8031))
+- [kemono] update `/creators` endpoint ([#8039](https://github.com/mikf/gallery-dl/issues/8039) [#8040](https://github.com/mikf/gallery-dl/issues/8040))
+- [kemono] don't set error status for posts without comments ([#7961](https://github.com/mikf/gallery-dl/issues/7961))
+- [pixiv] fix `IndexError` for unviewable works ([#7940](https://github.com/mikf/gallery-dl/issues/7940))
+- [pixiv] fix artworks downloads when using expired cookies ([#7987](https://github.com/mikf/gallery-dl/issues/7987))
+- [scrolller] fix NSFW subreddit pagination ([#7945](https://github.com/mikf/gallery-dl/issues/7945))
+- [twitter] fix potential `UnboundLocalError` when `videos` are disabled ([#7932](https://github.com/mikf/gallery-dl/issues/7932))
+- [vsco] disable TLS 1.2 cipher suites by default ([#7984](https://github.com/mikf/gallery-dl/issues/7984) [#7986](https://github.com/mikf/gallery-dl/issues/7986))
+- [wikimedia:wiki] fix `AttributeError: 'subcategories'` ([#7931](https://github.com/mikf/gallery-dl/issues/7931))
+#### Improvements
+- [aibooru] support `general.aibooru.online` & `aibooru.download`
+- [comick] add `lang` option ([#7938](https://github.com/mikf/gallery-dl/issues/7938))
+- [hentaifoundry] add `descriptions` option ([#7952](https://github.com/mikf/gallery-dl/issues/7952))
+- [facebook] raise `AuthRequired` for profiles requiring cookies ([#7962](https://github.com/mikf/gallery-dl/issues/7962))
+- [instagram] warn about lower quality image downloads ([#7921](https://github.com/mikf/gallery-dl/issues/7921))
+- [kemono] support `"endpoint": "posts+"` for full metadata ([#8028](https://github.com/mikf/gallery-dl/issues/8028))
+- [misskey] support `misskey.art` ([#7923](https://github.com/mikf/gallery-dl/issues/7923))
+- [motherless] detect `404`/`File not found` pages
+- [pixiv] detect suspended/deleted accounts ([#7990](https://github.com/mikf/gallery-dl/issues/7990))
+- [pixiv] improve API error messages
+- [pixiv] remove redundant cookies initialization code
+- [scrolller] limit `title` length in default filenames
+- [skeb] implement `include` option ([#6558](https://github.com/mikf/gallery-dl/issues/6558) [#7267](https://github.com/mikf/gallery-dl/issues/7267))
+- [vk] update default `archive_fmt` ([#8030](https://github.com/mikf/gallery-dl/issues/8030))
+#### Metadata
+- [cien] provide `author[id]` metadata ([#6582](https://github.com/mikf/gallery-dl/issues/6582))
+- [dankefuerslesen] extract more metadata ([#7915](https://github.com/mikf/gallery-dl/issues/7915))
+- [dankefuerslesen:manga] fix metadata being overwritten
+- [facebook] ensure numeric `user_id` values ([#7953](https://github.com/mikf/gallery-dl/issues/7953))
+- [facebook:set] fix/improve `user_id` extraction ([#7848](https://github.com/mikf/gallery-dl/issues/7848))
+- [fappic] fix `filename` values
+#### Common
+- [common] implement `"user-agent": "@BROWSER"` ([#7947](https://github.com/mikf/gallery-dl/issues/7947))
+- [common] improve error message for non-Netscape cookie files ([#8014](https://github.com/mikf/gallery-dl/issues/8014))
+### Downloaders
+- [ytdl] don't overwrite existing `filename` data ([#7964](https://github.com/mikf/gallery-dl/issues/7964))
### Miscellaneous
-- [tests/postprocessor] fix `TypeError` when logging an error ([#6582](https://github.com/mikf/gallery-dl/issues/6582))
+- [docs/configuration] improve `client-id` & `api-key` instructions
+- [docs/formatting] update and improve
+- [job] apply `extension-map` to `SimulationJob` results ([#7954](https://github.com/mikf/gallery-dl/issues/7954))
+- [job] improve URL `scheme` extraction performance
+- [job] split collected DataJob results
+- [path] implement `path-convert` option ([#493](https://github.com/mikf/gallery-dl/issues/493) [#6582](https://github.com/mikf/gallery-dl/issues/6582))
+- [scripts] improve and extend `init`, `generate_test_result`, and `pyprint`
+- extend `-A`/`--abort` & `"skip": "abort"` functionality ([#7891](https://github.com/mikf/gallery-dl/issues/7891))
+- use more f-strings ([#7671](https://github.com/mikf/gallery-dl/issues/7671))
diff --git a/PKG-INFO b/PKG-INFO
index 550241f..6787cc9 100644
--- a/PKG-INFO
+++ b/PKG-INFO
@@ -1,6 +1,6 @@
Metadata-Version: 2.4
Name: gallery_dl
-Version: 1.30.2
+Version: 1.30.3
Summary: Command-line program to download image galleries and collections from several image hosting sites
Home-page: https://github.com/mikf/gallery-dl
Download-URL: https://github.com/mikf/gallery-dl/releases/latest
@@ -37,6 +37,7 @@ Requires-Dist: yt-dlp; extra == "video"
Provides-Extra: extra
Requires-Dist: requests[socks]; extra == "extra"
Requires-Dist: yt-dlp[default]; extra == "extra"
+Requires-Dist: jinja2; extra == "extra"
Requires-Dist: pyyaml; extra == "extra"
Requires-Dist: toml; python_version < "3.11" and extra == "extra"
Requires-Dist: truststore; python_version >= "3.10" and extra == "extra"
@@ -138,9 +139,9 @@ Standalone Executable
Prebuilt executable files with a Python interpreter and
required Python packages included are available for
-- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.30.2/gallery-dl.exe>`__
+- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.30.3/gallery-dl.exe>`__
(Requires `Microsoft Visual C++ Redistributable Package (x86) <https://aka.ms/vs/17/release/vc_redist.x86.exe>`__)
-- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.30.2/gallery-dl.bin>`__
+- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.30.3/gallery-dl.bin>`__
Nightly Builds
diff --git a/README.rst b/README.rst
index 3ca61b2..1bb85f5 100644
--- a/README.rst
+++ b/README.rst
@@ -79,9 +79,9 @@ Standalone Executable
Prebuilt executable files with a Python interpreter and
required Python packages included are available for
-- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.30.2/gallery-dl.exe>`__
+- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.30.3/gallery-dl.exe>`__
(Requires `Microsoft Visual C++ Redistributable Package (x86) <https://aka.ms/vs/17/release/vc_redist.x86.exe>`__)
-- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.30.2/gallery-dl.bin>`__
+- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.30.3/gallery-dl.bin>`__
Nightly Builds
diff --git a/data/completion/_gallery-dl b/data/completion/_gallery-dl
index f0d654e..07cfcd9 100644
--- a/data/completion/_gallery-dl
+++ b/data/completion/_gallery-dl
@@ -72,8 +72,8 @@ _arguments -s -S \
{-C,--cookies}'[File to load additional cookies from]':'<file>':_files \
--cookies-export'[Export session cookies to FILE]':'<file>':_files \
--cookies-from-browser'[Name of the browser to load cookies from, with optional domain prefixed with '\''/'\'', keyring name prefixed with '\''+'\'', profile prefixed with '\'':'\'', and container prefixed with '\''::'\'' ('\''none'\'' for no container (default), '\''all'\'' for all containers)]':'<browser[/domain][+keyring][:profile][::container]>' \
-{-A,--abort}'[Stop current extractor run after N consecutive file downloads were skipped]':'<n>' \
-{-T,--terminate}'[Stop current and parent extractor runs after N consecutive file downloads were skipped]':'<n>' \
+{-A,--abort}'[Stop current extractor(s) after N consecutive file downloads were skipped. Specify a TARGET to set how many levels to ascend or to which subcategory to jump to. Examples: '\''-A 3'\'', '\''-A 3:2'\'', '\''-A 3:manga'\'']':'<n[:target]>' \
+{-T,--terminate}'[Stop current & parent extractors and proceed with the next input URL after N consecutive file downloads were skipped]':'<n>' \
--filesize-min'[Do not download files smaller than SIZE (e.g. 500k or 2.5M)]':'<size>' \
--filesize-max'[Do not download files larger than SIZE (e.g. 500k or 2.5M)]':'<size>' \
--download-archive'[Record successfully downloaded files in FILE and skip downloading any file already in it]':'<file>':_files \
diff --git a/data/completion/gallery-dl.fish b/data/completion/gallery-dl.fish
index 8eb427a..b7e4fe4 100644
--- a/data/completion/gallery-dl.fish
+++ b/data/completion/gallery-dl.fish
@@ -67,8 +67,8 @@ complete -c gallery-dl -l 'netrc' -d 'Enable .netrc authentication data'
complete -c gallery-dl -r -F -s 'C' -l 'cookies' -d 'File to load additional cookies from'
complete -c gallery-dl -r -F -l 'cookies-export' -d 'Export session cookies to FILE'
complete -c gallery-dl -x -l 'cookies-from-browser' -d 'Name of the browser to load cookies from, with optional domain prefixed with "/", keyring name prefixed with "+", profile prefixed with ":", and container prefixed with "::" ("none" for no container (default), "all" for all containers)'
-complete -c gallery-dl -x -s 'A' -l 'abort' -d 'Stop current extractor run after N consecutive file downloads were skipped'
-complete -c gallery-dl -x -s 'T' -l 'terminate' -d 'Stop current and parent extractor runs after N consecutive file downloads were skipped'
+complete -c gallery-dl -x -s 'A' -l 'abort' -d 'Stop current extractor(s) after N consecutive file downloads were skipped. Specify a TARGET to set how many levels to ascend or to which subcategory to jump to. Examples: "-A 3", "-A 3:2", "-A 3:manga"'
+complete -c gallery-dl -x -s 'T' -l 'terminate' -d 'Stop current & parent extractors and proceed with the next input URL after N consecutive file downloads were skipped'
complete -c gallery-dl -x -l 'filesize-min' -d 'Do not download files smaller than SIZE (e.g. 500k or 2.5M)'
complete -c gallery-dl -x -l 'filesize-max' -d 'Do not download files larger than SIZE (e.g. 500k or 2.5M)'
complete -c gallery-dl -r -F -l 'download-archive' -d 'Record successfully downloaded files in FILE and skip downloading any file already in it'
diff --git a/data/man/gallery-dl.1 b/data/man/gallery-dl.1
index 4979279..39b88a4 100644
--- a/data/man/gallery-dl.1
+++ b/data/man/gallery-dl.1
@@ -1,4 +1,4 @@
-.TH "GALLERY-DL" "1" "2025-07-27" "1.30.2" "gallery-dl Manual"
+.TH "GALLERY-DL" "1" "2025-08-15" "1.30.3" "gallery-dl Manual"
.\" disable hyphenation
.nh
@@ -218,11 +218,11 @@ Export session cookies to FILE
.B "\-\-cookies\-from\-browser" \f[I]BROWSER[/DOMAIN][+KEYRING][:PROFILE][::CONTAINER]\f[]
Name of the browser to load cookies from, with optional domain prefixed with '/', keyring name prefixed with '+', profile prefixed with ':', and container prefixed with '::' ('none' for no container (default), 'all' for all containers)
.TP
-.B "\-A, \-\-abort" \f[I]N\f[]
-Stop current extractor run after N consecutive file downloads were skipped
+.B "\-A, \-\-abort" \f[I]N[:TARGET]\f[]
+Stop current extractor(s) after N consecutive file downloads were skipped. Specify a TARGET to set how many levels to ascend or to which subcategory to jump to. Examples: '-A 3', '-A 3:2', '-A 3:manga'
.TP
.B "\-T, \-\-terminate" \f[I]N\f[]
-Stop current and parent extractor runs after N consecutive file downloads were skipped
+Stop current & parent extractors and proceed with the next input URL after N consecutive file downloads were skipped
.TP
.B "\-\-filesize\-min" \f[I]SIZE\f[]
Do not download files smaller than SIZE (e.g. 500k or 2.5M)
diff --git a/data/man/gallery-dl.conf.5 b/data/man/gallery-dl.conf.5
index 12eea08..d33a147 100644
--- a/data/man/gallery-dl.conf.5
+++ b/data/man/gallery-dl.conf.5
@@ -1,4 +1,4 @@
-.TH "GALLERY-DL.CONF" "5" "2025-07-27" "1.30.2" "gallery-dl Manual"
+.TH "GALLERY-DL.CONF" "5" "2025-08-15" "1.30.3" "gallery-dl Manual"
.\" disable hyphenation
.nh
.\" disable justification (adjust text to left margin only)
@@ -330,6 +330,22 @@ depending on the local operating system
* \f[I]"windows"\f[]: \f[I]". "\f[]
+.SS extractor.*.path-convert
+.IP "Type:" 6
+\f[I]Conversion(s)\f[]
+
+.IP "Example:" 4
+.br
+* "g"
+.br
+* "Wl"
+
+.IP "Description:" 4
+\f[I]Conversion(s)\f[] to apply to each path segment after
+\f[I]path-restrict\f[]
+replacements.
+
+
.SS extractor.*.path-extended
.IP "Type:" 6
\f[I]bool\f[]
@@ -371,36 +387,59 @@ A JSON \f[I]object\f[] mapping filename extensions to their replacements.
.IP "Default:" 9
\f[I]true\f[]
+.IP "Example:" 4
+.br
+* "abort:5"
+.br
+* "abort:5:2"
+.br
+* "abort:5:manga"
+.br
+* "terminate:3"
+
.IP "Description:" 4
Controls the behavior when downloading files that have been
downloaded before, i.e. a file with the same filename already
exists or its ID is in a \f[I]download archive\f[].
-.br
-* \f[I]true\f[]: Skip downloads
-.br
-* \f[I]false\f[]: Overwrite already existing files
+\f[I]true\f[]
+Skip downloads
+\f[I]false\f[]
+Overwrite already existing files
+\f[I]"abort"\f[]
+Stop the current extractor
+\f[I]"abort:N"\f[]
+Skip downloads and
+stop the current extractor after \f[I]N\f[] consecutive skips
+\f[I]"abort:N:L"\f[]
+Skip downloads and
.br
-* \f[I]"abort"\f[]: Stop the current extractor run
+stop the current extractor after \f[I]N\f[] consecutive skips
+Ascend \f[I]L\f[] levels in the extractor hierarchy
.br
-* \f[I]"abort:N"\f[]: Skip downloads and stop the current extractor run
-after \f[I]N\f[] consecutive skips
-
+\f[I]"abort:N:SC"\f[]
+Skip downloads and
.br
-* \f[I]"terminate"\f[]: Stop the current extractor run, including parent extractors
+stop the current extractor after \f[I]N\f[] consecutive skips
+Ascend to an extractor with subcategory \f[I]SC\f[] in the extractor hierarchy
.br
-* \f[I]"terminate:N"\f[]: Skip downloads and stop the current extractor run,
-including parent extractors, after \f[I]N\f[] consecutive skips
-.br
-* \f[I]"exit"\f[]: Exit the program altogether
-.br
-* \f[I]"exit:N"\f[]: Skip downloads and exit the program
+\f[I]"terminate"\f[]
+Stop the current extractor, including parent extractors
+\f[I]"terminate:N"\f[]
+Skip downloads and
+stop the current extractor, including parent extractors,
after \f[I]N\f[] consecutive skips
-.br
-* \f[I]"enumerate"\f[]: Add an enumeration index to the beginning of the
+\f[I]"exit"\f[]
+Exit the program altogether
+\f[I]"exit:N"\f[]
+Skip downloads and
+exit the program after \f[I]N\f[] consecutive skips
+
+\f[I]"enumerate"\f[]
+Add an enumeration index to the beginning of the
filename extension (\f[I]file.1.ext\f[], \f[I]file.2.ext\f[], etc.)
@@ -457,6 +496,7 @@ response before \f[I]retrying\f[] the request.
* \f[I]"0.5-1.5"\f[]
\f[I]ao3\f[],
\f[I]arcalive\f[],
+\f[I]booth\f[],
\f[I]civitai\f[],
\f[I][Danbooru]\f[],
\f[I][E621]\f[],
@@ -498,7 +538,6 @@ response before \f[I]retrying\f[] the request.
* \f[I]"3.0-6.0"\f[]
\f[I]bilibili\f[],
\f[I]exhentai\f[],
-\f[I]idolcomplex\f[],
\f[I][reactor]\f[],
\f[I]readcomiconline\f[]
.br
@@ -831,12 +870,23 @@ or a \f[I]list\f[] with IP and explicit port number as elements.
.br
* \f[I]"Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:LATEST) Gecko/20100101 Firefox/LATEST"\f[]: otherwise
+.IP "Example:" 4
+.br
+* "curl/8.14.1"
+.br
+* "browser"
+.br
+* "@chrome"
+
.IP "Description:" 4
User-Agent header value used for HTTP requests.
Setting this value to \f[I]"browser"\f[] will try to automatically detect
and use the \f[I]User-Agent\f[] header of the system's default browser.
+Setting this value to \f[I]"@BROWSER"\f[], e.g. \f[I]"@chrome"\f[], will try to automatically detect
+and use the \f[I]User-Agent\f[] header of this installed browser.
+
.SS extractor.*.browser
.IP "Type:" 6
@@ -969,7 +1019,7 @@ to use these browser's default ciphers.
.IP "Default:" 9
.br
-* \f[I]false\f[]: \f[I]artstation\f[], \f[I]behance\f[]
+* \f[I]false\f[]: \f[I]artstation\f[], \f[I]behance\f[], \f[I]vsco\f[]
.br
* \f[I]true\f[]: otherwise
@@ -2062,6 +2112,24 @@ Possibly available formats are
* \f[I]tiny\f[] (144p)
+.SS extractor.booth.strategy
+.IP "Type:" 6
+\f[I]string\f[]
+
+.IP "Default:" 9
+\f[I]"webpage"\f[]
+
+.IP "Description:" 4
+Selects how to handle and extract file URLs.
+
+\f[I]"webpage"\f[]
+Retrieve the full HTML page
+and extract file URLs from it
+\f[I]"fallback"\f[]
+Use \f[I]fallback\f[] URLs
+to guess each file's correct filename extension
+
+
.SS extractor.bunkr.endpoint
.IP "Type:" 6
\f[I]string\f[]
@@ -2175,6 +2243,8 @@ Possible values are
* \f[I]"user-images"\f[]
.br
* \f[I]"user-videos"\f[]
+.br
+* \f[I]"user-collections"\f[]
It is possible to use \f[I]"all"\f[] instead of listing all values separately.
@@ -2293,6 +2363,26 @@ Use \f[I]+\f[] as first character to add the given options to the
\f[I]quality\f[] ones.
+.SS extractor.comick.lang
+.IP "Type:" 6
+.br
+* \f[I]string\f[]
+.br
+* \f[I]list\f[] of \f[I]strings\f[]
+
+.IP "Example:" 4
+.br
+* "en"
+.br
+* "fr,it,pl"
+.br
+* ["fr", "it", "pl"]
+
+.IP "Description:" 4
+\f[I]ISO 639-1\f[] language codes
+to filter chapters by.
+
+
.SS extractor.cyberdrop.domain
.IP "Type:" 6
\f[I]string\f[]
@@ -3092,9 +3182,13 @@ when processing a user profile.
Supported values are
.br
+* \f[I]info\f[]
+.br
* \f[I]avatar\f[]
.br
* \f[I]photos\f[]
+.br
+* \f[I]albums\f[]
It is possible to use \f[I]"all"\f[] instead of listing all values separately.
@@ -3485,6 +3579,22 @@ to attempt to fetch the current value used by gofile.
Recursively download files from subfolders.
+.SS extractor.hentaifoundry.descriptions
+.IP "Type:" 6
+\f[I]string\f[]
+
+.IP "Default:" 9
+\f[I]"text"\f[]
+
+.IP "Description:" 4
+Controls the format of \f[I]description\f[] metadata fields.
+
+.br
+* \f[I]"text"\f[]: Plain text with HTML tags removed
+.br
+* \f[I]"html"\f[]: Raw HTML content
+
+
.SS extractor.hentaifoundry.include
.IP "Type:" 6
.br
@@ -3937,29 +4047,11 @@ Extract a user's announcements as \f[I]announcements\f[] metadata.
.IP "Description:" 4
API endpoint to use for retrieving creator posts.
-\f[I]"legacy"\f[]
-Use the results from
-.br
-\f[I]/v1/{service}/user/{creator_id}/posts-legacy\f[]
-Provides less metadata, but is more reliable at returning all posts.
-.br
-Supports filtering results by \f[I]tag\f[] query parameter.
-.br
-\f[I]"legacy+"\f[]
-Use the results from
-.br
-\f[I]/v1/{service}/user/{creator_id}/posts-legacy\f[]
-to retrieve post IDs
-and one request to
-.br
-\f[I]/v1/{service}/user/{creator_id}/post/{post_id}\f[]
-to get a full set of metadata for each.
-\f[I]"posts"\f[]
-Use the results from
-.br
-\f[I]/v1/{service}/user/{creator_id}\f[]
-Provides more metadata, but might not return a creator's first/last posts.
-.br
+\f[I]"posts"\f[] \f[I] \f[I]"legacy"\f[]
+Provides only limited metadata.
+\f[I]"posts+"\f[] \f[] \f[I]"legacy+"\f[]
+Provides full metadata,
+but requires an additional API request for each post.
.SS extractor.kemono.favorites
@@ -5497,6 +5589,40 @@ Download animated images as \f[I].gif\f[] instead of \f[I].webp\f[]
Download article images.
+.SS extractor.skeb.include
+.IP "Type:" 6
+.br
+* \f[I]string\f[]
+.br
+* \f[I]list\f[] of \f[I]strings\f[]
+
+.IP "Default:" 9
+.br
+* \f[I]["works", "sentrequests"]\f[]
+if \f[I]sent-requests\f[] are enabled
+.br
+* \f[I]["works"]\f[] otherwise
+
+.IP "Example:" 4
+.br
+* "works,sentrequests"
+.br
+* ["works", "sentrequests"]
+
+.IP "Description:" 4
+A (comma-separated) list of subcategories to include
+when processing a user profile.
+
+Possible values are
+
+.br
+* \f[I]"works"\f[]
+.br
+* \f[I]"sentrequests"\f[]
+
+It is possible to use \f[I]"all"\f[] instead of listing all values separately.
+
+
.SS extractor.skeb.sent-requests
.IP "Type:" 6
\f[I]bool\f[]
@@ -9589,9 +9715,9 @@ section of your account's preferences
.br
* select "installed app"
.br
-* set \f[I]http://localhost:6414/\f[] as "redirect uri"
+* set "redirect uri" to http://localhost:6414/
.br
-* solve the "I'm not a robot" reCAPTCHA if needed
+* solve the "I'm not a robot" challenge if needed
.br
* click "create app"
@@ -9620,11 +9746,21 @@ new \f[I]client-id\f[] (\f[I]gallery-dl oauth:reddit\f[])
.br
* login and \f[I]Apply for an API Key\f[]
.br
-* use a random name and description,
-set "Type" to "Application", "Platform" to "All",
-and "Use" to "Non-Commercial"
+* fill out the form:
+
+.br
+* choose a random name and description
+.br
+* set "Type" to "Application"
+.br
+* set "Platform" to "All"
+.br
+* set "Use" to "Non-Commercial"
+.br
+* tick the two checkboxes at the bottom
.br
-* fill out the two checkboxes at the bottom and click "Apply"
+* click "Apply"
+
.br
* copy \f[I]API Key\f[] and \f[I]API Secret\f[]
and put them in your configuration file
@@ -9642,11 +9778,19 @@ as \f[I]"api-key"\f[] and \f[I]"api-secret"\f[]
.br
* click "Register application"
.br
-* fill out the form: use a random name and description, set
-https://example.org/ as "Application Website" and "Default
-callback URL"
+* fill out the form:
+
+.br
+* choose a random name and description
.br
-* solve Google's "I'm not a robot" challenge and click "Register"
+* set "Application Website" to https://example.org/
+.br
+* set "Default callback URL" to https://example.org/
+.br
+* solve the "I'm not a robot" challenge
+.br
+* click "Register"
+
.br
* click "Show secret key" (below "OAuth Consumer Key")
.br
diff --git a/docs/gallery-dl.conf b/docs/gallery-dl.conf
index 6541030..97b5564 100644
--- a/docs/gallery-dl.conf
+++ b/docs/gallery-dl.conf
@@ -63,6 +63,7 @@
"path-replace" : "_",
"path-remove" : "\\u0000-\\u001f\\u007f",
"path-strip" : "auto",
+ "path-convert" : null,
"path-extended": true,
"metadata-extractor": null,
@@ -182,6 +183,12 @@
"metadata": false,
"videos" : true
},
+ "booth":
+ {
+ "sleep-request": "0.5-1.5",
+
+ "strategy": "webpage"
+ },
"bunkr":
{
"endpoint": "/api/_001_v2",
@@ -205,6 +212,10 @@
"quality" : "original=true",
"quality-videos": "quality=100"
},
+ "comick":
+ {
+ "lang": ""
+ },
"coomer":
{
"username": "",
@@ -360,6 +371,7 @@
},
"hentaifoundry":
{
+ "descriptions": "text",
"include": ["pictures"]
},
"hitomi":
@@ -370,8 +382,9 @@
{
"username": "",
"password": "",
- "referer" : false,
- "sleep-request": "3.0-6.0"
+
+ "refresh" : false,
+ "tags" : false
},
"imagechest":
{
@@ -632,8 +645,8 @@
"username": "",
"password": "",
- "refresh" : false,
- "tags" : false
+ "refresh" : false,
+ "tags" : false
},
"sankakucomplex":
{
@@ -663,6 +676,7 @@
"skeb":
{
"article" : false,
+ "include" : ["works"],
"sent-requests": false,
"thumbnails" : false,
diff --git a/gallery_dl.egg-info/PKG-INFO b/gallery_dl.egg-info/PKG-INFO
index 550241f..6787cc9 100644
--- a/gallery_dl.egg-info/PKG-INFO
+++ b/gallery_dl.egg-info/PKG-INFO
@@ -1,6 +1,6 @@
Metadata-Version: 2.4
Name: gallery_dl
-Version: 1.30.2
+Version: 1.30.3
Summary: Command-line program to download image galleries and collections from several image hosting sites
Home-page: https://github.com/mikf/gallery-dl
Download-URL: https://github.com/mikf/gallery-dl/releases/latest
@@ -37,6 +37,7 @@ Requires-Dist: yt-dlp; extra == "video"
Provides-Extra: extra
Requires-Dist: requests[socks]; extra == "extra"
Requires-Dist: yt-dlp[default]; extra == "extra"
+Requires-Dist: jinja2; extra == "extra"
Requires-Dist: pyyaml; extra == "extra"
Requires-Dist: toml; python_version < "3.11" and extra == "extra"
Requires-Dist: truststore; python_version >= "3.10" and extra == "extra"
@@ -138,9 +139,9 @@ Standalone Executable
Prebuilt executable files with a Python interpreter and
required Python packages included are available for
-- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.30.2/gallery-dl.exe>`__
+- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.30.3/gallery-dl.exe>`__
(Requires `Microsoft Visual C++ Redistributable Package (x86) <https://aka.ms/vs/17/release/vc_redist.x86.exe>`__)
-- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.30.2/gallery-dl.bin>`__
+- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.30.3/gallery-dl.bin>`__
Nightly Builds
diff --git a/gallery_dl.egg-info/SOURCES.txt b/gallery_dl.egg-info/SOURCES.txt
index 8ae28f6..d4427ab 100644
--- a/gallery_dl.egg-info/SOURCES.txt
+++ b/gallery_dl.egg-info/SOURCES.txt
@@ -71,6 +71,7 @@ gallery_dl/extractor/blogger.py
gallery_dl/extractor/bluesky.py
gallery_dl/extractor/booru.py
gallery_dl/extractor/boosty.py
+gallery_dl/extractor/booth.py
gallery_dl/extractor/bunkr.py
gallery_dl/extractor/catbox.py
gallery_dl/extractor/chevereto.py
@@ -260,6 +261,7 @@ gallery_dl/extractor/weibo.py
gallery_dl/extractor/wikiart.py
gallery_dl/extractor/wikifeet.py
gallery_dl/extractor/wikimedia.py
+gallery_dl/extractor/xasiat.py
gallery_dl/extractor/xfolio.py
gallery_dl/extractor/xhamster.py
gallery_dl/extractor/xvideos.py
diff --git a/gallery_dl.egg-info/requires.txt b/gallery_dl.egg-info/requires.txt
index 531a762..c4b9769 100644
--- a/gallery_dl.egg-info/requires.txt
+++ b/gallery_dl.egg-info/requires.txt
@@ -3,6 +3,7 @@ requests>=2.11.0
[extra]
requests[socks]
yt-dlp[default]
+jinja2
pyyaml
[extra:python_version < "3.11"]
diff --git a/gallery_dl/__init__.py b/gallery_dl/__init__.py
index 9ab61e5..fdcb6d0 100644
--- a/gallery_dl/__init__.py
+++ b/gallery_dl/__init__.py
@@ -48,7 +48,7 @@ def main():
if filename == "/O":
filename = "{filename}.{extension}"
elif filename.startswith("\\f"):
- filename = "\f" + filename[2:]
+ filename = f"\f{filename[2:]}"
config.set((), "filename", filename)
if args.directory is not None:
config.set((), "base-directory", args.directory)
@@ -56,9 +56,9 @@ def main():
if args.postprocessors:
config.set((), "postprocessors", args.postprocessors)
if args.abort:
- config.set((), "skip", "abort:" + str(args.abort))
+ config.set((), "skip", f"abort:{args.abort}")
if args.terminate:
- config.set((), "skip", "terminate:" + str(args.terminate))
+ config.set((), "skip", f"terminate:{args.terminate}")
if args.cookies_from_browser:
browser, _, profile = args.cookies_from_browser.partition(":")
browser, _, keyring = browser.partition("+")
diff --git a/gallery_dl/archive.py b/gallery_dl/archive.py
index edecb10..3df5011 100644
--- a/gallery_dl/archive.py
+++ b/gallery_dl/archive.py
@@ -41,7 +41,7 @@ def connect(path, prefix, format,
def sanitize(name):
- return '"' + name.replace('"', "_") + '"'
+ return f'''"{name.replace('"', '_')}"'''
class DownloadArchive():
@@ -68,25 +68,25 @@ class DownloadArchive():
table = "archive" if table is None else sanitize(table)
self._stmt_select = (
- "SELECT 1 "
- "FROM " + table + " "
- "WHERE entry=? "
- "LIMIT 1")
+ f"SELECT 1 "
+ f"FROM {table} "
+ f"WHERE entry=? "
+ f"LIMIT 1")
self._stmt_insert = (
- "INSERT OR IGNORE INTO " + table + " "
- "(entry) VALUES (?)")
+ f"INSERT OR IGNORE INTO {table} "
+ f"(entry) VALUES (?)")
if pragma:
for stmt in pragma:
- cursor.execute("PRAGMA " + stmt)
+ cursor.execute(f"PRAGMA {stmt}")
try:
- cursor.execute("CREATE TABLE IF NOT EXISTS " + table + " "
- "(entry TEXT PRIMARY KEY) WITHOUT ROWID")
+ cursor.execute(f"CREATE TABLE IF NOT EXISTS {table} "
+ f"(entry TEXT PRIMARY KEY) WITHOUT ROWID")
except self._sqlite3.OperationalError:
# fallback for missing WITHOUT ROWID support (#553)
- cursor.execute("CREATE TABLE IF NOT EXISTS " + table + " "
- "(entry TEXT PRIMARY KEY)")
+ cursor.execute(f"CREATE TABLE IF NOT EXISTS {table} "
+ f"(entry TEXT PRIMARY KEY)")
def add(self, kwdict):
"""Add item described by 'kwdict' to archive"""
@@ -156,18 +156,18 @@ class DownloadArchivePostgresql():
table = "archive" if table is None else sanitize(table)
self._stmt_select = (
- "SELECT true "
- "FROM " + table + " "
- "WHERE entry=%s "
- "LIMIT 1")
+ f"SELECT true "
+ f"FROM {table} "
+ f"WHERE entry=%s "
+ f"LIMIT 1")
self._stmt_insert = (
- "INSERT INTO " + table + " (entry) "
- "VALUES (%s) "
- "ON CONFLICT DO NOTHING")
+ f"INSERT INTO {table} (entry) "
+ f"VALUES (%s) "
+ f"ON CONFLICT DO NOTHING")
try:
- cursor.execute("CREATE TABLE IF NOT EXISTS " + table + " "
- "(entry TEXT PRIMARY KEY)")
+ cursor.execute(f"CREATE TABLE IF NOT EXISTS {table} "
+ f"(entry TEXT PRIMARY KEY)")
con.commit()
except Exception as exc:
log.error("%s: %s when creating '%s' table: %s",
diff --git a/gallery_dl/downloader/ytdl.py b/gallery_dl/downloader/ytdl.py
index 69a59ff..9659782 100644
--- a/gallery_dl/downloader/ytdl.py
+++ b/gallery_dl/downloader/ytdl.py
@@ -109,7 +109,11 @@ class YoutubeDLDownloader(DownloaderBase):
def _download_video(self, ytdl_instance, pathfmt, info_dict):
if "url" in info_dict:
- text.nameext_from_url(info_dict["url"], pathfmt.kwdict)
+ if "filename" in pathfmt.kwdict:
+ pathfmt.kwdict["extension"] = \
+ text.ext_from_url(info_dict["url"])
+ else:
+ text.nameext_from_url(info_dict["url"], pathfmt.kwdict)
formats = info_dict.get("requested_formats")
if formats and not compatible_formats(formats):
diff --git a/gallery_dl/exception.py b/gallery_dl/exception.py
index 5a52581..6adda0d 100644
--- a/gallery_dl/exception.py
+++ b/gallery_dl/exception.py
@@ -100,12 +100,17 @@ class AuthorizationError(ExtractionError):
class AuthRequired(AuthorizationError):
default = "Account credentials required"
- def __init__(self, required=None, message=None):
- if required and not message:
- if isinstance(required, str):
- message = f"{required} required"
+ def __init__(self, auth=None, resource="resource", message=None):
+ if auth:
+ if not isinstance(auth, str):
+ auth = " or ".join(auth)
+ if " " not in resource:
+ resource = "this " + resource
+ if message is None:
+ message = (f"{auth} needed to access {resource}")
else:
- message = f"{' or '.join(required)} required"
+ message = (f"{auth} needed to access {resource} "
+ f"('{message}')")
AuthorizationError.__init__(self, message)
@@ -160,6 +165,22 @@ class ControlException(GalleryDLException):
class StopExtraction(ControlException):
"""Stop data extraction"""
+ def __init__(self, target=None):
+ ControlException.__init__(self)
+
+ if target is None:
+ self.target = None
+ self.depth = 1
+ elif isinstance(target, int):
+ self.target = None
+ self.depth = target
+ elif target.isdecimal():
+ self.target = None
+ self.depth = int(target)
+ else:
+ self.target = target
+ self.depth = 128
+
class AbortExtraction(ExtractionError, ControlException):
"""Abort data extraction due to an error"""
diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py
index 688f0a0..70e79fe 100644
--- a/gallery_dl/extractor/__init__.py
+++ b/gallery_dl/extractor/__init__.py
@@ -35,6 +35,7 @@ modules = [
"blogger",
"bluesky",
"boosty",
+ "booth",
"bunkr",
"catbox",
"chevereto",
@@ -210,6 +211,7 @@ modules = [
"wikiart",
"wikifeet",
"wikimedia",
+ "xasiat",
"xfolio",
"xhamster",
"xvideos",
diff --git a/gallery_dl/extractor/blogger.py b/gallery_dl/extractor/blogger.py
index 796d9d1..af43446 100644
--- a/gallery_dl/extractor/blogger.py
+++ b/gallery_dl/extractor/blogger.py
@@ -31,6 +31,11 @@ class BloggerExtractor(BaseExtractor):
self.blog = self.root.rpartition("/")[2]
self.videos = self.config("videos", True)
+ if self.videos:
+ self.findall_video = util.re(
+ r"""src=["'](https?://www\.blogger\.com"""
+ r"""/video\.g\?token=[^"']+)""").findall
+
def items(self):
blog = self.api.blog_by_url("http://" + self.blog)
blog["pages"] = blog["pages"]["totalItems"]
@@ -43,8 +48,6 @@ class BloggerExtractor(BaseExtractor):
r'blogger\.googleusercontent\.com/img|'
r'lh\d+(?:-\w+)?\.googleusercontent\.com|'
r'\d+\.bp\.blogspot\.com)/[^"]+)').findall
- findall_video = util.re(
- r'src="(https?://www\.blogger\.com/video\.g\?token=[^"]+)').findall
metadata = self.metadata()
for post in self.posts(blog):
@@ -54,16 +57,10 @@ class BloggerExtractor(BaseExtractor):
for idx, url in enumerate(files):
files[idx] = original(url)
- if self.videos and 'id="BLOG_video-' in content:
- page = self.request(post["url"]).text
- for url in findall_video(page):
- page = self.request(url).text
- video_config = util.json_loads(text.extr(
- page, 'var VIDEO_CONFIG =', '\n'))
- files.append(max(
- video_config["streams"],
- key=lambda x: x["format_id"],
- )["play_url"])
+ if self.videos and (
+ 'id="BLOG_video-' in content or
+ 'class="BLOG_video_' in content):
+ self._extract_videos(files, post)
post["author"] = post["author"]["displayName"]
post["replies"] = post["replies"]["totalItems"]
@@ -87,6 +84,27 @@ class BloggerExtractor(BaseExtractor):
def metadata(self):
"""Return additional metadata"""
+ def _extract_videos(self, files, post):
+ url = f"https://{self.blog}/feeds/posts/default/{post['id']}"
+ params = {
+ "alt" : "json",
+ "v" : "2",
+ "dynamicviews" : "1",
+ "rewriteforssl": "true",
+ }
+
+ data = self.request_json(url, params=params)
+ html = data["entry"]["content"]["$t"]
+
+ for url in self.findall_video(html):
+ page = self.request(url).text
+ video_config = util.json_loads(text.extr(
+ page, 'var VIDEO_CONFIG =', '\n'))
+ files.append(max(
+ video_config["streams"],
+ key=lambda x: x["format_id"],
+ )["play_url"])
+
BASE_PATTERN = BloggerExtractor.update({
"blogspot": {
diff --git a/gallery_dl/extractor/booth.py b/gallery_dl/extractor/booth.py
new file mode 100644
index 0000000..0fcb1cb
--- /dev/null
+++ b/gallery_dl/extractor/booth.py
@@ -0,0 +1,127 @@
+# -*- coding: utf-8 -*-
+
+# Copyright 2025 Mike Fรคhrmann
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Extractors for https://booth.pm/"""
+
+from .common import Extractor, Message
+from .. import text, util
+
+
+class BoothExtractor(Extractor):
+ """Base class for booth extractors"""
+ category = "booth"
+ root = "https://booth.pm"
+ directory_fmt = ("{category}", "{shop[name]}", "{id} {name}")
+ filename_fmt = "{num:>02} {filename}.{extension}"
+ archive_fmt = "{id}_{filename}"
+ request_interval = (0.5, 1.5)
+
+ def _init(self):
+ self.cookies.set("adult", "t", domain=".booth.pm")
+
+ def items(self):
+ for item in self.shop_items():
+ item["_extractor"] = BoothItemExtractor
+ yield Message.Queue, item["shop_item_url"], item
+
+ def _pagination(self, url):
+ while True:
+ page = self.request(url).text
+
+ for item in text.extract_iter(page, ' data-item="', '"'):
+ yield util.json_loads(text.unescape(item))
+
+ next = text.extr(page, 'rel="next" class="nav-item" href="', '"')
+ if not next:
+ break
+ url = self.root + next
+
+
+class BoothItemExtractor(BoothExtractor):
+ subcategory = "item"
+ pattern = r"(?:https?://)?(?:[\w-]+\.)?booth\.pm/(?:\w\w/)?items/(\d+)"
+ example = "https://booth.pm/items/12345"
+
+ def items(self):
+ url = f"{self.root}/ja/items/{self.groups[0]}"
+ headers = {
+ "Accept": "application/json",
+ "Content-Type": "application/json",
+ "X-CSRF-Token": None,
+ "Sec-Fetch-Dest": "empty",
+ "Sec-Fetch-Mode": "cors",
+ "Sec-Fetch-Site": "same-origin",
+ "Priority": "u=4",
+ }
+
+ if self.config("strategy") == "fallback":
+ page = None
+ item = self.request_json(url + ".json", headers=headers)
+ else:
+ page = self.request(url).text
+ headers["X-CSRF-Token"] = text.extr(
+ page, 'name="csrf-token" content="', '"')
+ item = self.request_json(
+ url + ".json", headers=headers, interval=False)
+
+ item["booth_category"] = item.pop("category", None)
+ item["date"] = text.parse_datetime(
+ item["published_at"], "%Y-%m-%dT%H:%M:%S.%f%z")
+ item["tags"] = [t["name"] for t in item["tags"]]
+
+ shop = item["shop"]
+ shop["id"] = text.parse_int(shop["thumbnail_url"].rsplit("/", 3)[1])
+
+ if files := self._extract_files(item, page):
+ item["count"] = len(files)
+ shop["uuid"] = files[0]["url"].split("/", 4)[3]
+ else:
+ item["count"] = 0
+ shop["uuid"] = util.NONE
+
+ yield Message.Directory, item
+ for num, file in enumerate(files, 1):
+ url = file["url"]
+ file["num"] = num
+ text.nameext_from_url(url, file)
+ yield Message.Url, url, {**item, **file}
+
+ def _extract_files(self, item, page):
+ if page is None:
+ files = []
+ for image in item.pop("images"):
+ url = image["original"].replace("_base_resized", "")
+ files.append({
+ "url" : url,
+ "_fallback": _fallback(url),
+ })
+ return files
+
+ del item["images"]
+ return [{"url": url}
+ for url in text.extract_iter(page, 'data-origin="', '"')]
+
+
+class BoothShopExtractor(BoothExtractor):
+ subcategory = "shop"
+ pattern = r"(?:https?://)?([\w-]+\.)booth\.pm/(?:\w\w/)?(?:items)?"
+ example = "https://SHOP.booth.pm/"
+
+ def __init__(self, match):
+ self.root = text.root_from_url(match[0])
+ BoothExtractor.__init__(self, match)
+
+ def shop_items(self):
+ return self._pagination(f"{self.root}/items")
+
+
+def _fallback(url):
+ base = url[:-3]
+ yield base + "jpeg"
+ yield base + "png"
+ yield base + "webp"
diff --git a/gallery_dl/extractor/cien.py b/gallery_dl/extractor/cien.py
index 7dfe6b6..45e5dab 100644
--- a/gallery_dl/extractor/cien.py
+++ b/gallery_dl/extractor/cien.py
@@ -52,17 +52,19 @@ class CienArticleExtractor(CienExtractor):
example = "https://ci-en.net/creator/123/article/12345"
def items(self):
- url = f"{self.root}/creator/{self.groups[0]}/article/{self.groups[1]}"
+ author_id, post_id = self.groups
+ url = f"{self.root}/creator/{author_id}/article/{post_id}"
page = self.request(url, notfound="article").text
files = self._extract_files(page)
post = self._extract_jsonld(page)[0]
post["post_url"] = url
- post["post_id"] = text.parse_int(self.groups[1])
+ post["post_id"] = text.parse_int(post_id)
post["count"] = len(files)
post["date"] = text.parse_datetime(post["datePublished"])
try:
+ post["author"]["id"] = text.parse_int(author_id)
del post["publisher"]
del post["sameAs"]
except Exception:
diff --git a/gallery_dl/extractor/civitai.py b/gallery_dl/extractor/civitai.py
index dc5b777..fe3b7ed 100644
--- a/gallery_dl/extractor/civitai.py
+++ b/gallery_dl/extractor/civitai.py
@@ -200,7 +200,7 @@ class CivitaiExtractor(Extractor):
if "Authorization" not in self.api.headers and \
not self.cookies.get(
"__Secure-civitai-token", domain=".civitai.com"):
- raise exception.AuthRequired(("'api-key'", "cookies"))
+ raise exception.AuthRequired(("api-key", "authenticated cookies"))
def _parse_query(self, value):
return text.parse_query_list(
@@ -377,6 +377,28 @@ class CivitaiImageExtractor(CivitaiExtractor):
return self.api.image(self.groups[0])
+class CivitaiCollectionExtractor(CivitaiExtractor):
+ subcategory = "collection"
+ directory_fmt = ("{category}", "{user_collection[username]}",
+ "collections", "{collection[id]}{collection[name]:? //}")
+ pattern = BASE_PATTERN + r"/collections/(\d+)"
+ example = "https://civitai.com/collections/12345"
+
+ def images(self):
+ cid = int(self.groups[0])
+ self.kwdict["collection"] = col = self.api.collection(cid)
+ self.kwdict["user_collection"] = col.pop("user", None)
+
+ params = {
+ "collectionId" : cid,
+ "period" : "AllTime",
+ "sort" : "Newest",
+ "browsingLevel" : self.api.nsfw,
+ "include" : ("cosmetics",),
+ }
+ return self.api.images(params, defaults=False)
+
+
class CivitaiPostExtractor(CivitaiExtractor):
subcategory = "post"
directory_fmt = ("{category}", "{username|user[username]}", "posts",
@@ -461,6 +483,7 @@ class CivitaiUserExtractor(Dispatch, CivitaiExtractor):
(CivitaiUserPostsExtractor , base + "posts"),
(CivitaiUserImagesExtractor, base + "images"),
(CivitaiUserVideosExtractor, base + "videos"),
+ (CivitaiUserCollectionsExtractor, base + "collections"),
), ("user-images", "user-videos"))
@@ -529,6 +552,22 @@ class CivitaiUserVideosExtractor(CivitaiExtractor):
images = CivitaiUserImagesExtractor.images
+class CivitaiUserCollectionsExtractor(CivitaiExtractor):
+ subcategory = "user-collections"
+ pattern = USER_PATTERN + r"/collections/?(?:\?([^#]+))?"
+ example = "https://civitai.com/user/USER/collections"
+
+ def items(self):
+ user, query = self.groups
+ params = self._parse_query(query)
+ params["userId"] = self.api.user(text.unquote(user))[0]["id"]
+
+ base = f"{self.root}/collections/"
+ for collection in self.api.collections(params):
+ collection["_extractor"] = CivitaiCollectionExtractor
+ yield Message.Queue, f"{base}{collection['id']}", collection
+
+
class CivitaiGeneratedExtractor(CivitaiExtractor):
"""Extractor for your generated files feed"""
subcategory = "generated"
@@ -635,7 +674,7 @@ class CivitaiTrpcAPI():
self.root = extractor.root + "/api/trpc/"
self.headers = {
"content-type" : "application/json",
- "x-client-version": "5.0.920",
+ "x-client-version": "5.0.954",
"x-client-date" : "",
"x-client" : "web",
"x-fingerprint" : "undefined",
@@ -758,6 +797,23 @@ class CivitaiTrpcAPI():
params = self._type_params(params)
return self._pagination(endpoint, params, meta)
+ def collection(self, collection_id):
+ endpoint = "collection.getById"
+ params = {"id": int(collection_id)}
+ return self._call(endpoint, params)["collection"]
+
+ def collections(self, params, defaults=True):
+ endpoint = "collection.getInfinite"
+
+ if defaults:
+ params = self._merge_params(params, {
+ "browsingLevel": self.nsfw,
+ "sort" : "Newest",
+ })
+
+ params = self._type_params(params)
+ return self._pagination(endpoint, params)
+
def user(self, username):
endpoint = "user.getCreator"
params = {"username": username}
@@ -783,9 +839,8 @@ class CivitaiTrpcAPI():
params = {"input": util.json_dumps(input)}
headers["x-client-date"] = str(int(time.time() * 1000))
- response = self.extractor.request(url, params=params, headers=headers)
-
- return response.json()["result"]["data"]["json"]
+ return self.extractor.request_json(
+ url, params=params, headers=headers)["result"]["data"]["json"]
def _pagination(self, endpoint, params, meta=None):
if "cursor" not in params:
diff --git a/gallery_dl/extractor/comick.py b/gallery_dl/extractor/comick.py
index 7ef4607..6c54156 100644
--- a/gallery_dl/extractor/comick.py
+++ b/gallery_dl/extractor/comick.py
@@ -20,83 +20,27 @@ class ComickBase():
category = "comick"
root = "https://comick.io"
- @memcache(keyarg=1)
- def _manga_info(self, slug):
- url = f"{self.root}/comic/{slug}"
- page = self.request(url).text
- data = self._extract_nextdata(page)
- props = data["props"]["pageProps"]
- comic = props["comic"]
-
- genre = []
- theme = []
- format = ""
- for item in comic["md_comic_md_genres"]:
- item = item["md_genres"]
- group = item["group"]
- if group == "Genre":
- genre.append(item["name"])
- elif group == "Theme":
- theme.append(item["name"])
- else:
- format = item["name"]
-
- if mu := comic["mu_comics"]:
- tags = [c["mu_categories"]["title"]
- for c in mu["mu_comic_categories"]]
- publisher = [p["mu_publishers"]["title"]
- for p in mu["mu_comic_publishers"]]
- else:
- tags = publisher = ()
-
- return {
- "manga": comic["title"],
- "manga_id": comic["id"],
- "manga_hid": comic["hid"],
- "manga_slug": slug,
- "manga_titles": [t["title"] for t in comic["md_titles"]],
- "artist": [a["name"] for a in props["artists"]],
- "author": [a["name"] for a in props["authors"]],
- "genre" : genre,
- "theme" : theme,
- "format": format,
- "tags" : tags,
- "publisher": publisher,
- "published": text.parse_int(comic["year"]),
- "description": comic["desc"],
- "demographic": props["demographic"],
- "origin": comic["iso639_1"],
- "mature": props["matureContent"],
- "rating": comic["content_rating"],
- "rank" : comic["follow_rank"],
- "score" : text.parse_float(comic["bayesian_rating"]),
- "status": "Complete" if comic["status"] == 2 else "Ongoing",
- "links" : comic["links"],
- "_build_id": data["buildId"],
- }
-
- def _chapter_info(self, manga, chstr):
- slug = manga['manga_slug']
- url = (f"{self.root}/_next/data/{manga['_build_id']}"
- f"/comic/{slug}/{chstr}.json")
- params = {"slug": slug, "chapter": chstr}
- return self.request_json(url, params=params)["pageProps"]
-
class ComickChapterExtractor(ComickBase, ChapterExtractor):
"""Extractor for comick.io manga chapters"""
archive_fmt = "{chapter_hid}_{page}"
- pattern = BASE_PATTERN + r"/comic/([\w-]+)/(\w+-chapter-[^/?#]+)"
+ pattern = (BASE_PATTERN + r"/comic/([\w-]+)"
+ r"/(\w+(?:-(?:chapter|volume)-[^/?#]+)?)")
example = "https://comick.io/comic/MANGA/ID-chapter-123-en"
def metadata(self, page):
slug, chstr = self.groups
- manga = self._manga_info(slug)
- props = self._chapter_info(manga, chstr)
+ manga = _manga_info(self, slug)
+ props = _chapter_info(self, manga, chstr)
ch = props["chapter"]
self._images = ch["md_images"]
- chapter, sep, minor = ch["chap"].partition(".")
+
+ if chapter := ch["chap"]:
+ chapter, sep, minor = chapter.partition(".")
+ else:
+ chapter = 0
+ sep = minor = ""
return {
**manga,
@@ -133,19 +77,32 @@ class ComickMangaExtractor(ComickBase, MangaExtractor):
example = "https://comick.io/comic/MANGA"
def items(self):
- slug = self.groups[0]
- manga = self._manga_info(slug)
+ manga = _manga_info(self, self.groups[0])
+ slug = manga["manga_slug"]
+ _manga_info.update(slug, manga)
for ch in self.chapters(manga):
- url = (f"{self.root}/comic/{slug}"
- f"/{ch['hid']}-chapter-{ch['chap']}-{ch['lang']}")
-
ch.update(manga)
- chapter, sep, minor = ch["chap"].partition(".")
- ch["chapter"] = text.parse_int(chapter)
- ch["chapter_minor"] = sep + minor
ch["_extractor"] = ComickChapterExtractor
+ if chapter := ch["chap"]:
+ url = (f"{self.root}/comic/{slug}"
+ f"/{ch['hid']}-chapter-{chapter}-{ch['lang']}")
+ chapter, sep, minor = chapter.partition(".")
+ ch["volume"] = text.parse_int(ch["vol"])
+ ch["chapter"] = text.parse_int(chapter)
+ ch["chapter_minor"] = sep + minor
+ elif volume := ch["vol"]:
+ url = (f"{self.root}/comic/{slug}"
+ f"/{ch['hid']}-volume-{volume}-{ch['lang']}")
+ ch["volume"] = text.parse_int(volume)
+ ch["chapter"] = 0
+ ch["chapter_minor"] = ""
+ else:
+ url = f"{self.root}/comic/{slug}/{ch['hid']}"
+ ch["volume"] = ch["chapter"] = 0
+ ch["chapter_minor"] = ""
+
yield Message.Queue, url, ch
def chapters(self, manga):
@@ -160,8 +117,15 @@ class ComickMangaExtractor(ComickBase, MangaExtractor):
"Sec-Fetch-Site": "same-site",
}
- query = text.parse_query(query)
- params = {"lang": query.get("lang") or None}
+ query = text.parse_query_list(query, ("lang",))
+
+ if (lang := query.get("lang")) or (lang := self.config("lang")):
+ if not isinstance(lang, str):
+ lang = ",".join(lang)
+ else:
+ lang = None
+
+ params = {"lang": lang}
params["page"] = page = text.parse_int(query.get("page"), 1)
if date_order := query.get("date-order"):
@@ -172,7 +136,7 @@ class ComickMangaExtractor(ComickBase, MangaExtractor):
params["chap-order"] = \
"0" if self.config("chapter-reverse", False) else "1"
- group = query.get("group", None)
+ group = query.get("group")
if group == "0":
group = None
@@ -190,9 +154,73 @@ class ComickMangaExtractor(ComickBase, MangaExtractor):
yield from data["chapters"]
else:
for ch in data["chapters"]:
- if group in ch["group_name"]:
+ if (groups := ch["group_name"]) and group in groups:
yield ch
if data["total"] <= limit * page:
return
params["page"] = page = page + 1
+
+
+@memcache(keyarg=1)
+def _manga_info(self, slug):
+ url = f"{self.root}/comic/{slug}"
+ page = self.request(url).text
+ data = self._extract_nextdata(page)
+ props = data["props"]["pageProps"]
+ comic = props["comic"]
+
+ genre = []
+ theme = []
+ format = ""
+ for item in comic["md_comic_md_genres"]:
+ item = item["md_genres"]
+ group = item["group"]
+ if group == "Genre":
+ genre.append(item["name"])
+ elif group == "Theme":
+ theme.append(item["name"])
+ else:
+ format = item["name"]
+
+ if mu := comic["mu_comics"]:
+ tags = [c["mu_categories"]["title"]
+ for c in mu["mu_comic_categories"]]
+ publisher = [p["mu_publishers"]["title"]
+ for p in mu["mu_comic_publishers"]]
+ else:
+ tags = publisher = ()
+
+ return {
+ "manga": comic["title"],
+ "manga_id": comic["id"],
+ "manga_hid": comic["hid"],
+ "manga_slug": comic["slug"],
+ "manga_titles": [t["title"] for t in comic["md_titles"]],
+ "artist": [a["name"] for a in props["artists"]],
+ "author": [a["name"] for a in props["authors"]],
+ "genre" : genre,
+ "theme" : theme,
+ "format": format,
+ "tags" : tags,
+ "publisher": publisher,
+ "published": text.parse_int(comic["year"]),
+ "description": comic["desc"],
+ "demographic": props["demographic"],
+ "origin": comic["iso639_1"],
+ "mature": props["matureContent"],
+ "rating": comic["content_rating"],
+ "rank" : comic["follow_rank"],
+ "score" : text.parse_float(comic["bayesian_rating"]),
+ "status": "Complete" if comic["status"] == 2 else "Ongoing",
+ "links" : comic["links"],
+ "_build_id": data["buildId"],
+ }
+
+
+def _chapter_info(self, manga, chstr):
+ slug = manga['manga_slug']
+ url = (f"{self.root}/_next/data/{manga['_build_id']}"
+ f"/comic/{slug}/{chstr}.json")
+ params = {"slug": slug, "chapter": chstr}
+ return self.request_json(url, params=params)["pageProps"]
diff --git a/gallery_dl/extractor/common.py b/gallery_dl/extractor/common.py
index d46152b..1ee54de 100644
--- a/gallery_dl/extractor/common.py
+++ b/gallery_dl/extractor/common.py
@@ -142,9 +142,9 @@ class Extractor():
return values
- def request(self, url, method="GET", session=None,
- retries=None, retry_codes=None, encoding=None,
- fatal=True, notfound=None, **kwargs):
+ def request(self, url, method="GET", session=None, fatal=True,
+ retries=None, retry_codes=None, interval=True,
+ encoding=None, notfound=None, **kwargs):
if session is None:
session = self.session
if retries is None:
@@ -170,7 +170,7 @@ class Extractor():
response = challenge = None
tries = 1
- if self._interval:
+ if self._interval and interval:
seconds = (self._interval() -
(time.time() - Extractor.request_timestamp))
if seconds > 0.0:
@@ -464,7 +464,9 @@ class Extractor():
if custom_ua is None or custom_ua == "auto":
pass
elif custom_ua == "browser":
- headers["User-Agent"] = _browser_useragent()
+ headers["User-Agent"] = _browser_useragent(None)
+ elif custom_ua[0] == "@":
+ headers["User-Agent"] = _browser_useragent(custom_ua[1:])
elif self.useragent is Extractor.useragent and not self.browser or \
custom_ua is not config.get(("extractor",), "user-agent"):
headers["User-Agent"] = custom_ua
@@ -539,6 +541,10 @@ class Extractor():
try:
with open(path) as fp:
cookies = util.cookiestxt_load(fp)
+ except ValueError as exc:
+ self.log.warning("cookies: Invalid Netscape cookies.txt file "
+ "'%s' (%s: %s)",
+ cookies_source, exc.__class__.__name__, exc)
except Exception as exc:
self.log.warning("cookies: Failed to load '%s' (%s: %s)",
cookies_source, exc.__class__.__name__, exc)
@@ -1042,19 +1048,31 @@ def _build_requests_adapter(
return adapter
-@cache.cache(maxage=86400)
-def _browser_useragent():
+@cache.cache(maxage=86400, keyarg=0)
+def _browser_useragent(browser):
"""Get User-Agent header from default browser"""
import webbrowser
- import socket
+ try:
+ open = webbrowser.get(browser).open
+ except webbrowser.Error:
+ if not browser:
+ raise
+ import shutil
+ if not (browser := shutil.which(browser)):
+ raise
+
+ def open(url):
+ util.Popen((browser, url),
+ start_new_session=False if util.WINDOWS else True)
+ import socket
server = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
server.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
server.bind(("127.0.0.1", 0))
server.listen(1)
host, port = server.getsockname()
- webbrowser.open(f"http://{host}:{port}/user-agent")
+ open(f"http://{host}:{port}/user-agent")
client = server.accept()[0]
server.close()
diff --git a/gallery_dl/extractor/danbooru.py b/gallery_dl/extractor/danbooru.py
index ff071c5..019410c 100644
--- a/gallery_dl/extractor/danbooru.py
+++ b/gallery_dl/extractor/danbooru.py
@@ -236,7 +236,7 @@ BASE_PATTERN = DanbooruExtractor.update({
},
"aibooru": {
"root": None,
- "pattern": r"(?:safe\.)?aibooru\.online",
+ "pattern": r"(?:safe\.|general\.)?aibooru\.(?:online|download)",
},
"booruvar": {
"root": "https://booru.borvar.art",
diff --git a/gallery_dl/extractor/dankefuerslesen.py b/gallery_dl/extractor/dankefuerslesen.py
index a2b0f42..1c4b7d8 100644
--- a/gallery_dl/extractor/dankefuerslesen.py
+++ b/gallery_dl/extractor/dankefuerslesen.py
@@ -59,6 +59,9 @@ class DankefuerslesenChapterExtractor(DankefuerslesenBase, ChapterExtractor):
return {
"manga" : manga["title"],
"manga_slug": manga["slug"],
+ "author" : manga["author"],
+ "artist" : manga["artist"],
+ "description": manga["description"],
"title" : data["title"],
"volume" : text.parse_int(data["volume"]),
"chapter" : text.parse_int(chapter),
@@ -114,7 +117,6 @@ class DankefuerslesenMangaExtractor(DankefuerslesenBase, MangaExtractor):
data["chapter"] = text.parse_int(ch)
data["chapter_minor"] = ""
- manga.update(data)
- results.append((f"{base}{ch}/1/", manga))
+ results.append((f"{base}{ch}/1/", {**manga, **data}))
return results
diff --git a/gallery_dl/extractor/deviantart.py b/gallery_dl/extractor/deviantart.py
index 66e2a1e..d900f4c 100644
--- a/gallery_dl/extractor/deviantart.py
+++ b/gallery_dl/extractor/deviantart.py
@@ -430,13 +430,15 @@ class DeviantartExtractor(Extractor):
if children := content.get("content"):
html.append('<p style="')
- attrs = content["attrs"]
- if attrs.get("textAlign"):
- html.append("text-align:")
- html.append(attrs["textAlign"])
- html.append(";")
- self._tiptap_process_indentation(html, attrs)
- html.append('">')
+ if attrs := content.get("attrs"):
+ if align := attrs.get("textAlign"):
+ html.append("text-align:")
+ html.append(align)
+ html.append(";")
+ self._tiptap_process_indentation(html, attrs)
+ html.append('">')
+ else:
+ html.append('margin-inline-start:0px">')
for block in children:
self._tiptap_process_content(html, block)
diff --git a/gallery_dl/extractor/everia.py b/gallery_dl/extractor/everia.py
index 787786e..91672bb 100644
--- a/gallery_dl/extractor/everia.py
+++ b/gallery_dl/extractor/everia.py
@@ -52,7 +52,7 @@ class EveriaPostExtractor(EveriaExtractor):
url = self.root + self.groups[0] + "/"
page = self.request(url).text
content = text.extr(page, 'itemprop="text">', "<h3")
- urls = util.re(r'img.*?src="([^"]+)').findall(content)
+ urls = util.re(r'img.*?lazy-src="([^"]+)').findall(content)
data = {
"title": text.unescape(
diff --git a/gallery_dl/extractor/facebook.py b/gallery_dl/extractor/facebook.py
index 069ed99..f9ed1ab 100644
--- a/gallery_dl/extractor/facebook.py
+++ b/gallery_dl/extractor/facebook.py
@@ -7,7 +7,7 @@
"""Extractors for https://www.facebook.com/"""
from .common import Extractor, Message, Dispatch
-from .. import text, exception
+from .. import text, util, exception
from ..cache import memcache
BASE_PATTERN = r"(?:https?://)?(?:[\w-]+\.)?facebook\.com"
@@ -61,6 +61,7 @@ class FacebookExtractor(Extractor):
"user_id": text.extr(
set_page, '"owner":{"__typename":"User","id":"', '"'
),
+ "user_pfbid": "",
"title": self.decode_all(text.extr(
set_page, '"title":{"text":"', '"'
)),
@@ -74,6 +75,15 @@ class FacebookExtractor(Extractor):
)
}
+ if directory["user_id"].startswith("pfbid"):
+ directory["user_pfbid"] = directory["user_id"]
+ directory["user_id"] = (
+ text.extr(
+ set_page, '"actors":[{"__typename":"User","id":"', '"') or
+ text.extr(
+ set_page, '"userID":"', '"') or
+ directory["set_id"].split(".")[1])
+
return directory
def parse_photo_page(self, photo_page):
@@ -92,6 +102,7 @@ class FacebookExtractor(Extractor):
"user_id": text.extr(
photo_page, '"owner":{"__typename":"User","id":"', '"'
),
+ "user_pfbid": "",
"caption": self.decode_all(text.extr(
photo_page,
'"message":{"delight_ranges"',
@@ -115,6 +126,11 @@ class FacebookExtractor(Extractor):
)
}
+ if photo["user_id"].startswith("pfbid"):
+ photo["user_pfbid"] = photo["user_id"]
+ photo["user_id"] = text.extr(
+ photo_page, r'\"content_owner_id_new\":\"', r'\"')
+
text.nameext_from_url(photo["url"], photo)
photo["followups_ids"] = []
@@ -296,21 +312,33 @@ class FacebookExtractor(Extractor):
i += 1
@memcache(keyarg=1)
- def _extract_profile_photos_page(self, profile):
- profile_photos_url = f"{self.root}/{profile}/photos_by"
+ def _extract_profile(self, profile, set_id=False):
+ if set_id:
+ url = f"{self.root}/{profile}/photos_by"
+ else:
+ url = f"{self.root}/{profile}"
+ return self._extract_profile_page(url)
+ def _extract_profile_page(self, url):
for _ in range(self.fallback_retries + 1):
- profile_photos_page = self.request(profile_photos_url).text
- if set_id := self._extract_profile_set_id(profile_photos_page):
- break
- self.log.debug("Got empty profile photos page, retrying...")
- else:
- raise exception.AbortExtraction("Failed to extract profile data")
+ page = self.request(url).text
- avatar_page_url = text.extr(
- profile_photos_page, ',"profilePhoto":{"url":"', '"')
+ if page.find('>Page Not Found</title>', 0, 3000) > 0:
+ break
+ if ('"props":{"title":"This content isn\'t available right now"' in
+ page):
+ raise exception.AuthRequired(
+ "authenticated cookies", "profile",
+ "This content isn't available right now")
+
+ set_id = self._extract_profile_set_id(page)
+ user = self._extract_profile_user(page)
+ if set_id or user:
+ user["set_id"] = set_id
+ return user
- return set_id, avatar_page_url.replace("\\/", "/")
+ self.log.debug("Got empty profile photos page, retrying...")
+ return {}
def _extract_profile_set_id(self, profile_photos_page):
set_ids_raw = text.extr(
@@ -325,6 +353,28 @@ class FacebookExtractor(Extractor):
return set_id
+ def _extract_profile_user(self, page):
+ data = text.extr(page, '","user":{"', '},"viewer":{')
+
+ user = None
+ try:
+ user = util.json_loads(f'{{"{data}}}')
+ if user["id"].startswith("pfbid"):
+ user["user_pfbid"] = user["id"]
+ user["id"] = text.extr(page, '"userID":"', '"')
+ user["username"] = (text.extr(page, '"userVanity":"', '"') or
+ text.extr(page, '"vanity":"', '"'))
+ user["profile_tabs"] = [
+ edge["node"]
+ for edge in (user["profile_tabs"]["profile_user"]
+ ["timeline_nav_app_sections"]["edges"])
+ ]
+ except Exception:
+ if user is None:
+ self.log.debug("Failed to extract user data: %s", data)
+ user = {}
+ return user
+
class FacebookSetExtractor(FacebookExtractor):
"""Base class for Facebook Set extractors"""
@@ -418,6 +468,51 @@ class FacebookVideoExtractor(FacebookExtractor):
yield Message.Url, audio["url"], audio
+class FacebookInfoExtractor(FacebookExtractor):
+ """Extractor for Facebook Profile data"""
+ subcategory = "info"
+ directory_fmt = ("{category}", "{username}")
+ pattern = USER_PATTERN + r"/info"
+ example = "https://www.facebook.com/USERNAME/info"
+
+ def items(self):
+ user = self._extract_profile(self.groups[0])
+ return iter(((Message.Directory, user),))
+
+
+class FacebookAlbumsExtractor(FacebookExtractor):
+ """Extractor for Facebook Profile albums"""
+ subcategory = "albums"
+ pattern = USER_PATTERN + r"/photos_albums(?:/([^/?#]+))?"
+ example = "https://www.facebook.com/USERNAME/photos_albums"
+
+ def items(self):
+ profile, name = self.groups
+ url = f"{self.root}/{profile}/photos_albums"
+ page = self.request(url).text
+
+ pos = page.find(
+ '"TimelineAppCollectionAlbumsRenderer","collection":{"id":"')
+ if pos < 0:
+ return
+ if name is not None:
+ name = name.lower()
+
+ items = text.extract(page, '},"pageItems":', '}}},', pos)[0]
+ edges = util.json_loads(items + "}}")["edges"]
+
+ # TODO: use /graphql API endpoint
+ for edge in edges:
+ node = edge["node"]
+ album = node["node"]
+ album["title"] = title = node["title"]["text"]
+ if name is not None and name != title.lower():
+ continue
+ album["_extractor"] = FacebookSetExtractor
+ album["thumbnail"] = (img := node["image"]) and img["uri"]
+ yield Message.Queue, album["url"], album
+
+
class FacebookPhotosExtractor(FacebookExtractor):
"""Extractor for Facebook Profile Photos"""
subcategory = "photos"
@@ -425,7 +520,10 @@ class FacebookPhotosExtractor(FacebookExtractor):
example = "https://www.facebook.com/USERNAME/photos"
def items(self):
- set_id = self._extract_profile_photos_page(self.groups[0])[0]
+ set_id = self._extract_profile(self.groups[0], True)["set_id"]
+ if not set_id:
+ return iter(())
+
set_url = f"{self.root}/media/set/?set={set_id}"
set_page = self.request(set_url).text
set_data = self.parse_set_page(set_page)
@@ -439,7 +537,8 @@ class FacebookAvatarExtractor(FacebookExtractor):
example = "https://www.facebook.com/USERNAME/avatar"
def items(self):
- avatar_page_url = self._extract_profile_photos_page(self.groups[0])[1]
+ user = self._extract_profile(self.groups[0])
+ avatar_page_url = user["profilePhoto"]["url"]
avatar_page = self.photo_page_request_wrapper(avatar_page_url).text
avatar = self.parse_photo_page(avatar_page)
@@ -462,6 +561,8 @@ class FacebookUserExtractor(Dispatch, FacebookExtractor):
def items(self):
base = f"{self.root}/{self.groups[0]}/"
return self._dispatch_extractors((
+ (FacebookInfoExtractor , base + "info"),
(FacebookAvatarExtractor, base + "avatar"),
(FacebookPhotosExtractor, base + "photos"),
+ (FacebookAlbumsExtractor, base + "photos_albums"),
), ("photos",))
diff --git a/gallery_dl/extractor/hentaifoundry.py b/gallery_dl/extractor/hentaifoundry.py
index e529940..91bcd38 100644
--- a/gallery_dl/extractor/hentaifoundry.py
+++ b/gallery_dl/extractor/hentaifoundry.py
@@ -32,6 +32,10 @@ class HentaifoundryExtractor(Extractor):
self.start_post = 0
self.start_page = 1
+ def _init(self):
+ if self.config("descriptions") == "html":
+ self._process_description = self._process_description_html
+
def items(self):
self._init_site_filters()
data = self.metadata()
@@ -77,9 +81,9 @@ class HentaifoundryExtractor(Extractor):
"artist" : text.unescape(extr('/profile">', '<')),
"_body" : extr(
'<div class="boxbody"', '<div class="boxfooter"'),
- "description": text.unescape(text.remove_html(extr(
- '>Description</div>', '</section>')
- .replace("\r\n", "\n"), "", "")),
+ "description": self._process_description(extr(
+ "<div class='picDescript'>", '</section>')
+ .replace("\r\n", "\n")),
"ratings" : [text.unescape(r) for r in text.extract_iter(extr(
"class='ratings_box'", "</div>"), "title='", "'")],
"date" : text.parse_datetime(extr("datetime='", "'")),
@@ -106,6 +110,14 @@ class HentaifoundryExtractor(Extractor):
return text.nameext_from_url(data["src"], data)
+ def _process_description(self, description):
+ return text.unescape(text.remove_html(description, "", ""))
+
+ def _process_description_html(self, description):
+ pos1 = description.rfind('</div') # picDescript
+ pos2 = description.rfind('</div', None, pos1) # boxBody
+ return str.strip(description[0:pos2])
+
def _parse_story(self, html):
"""Collect url and metadata for a story"""
extr = text.extract_from(html)
diff --git a/gallery_dl/extractor/idolcomplex.py b/gallery_dl/extractor/idolcomplex.py
index 075e1f6..26fd595 100644
--- a/gallery_dl/extractor/idolcomplex.py
+++ b/gallery_dl/extractor/idolcomplex.py
@@ -6,266 +6,39 @@
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
-"""Extractors for https://idol.sankakucomplex.com/"""
+"""Extractors for https://www.idolcomplex.com/"""
-from .sankaku import SankakuExtractor
-from .common import Message
-from ..cache import cache
-from .. import text, util, exception
-import collections
-import re
+from . import sankaku
-BASE_PATTERN = r"(?:https?://)?idol\.sankakucomplex\.com(?:/[a-z]{2})?"
+BASE_PATTERN = (r"(?:https?://)?(?:www\.)?"
+ r"idol(?:\.sankaku)?complex\.com(?:/[a-z]{2})?")
-class IdolcomplexExtractor(SankakuExtractor):
+class IdolcomplexBase():
"""Base class for idolcomplex extractors"""
category = "idolcomplex"
- root = "https://idol.sankakucomplex.com"
- cookies_domain = "idol.sankakucomplex.com"
- cookies_names = ("_idolcomplex_session",)
- referer = False
- request_interval = (3.0, 6.0)
-
- def __init__(self, match):
- SankakuExtractor.__init__(self, match)
- self.logged_in = True
- self.start_page = 1
- self.start_post = 0
+ root = "https://www.idolcomplex.com"
+ cookies_domain = ".idolcomplex.com"
def _init(self):
- self.find_pids = re.compile(
- r" href=[\"#]/\w\w/posts/(\w+)"
- ).findall
- self.find_tags = re.compile(
- r'tag-type-([^"]+)">\s*<a [^>]*?href="/[^?]*\?tags=([^"]+)'
- ).findall
-
- def items(self):
- self.login()
- data = self.metadata()
-
- for post_id in util.advance(self.post_ids(), self.start_post):
- post = self._extract_post(post_id)
- url = post["file_url"]
- post.update(data)
- text.nameext_from_url(url, post)
- yield Message.Directory, post
- yield Message.Url, url, post
-
- def skip(self, num):
- self.start_post += num
- return num
-
- def post_ids(self):
- """Return an iterable containing all relevant post ids"""
-
- def login(self):
- if self.cookies_check(self.cookies_names):
- return
-
- username, password = self._get_auth_info()
- if username:
- return self.cookies_update(self._login_impl(username, password))
-
- self.logged_in = False
-
- @cache(maxage=90*86400, keyarg=1)
- def _login_impl(self, username, password):
- self.log.info("Logging in as %s", username)
-
- url = self.root + "/users/login"
- page = self.request(url).text
-
- headers = {
- "Referer": url,
- }
- url = self.root + (text.extr(page, '<form action="', '"') or
- "/en/user/authenticate")
- data = {
- "authenticity_token": text.unescape(text.extr(
- page, 'name="authenticity_token" value="', '"')),
- "url" : "",
- "user[name]" : username,
- "user[password]": password,
- "commit" : "Login",
- }
- self.sleep(10, "login")
- response = self.request(url, method="POST", headers=headers, data=data)
-
- if not response.history or response.url.endswith(
- ("/users/login", "/user/home")):
- raise exception.AuthenticationError()
- return {c.name: c.value for c in response.history[0].cookies}
-
- def _extract_post(self, post_id):
- url = self.root + "/posts/" + post_id
- page = self.request(url, retries=10).text
- extr = text.extract_from(page)
-
- vavg = extr('id="rating"', "</ul>")
- vcnt = extr('>Votes</strong>:', "<")
- pid = extr(">Post ID:", "<")
- created = extr(' title="', '"')
-
- if file_url := extr('>Original:', 'id='):
- file_url = extr(' href="', '"')
- width = extr(">", "x")
- height = extr("", " ")
- else:
- width = extr('<object width=', ' ')
- height = extr('height=', '>')
- file_url = extr('<embed src="', '"')
-
- rating = extr(">Rating:", "<br")
-
- data = {
- "id" : pid.strip(),
- "md5" : file_url.rpartition("/")[2].partition(".")[0],
- "vote_average": (1.0 * vavg.count('class="star-full"') +
- 0.5 * vavg.count('class="star-half"')),
- "vote_count" : text.parse_int(vcnt),
- "created_at" : created,
- "date" : text.parse_datetime(
- created, "%Y-%m-%d %H:%M:%S.%f"),
- "rating" : text.remove_html(rating).lower(),
- "file_url" : "https:" + text.unescape(file_url),
- "width" : text.parse_int(width),
- "height" : text.parse_int(height),
- }
-
- tags = collections.defaultdict(list)
- tags_list = []
- tags_html = text.extr(page, '<ul id="tag-sidebar"', '</ul>')
- for tag_type, tag_name in self.find_tags(tags_html or ""):
- tags[tag_type].append(text.unquote(tag_name))
- for key, value in tags.items():
- data["tags_" + key] = " ".join(value)
- tags_list += value
- data["tags"] = " ".join(tags_list)
-
- return data
+ self.api = sankaku.SankakuAPI(self)
+ self.api.ROOT = "https://i.sankakuapi.com"
+ self.api.headers["Origin"] = self.root
-class IdolcomplexTagExtractor(IdolcomplexExtractor):
- """Extractor for images from idol.sankakucomplex.com by search-tags"""
- subcategory = "tag"
- directory_fmt = ("{category}", "{search_tags}")
- archive_fmt = "t_{search_tags}_{id}"
- pattern = BASE_PATTERN + r"/(?:posts/?)?\?([^#]*)"
- example = "https://idol.sankakucomplex.com/en/posts?tags=TAGS"
- per_page = 20
+class IdolcomplexTagExtractor(IdolcomplexBase, sankaku.SankakuTagExtractor):
+ """Extractor for idolcomplex tag searches"""
+ pattern = BASE_PATTERN + r"(?:/posts)?/?\?([^#]*)"
+ example = "https://www.idolcomplex.com/en/posts?tags=TAGS"
- def __init__(self, match):
- IdolcomplexExtractor.__init__(self, match)
- query = text.parse_query(match[1])
- self.tags = text.unquote(query.get("tags", "").replace("+", " "))
- self.start_page = text.parse_int(query.get("page"), 1)
- self.next = text.parse_int(query.get("next"), 0)
- def skip(self, num):
- if self.next:
- self.start_post += num
- else:
- pages, posts = divmod(num, self.per_page)
- self.start_page += pages
- self.start_post += posts
- return num
-
- def metadata(self):
- if not self.next:
- max_page = 50 if self.logged_in else 25
- if self.start_page > max_page:
- self.log.info("Traversing from page %d to page %d",
- max_page, self.start_page)
- self.start_post += self.per_page * (self.start_page - max_page)
- self.start_page = max_page
-
- tags = self.tags.split()
- if not self.logged_in and len(tags) > 4:
- raise exception.AbortExtraction(
- "Non-members can only search up to 4 tags at once")
- return {"search_tags": " ".join(tags)}
-
- def post_ids(self):
- url = self.root + "/en/posts"
-
- params = {"auto_page": "t"}
- if self.next:
- params["next"] = self.next
- else:
- params["page"] = self.start_page
- params["tags"] = self.tags
-
- while True:
- response = self.request(url, params=params, retries=10)
- if response.history and "/posts/premium" in response.url:
- self.log.warning("HTTP redirect to %s", response.url)
- page = response.text
-
- yield from text.extract_iter(page, '"id":"', '"')
-
- next_page_url = text.extr(page, 'next-page-url="', '"')
- if not next_page_url:
- return
-
- url, _, next_params = text.unquote(
- text.unescape(text.unescape(next_page_url))).partition("?")
- next_params = text.parse_query(next_params)
-
- if "next" in next_params:
- # stop if the same "next" value occurs twice in a row (#265)
- if "next" in params and params["next"] == next_params["next"]:
- return
- next_params["page"] = "2"
-
- if url[0] == "/":
- url = self.root + url
- params = next_params
-
-
-class IdolcomplexPoolExtractor(IdolcomplexExtractor):
- """Extractor for image-pools from idol.sankakucomplex.com"""
- subcategory = "pool"
- directory_fmt = ("{category}", "pool", "{pool}")
- archive_fmt = "p_{pool}_{id}"
+class IdolcomplexPoolExtractor(IdolcomplexBase, sankaku.SankakuPoolExtractor):
+ """Extractor for idolcomplex pools"""
pattern = BASE_PATTERN + r"/pools?/(?:show/)?(\w+)"
- example = "https://idol.sankakucomplex.com/pools/0123456789abcdef"
- per_page = 24
-
- def skip(self, num):
- pages, posts = divmod(num, self.per_page)
- self.start_page += pages
- self.start_post += posts
- return num
-
- def metadata(self):
- return {"pool": self.groups[0]}
-
- def post_ids(self):
- if not self.logged_in:
- self.log.warning("Login required")
-
- url = self.root + "/pools/show/" + self.groups[0]
- params = {"page": self.start_page}
-
- while True:
- page = self.request(url, params=params, retries=10).text
- pos = page.find('id="pool-show"') + 1
- post_ids = self.find_pids(page, pos)
-
- yield from post_ids
- if len(post_ids) < self.per_page:
- return
- params["page"] += 1
-
+ example = "https://www.idolcomplex.com/en/pools/0123456789abcdef"
-class IdolcomplexPostExtractor(IdolcomplexExtractor):
- """Extractor for single images from idol.sankakucomplex.com"""
- subcategory = "post"
- archive_fmt = "{id}"
- pattern = BASE_PATTERN + r"/posts?/(?:show/)?(\w+)"
- example = "https://idol.sankakucomplex.com/posts/0123456789abcdef"
- def post_ids(self):
- return (self.groups[0],)
+class IdolcomplexPostExtractor(IdolcomplexBase, sankaku.SankakuPostExtractor):
+ """Extractor for individual idolcomplex posts"""
+ pattern = BASE_PATTERN + r"/posts?(?:/show)?/(\w+)"
+ example = "https://www.idolcomplex.com/en/posts/0123456789abcdef"
diff --git a/gallery_dl/extractor/imagehosts.py b/gallery_dl/extractor/imagehosts.py
index 0e5ce7e..fccc466 100644
--- a/gallery_dl/extractor/imagehosts.py
+++ b/gallery_dl/extractor/imagehosts.py
@@ -372,14 +372,78 @@ class ImgclickImageExtractor(ImagehostImageExtractor):
class FappicImageExtractor(ImagehostImageExtractor):
"""Extractor for single images from fappic.com"""
category = "fappic"
- pattern = r"(?:https?://)?((?:www\.)?fappic\.com/(\w+)/[^/?#]+)"
- example = "https://fappic.com/abc123/NAME.EXT"
+ pattern = (r"(?:https?://)?(?:www\.|img\d+\.)?fappic\.com"
+ r"/(?:i/\d+/())?(\w{10,})(?:/|\.)\w+")
+ example = "https://fappic.com/abcde12345/NAME.EXT"
+
+ def __init__(self, match):
+ Extractor.__init__(self, match)
+
+ thumb, token = self.groups
+ if thumb is not None and token.endswith("_t"):
+ self.token = token = token[:-2]
+ else:
+ self.token = token
+ self.page_url = f"https://fappic.com/{token}/pic.jpg"
def get_info(self, page):
url , pos = text.extract(page, '<a href="#"><img src="', '"')
filename, pos = text.extract(page, 'alt="', '"', pos)
+ return url, text.re(r"^Porn[ -]Pic(?:s|ture)[ -]").sub("", filename)
+
- if filename.startswith("Porn-Picture-"):
- filename = filename[13:]
+class PicstateImageExtractor(ImagehostImageExtractor):
+ """Extractor for single images from picstate.com"""
+ category = "picstate"
+ pattern = r"(?:https?://)?((?:www\.)?picstate\.com/view/full/([^/?#]+))"
+ example = "https://picstate.com/view/full/123"
+ def get_info(self, page):
+ pos = page.index(' id="image_container"')
+ url , pos = text.extract(page, '<img src="', '"', pos)
+ filename, pos = text.extract(page, 'alt="', '"', pos)
return url, filename
+
+
+class ImgdriveImageExtractor(ImagehostImageExtractor):
+ """Extractor for single images from imgdrive.net"""
+ category = "imgdrive"
+ pattern = (r"(?:https?://)?(?:www\.)?(img(drive|taxi|wallet)\.(?:com|net)"
+ r"/img-(\w+)\.html)")
+ example = "https://imgdrive.net/img-0123456789abc.html"
+
+ def __init__(self, match):
+ path, category, self.token = match.groups()
+ self.page_url = f"https://{path}"
+ self.category = f"img{category}"
+ Extractor.__init__(self, match)
+
+ def get_info(self, page):
+ title, pos = text.extract(
+ page, 'property="og:title" content="', '"')
+ image, pos = text.extract(
+ page, 'property="og:image" content="', '"', pos)
+ return image.replace("/small/", "/big/"), title.rsplit(" | ", 2)[0]
+
+
+class SilverpicImageExtractor(ImagehostImageExtractor):
+ """Extractor for single images from silverpic.com"""
+ category = "silverpic"
+ pattern = (r"(?:https?://)?((?:www\.)?silverpic\.com"
+ r"/([a-z0-9]{10,})/[\S]+\.html)")
+ example = "https://silverpic.com/a1b2c3d4f5g6/NAME.EXT.html"
+
+ def get_info(self, page):
+ url, pos = text.extract(page, '<img src="/img/', '"')
+ alt, pos = text.extract(page, 'alt="', '"', pos)
+ return f"https://silverpic.com/img/{url}", alt
+
+ def metadata(self, page):
+ pos = page.find('<img src="/img/')
+ width = text.extract(page, 'width="', '"', pos)[0]
+ height = text.extract(page, 'height="', '"', pos)[0]
+
+ return {
+ "width" : text.parse_int(width),
+ "height": text.parse_int(height),
+ }
diff --git a/gallery_dl/extractor/instagram.py b/gallery_dl/extractor/instagram.py
index 6213e9a..b5450d5 100644
--- a/gallery_dl/extractor/instagram.py
+++ b/gallery_dl/extractor/instagram.py
@@ -223,7 +223,8 @@ class InstagramExtractor(Extractor):
for num, item in enumerate(items, 1):
try:
- image = item["image_versions2"]["candidates"][0]
+ candidates = item["image_versions2"]["candidates"]
+ image = candidates[0]
except Exception:
self.log.warning("Missing media in post %s",
data["post_shortcode"])
@@ -239,6 +240,22 @@ class InstagramExtractor(Extractor):
video = None
media = image
+ if len(candidates) <= 3 and not post.get("__gdl_gen"):
+ self.log.warning(
+ "%s: Image candidate list possibly incomplete "
+ "(%s items). Consider refreshing your cookies.",
+ data["post_shortcode"], len(candidates))
+ elif image["width"] < item.get("original_width", 0) or \
+ image["height"] < item.get("original_height", 0):
+ self.log.warning(
+ "%s: Available image resolutions lower than the "
+ "original (%sx%s < %sx%s). "
+ "Consider refreshing your cookies.",
+ data["post_shortcode"],
+ image["width"], image["height"],
+ item.get("original_width", 0),
+ item.get("original_height", 0))
+
media = {
"num" : num,
"date" : text.parse_timestamp(item.get("taken_at") or
@@ -694,6 +711,7 @@ class InstagramAvatarExtractor(InstagramExtractor):
"caption" : None,
"like_count": 0,
"image_versions2": {"candidates": (avatar,)},
+ "__gdl_gen" : True,
},)
diff --git a/gallery_dl/extractor/iwara.py b/gallery_dl/extractor/iwara.py
index 934b301..179909b 100644
--- a/gallery_dl/extractor/iwara.py
+++ b/gallery_dl/extractor/iwara.py
@@ -341,7 +341,8 @@ class IwaraAPI():
def favorites(self, type):
if not self.username:
- raise exception.AuthRequired("'username' & 'password'")
+ raise exception.AuthRequired(
+ "username & password", "your favorites")
endpoint = f"/favorites/{type}s"
return self._pagination(endpoint)
diff --git a/gallery_dl/extractor/kemono.py b/gallery_dl/extractor/kemono.py
index 1e88891..46139bc 100644
--- a/gallery_dl/extractor/kemono.py
+++ b/gallery_dl/extractor/kemono.py
@@ -102,19 +102,15 @@ class KemonoExtractor(Extractor):
post["username"] = creator["name"]
if comments:
- try:
- post["comments"] = self.api.creator_post_comments(
- service, creator_id, post["id"])
- except exception.HttpError:
+ post["comments"] = cmts = self.api.creator_post_comments(
+ service, creator_id, post["id"])
+ if not isinstance(cmts, list):
+ self.log.debug("%s/%s: %s", creator_id, post["id"], cmts)
post["comments"] = ()
if dms is not None:
if dms is True:
dms = self.api.creator_dms(
post["service"], post["user"])
- try:
- dms = dms["props"]["dms"]
- except Exception:
- dms = ()
post["dms"] = dms
if announcements is not None:
if announcements is True:
@@ -245,16 +241,15 @@ class KemonoExtractor(Extractor):
def _revisions_post(self, post):
post["revision_id"] = 0
- try:
- revs = self.api.creator_post_revisions(
- post["service"], post["user"], post["id"])
- except exception.HttpError:
+ revs = self.api.creator_post_revisions(
+ post["service"], post["user"], post["id"])
+ if not revs:
post["revision_hash"] = self._revision_hash(post)
post["revision_index"] = 1
post["revision_count"] = 1
return (post,)
- revs.insert(0, post)
+ revs.insert(0, post)
for rev in revs:
rev["revision_hash"] = self._revision_hash(rev)
@@ -325,25 +320,14 @@ class KemonoUserExtractor(KemonoExtractor):
def posts(self):
_, _, service, creator_id, query = self.groups
params = text.parse_query(query)
- tag = params.get("tag")
- endpoint = self.config("endpoint")
- if endpoint == "legacy+":
- endpoint = self._posts_legacy_plus
- elif endpoint == "legacy" or tag:
- endpoint = self.api.creator_posts_legacy
+ if self.config("endpoint") in ("posts+", "legacy+"):
+ endpoint = self.api.creator_posts_expand
else:
endpoint = self.api.creator_posts
return endpoint(service, creator_id,
- params.get("o"), params.get("q"), tag)
-
- def _posts_legacy_plus(self, service, creator_id,
- offset=0, query=None, tags=None):
- for post in self.api.creator_posts_legacy(
- service, creator_id, offset, query, tags):
- yield self.api.creator_post(
- service, creator_id, post["id"])["post"]
+ params.get("o"), params.get("q"), params.get("tag"))
class KemonoPostsExtractor(KemonoExtractor):
@@ -589,20 +573,22 @@ class KemonoAPI():
return self._call(endpoint)
def creators(self):
- endpoint = "/creators.txt"
- return self._call(endpoint)
+ endpoint = "/creators"
+ headers = {"Accept": "text/css"}
+ return self._call(endpoint, headers=headers)
def creator_posts(self, service, creator_id,
offset=0, query=None, tags=None):
- endpoint = f"/{service}/user/{creator_id}"
- params = {"q": query, "tag": tags, "o": offset}
+ endpoint = f"/{service}/user/{creator_id}/posts"
+ params = {"o": offset, "tag": tags, "q": query}
return self._pagination(endpoint, params, 50)
- def creator_posts_legacy(self, service, creator_id,
+ def creator_posts_expand(self, service, creator_id,
offset=0, query=None, tags=None):
- endpoint = f"/{service}/user/{creator_id}/posts-legacy"
- params = {"o": offset, "tag": tags, "q": query}
- return self._pagination(endpoint, params, 50, "results")
+ for post in self.creator_posts(
+ service, creator_id, offset, query, tags):
+ yield self.creator_post(
+ service, creator_id, post["id"])["post"]
def creator_announcements(self, service, creator_id):
endpoint = f"/{service}/user/{creator_id}/announcements"
@@ -622,11 +608,11 @@ class KemonoAPI():
def creator_post_comments(self, service, creator_id, post_id):
endpoint = f"/{service}/user/{creator_id}/post/{post_id}/comments"
- return self._call(endpoint)
+ return self._call(endpoint, fatal=False)
def creator_post_revisions(self, service, creator_id, post_id):
endpoint = f"/{service}/user/{creator_id}/post/{post_id}/revisions"
- return self._call(endpoint)
+ return self._call(endpoint, fatal=False)
def creator_profile(self, service, creator_id):
endpoint = f"/{service}/user/{creator_id}/profile"
@@ -657,19 +643,19 @@ class KemonoAPI():
params = {"type": type}
return self._call(endpoint, params)
- def _call(self, endpoint, params=None):
- url = self.root + endpoint
- response = self.extractor.request(url, params=params)
- return response.json()
+ def _call(self, endpoint, params=None, headers=None, fatal=True):
+ return self.extractor.request_json(
+ f"{self.root}{endpoint}", params=params, headers=headers,
+ encoding="utf-8", fatal=fatal)
- def _pagination(self, endpoint, params, batch=50, key=False):
+ def _pagination(self, endpoint, params, batch=50, key=None):
offset = text.parse_int(params.get("o"))
params["o"] = offset - offset % batch
while True:
data = self._call(endpoint, params)
- if key:
+ if key is not None:
data = data.get(key)
if not data:
return
diff --git a/gallery_dl/extractor/madokami.py b/gallery_dl/extractor/madokami.py
index e87dbba..1db5126 100644
--- a/gallery_dl/extractor/madokami.py
+++ b/gallery_dl/extractor/madokami.py
@@ -31,7 +31,7 @@ class MadokamiMangaExtractor(MadokamiExtractor):
def items(self):
username, password = self._get_auth_info()
if not username:
- raise exception.AuthRequired("'username' & 'password'")
+ raise exception.AuthRequired("username & password")
self.session.auth = util.HTTPBasicAuth(username, password)
url = f"{self.root}/Manga/{self.groups[0]}"
diff --git a/gallery_dl/extractor/misskey.py b/gallery_dl/extractor/misskey.py
index f579a2d..5ff601a 100644
--- a/gallery_dl/extractor/misskey.py
+++ b/gallery_dl/extractor/misskey.py
@@ -90,6 +90,10 @@ BASE_PATTERN = MisskeyExtractor.update({
"root": "https://misskey.design",
"pattern": r"misskey\.design",
},
+ "misskey.art": {
+ "root": "https://misskey.art",
+ "pattern": r"misskey\.art",
+ },
"lesbian.energy": {
"root": "https://lesbian.energy",
"pattern": r"lesbian\.energy",
diff --git a/gallery_dl/extractor/motherless.py b/gallery_dl/extractor/motherless.py
index c81a4d1..48137ce 100644
--- a/gallery_dl/extractor/motherless.py
+++ b/gallery_dl/extractor/motherless.py
@@ -9,7 +9,7 @@
"""Extractors for https://motherless.com/"""
from .common import Extractor, Message
-from .. import text, util
+from .. import text, util, exception
from ..cache import memcache
from datetime import timedelta
@@ -23,6 +23,17 @@ class MotherlessExtractor(Extractor):
filename_fmt = "{id} {title}.{extension}"
archive_fmt = "{id}"
+ def request(self, url, **kwargs):
+ response = Extractor.request(self, url, **kwargs)
+
+ content = response.content
+ if (b'<div class="error-page' in content or
+ b">The page you're looking for cannot be found.<" in content):
+ raise exception.NotFoundError("page")
+
+ self.request = Extractor.request.__get__(self)
+ return response
+
def _extract_media(self, path):
url = f"{self.root}/{path}"
page = self.request(url).text
diff --git a/gallery_dl/extractor/pixiv.py b/gallery_dl/extractor/pixiv.py
index cb0e93e..d34130d 100644
--- a/gallery_dl/extractor/pixiv.py
+++ b/gallery_dl/extractor/pixiv.py
@@ -308,7 +308,12 @@ class PixivExtractor(Extractor):
square1200 = body["userIllusts"][body["id"]]["url"]
except Exception:
return
+
parts = square1200.rpartition("_p0")[0].split("/")
+ if len(parts) < 6:
+ return self.log.warning(
+ "%s: %s", body["id"], square1200.rpartition("/")[2])
+
del parts[3:5]
parts[3] = "img-original"
base = "/".join(parts)
@@ -424,14 +429,11 @@ class PixivArtworksExtractor(PixivExtractor):
self.user_id = u1 or u2
self.tag = t1 or t2
- if self.sanity_workaround:
- self.cookies_domain = domain = ".pixiv.net"
- self._init_cookies()
- if self._warn_phpsessid:
- PixivArtworksExtractor._warn_phpsessid = False
- if not self.cookies.get("PHPSESSID", domain=domain):
- self.log.warning("No 'PHPSESSID' cookie set. Can detect on"
- "ly non R-18 'limit_sanity_level' works.")
+ if self.sanity_workaround and self._warn_phpsessid:
+ PixivArtworksExtractor._warn_phpsessid = False
+ if not self.cookies.get("PHPSESSID", domain=self.cookies_domain):
+ self.log.warning("No 'PHPSESSID' cookie set. Can detect only "
+ "non R-18 'limit_sanity_level' works.")
def metadata(self):
if self.config("metadata"):
@@ -441,19 +443,16 @@ class PixivArtworksExtractor(PixivExtractor):
def works(self):
works = self.api.user_illusts(self.user_id)
- if self.sanity_workaround:
- body = self._request_ajax(
- f"/user/{self.user_id}/profile/all")
- if not body:
- return ()
+ if self.sanity_workaround and (body := self._request_ajax(
+ f"/user/{self.user_id}/profile/all")):
try:
ajax_ids = list(map(int, body["illusts"]))
ajax_ids.extend(map(int, body["manga"]))
ajax_ids.sort()
except Exception as exc:
+ self.log.debug("", exc_info=exc)
self.log.warning("u%s: Failed to collect artwork IDs "
- "using AJAX API (%s: %s)",
- self.user_id, exc.__class__.__name__, exc)
+ "using AJAX API", self.user_id)
else:
works = self._extend_sanity(works, ajax_ids)
@@ -1262,7 +1261,7 @@ class PixivAppAPI():
def user_illusts(self, user_id):
params = {"user_id": user_id}
- return self._pagination("/v1/user/illusts", params)
+ return self._pagination("/v1/user/illusts", params, user_data="user")
def user_novels(self, user_id):
params = {"user_id": user_id}
@@ -1297,22 +1296,29 @@ class PixivAppAPI():
self.extractor.wait(seconds=300)
continue
- raise exception.AbortExtraction(f"API request failed: {error}")
+ msg = (f"'{msg}'" if (msg := error.get("user_message")) else
+ f"'{msg}'" if (msg := error.get("message")) else
+ error)
+ raise exception.AbortExtraction(f"API request failed: {msg}")
def _pagination(self, endpoint, params,
- key_items="illusts", key_data=None):
- while True:
- data = self._call(endpoint, params)
+ key_items="illusts", key_data=None, user_data=None):
+ data = self._call(endpoint, params)
- if key_data:
- self.data = data.get(key_data)
- key_data = None
+ if key_data is not None:
+ self.data = data.get(key_data)
+ if user_data is not None:
+ if not data[user_data].get("id"):
+ raise exception.NotFoundError("user")
+
+ while True:
yield from data[key_items]
if not data["next_url"]:
return
query = data["next_url"].rpartition("?")[2]
params = text.parse_query(query)
+ data = self._call(endpoint, params)
@cache(maxage=36500*86400, keyarg=0)
diff --git a/gallery_dl/extractor/sankaku.py b/gallery_dl/extractor/sankaku.py
index 1c93cbf..5caad4b 100644
--- a/gallery_dl/extractor/sankaku.py
+++ b/gallery_dl/extractor/sankaku.py
@@ -152,12 +152,8 @@ class SankakuPoolExtractor(SankakuExtractor):
pattern = BASE_PATTERN + r"/(?:books|pools?/show)/(\w+)"
example = "https://sankaku.app/books/12345"
- def __init__(self, match):
- SankakuExtractor.__init__(self, match)
- self.pool_id = match[1]
-
def metadata(self):
- pool = self.api.pools(self.pool_id)
+ pool = self.api.pools(self.groups[0])
pool["tags"] = [tag["name"] for tag in pool["tags"]]
pool["artist_tags"] = [tag["name"] for tag in pool["artist_tags"]]
@@ -178,12 +174,8 @@ class SankakuPostExtractor(SankakuExtractor):
pattern = BASE_PATTERN + r"/posts?(?:/show)?/(\w+)"
example = "https://sankaku.app/post/show/12345"
- def __init__(self, match):
- SankakuExtractor.__init__(self, match)
- self.post_id = match[1]
-
def posts(self):
- return self.api.posts(self.post_id)
+ return self.api.posts(self.groups[0])
class SankakuBooksExtractor(SankakuExtractor):
@@ -207,12 +199,14 @@ class SankakuBooksExtractor(SankakuExtractor):
class SankakuAPI():
"""Interface for the sankaku.app API"""
+ ROOT = "https://sankakuapi.com"
+ VERSION = None
def __init__(self, extractor):
self.extractor = extractor
self.headers = {
"Accept" : "application/vnd.sankaku.api+json;v=2",
- "Api-Version": None,
+ "Api-Version": self.VERSION,
"Origin" : extractor.root,
}
@@ -281,7 +275,7 @@ class SankakuAPI():
_authenticate_impl(self.extractor, self.username, self.password)
def _call(self, endpoint, params=None):
- url = "https://sankakuapi.com" + endpoint
+ url = self.ROOT + endpoint
for _ in range(5):
self.authenticate()
response = self.extractor.request(
@@ -307,6 +301,10 @@ class SankakuAPI():
("unauthorized", "invalid-token", "invalid_token")):
_authenticate_impl.invalidate(self.username)
continue
+ try:
+ code = f"'{code.rpartition('__')[2].replace('-', ' ')}'"
+ except Exception:
+ pass
raise exception.AbortExtraction(code)
return data
@@ -357,12 +355,12 @@ class SankakuAPI():
def _authenticate_impl(extr, username, password):
extr.log.info("Logging in as %s", username)
- url = "https://sankakuapi.com/auth/token"
- headers = {"Accept": "application/vnd.sankaku.api+json;v=2"}
+ api = extr.api
+ url = api.ROOT + "/auth/token"
data = {"login": username, "password": password}
response = extr.request(
- url, method="POST", headers=headers, json=data, fatal=False)
+ url, method="POST", headers=api.headers, json=data, fatal=False)
data = response.json()
if response.status_code >= 400 or not data.get("success"):
diff --git a/gallery_dl/extractor/scrolller.py b/gallery_dl/extractor/scrolller.py
index 40f047a..ff191db 100644
--- a/gallery_dl/extractor/scrolller.py
+++ b/gallery_dl/extractor/scrolller.py
@@ -20,7 +20,7 @@ class ScrolllerExtractor(Extractor):
category = "scrolller"
root = "https://scrolller.com"
directory_fmt = ("{category}", "{subredditTitle}")
- filename_fmt = "{id}{num:?_//>03}{title:? //}.{extension}"
+ filename_fmt = "{id}{num:?_//>03}{title:? //[:230]}.{extension}"
archive_fmt = "{id}_{num}"
request_interval = (0.5, 1.5)
@@ -115,7 +115,7 @@ class ScrolllerExtractor(Extractor):
)["data"]
def _pagination(self, opname, variables, data=None):
- if data is None:
+ if data is None or not data.get("items"):
data = self._request_graphql(opname, variables)
while True:
diff --git a/gallery_dl/extractor/skeb.py b/gallery_dl/extractor/skeb.py
index 1caafd1..3c7205a 100644
--- a/gallery_dl/extractor/skeb.py
+++ b/gallery_dl/extractor/skeb.py
@@ -6,9 +6,11 @@
"""Extractors for https://skeb.jp/"""
-from .common import Extractor, Message
+from .common import Extractor, Message, Dispatch
from .. import text
-import itertools
+
+BASE_PATTERN = r"(?:https?://)?skeb\.jp"
+USER_PATTERN = BASE_PATTERN + r"/@([^/?#]+)"
class SkebExtractor(Extractor):
@@ -19,10 +21,6 @@ class SkebExtractor(Extractor):
archive_fmt = "{post_num}_{_file_id}_{content_category}"
root = "https://skeb.jp"
- def __init__(self, match):
- Extractor.__init__(self, match)
- self.user_name = match[1]
-
def _init(self):
self.thumbnails = self.config("thumbnails", False)
self.article = self.config("article", False)
@@ -65,7 +63,7 @@ class SkebExtractor(Extractor):
url = file["file_url"]
yield Message.Url, url, text.nameext_from_url(url, post)
- def _items_users(self):
+ def items_users(self):
base = self.root + "/@"
for user in self.users():
user["_extractor"] = SkebUserExtractor
@@ -196,44 +194,63 @@ class SkebExtractor(Extractor):
class SkebPostExtractor(SkebExtractor):
"""Extractor for a single skeb post"""
subcategory = "post"
- pattern = r"(?:https?://)?skeb\.jp/@([^/?#]+)/works/(\d+)"
+ pattern = USER_PATTERN + r"/works/(\d+)"
example = "https://skeb.jp/@USER/works/123"
- def __init__(self, match):
- SkebExtractor.__init__(self, match)
- self.post_num = match[2]
+ def posts(self):
+ return (self.groups,)
+
+
+class SkebWorksExtractor(SkebExtractor):
+ """Extractor for a skeb user's works"""
+ subcategory = "works"
+ pattern = USER_PATTERN + r"/works"
+ example = "https://skeb.jp/@USER/works"
def posts(self):
- return ((self.user_name, self.post_num),)
+ url = f"{self.root}/api/users/{self.groups[0]}/works"
+ params = {"role": "creator", "sort": "date"}
+ return self._pagination(url, params)
-class SkebUserExtractor(SkebExtractor):
- """Extractor for all posts from a skeb user"""
- subcategory = "user"
- pattern = r"(?:https?://)?skeb\.jp/@([^/?#]+)/?$"
- example = "https://skeb.jp/@USER"
+class SkebSentrequestsExtractor(SkebExtractor):
+ """Extractor for a skeb user's sent requests"""
+ subcategory = "sentrequests"
+ pattern = USER_PATTERN + r"/sent[ _-]?requests"
+ example = "https://skeb.jp/@USER/sentrequests"
def posts(self):
- url = f"{self.root}/api/users/{self.user_name}/works"
+ url = f"{self.root}/api/users/{self.groups[0]}/works"
+ params = {"role": "client", "sort": "date"}
+ return self._pagination(url, params)
- params = {"role": "creator", "sort": "date"}
- posts = self._pagination(url, params)
+class SkebUserExtractor(Dispatch, SkebExtractor):
+ """Extractor for a skeb user profile"""
+ pattern = USER_PATTERN + r"/?$"
+ example = "https://skeb.jp/@USER"
+
+ def items(self):
if self.config("sent-requests", False):
- params = {"role": "client", "sort": "date"}
- posts = itertools.chain(posts, self._pagination(url, params))
+ default = ("works", "sentrequests")
+ else:
+ default = ("works",)
- return posts
+ base = f"{self.root}/@{self.groups[0]}/"
+ return self._dispatch_extractors((
+ (SkebWorksExtractor , base + "works"),
+ (SkebSentrequestsExtractor, base + "sentrequests"),
+ ), default)
class SkebSearchExtractor(SkebExtractor):
"""Extractor for skeb search results"""
subcategory = "search"
- pattern = r"(?:https?://)?skeb\.jp/search\?q=([^&#]+)"
+ pattern = BASE_PATTERN + r"/search\?q=([^&#]+)"
example = "https://skeb.jp/search?q=QUERY"
def metadata(self):
- return {"search_tags": text.unquote(self.user_name)}
+ return {"search_tags": text.unquote(self.groups[0])}
def posts(self):
url = "https://hb1jt3kre9-2.algolianet.com/1/indexes/*/queries"
@@ -258,7 +275,7 @@ class SkebSearchExtractor(SkebExtractor):
request = {
"indexName": "Request",
- "query": text.unquote(self.user_name),
+ "query": text.unquote(self.groups[0]),
"params": pams + str(page),
}
data = {"requests": (request,)}
@@ -281,13 +298,13 @@ class SkebSearchExtractor(SkebExtractor):
class SkebFollowingExtractor(SkebExtractor):
"""Extractor for all creators followed by a skeb user"""
subcategory = "following"
- pattern = r"(?:https?://)?skeb\.jp/@([^/?#]+)/following_creators"
+ pattern = USER_PATTERN + r"/following_creators"
example = "https://skeb.jp/@USER/following_creators"
- items = SkebExtractor._items_users
+ items = SkebExtractor.items_users
def users(self):
- endpoint = f"/users/{self.user_name}/following_creators"
+ endpoint = f"/users/{self.groups[0]}/following_creators"
params = {"sort": "date"}
return self._pagination_users(endpoint, params)
@@ -295,12 +312,11 @@ class SkebFollowingExtractor(SkebExtractor):
class SkebFollowingUsersExtractor(SkebExtractor):
"""Extractor for your followed users"""
subcategory = "following-users"
- pattern = r"(?:https?://)?skeb\.jp/following_users()"
+ pattern = BASE_PATTERN + r"/following_users"
example = "https://skeb.jp/following_users"
- items = SkebExtractor._items_users
+ items = SkebExtractor.items_users
def users(self):
endpoint = "/following_users"
- params = {}
- return self._pagination_users(endpoint, params)
+ return self._pagination_users(endpoint, {})
diff --git a/gallery_dl/extractor/tumblr.py b/gallery_dl/extractor/tumblr.py
index d9f1ea2..46507c4 100644
--- a/gallery_dl/extractor/tumblr.py
+++ b/gallery_dl/extractor/tumblr.py
@@ -171,6 +171,11 @@ class TumblrExtractor(Extractor):
post["count"] = len(posts)
yield msg, url, post
+ def items_blogs(self):
+ for blog in self.blogs():
+ blog["_extractor"] = TumblrUserExtractor
+ yield Message.Queue, blog["url"], blog
+
def posts(self):
"""Return an iterable containing all relevant posts"""
@@ -345,6 +350,30 @@ class TumblrLikesExtractor(TumblrExtractor):
return self.api.likes(self.blog)
+class TumblrFollowingExtractor(TumblrExtractor):
+ """Extractor for a Tumblr user's followed blogs"""
+ subcategory = "following"
+ pattern = BASE_PATTERN + r"/following"
+ example = "https://www.tumblr.com/BLOG/following"
+
+ items = TumblrExtractor.items_blogs
+
+ def blogs(self):
+ return self.api.following(self.blog)
+
+
+class TumblrFollowersExtractor(TumblrExtractor):
+ """Extractor for a Tumblr user's followers"""
+ subcategory = "followers"
+ pattern = BASE_PATTERN + r"/followers"
+ example = "https://www.tumblr.com/BLOG/followers"
+
+ items = TumblrExtractor.items_blogs
+
+ def blogs(self):
+ return self.api.followers(self.blog)
+
+
class TumblrSearchExtractor(TumblrExtractor):
"""Extractor for a Tumblr search"""
subcategory = "search"
@@ -420,6 +449,14 @@ class TumblrAPI(oauth.OAuth1API):
yield from posts
params["before"] = posts[-1]["liked_timestamp"]
+ def following(self, blog):
+ endpoint = f"/v2/blog/{blog}/following"
+ return self._pagination_blogs(endpoint)
+
+ def followers(self, blog):
+ endpoint = f"/v2/blog/{blog}/followers"
+ return self._pagination_blogs(endpoint)
+
def search(self, query, params, mode="top", post_type=None):
"""Retrieve search results"""
endpoint = "/v2/timeline/search"
@@ -556,3 +593,21 @@ class TumblrAPI(oauth.OAuth1API):
params["before"] = None
if params["offset"] >= data["total_posts"]:
return
+
+ def _pagination_blogs(self, endpoint, params=None):
+ if params is None:
+ params = {}
+ if self.api_key:
+ params["api_key"] = self.api_key
+ params["limit"] = 20
+ params["offset"] = text.parse_int(params.get("offset"), 0)
+
+ while True:
+ data = self._call(endpoint, params)
+
+ blogs = data["blogs"]
+ yield from blogs
+
+ params["offset"] = params["offset"] + params["limit"]
+ if params["offset"] >= data["total_blogs"]:
+ return
diff --git a/gallery_dl/extractor/twitter.py b/gallery_dl/extractor/twitter.py
index 7252d05..4303524 100644
--- a/gallery_dl/extractor/twitter.py
+++ b/gallery_dl/extractor/twitter.py
@@ -217,6 +217,8 @@ class TwitterExtractor(Extractor):
"duration": video_info.get(
"duration_millis", 0) / 1000,
}
+ else:
+ continue
elif "media_url_https" in media:
url = media["media_url_https"]
if url[-4] == ".":
diff --git a/gallery_dl/extractor/vk.py b/gallery_dl/extractor/vk.py
index 0f323e1..75a0137 100644
--- a/gallery_dl/extractor/vk.py
+++ b/gallery_dl/extractor/vk.py
@@ -19,7 +19,7 @@ class VkExtractor(Extractor):
category = "vk"
directory_fmt = ("{category}", "{user[name]|user[id]}")
filename_fmt = "{id}.{extension}"
- archive_fmt = "{id}"
+ archive_fmt = "{user[id]}_{id}"
root = "https://vk.com"
request_interval = (0.5, 1.5)
diff --git a/gallery_dl/extractor/vsco.py b/gallery_dl/extractor/vsco.py
index 42839a8..df09fce 100644
--- a/gallery_dl/extractor/vsco.py
+++ b/gallery_dl/extractor/vsco.py
@@ -22,6 +22,7 @@ class VscoExtractor(Extractor):
directory_fmt = ("{category}", "{user}")
filename_fmt = "{id}.{extension}"
archive_fmt = "{id}"
+ tls12 = False
def __init__(self, match):
Extractor.__init__(self, match)
diff --git a/gallery_dl/extractor/wikimedia.py b/gallery_dl/extractor/wikimedia.py
index e927bc1..00266bd 100644
--- a/gallery_dl/extractor/wikimedia.py
+++ b/gallery_dl/extractor/wikimedia.py
@@ -32,6 +32,7 @@ class WikimediaExtractor(BaseExtractor):
f"{self.root.partition('.')[0].rpartition('/')[2]}")
self.per_page = self.config("limit", 50)
+ self.subcategories = False
if useragent := self.config_instance("useragent"):
self.useragent = useragent
@@ -217,8 +218,8 @@ class WikimediaArticleExtractor(WikimediaExtractor):
self.subcategory = prefix
if prefix == "category":
- self.subcategories = \
- True if self.config("subcategories", True) else False
+ if self.config("subcategories", True):
+ self.subcategories = True
self.params = {
"generator": "categorymembers",
"gcmtitle" : path,
@@ -226,12 +227,10 @@ class WikimediaArticleExtractor(WikimediaExtractor):
"gcmlimit" : self.per_page,
}
elif prefix == "file":
- self.subcategories = False
self.params = {
"titles" : path,
}
else:
- self.subcategories = False
self.params = {
"generator": "images",
"gimlimit" : self.per_page,
diff --git a/gallery_dl/extractor/xasiat.py b/gallery_dl/extractor/xasiat.py
new file mode 100644
index 0000000..6aa3168
--- /dev/null
+++ b/gallery_dl/extractor/xasiat.py
@@ -0,0 +1,103 @@
+# -*- coding: utf-8 -*-
+
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Extractors for https://www.xasiat.com"""
+
+from .common import Extractor, Message
+from .. import text, util
+import time
+
+BASE_PATTERN = r"(?:https?://)?(?:www\.)?xasiat\.com((?:/fr|/ja)?/albums"
+
+
+class XasiatExtractor(Extractor):
+ category = "xasiat"
+ directory_fmt = ("{category}", "{title}")
+ archive_fmt = "{album_url}_{num}"
+ root = "https://www.xasiat.com"
+
+ def items(self):
+ data = {"_extractor": XasiatAlbumExtractor}
+ for url in self.posts():
+ yield Message.Queue, url, data
+
+ def posts(self):
+ return self._pagination(self.groups[0])
+
+ def _pagination(self, path, pnum=1):
+ url = f"{self.root}{path}/"
+ find_posts = util.re(r'class="item ">\s*<a href="([^"]+)').findall
+
+ while True:
+ params = {
+ "mode": "async",
+ "function": "get_block",
+ "block_id": "list_albums_common_albums_list",
+ "sort_by": "post_date",
+ "from": pnum,
+ "_": int(time.time() * 1000)
+ }
+
+ page = self.request(url, params=params).text
+ yield from find_posts(page)
+
+ if "<span>Next</span>" in page:
+ return
+
+ pnum += 1
+
+
+class XasiatAlbumExtractor(XasiatExtractor):
+ subcategory = "album"
+ pattern = BASE_PATTERN + r"/(\d+)/[^/?#]+)"
+ example = "https://www.xasiat.com/albums/12345/TITLE/"
+
+ def items(self):
+ path, album_id = self.groups
+ url = f"{self.root}{path}/"
+ response = self.request(url)
+ extr = text.extract_from(response.text)
+
+ title = extr("<h1>", "<")
+ info = extr('class="info-content"', "</div>")
+ images = extr('class="images"', "</div>")
+
+ urls = list(text.extract_iter(images, 'href="', '"'))
+
+ data = {
+ "title": text.unescape(title),
+ "model": util.re(
+ r'top_models1"></i>\s*(.+)\s*</span').findall(info),
+ "tags": util.re(
+ r'tags/[^"]+\">\s*(.+)\s*</a').findall(info),
+ "album_category": util.re(
+ r'categories/[^"]+\">\s*(.+)\s*</a').findall(info)[0],
+ "album_url": response.url,
+ "album_id": text.parse_int(album_id),
+ "count": len(urls),
+ }
+
+ yield Message.Directory, data
+ for data["num"], url in enumerate(urls, 1):
+ yield Message.Url, url, text.nameext_from_url(url[:-1], data)
+
+
+class XasiatTagExtractor(XasiatExtractor):
+ subcategory = "tag"
+ pattern = BASE_PATTERN + r"/tags/[^/?#]+)"
+ example = "https://www.xasiat.com/albums/tags/TAG/"
+
+
+class XasiatCategoryExtractor(XasiatExtractor):
+ subcategory = "category"
+ pattern = BASE_PATTERN + r"/categories/[^/?#]+)"
+ example = "https://www.xasiat.com/albums/categories/CATEGORY/"
+
+
+class XasiatModelExtractor(XasiatExtractor):
+ subcategory = "model"
+ pattern = BASE_PATTERN + r"/models/[^/?#]+)"
+ example = "https://www.xasiat.com/albums/models/MODEL/"
diff --git a/gallery_dl/job.py b/gallery_dl/job.py
index 3176eb4..9d98e68 100644
--- a/gallery_dl/job.py
+++ b/gallery_dl/job.py
@@ -151,7 +151,10 @@ class Job():
try:
for msg in extractor:
self.dispatch(msg)
- except exception.StopExtraction:
+ except exception.StopExtraction as exc:
+ if exc.depth > 1 and exc.target != extractor.__class__.subcategory:
+ exc.depth -= 1
+ raise
pass
except exception.AbortExtraction as exc:
log.error(exc.message)
@@ -509,12 +512,11 @@ class DownloadJob(Job):
if not self._skipftr or self._skipftr(pathfmt.kwdict):
self._skipcnt += 1
if self._skipcnt >= self._skipmax:
- raise self._skipexc()
+ raise self._skipexc
def download(self, url):
"""Download 'url'"""
- scheme = url.partition(":")[0]
- if downloader := self.get_downloader(scheme):
+ if downloader := self.get_downloader(url[:url.find(":")]):
try:
return downloader.download(url, self.pathfmt)
except OSError as exc:
@@ -604,7 +606,8 @@ class DownloadJob(Job):
elif isinstance(skip, str):
skip, _, smax = skip.partition(":")
if skip == "abort":
- self._skipexc = exception.StopExtraction
+ smax, _, sarg = smax.partition(":")
+ self._skipexc = exception.StopExtraction(sarg or None)
elif skip == "terminate":
self._skipexc = exception.TerminateExtraction
elif skip == "exit":
@@ -731,8 +734,8 @@ class SimulationJob(DownloadJob):
"""Simulate the extraction process without downloading anything"""
def handle_url(self, url, kwdict):
- if not kwdict["extension"]:
- kwdict["extension"] = "jpg"
+ ext = kwdict["extension"] or "jpg"
+ kwdict["extension"] = self.pathfmt.extension_map(ext, ext)
if self.sleep:
self.extractor.sleep(self.sleep(), "download")
if self.archive and self._archive_write_skip:
@@ -850,7 +853,7 @@ class UrlJob(Job):
stdout_write(url + "\n")
if "_fallback" in kwdict:
for url in kwdict["_fallback"]:
- stdout_write("| " + url + "\n")
+ stdout_write(f"| {url}\n")
def handle_queue(self, url, kwdict):
if cls := kwdict.get("_extractor"):
@@ -909,6 +912,10 @@ class DataJob(Job):
Job.__init__(self, url, parent)
self.file = file
self.data = []
+ self.data_urls = []
+ self.data_post = []
+ self.data_meta = []
+ self.exception = None
self.ascii = config.get(("output",), "ascii", ensure_ascii)
self.resolve = 128 if resolve is True else (resolve or self.resolve)
@@ -934,6 +941,7 @@ class DataJob(Job):
except exception.StopExtraction:
pass
except Exception as exc:
+ self.exception = exc
self.data.append((-1, {
"error" : exc.__class__.__name__,
"message": str(exc),
@@ -957,13 +965,21 @@ class DataJob(Job):
return 0
def handle_url(self, url, kwdict):
- self.data.append((Message.Url, url, self.filter(kwdict)))
+ kwdict = self.filter(kwdict)
+ self.data_urls.append(url)
+ self.data_meta.append(kwdict)
+ self.data.append((Message.Url, url, kwdict))
def handle_directory(self, kwdict):
- self.data.append((Message.Directory, self.filter(kwdict)))
+ kwdict = self.filter(kwdict)
+ self.data_post.append(kwdict)
+ self.data.append((Message.Directory, kwdict))
def handle_queue(self, url, kwdict):
- self.data.append((Message.Queue, url, self.filter(kwdict)))
+ kwdict = self.filter(kwdict)
+ self.data_urls.append(url)
+ self.data_meta.append(kwdict)
+ self.data.append((Message.Queue, url, kwdict))
def handle_queue_resolve(self, url, kwdict):
if cls := kwdict.get("_extractor"):
@@ -972,8 +988,14 @@ class DataJob(Job):
extr = extractor.find(url)
if not extr:
- return self.data.append((Message.Queue, url, self.filter(kwdict)))
+ kwdict = self.filter(kwdict)
+ self.data_urls.append(url)
+ self.data_meta.append(kwdict)
+ return self.data.append((Message.Queue, url, kwdict))
job = self.__class__(extr, self, None, self.ascii, self.resolve-1)
job.data = self.data
+ job.data_urls = self.data_urls
+ job.data_post = self.data_post
+ job.data_meta = self.data_meta
job.run()
diff --git a/gallery_dl/option.py b/gallery_dl/option.py
index 963f957..fd664e6 100644
--- a/gallery_dl/option.py
+++ b/gallery_dl/option.py
@@ -664,14 +664,18 @@ def build_parser():
selection = parser.add_argument_group("Selection Options")
selection.add_argument(
"-A", "--abort",
- dest="abort", metavar="N", type=int,
- help=("Stop current extractor run "
- "after N consecutive file downloads were skipped"),
+ dest="abort", metavar="N[:TARGET]",
+ help=("Stop current extractor(s) "
+ "after N consecutive file downloads were skipped. "
+ "Specify a TARGET to set how many levels to ascend or "
+ "to which subcategory to jump to. "
+ "Examples: '-A 3', '-A 3:2', '-A 3:manga'"),
)
selection.add_argument(
"-T", "--terminate",
- dest="terminate", metavar="N", type=int,
- help=("Stop current and parent extractor runs "
+ dest="terminate", metavar="N",
+ help=("Stop current & parent extractors "
+ "and proceed with the next input URL "
"after N consecutive file downloads were skipped"),
)
selection.add_argument(
diff --git a/gallery_dl/output.py b/gallery_dl/output.py
index e4937f4..519a8f4 100644
--- a/gallery_dl/output.py
+++ b/gallery_dl/output.py
@@ -165,9 +165,9 @@ class Formatter(logging.Formatter):
if record.exc_info and not record.exc_text:
record.exc_text = self.formatException(record.exc_info)
if record.exc_text:
- msg = msg + "\n" + record.exc_text
+ msg = f"{msg}\n{record.exc_text}"
if record.stack_info:
- msg = msg + "\n" + record.stack_info
+ msg = f"{msg}\n{record.stack_info}"
return msg
@@ -317,18 +317,7 @@ def configure_standard_streams():
elif not options.get("errors"):
options["errors"] = "replace"
- try:
- stream.reconfigure(**options)
- except AttributeError:
- # no 'reconfigure' support
- oget = options.get
- setattr(sys, name, stream.__class__(
- stream.buffer,
- encoding=oget("encoding", stream.encoding),
- errors=oget("errors", "replace"),
- newline=oget("newline", stream.newlines),
- line_buffering=oget("line_buffering", stream.line_buffering),
- ))
+ stream.reconfigure(**options)
# --------------------------------------------------------------------
@@ -383,10 +372,10 @@ class NullOutput():
class PipeOutput(NullOutput):
def skip(self, path):
- stdout_write(CHAR_SKIP + path + "\n")
+ stdout_write(f"{CHAR_SKIP}{path}\n")
def success(self, path):
- stdout_write(path + "\n")
+ stdout_write(f"{path}\n")
class TerminalOutput():
@@ -401,13 +390,13 @@ class TerminalOutput():
self.shorten = util.identity
def start(self, path):
- stdout_write_flush(self.shorten(" " + path))
+ stdout_write_flush(self.shorten(f" {path}"))
def skip(self, path):
- stdout_write(self.shorten(CHAR_SKIP + path) + "\n")
+ stdout_write(f"{self.shorten(CHAR_SKIP + path)}\n")
def success(self, path):
- stdout_write("\r" + self.shorten(CHAR_SUCCESS + path) + "\n")
+ stdout_write(f"\r{self.shorten(CHAR_SUCCESS + path)}\n")
def progress(self, bytes_total, bytes_downloaded, bytes_per_second):
bdl = util.format_value(bytes_downloaded)
@@ -435,10 +424,10 @@ class ColorOutput(TerminalOutput):
stdout_write_flush(self.shorten(path))
def skip(self, path):
- stdout_write(self.color_skip + self.shorten(path) + "\033[0m\n")
+ stdout_write(f"{self.color_skip}{self.shorten(path)}\x1b[0m\n")
def success(self, path):
- stdout_write(self.color_success + self.shorten(path) + "\033[0m\n")
+ stdout_write(f"{self.color_success}{self.shorten(path)}\x1b[0m\n")
class CustomOutput():
@@ -514,7 +503,7 @@ def shorten_string(txt, limit, sep="โ€ฆ"):
if len(txt) <= limit:
return txt
limit -= len(sep)
- return txt[:limit // 2] + sep + txt[-((limit+1) // 2):]
+ return f"{txt[:limit // 2]}{sep}{txt[-((limit+1) // 2):]}"
def shorten_string_eaw(txt, limit, sep="โ€ฆ", cache=EAWCache()):
@@ -529,7 +518,7 @@ def shorten_string_eaw(txt, limit, sep="โ€ฆ", cache=EAWCache()):
limit -= len(sep)
if text_width == len(txt):
# all characters have a width of 1
- return txt[:limit // 2] + sep + txt[-((limit+1) // 2):]
+ return f"{txt[:limit // 2]}{sep}{txt[-((limit+1) // 2):]}"
# wide characters
left = 0
@@ -548,4 +537,4 @@ def shorten_string_eaw(txt, limit, sep="โ€ฆ", cache=EAWCache()):
break
right -= 1
- return txt[:left] + sep + txt[right+1:]
+ return f"{txt[:left]}{sep}{txt[right+1:]}"
diff --git a/gallery_dl/path.py b/gallery_dl/path.py
index 795564d..eecbd6c 100644
--- a/gallery_dl/path.py
+++ b/gallery_dl/path.py
@@ -90,6 +90,7 @@ class PathFormat():
restrict = config("path-restrict", "auto")
replace = config("path-replace", "_")
+ conv = config("path-convert")
if restrict == "auto":
restrict = "\\\\|/<>:\"?*" if WINDOWS else "/"
elif restrict == "unix":
@@ -100,10 +101,10 @@ class PathFormat():
restrict = "^0-9A-Za-z_."
elif restrict == "ascii+":
restrict = "^0-9@-[\\]-{ #-)+-.;=!}~"
- self.clean_segment = self._build_cleanfunc(restrict, replace)
+ self.clean_segment = _build_cleanfunc(restrict, replace, conv)
remove = config("path-remove", "\x00-\x1f\x7f")
- self.clean_path = self._build_cleanfunc(remove, "")
+ self.clean_path = _build_cleanfunc(remove, "")
strip = config("path-strip", "auto")
if strip == "auto":
@@ -122,7 +123,7 @@ class PathFormat():
basedir = config("base-directory")
sep = os.sep
if basedir is None:
- basedir = "." + sep + "gallery-dl" + sep
+ basedir = f".{sep}gallery-dl{sep}"
elif basedir:
basedir = util.expand_path(basedir)
altsep = os.altsep
@@ -133,37 +134,6 @@ class PathFormat():
basedir = self.clean_path(basedir)
self.basedirectory = basedir
- def _build_cleanfunc(self, chars, repl):
- if not chars:
- return util.identity
- elif isinstance(chars, dict):
- if 0 not in chars:
- chars = self._process_repl_dict(chars)
- chars[0] = None
-
- def func(x, table=str.maketrans(chars)):
- return x.translate(table)
- elif len(chars) == 1:
- def func(x, c=chars, r=repl):
- return x.replace(c, r)
- else:
- return functools.partial(util.re(f"[{chars}]").sub, repl)
- return func
-
- def _process_repl_dict(self, chars):
- # can't modify 'chars' while *directly* iterating over its keys
- for char in [c for c in chars if len(c) > 1]:
- if len(char) == 3 and char[1] == "-":
- citer = range(ord(char[0]), ord(char[2])+1)
- else:
- citer = char
-
- repl = chars.pop(char)
- for c in citer:
- chars[c] = repl
-
- return chars
-
def open(self, mode="wb"):
"""Open file and return a corresponding file object"""
try:
@@ -382,3 +352,51 @@ class PathFormat():
break
self.set_mtime()
+
+
+def _build_convertfunc(func, conv):
+ if len(conv) <= 1:
+ conv = formatter._CONVERSIONS[conv]
+ return lambda x: conv(func(x))
+
+ def convert_many(x):
+ x = func(x)
+ for conv in convs:
+ x = conv(x)
+ return x
+ convs = [formatter._CONVERSIONS[c] for c in conv]
+ return convert_many
+
+
+def _build_cleanfunc(chars, repl, conv=None):
+ if not chars:
+ func = util.identity
+ elif isinstance(chars, dict):
+ if 0 not in chars:
+ chars = _process_repl_dict(chars)
+ chars[0] = None
+
+ def func(x):
+ return x.translate(table)
+ table = str.maketrans(chars)
+ elif len(chars) == 1:
+ def func(x):
+ return x.replace(chars, repl)
+ else:
+ func = functools.partial(util.re(f"[{chars}]").sub, repl)
+ return _build_convertfunc(func, conv) if conv else func
+
+
+def _process_repl_dict(chars):
+ # can't modify 'chars' while *directly* iterating over its keys
+ for char in [c for c in chars if len(c) > 1]:
+ if len(char) == 3 and char[1] == "-":
+ citer = range(ord(char[0]), ord(char[2])+1)
+ else:
+ citer = char
+
+ repl = chars.pop(char)
+ for c in citer:
+ chars[c] = repl
+
+ return chars
diff --git a/gallery_dl/transaction_id.py b/gallery_dl/transaction_id.py
index 915b7b3..f8769d9 100644
--- a/gallery_dl/transaction_id.py
+++ b/gallery_dl/transaction_id.py
@@ -65,8 +65,8 @@ class ClientTransaction():
@cache(maxage=36500*86400, keyarg=1)
def _extract_indices(self, ondemand_s, extractor):
- url = ("https://abs.twimg.com/responsive-web/client-web"
- "/ondemand.s." + ondemand_s + "a.js")
+ url = (f"https://abs.twimg.com/responsive-web/client-web"
+ f"/ondemand.s.{ondemand_s}a.js")
page = extractor.request(url).text
pattern = util.re_compile(r"\(\w\[(\d\d?)\],\s*16\)")
return [int(i) for i in pattern.findall(page)]
diff --git a/gallery_dl/util.py b/gallery_dl/util.py
index 4027ac6..45ffc9c 100644
--- a/gallery_dl/util.py
+++ b/gallery_dl/util.py
@@ -512,15 +512,15 @@ def cookiestxt_store(fp, cookies):
value = cookie.value
domain = cookie.domain
- fp.write("\t".join((
- domain,
- "TRUE" if domain and domain[0] == "." else "FALSE",
- cookie.path,
- "TRUE" if cookie.secure else "FALSE",
- "0" if cookie.expires is None else str(cookie.expires),
- name,
- value + "\n",
- )))
+ fp.write(
+ f"{domain}\t"
+ f"{'TRUE' if domain and domain[0] == '.' else 'FALSE'}\t"
+ f"{cookie.path}\t"
+ f"{'TRUE' if cookie.secure else 'FALSE'}\t"
+ f"{'0' if cookie.expires is None else str(cookie.expires)}\t"
+ f"{name}\t"
+ f"{value}\n"
+ )
def code_to_language(code, default=None):
diff --git a/gallery_dl/version.py b/gallery_dl/version.py
index af7e3c6..a6474de 100644
--- a/gallery_dl/version.py
+++ b/gallery_dl/version.py
@@ -6,5 +6,5 @@
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
-__version__ = "1.30.2"
+__version__ = "1.30.3"
__variant__ = None
diff --git a/scripts/run_tests.py b/scripts/run_tests.py
index d1fd1f1..ebf06fa 100755
--- a/scripts/run_tests.py
+++ b/scripts/run_tests.py
@@ -34,13 +34,13 @@ suite = unittest.TestSuite()
for test in TESTS:
try:
module = __import__(test)
- except ImportError:
- print("unable to import", test)
+ except Exception as exc:
+ sys.stderr.write(f"Failed to import {test}: {exc}\n")
else:
tests = unittest.defaultTestLoader.loadTestsFromModule(module)
suite.addTests(tests)
if __name__ == "__main__":
result = unittest.TextTestRunner(verbosity=2).run(suite)
- if result.errors or result.failures:
+ if not result.wasSuccessful():
sys.exit(1)
diff --git a/setup.py b/setup.py
index c52d1d7..a0bccef 100644
--- a/setup.py
+++ b/setup.py
@@ -111,6 +111,7 @@ def build_setuptools():
"extra": [
"requests[socks]",
"yt-dlp[default]",
+ "jinja2",
"pyyaml",
"toml; python_version < '3.11'",
"truststore; python_version >= '3.10'",
diff --git a/test/test_config.py b/test/test_config.py
index 5c94b1b..064d8e7 100644
--- a/test/test_config.py
+++ b/test/test_config.py
@@ -229,7 +229,7 @@ class TestConfigFiles(unittest.TestCase):
with open(path) as fp:
return util.json_loads(fp.read())
except FileNotFoundError:
- raise unittest.SkipTest(path + " not available")
+ raise unittest.SkipTest(f"{path} not available")
if __name__ == "__main__":
diff --git a/test/test_cookies.py b/test/test_cookies.py
index 5900473..9721d10 100644
--- a/test/test_cookies.py
+++ b/test/test_cookies.py
@@ -91,7 +91,7 @@ class TestCookiedict(unittest.TestCase):
self.assertEqual(sorted(cookies.values()), sorted(self.cdict.values()))
def test_domain(self):
- for category in ["exhentai", "idolcomplex", "nijie", "horne"]:
+ for category in ["exhentai", "nijie", "horne"]:
extr = _get_extractor(category)
cookies = extr.cookies
for key in self.cdict:
@@ -108,7 +108,6 @@ class TestCookieLogin(unittest.TestCase):
def test_cookie_login(self):
extr_cookies = {
"exhentai" : ("ipb_member_id", "ipb_pass_hash"),
- "idolcomplex": ("login", "pass_hash"),
"nijie" : ("nijie_tok",),
"horne" : ("horne_tok",),
}
@@ -159,7 +158,7 @@ class TestCookieUtils(unittest.TestCase):
extr.cookies.set("cd_a", "1", domain=extr.cookies_domain)
self.assertTrue(extr.cookies_check(("cd_a",)))
- extr.cookies.set("wd_a", "1", domain="www" + extr.cookies_domain)
+ extr.cookies.set("wd_a", "1", domain=f"www{extr.cookies_domain}")
self.assertFalse(extr.cookies_check(("wd_a",)))
self.assertEqual(len(extr.cookies), 3)
@@ -184,7 +183,7 @@ class TestCookieUtils(unittest.TestCase):
extr.cookies.set("cd_a", "1", domain=extr.cookies_domain)
self.assertTrue(extr.cookies_check(("cd_a",), subdomains=True))
- extr.cookies.set("wd_a", "1", domain="www" + extr.cookies_domain)
+ extr.cookies.set("wd_a", "1", domain=f"www{extr.cookies_domain}")
self.assertTrue(extr.cookies_check(("wd_a",), subdomains=True))
extr.cookies.set("cd_b", "2", domain=extr.cookies_domain)
@@ -244,7 +243,6 @@ def _get_extractor(category):
URLS = {
"exhentai" : "https://exhentai.org/g/1200119/d55c44d3d0/",
- "idolcomplex": "https://idol.sankakucomplex.com/post/show/1",
"nijie" : "https://nijie.info/view.php?id=1",
"horne" : "https://horne.red/view.php?id=1",
"test" : "generic:https://example.org/",
diff --git a/test/test_downloader.py b/test/test_downloader.py
index 3e5bf84..ecd8b85 100644
--- a/test/test_downloader.py
+++ b/test/test_downloader.py
@@ -183,7 +183,7 @@ class TestDownloaderBase(unittest.TestCase):
@classmethod
def _prepare_destination(cls, content=None, part=True, extension=None):
- name = "file-{}".format(cls.fnum)
+ name = f"file-{cls.fnum}"
cls.fnum += 1
kwdict = {
@@ -199,7 +199,7 @@ class TestDownloaderBase(unittest.TestCase):
pathfmt.build_path()
if content:
- mode = "w" + ("b" if isinstance(content, bytes) else "")
+ mode = "wb" if isinstance(content, bytes) else "w"
with pathfmt.open(mode) as fp:
fp.write(content)
@@ -211,10 +211,10 @@ class TestDownloaderBase(unittest.TestCase):
success = self.downloader.download(url, pathfmt)
# test successful download
- self.assertTrue(success, "downloading '{}' failed".format(url))
+ self.assertTrue(success, f"downloading '{url}' failed")
# test content
- mode = "r" + ("b" if isinstance(output, bytes) else "")
+ mode = "rb" if isinstance(output, bytes) else "r"
with pathfmt.open(mode) as fp:
content = fp.read()
self.assertEqual(content, output)
@@ -245,16 +245,16 @@ class TestHTTPDownloader(TestDownloaderBase):
server = http.server.HTTPServer((host, port), HttpRequestHandler)
except OSError as exc:
raise unittest.SkipTest(
- "cannot spawn local HTTP server ({})".format(exc))
+ f"cannot spawn local HTTP server ({exc})")
host, port = server.server_address
- cls.address = "http://{}:{}".format(host, port)
+ cls.address = f"http://{host}:{port}"
threading.Thread(target=server.serve_forever, daemon=True).start()
def _run_test(self, ext, input, output,
extension, expected_extension=None):
TestDownloaderBase._run_test(
- self, self.address + "/" + ext, input, output,
+ self, f"{self.address}/{ext}", input, output,
extension, expected_extension)
def tearDown(self):
@@ -281,7 +281,7 @@ class TestHTTPDownloader(TestDownloaderBase):
self._run_test("gif", None, DATA["gif"], "jpg", "gif")
def test_http_filesize_min(self):
- url = self.address + "/gif"
+ url = f"{self.address}/gif"
pathfmt = self._prepare_destination(None, extension=None)
self.downloader.minsize = 100
with self.assertLogs(self.downloader.log, "WARNING"):
@@ -290,7 +290,7 @@ class TestHTTPDownloader(TestDownloaderBase):
self.assertEqual(pathfmt.temppath, "")
def test_http_filesize_max(self):
- url = self.address + "/jpg"
+ url = f"{self.address}/jpg"
pathfmt = self._prepare_destination(None, extension=None)
self.downloader.maxsize = 100
with self.assertLogs(self.downloader.log, "WARNING"):
@@ -334,8 +334,8 @@ class HttpRequestHandler(http.server.BaseHTTPRequestHandler):
match = re.match(r"bytes=(\d+)-", self.headers["Range"])
start = int(match[1])
- headers["Content-Range"] = "bytes {}-{}/{}".format(
- start, len(output)-1, len(output))
+ headers["Content-Range"] = \
+ f"bytes {start}-{len(output) - 1}/{len(output)}"
output = output[start:]
else:
status = 200
@@ -408,7 +408,7 @@ for ext, content in SAMPLES:
DATA[ext] = content
for idx, (_, content) in enumerate(SAMPLES):
- DATA["S{:>02}".format(idx)] = content
+ DATA[f"S{idx:>02}"] = content
# reverse mime types mapping
@@ -421,8 +421,8 @@ MIME_TYPES = {
def generate_tests():
def generate_test(idx, ext, content):
def test(self):
- self._run_test("S{:>02}".format(idx), None, content, "bin", ext)
- test.__name__ = "test_http_ext_{:>02}_{}".format(idx, ext)
+ self._run_test(f"S{idx:>02}", None, content, "bin", ext)
+ test.__name__ = f"test_http_ext_{idx:>02}_{ext}"
return test
for idx, (ext, content) in enumerate(SAMPLES):
diff --git a/test/test_extractor.py b/test/test_extractor.py
index bf4aa07..f8b8f09 100644
--- a/test/test_extractor.py
+++ b/test/test_extractor.py
@@ -110,7 +110,7 @@ class TestExtractorModule(unittest.TestCase):
except AssertionError:
pass
else:
- self.fail(result["#url"] + ": Test did not fail")
+ self.fail(f"{result['#url']}: Test did not fail")
else:
self.assertCategories(result)
@@ -167,8 +167,7 @@ class TestExtractorModule(unittest.TestCase):
extr.finalize()
except ImportError as exc:
if exc.name in ("youtube_dl", "yt_dlp"):
- raise unittest.SkipTest("cannot import module '{}'".format(
- exc.name))
+ raise unittest.SkipTest(f"cannot import module '{exc.name}'")
raise
def test_docstrings(self):
@@ -179,7 +178,7 @@ class TestExtractorModule(unittest.TestCase):
self.assertNotEqual(
extr1.__doc__,
extr2.__doc__,
- "{} <-> {}".format(extr1, extr2),
+ f"{extr1} <-> {extr2}",
)
def test_names(self):
@@ -191,12 +190,10 @@ class TestExtractorModule(unittest.TestCase):
for extr in extractor.extractors():
if extr.category not in ("", "oauth", "ytdl"):
- expected = "{}{}Extractor".format(
- capitalize(extr.category),
- capitalize(extr.subcategory),
- )
+ expected = (f"{capitalize(extr.category)}"
+ f"{capitalize(extr.subcategory)}Extractor")
if expected[0].isdigit():
- expected = "_" + expected
+ expected = f"_{expected}"
self.assertEqual(expected, extr.__name__)
@@ -225,7 +222,7 @@ class TestExtractorWait(unittest.TestCase):
calls = sleep.mock_calls
self.assertEqual(len(calls), 1)
- self.assertAlmostEqual(calls[0][1][0], 6.0, places=1)
+ self.assertAlmostEqual(calls[0][1][0], 6.0, places=0)
calls = log.info.mock_calls
self.assertEqual(len(calls), 1)
@@ -266,7 +263,7 @@ class TextExtractorOAuth(unittest.TestCase):
def test_oauth1(self):
for category in ("flickr", "smugmug", "tumblr"):
- extr = extractor.find("oauth:" + category)
+ extr = extractor.find(f"oauth:{category}")
with patch.object(extr, "_oauth1_authorization_flow") as m:
for msg in extr:
@@ -275,7 +272,7 @@ class TextExtractorOAuth(unittest.TestCase):
def test_oauth2(self):
for category in ("deviantart", "reddit"):
- extr = extractor.find("oauth:" + category)
+ extr = extractor.find(f"oauth:{category}")
with patch.object(extr, "_oauth2_authorization_code_grant") as m:
for msg in extr:
diff --git a/test/test_formatter.py b/test/test_formatter.py
index 3305983..f3ed9dd 100644
--- a/test/test_formatter.py
+++ b/test/test_formatter.py
@@ -73,8 +73,8 @@ class TestFormatter(unittest.TestCase):
self._run_test("{u!H}", "'< / >'")
self._run_test("{n!H}", "")
self._run_test("{a!s}", self.kwdict["a"])
- self._run_test("{a!r}", "'" + self.kwdict["a"] + "'")
- self._run_test("{a!a}", "'" + self.kwdict["a"] + "'")
+ self._run_test("{a!r}", f"'{self.kwdict['a']}'")
+ self._run_test("{a!a}", f"'{self.kwdict['a']}'")
self._run_test("{b!a}", "'\\xe4\\xf6\\xfc'")
self._run_test("{a!S}", self.kwdict["a"])
self._run_test("{l!S}", "a, b, c")
@@ -139,7 +139,7 @@ class TestFormatter(unittest.TestCase):
self._run_test("{missing}" , replacement, default)
self._run_test("{missing.attr}", replacement, default)
self._run_test("{missing[key]}", replacement, default)
- self._run_test("{missing:?a//}", "a" + default, default)
+ self._run_test("{missing:?a//}", f"a{default}", default)
def test_fmt_func(self):
self._run_test("{t}" , self.kwdict["t"] , None, int)
@@ -444,11 +444,11 @@ class TestFormatter(unittest.TestCase):
with open(path1, "w") as fp:
fp.write("{a}")
- fmt1 = formatter.parse("\fT " + path1)
+ fmt1 = formatter.parse(f"\fT {path1}")
with open(path2, "w") as fp:
fp.write("{a!u:Rh/C/}\nFooBar")
- fmt2 = formatter.parse("\fT " + path2)
+ fmt2 = formatter.parse(f"\fT {path2}")
self.assertEqual(fmt1.format_map(self.kwdict), self.kwdict["a"])
self.assertEqual(fmt2.format_map(self.kwdict), "HELLO WORLD\nFooBar")
@@ -458,15 +458,18 @@ class TestFormatter(unittest.TestCase):
def test_expression(self):
self._run_test("\fE a", self.kwdict["a"])
- self._run_test("\fE name * 2 + ' ' + a", "{}{} {}".format(
- self.kwdict["name"], self.kwdict["name"], self.kwdict["a"]))
+ self._run_test(
+ "\fE name * 2 + ' ' + a",
+ f"{self.kwdict['name']}{self.kwdict['name']} {self.kwdict['a']}")
def test_fstring(self):
self._run_test("\fF {a}", self.kwdict["a"])
- self._run_test("\fF {name}{name} {a}", "{}{} {}".format(
- self.kwdict["name"], self.kwdict["name"], self.kwdict["a"]))
- self._run_test("\fF foo-'\"{a.upper()}\"'-bar",
- """foo-'"{}"'-bar""".format(self.kwdict["a"].upper()))
+ self._run_test(
+ "\fF {name}{name} {a}",
+ f"{self.kwdict['name']}{self.kwdict['name']} {self.kwdict['a']}")
+ self._run_test(
+ "\fF foo-'\"{a.upper()}\"'-bar",
+ f"""foo-'"{self.kwdict['a'].upper()}"'-bar""")
def test_template_fstring(self):
with tempfile.TemporaryDirectory() as tmpdirname:
@@ -475,15 +478,15 @@ class TestFormatter(unittest.TestCase):
with open(path1, "w") as fp:
fp.write("{a}")
- fmt1 = formatter.parse("\fTF " + path1)
+ fmt1 = formatter.parse(f"\fTF {path1}")
with open(path2, "w") as fp:
fp.write("foo-'\"{a.upper()}\"'-bar")
- fmt2 = formatter.parse("\fTF " + path2)
+ fmt2 = formatter.parse(f"\fTF {path2}")
self.assertEqual(fmt1.format_map(self.kwdict), self.kwdict["a"])
self.assertEqual(fmt2.format_map(self.kwdict),
- """foo-'"{}"'-bar""".format(self.kwdict["a"].upper()))
+ f"""foo-'"{self.kwdict['a'].upper()}"'-bar""")
with self.assertRaises(OSError):
formatter.parse("\fTF /")
@@ -493,10 +496,12 @@ class TestFormatter(unittest.TestCase):
formatter.JinjaFormatter.env = None
self._run_test("\fJ {{a}}", self.kwdict["a"])
- self._run_test("\fJ {{name}}{{name}} {{a}}", "{}{} {}".format(
- self.kwdict["name"], self.kwdict["name"], self.kwdict["a"]))
- self._run_test("\fJ foo-'\"{{a | upper}}\"'-bar",
- """foo-'"{}"'-bar""".format(self.kwdict["a"].upper()))
+ self._run_test(
+ "\fJ {{name}}{{name}} {{a}}",
+ f"{self.kwdict['name']}{self.kwdict['name']} {self.kwdict['a']}")
+ self._run_test(
+ "\fJ foo-'\"{{a | upper}}\"'-bar",
+ f"""foo-'"{self.kwdict['a'].upper()}"'-bar""")
@unittest.skipIf(jinja2 is None, "no jinja2")
def test_template_jinja(self):
@@ -508,15 +513,15 @@ class TestFormatter(unittest.TestCase):
with open(path1, "w") as fp:
fp.write("{{a}}")
- fmt1 = formatter.parse("\fTJ " + path1)
+ fmt1 = formatter.parse(f"\fTJ {path1}")
with open(path2, "w") as fp:
fp.write("foo-'\"{{a | upper}}\"'-bar")
- fmt2 = formatter.parse("\fTJ " + path2)
+ fmt2 = formatter.parse(f"\fTJ {path2}")
self.assertEqual(fmt1.format_map(self.kwdict), self.kwdict["a"])
self.assertEqual(fmt2.format_map(self.kwdict),
- """foo-'"{}"'-bar""".format(self.kwdict["a"].upper()))
+ f"""foo-'"{self.kwdict['a'].upper()}"'-bar""")
with self.assertRaises(OSError):
formatter.parse("\fTJ /")
@@ -562,7 +567,7 @@ Present Time is ((( dt | dt_fmt("%H:%M:%S") )))
Hello ((( s | sanitize_whitespace ))).
I hope there is enough "(((S|sanitize_whitespace)))" for you.
""")
- fmt = formatter.parse("\fTJ " + path_template)
+ fmt = formatter.parse(f"\fTJ {path_template}")
self.assertEqual(fmt.format_map(self.kwdict), """\
Present Day is January 01, 2010
@@ -607,8 +612,8 @@ def noarg():
finally:
sys.path.pop(0)
- fmt3 = formatter.parse("\fM " + path + ":gentext")
- fmt4 = formatter.parse("\fM " + path + ":lengths")
+ fmt3 = formatter.parse(f"\fM {path}:gentext")
+ fmt4 = formatter.parse(f"\fM {path}:lengths")
self.assertEqual(fmt1.format_map(self.kwdict), "'Title' by Name")
self.assertEqual(fmt2.format_map(self.kwdict), "168")
diff --git a/test/test_job.py b/test/test_job.py
index 3aa28e8..0a533ea 100644
--- a/test/test_job.py
+++ b/test/test_job.py
@@ -299,7 +299,7 @@ class TestDataJob(TestJob):
for i in range(1, 4):
self.assertEqual(
tjob.data[i][2]["_fallback"],
- ("https://example.org/alt/{}.jpg".format(i),),
+ (f"https://example.org/alt/{i}.jpg",),
)
def test_sleep(self):
@@ -382,13 +382,13 @@ class TestExtractor(Extractor):
}
for i in range(1, 4):
- url = "{}/{}.jpg".format(root, i)
+ url = f"{root}/{i}.jpg"
yield Message.Url, url, text.nameext_from_url(url, {
"num" : i,
"tags": ["foo", "bar", "ใƒ†ใ‚นใƒˆ"],
"user": user,
"author": user,
- "_fallback": ("{}/alt/{}.jpg".format(root, i),),
+ "_fallback": (f"{root}/alt/{i}.jpg",),
})
diff --git a/test/test_postprocessor.py b/test/test_postprocessor.py
index 2e39cc7..07bd348 100644
--- a/test/test_postprocessor.py
+++ b/test/test_postprocessor.py
@@ -52,7 +52,7 @@ class TestPostprocessorModule(unittest.TestCase):
def test_find(self):
for name in (postprocessor.modules):
cls = postprocessor.find(name)
- self.assertEqual(cls.__name__, name.capitalize() + "PP")
+ self.assertEqual(cls.__name__, f"{name.capitalize()}PP")
self.assertIs(cls.__base__, PostProcessor)
self.assertEqual(postprocessor.find("foo"), None)
@@ -129,15 +129,15 @@ class ClassifyTest(BasePostprocessorTest):
self._trigger(("prepare",))
self.pathfmt.build_path()
path = os.path.join(self.dir.name, "test", "Pictures")
- self.assertEqual(self.pathfmt.path, path + "/file.jpg")
- self.assertEqual(self.pathfmt.realpath, path + "/file.jpg")
+ self.assertEqual(self.pathfmt.path, f"{path}/file.jpg")
+ self.assertEqual(self.pathfmt.realpath, f"{path}/file.jpg")
self.pathfmt.set_extension("mp4")
self._trigger(("prepare",))
self.pathfmt.build_path()
path = os.path.join(self.dir.name, "test", "Video")
- self.assertEqual(self.pathfmt.path, path + "/file.mp4")
- self.assertEqual(self.pathfmt.realpath, path + "/file.mp4")
+ self.assertEqual(self.pathfmt.path, f"{path}/file.mp4")
+ self.assertEqual(self.pathfmt.realpath, f"{path}/file.mp4")
def test_classify_noop(self):
pp = self._create()
@@ -169,8 +169,8 @@ class ClassifyTest(BasePostprocessorTest):
self._trigger(("prepare",))
self.pathfmt.build_path()
path = os.path.join(self.dir.name, "test", "foo", "bar")
- self.assertEqual(self.pathfmt.path, path + "/file.foo")
- self.assertEqual(self.pathfmt.realpath, path + "/file.foo")
+ self.assertEqual(self.pathfmt.path, f"{path}/file.foo")
+ self.assertEqual(self.pathfmt.realpath, f"{path}/file.foo")
class DirectoryTest(BasePostprocessorTest):
@@ -179,16 +179,16 @@ class DirectoryTest(BasePostprocessorTest):
self._create()
path = os.path.join(self.dir.name, "test")
- self.assertEqual(self.pathfmt.realdirectory, path + "/")
- self.assertEqual(self.pathfmt.realpath, path + "/file.ext")
+ self.assertEqual(self.pathfmt.realdirectory, f"{path}/")
+ self.assertEqual(self.pathfmt.realpath, f"{path}/file.ext")
self.pathfmt.kwdict["category"] = "custom"
self._trigger()
path = os.path.join(self.dir.name, "custom")
- self.assertEqual(self.pathfmt.realdirectory, path + "/")
+ self.assertEqual(self.pathfmt.realdirectory, f"{path}/")
self.pathfmt.build_path()
- self.assertEqual(self.pathfmt.realpath, path + "/file.ext")
+ self.assertEqual(self.pathfmt.realpath, f"{path}/file.ext")
class ExecTest(BasePostprocessorTest):
@@ -205,10 +205,12 @@ class ExecTest(BasePostprocessorTest):
self._trigger(("after",))
p.assert_called_once_with(
- "echo {0} {0} {1} {2} && rm {0};".format(
- self.pathfmt.realpath,
- self.pathfmt.realdirectory,
- self.pathfmt.filename),
+ (f"echo "
+ f"{self.pathfmt.realpath} "
+ f"{self.pathfmt.realpath} "
+ f"{self.pathfmt.realdirectory} "
+ f"{self.pathfmt.filename} "
+ f"&& rm {self.pathfmt.realpath};"),
shell=True,
creationflags=0,
start_new_session=False,
@@ -254,10 +256,12 @@ class ExecTest(BasePostprocessorTest):
self.assertEqual(p.call_args_list, [
call(
- "echo {0} {0} {1} {2} && rm {0};".format(
- self.pathfmt.realpath,
- self.pathfmt.realdirectory,
- self.pathfmt.filename),
+ (f"echo "
+ f"{self.pathfmt.realpath} "
+ f"{self.pathfmt.realpath} "
+ f"{self.pathfmt.realdirectory} "
+ f"{self.pathfmt.filename} "
+ f"&& rm {self.pathfmt.realpath};"),
shell=True,
creationflags=0,
start_new_session=False,
@@ -287,8 +291,9 @@ class ExecTest(BasePostprocessorTest):
with self.assertLogs() as log:
self._trigger(("after",))
- msg = ("WARNING:postprocessor.exec:'echo {}' returned with "
- "non-zero exit status (123)".format(self.pathfmt.realpath))
+ msg = (f"WARNING:postprocessor.exec:"
+ f"'echo {self.pathfmt.realpath}' "
+ f"returned with non-zero exit status (123)")
self.assertEqual(log.output[0], msg)
def test_async(self):
@@ -426,7 +431,7 @@ class MetadataTest(BasePostprocessorTest):
with patch("builtins.open", mock_open()) as m:
self._trigger()
- path = self.pathfmt.realpath + ".JSON"
+ path = f"{self.pathfmt.realpath}.JSON"
m.assert_called_once_with(path, "w", encoding="utf-8")
self.assertEqual(self._output(m), """{
@@ -460,7 +465,7 @@ class MetadataTest(BasePostprocessorTest):
with patch("builtins.open", mock_open()) as m:
self._trigger()
- path = self.pathfmt.realpath + ".JSON"
+ path = f"{self.pathfmt.realpath}.JSON"
m.assert_called_once_with(path, "a", encoding="UTF-8")
self.assertEqual(self._output(m), """{\
"_private" : "foo \\u30d0\\u30fc",\
@@ -481,7 +486,7 @@ class MetadataTest(BasePostprocessorTest):
with patch("builtins.open", mock_open()) as m:
self._trigger()
- path = self.pathfmt.realpath + ".txt"
+ path = f"{self.pathfmt.realpath}.txt"
m.assert_called_once_with(path, "w", encoding="utf-8")
self.assertEqual(self._output(m), "foo\nbar\nbaz\n")
@@ -561,7 +566,7 @@ class MetadataTest(BasePostprocessorTest):
with patch("builtins.open", mock_open()) as m:
self._trigger()
- path = self.pathfmt.realdirectory + "file.json"
+ path = f"{self.pathfmt.realdirectory}file.json"
m.assert_called_once_with(path, "w", encoding="utf-8")
def test_metadata_extfmt_2(self):
@@ -573,7 +578,7 @@ class MetadataTest(BasePostprocessorTest):
with patch("builtins.open", mock_open()) as m:
self._trigger()
- path = self.pathfmt.realdirectory + "file.2.EXT-data:tESt"
+ path = f"{self.pathfmt.realdirectory}file.2.EXT-data:tESt"
m.assert_called_once_with(path, "w", encoding="utf-8")
def test_metadata_directory(self):
@@ -584,7 +589,7 @@ class MetadataTest(BasePostprocessorTest):
with patch("builtins.open", mock_open()) as m:
self._trigger()
- path = self.pathfmt.realdirectory + "metadata/file.ext.json"
+ path = f"{self.pathfmt.realdirectory}metadata/file.ext.json"
m.assert_called_once_with(path, "w", encoding="utf-8")
def test_metadata_directory_2(self):
@@ -596,7 +601,7 @@ class MetadataTest(BasePostprocessorTest):
with patch("builtins.open", mock_open()) as m:
self._trigger()
- path = self.pathfmt.realdirectory + "metadata/file.json"
+ path = f"{self.pathfmt.realdirectory}metadata/file.json"
m.assert_called_once_with(path, "w", encoding="utf-8")
def test_metadata_directory_format(self):
@@ -608,7 +613,7 @@ class MetadataTest(BasePostprocessorTest):
with patch("builtins.open", mock_open()) as m:
self._trigger()
- path = self.pathfmt.realdirectory + "../json/12500/file.ext.json"
+ path = f"{self.pathfmt.realdirectory}../json/12500/file.ext.json"
m.assert_called_once_with(path, "w", encoding="utf-8")
def test_metadata_directory_empty(self):
@@ -619,7 +624,7 @@ class MetadataTest(BasePostprocessorTest):
with patch("builtins.open", mock_open()) as m:
self._trigger()
- path = self.pathfmt.realdirectory + "./file.ext.json"
+ path = f"{self.pathfmt.realdirectory}./file.ext.json"
m.assert_called_once_with(path, "w", encoding="utf-8")
def test_metadata_basedirectory(self):
@@ -628,7 +633,7 @@ class MetadataTest(BasePostprocessorTest):
with patch("builtins.open", mock_open()) as m:
self._trigger()
- path = self.pathfmt.basedirectory + "file.ext.json"
+ path = f"{self.pathfmt.basedirectory}file.ext.json"
m.assert_called_once_with(path, "w", encoding="utf-8")
def test_metadata_basedirectory_custom(self):
@@ -652,7 +657,7 @@ class MetadataTest(BasePostprocessorTest):
with patch("builtins.open", mock_open()) as m:
self._trigger()
- path = self.pathfmt.realdirectory + "test_file__meta_.data"
+ path = f"{self.pathfmt.realdirectory}test_file__meta_.data"
m.assert_called_once_with(path, "w", encoding="utf-8")
def test_metadata_meta_path(self):
@@ -663,7 +668,7 @@ class MetadataTest(BasePostprocessorTest):
self._trigger()
self.assertEqual(self.pathfmt.kwdict["_meta_path"],
- self.pathfmt.realpath + ".json")
+ f"{self.pathfmt.realpath}.json")
def test_metadata_stdout(self):
self._create({"filename": "-", "indent": None, "sort": True})
@@ -752,7 +757,7 @@ class MetadataTest(BasePostprocessorTest):
self.assertTrue(m.called)
self.assertGreater(len(self._output(m)), 0)
- path = self.pathfmt.realdirectory + "file.ext.json"
+ path = f"{self.pathfmt.realdirectory}file.ext.json"
m.assert_called_once_with(path, "w", encoding="utf-8")
def test_metadata_option_skip_false(self):
@@ -856,7 +861,7 @@ class PythonTest(BasePostprocessorTest):
path = os.path.join(self.dir.name, "module.py")
self._write_module(path)
- self._create({"function": path + ":calc"}, {"_value": 12})
+ self._create({"function": f"{path}:calc"}, {"_value": 12})
self.assertNotIn("_result", self.pathfmt.kwdict)
self._trigger()
@@ -913,7 +918,7 @@ class RenameTest(BasePostprocessorTest):
def test_rename_skip(self):
self._create({"from": "{id}.{extension}"}, {"id": 12345})
path = self._prepare("12345.ext")
- with open(path + "file.ext", "w"):
+ with open(f"{path}file.ext", "w"):
pass
with self.assertLogs("postprocessor.rename", level="WARNING") as cm:
@@ -932,7 +937,7 @@ class ZipTest(BasePostprocessorTest):
self.assertEqual(pp.path, self.pathfmt.realdirectory[:-1])
self.assertEqual(pp.delete, True)
self.assertEqual(pp.args, (
- pp.path + ".zip", "a", zipfile.ZIP_STORED, True,
+ f"{pp.path}.zip", "a", zipfile.ZIP_STORED, True,
))
self.assertTrue(pp.args[0].endswith("/test.zip"))
@@ -942,7 +947,7 @@ class ZipTest(BasePostprocessorTest):
self.assertEqual(pp.path, self.pathfmt.realdirectory[:-1])
self.assertEqual(pp.delete, True)
self.assertEqual(pp.args, (
- pp.path + ".zip", "a", zipfile.ZIP_STORED, True,
+ f"{pp.path}.zip", "a", zipfile.ZIP_STORED, True,
))
self.assertTrue(pp.args[0].endswith("/test.zip"))
@@ -954,7 +959,7 @@ class ZipTest(BasePostprocessorTest):
})
self.assertEqual(pp.delete, False)
self.assertEqual(pp.args, (
- pp.path + ".cbz", "a", zipfile.ZIP_DEFLATED, True,
+ f"{pp.path}.cbz", "a", zipfile.ZIP_DEFLATED, True,
))
self.assertTrue(pp.args[0].endswith("/test.cbz"))
@@ -968,7 +973,7 @@ class ZipTest(BasePostprocessorTest):
# write dummy file with 3 different names
for i in range(3):
- name = "file{}.ext".format(i)
+ name = f"file{i}.ext"
self.pathfmt.temppath = file.name
self.pathfmt.filename = name
@@ -1015,8 +1020,8 @@ class ZipTest(BasePostprocessorTest):
# write 3 files
for i in range(3):
- self.pathfmt.temppath = self.pathfmt.realdirectory + "file.ext"
- self.pathfmt.filename = "file{}.ext".format(i)
+ self.pathfmt.temppath = f"{self.pathfmt.realdirectory}file.ext"
+ self.pathfmt.filename = f"file{i}.ext"
self._trigger()
# write the last file a second time (should be skipped)
diff --git a/test/test_results.py b/test/test_results.py
index 4b1c4c1..05b98bf 100644
--- a/test/test_results.py
+++ b/test/test_results.py
@@ -145,7 +145,8 @@ class TestExtractorResults(unittest.TestCase):
config.set((), key, None)
if auth and not any(extr.config(key) for key in AUTH_KEYS):
- return self._skipped.append((result["#url"], "no auth"))
+ self._skipped.append((result["#url"], "no auth"))
+ self.skipTest("no auth")
if "#options" in result:
for key, value in result["#options"].items():
@@ -155,11 +156,16 @@ class TestExtractorResults(unittest.TestCase):
config.set((), "image-range" , result["#range"])
config.set((), "chapter-range", result["#range"])
- tjob = ResultJob(extr, content=("#sha1_content" in result))
+ tjob = ResultJob(extr,
+ content=("#sha1_content" in result),
+ format=(result.get("#metadata") != "post"))
if "#exception" in result:
- with self.assertRaises(result["#exception"], msg="#exception"):
+ with self.assertRaises(result["#exception"], msg="#exception"), \
+ self.assertLogs() as log_info:
tjob.run()
+ if "#log" in result:
+ self.assertLogEqual(result["#log"], log_info.output)
return
try:
@@ -228,7 +234,7 @@ class TestExtractorResults(unittest.TestCase):
if isinstance(count, str):
self.assertRegex(
count, r"^ *(==|!=|<|<=|>|>=) *\d+ *$", msg="#count")
- expr = "{} {}".format(len_urls, count)
+ expr = f"{len_urls} {count}"
self.assertTrue(eval(expr), msg=expr)
elif isinstance(count, range):
self.assertRange(len_urls, count, msg="#count")
@@ -257,7 +263,11 @@ class TestExtractorResults(unittest.TestCase):
metadata = {k: v for k, v in result.items() if k[0] != "#"}
if metadata:
- for kwdict in tjob.kwdict_list:
+ if result.get("#metadata") == "post":
+ kwdicts = tjob.kwdict_post
+ else:
+ kwdicts = tjob.kwdict_list
+ for kwdict in kwdicts:
self._test_kwdict(kwdict, metadata)
def _test_kwdict(self, kwdict, tests, parent=None):
@@ -274,7 +284,7 @@ class TestExtractorResults(unittest.TestCase):
else:
subtest = False
- path = "{}.{}".format(parent, key) if parent else key
+ path = f"{parent}.{key}" if parent else key
if key.startswith("!"):
self.assertNotIn(key[1:], kwdict, msg=path)
@@ -286,7 +296,7 @@ class TestExtractorResults(unittest.TestCase):
if subtest:
self.assertNotIsInstance(value, str, msg=path)
for idx, item in enumerate(value):
- subpath = "{}[{}]".format(path, idx)
+ subpath = f"{path}[{idx}]"
self._test_kwdict_value(item, test, subpath)
else:
self._test_kwdict_value(value, test, path)
@@ -308,12 +318,18 @@ class TestExtractorResults(unittest.TestCase):
for idx, item in enumerate(test):
if isinstance(item, dict):
subtest = True
- subpath = "{}[{}]".format(path, idx)
- self._test_kwdict(value[idx], item, subpath)
+ subpath = f"{path}[{idx}]"
+ try:
+ obj = value[idx]
+ except Exception as exc:
+ self.fail(f"'{exc.__class__.__name__}: {exc}' "
+ f"when accessing {subpath}")
+ self._test_kwdict(obj, item, subpath)
if not subtest:
self.assertEqual(test, value, msg=path)
elif isinstance(test, str):
if test.startswith("re:"):
+ self.assertIsInstance(value, str, msg=path)
self.assertRegex(value, test[3:], msg=path)
elif test.startswith("dt:"):
self.assertIsInstance(value, datetime.datetime, msg=path)
@@ -324,8 +340,29 @@ class TestExtractorResults(unittest.TestCase):
cls, _, length = test[4:].rpartition(":")
if cls:
self.assertEqual(
- cls, type(value).__name__, msg=path + "/type")
- self.assertEqual(int(length), len(value), msg=path)
+ cls, type(value).__name__, msg=f"{path}/type")
+ try:
+ len_value = len(value)
+ except Exception:
+ len_value = 0
+ for _ in value:
+ len_value += 1
+ self.assertEqual(int(length), len_value, msg=path)
+ elif test.startswith("iso:"):
+ iso = test[4:]
+ if iso in ("dt", "datetime", "8601"):
+ msg = f"{path} / ISO 8601"
+ try:
+ dt = datetime.datetime.fromisoformat(value)
+ except Exception as exc:
+ self.fail(f"Invalid datetime '{value}': {exc} {msg}")
+ self.assertIsInstance(dt, datetime.datetime, msg=msg)
+ elif iso in ("lang", "639", "639-1"):
+ msg = f"{path} / ISO 639-1"
+ self.assertIsInstance(value, str, msg=msg)
+ self.assertRegex(value, r"^[a-z]{2}(-\w+)?$", msg=msg)
+ else:
+ self.fail(f"Unsupported ISO test '{test}'")
else:
self.assertEqual(test, value, msg=path)
else:
@@ -335,7 +372,7 @@ class TestExtractorResults(unittest.TestCase):
class ResultJob(job.DownloadJob):
"""Generate test-results for extractor runs"""
- def __init__(self, url, parent=None, content=False):
+ def __init__(self, url, parent=None, content=False, format=True):
job.DownloadJob.__init__(self, url, parent)
self.queue = False
self.content = content
@@ -343,6 +380,7 @@ class ResultJob(job.DownloadJob):
self.url_list = []
self.url_hash = hashlib.sha1()
self.kwdict_list = []
+ self.kwdict_post = []
self.kwdict_hash = hashlib.sha1()
self.archive_list = []
self.archive_hash = hashlib.sha1()
@@ -353,12 +391,17 @@ class ResultJob(job.DownloadJob):
else:
self._update_content = lambda url, kwdict: None
- self.format_directory = TestFormatter(
- "".join(self.extractor.directory_fmt)).format_map
- self.format_filename = TestFormatter(
- self.extractor.filename_fmt).format_map
- self.format_archive = TestFormatter(
- self.extractor.archive_fmt).format_map
+ if format:
+ self.format_directory = TestFormatter(
+ "".join(self.extractor.directory_fmt)).format_map
+ self.format_filename = TestFormatter(
+ self.extractor.filename_fmt).format_map
+ self.format_archive = TestFormatter(
+ self.extractor.archive_fmt).format_map
+ else:
+ self.format_directory = \
+ self.format_filename = \
+ self.format_archive = lambda kwdict: ""
def run(self):
self._init()
@@ -391,6 +434,8 @@ class ResultJob(job.DownloadJob):
def _update_kwdict(self, kwdict, to_list=True):
if to_list:
self.kwdict_list.append(kwdict.copy())
+ else:
+ self.kwdict_post.append(kwdict.copy())
kwdict = util.filter_dict(kwdict)
self.kwdict_hash.update(
json.dumps(kwdict, sort_keys=True, default=str).encode())
@@ -489,8 +534,7 @@ def load_test_config():
except FileNotFoundError:
pass
except Exception as exc:
- sys.exit("Error when loading {}: {}: {}".format(
- path, exc.__class__.__name__, exc))
+ sys.exit(f"Error when loading {path}: {exc.__class__.__name__}: {exc}")
def result_categories(result):
@@ -553,12 +597,12 @@ def generate_tests():
enum = collections.defaultdict(int)
for result in tests:
base, cat, sub = result_categories(result)
- name = "{}_{}".format(cat, sub)
+ name = f"{cat}_{sub}"
enum[name] += 1
method = _generate_method(result)
method.__doc__ = result["#url"]
- method.__name__ = "test_{}_{}".format(name, enum[name])
+ method.__name__ = f"test_{name}_{enum[name]}"
setattr(TestExtractorResults, method.__name__, method)
diff --git a/test/test_util.py b/test/test_util.py
index 00e8c4b..4a76769 100644
--- a/test/test_util.py
+++ b/test/test_util.py
@@ -385,7 +385,7 @@ class TestCompileExpression(unittest.TestCase):
self.assertEqual(expr(value), result)
with tempfile.TemporaryDirectory() as path:
- file = path + "/module_sha1.py"
+ file = f"{path}/module_sha1.py"
with open(file, "w") as fp:
fp.write("""
import hashlib
@@ -638,7 +638,7 @@ class TestOther(unittest.TestCase):
self.assertIs(module, datetime)
with tempfile.TemporaryDirectory() as path:
- file = path + "/module_test.py"
+ file = f"{path}/module_test.py"
with open(file, "w") as fp:
fp.write("""
import datetime
diff --git a/test/test_ytdl.py b/test/test_ytdl.py
index ecc6d2f..88933e4 100644
--- a/test/test_ytdl.py
+++ b/test/test_ytdl.py
@@ -23,8 +23,8 @@ class Test_CommandlineArguments(unittest.TestCase):
try:
cls.module = __import__(cls.module_name)
except (ImportError, SyntaxError):
- raise unittest.SkipTest("cannot import module '{}'".format(
- cls.module_name))
+ raise unittest.SkipTest(
+ f"cannot import module '{cls.module_name}'")
cls.default = ytdl.parse_command_line(cls.module, [])
cls.ytdlp = hasattr(cls.module, "cookies")