aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLibravatarUnit 193 <unit193@unit193.net>2023-01-30 04:41:03 -0500
committerLibravatarUnit 193 <unit193@unit193.net>2023-01-30 04:41:03 -0500
commit8223d9260629b15967d2832f818b5b423c05bea5 (patch)
tree8e674751080251350b889c7e1a86c8871fa46dfc
parent770cc6fa7e8df11c358d87531d19d7a85bbd428d (diff)
parent919f8ba16a7b82ba1099bd25b2c61c7881a05aa2 (diff)
Update upstream source from tag 'upstream/1.24.5'
Update to upstream version '1.24.5' with Debian dir aea31cd33d70a744865423ac09710534f3dd148f
-rw-r--r--CHANGELOG.md30
-rw-r--r--PKG-INFO25
-rw-r--r--README.rst7
-rw-r--r--data/completion/_gallery-dl3
-rw-r--r--data/completion/gallery-dl2
-rw-r--r--data/completion/gallery-dl.fish1
-rw-r--r--data/man/gallery-dl.15
-rw-r--r--data/man/gallery-dl.conf.5150
-rw-r--r--gallery_dl.egg-info/PKG-INFO25
-rw-r--r--gallery_dl.egg-info/SOURCES.txt3
-rw-r--r--gallery_dl/__init__.py4
-rw-r--r--gallery_dl/downloader/http.py15
-rw-r--r--gallery_dl/extractor/__init__.py3
-rw-r--r--gallery_dl/extractor/booru.py6
-rw-r--r--gallery_dl/extractor/bunkr.py3
-rw-r--r--gallery_dl/extractor/common.py23
-rw-r--r--gallery_dl/extractor/danbooru.py25
-rw-r--r--gallery_dl/extractor/deviantart.py185
-rw-r--r--gallery_dl/extractor/fantia.py17
-rw-r--r--gallery_dl/extractor/generic.py4
-rw-r--r--gallery_dl/extractor/hiperdex.py27
-rw-r--r--gallery_dl/extractor/hotleak.py13
-rw-r--r--gallery_dl/extractor/instagram.py47
-rw-r--r--gallery_dl/extractor/kemonoparty.py23
-rw-r--r--gallery_dl/extractor/lexica.py104
-rw-r--r--gallery_dl/extractor/mastodon.py19
-rw-r--r--gallery_dl/extractor/nudecollect.py142
-rw-r--r--gallery_dl/extractor/oauth.py163
-rw-r--r--gallery_dl/extractor/philomena.py11
-rw-r--r--gallery_dl/extractor/sankaku.py10
-rw-r--r--gallery_dl/extractor/twitter.py127
-rw-r--r--gallery_dl/extractor/wikifeet.py118
-rw-r--r--gallery_dl/job.py9
-rw-r--r--gallery_dl/option.py29
-rw-r--r--gallery_dl/util.py9
-rw-r--r--gallery_dl/version.py4
-rw-r--r--setup.py3
-rw-r--r--test/test_downloader.py3
38 files changed, 1102 insertions, 295 deletions
diff --git a/CHANGELOG.md b/CHANGELOG.md
index f9555ad..3beecbb 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,35 @@
# Changelog
+## 1.24.5 - 2023-01-28
+### Additions
+- [booru] add `url` option
+- [danbooru] extend `metadata` option ([#3505](https://github.com/mikf/gallery-dl/issues/3505))
+- [deviantart] add extractor for status updates ([#3539](https://github.com/mikf/gallery-dl/issues/3539), [#3541](https://github.com/mikf/gallery-dl/issues/3541))
+- [deviantart] add support for `/deviation/` and `fav.me` URLs ([#3558](https://github.com/mikf/gallery-dl/issues/3558), [#3560](https://github.com/mikf/gallery-dl/issues/3560))
+- [kemonoparty] extract `hash` metadata for discord files ([#3531](https://github.com/mikf/gallery-dl/issues/3531))
+- [lexica] add `search` extractor ([#3567](https://github.com/mikf/gallery-dl/issues/3567))
+- [mastodon] add `num` and `count` metadata fields ([#3517](https://github.com/mikf/gallery-dl/issues/3517))
+- [nudecollect] add `image` and `album` extractors ([#2430](https://github.com/mikf/gallery-dl/issues/2430), [#2818](https://github.com/mikf/gallery-dl/issues/2818), [#3575](https://github.com/mikf/gallery-dl/issues/3575))
+- [wikifeet] add `gallery` extractor ([#519](https://github.com/mikf/gallery-dl/issues/519), [#3537](https://github.com/mikf/gallery-dl/issues/3537))
+- [downloader:http] add signature checks for `.blend`, `.obj`, and `.clip` files ([#3535](https://github.com/mikf/gallery-dl/issues/3535))
+- add `extractor.retry-codes` option
+- add `-O/--postprocessor-option` command-line option ([#3565](https://github.com/mikf/gallery-dl/issues/3565))
+- improve `write-pages` output
+### Fixes
+- [bunkr] fix downloading `.mkv` and `.ts` files ([#3571](https://github.com/mikf/gallery-dl/issues/3571))
+- [fantia] send `X-CSRF-Token` headers ([#3576](https://github.com/mikf/gallery-dl/issues/3576))
+- [generic] fix regex for non-src image URLs ([#3555](https://github.com/mikf/gallery-dl/issues/3555))
+- [hiperdex] update domain ([#3572](https://github.com/mikf/gallery-dl/issues/3572))
+- [hotleak] fix video URLs ([#3516](https://github.com/mikf/gallery-dl/issues/3516), [#3525](https://github.com/mikf/gallery-dl/issues/3525), [#3563](https://github.com/mikf/gallery-dl/issues/3563), [#3581](https://github.com/mikf/gallery-dl/issues/3581))
+- [instagram] always show `cursor` value after errors ([#3440](https://github.com/mikf/gallery-dl/issues/3440))
+- [instagram] update API domain, headers, and csrf token handling
+- [oauth] show `client-id`/`api-key` values ([#3518](https://github.com/mikf/gallery-dl/issues/3518))
+- [philomena] match URLs with www subdomain
+- [sankaku] update URL pattern ([#3523](https://github.com/mikf/gallery-dl/issues/3523))
+- [twitter] refresh guest tokens ([#3445](https://github.com/mikf/gallery-dl/issues/3445), [#3458](https://github.com/mikf/gallery-dl/issues/3458))
+- [twitter] fix search pagination ([#3536](https://github.com/mikf/gallery-dl/issues/3536), [#3534](https://github.com/mikf/gallery-dl/issues/3534), [#3549](https://github.com/mikf/gallery-dl/issues/3549))
+- [twitter] use `"browser": "firefox"` by default ([#3522](https://github.com/mikf/gallery-dl/issues/3522))
+
## 1.24.4 - 2023-01-11
### Additions
- [downloader:http] add `validate` option
diff --git a/PKG-INFO b/PKG-INFO
index 22c1c63..9165899 100644
--- a/PKG-INFO
+++ b/PKG-INFO
@@ -1,6 +1,6 @@
Metadata-Version: 2.1
Name: gallery_dl
-Version: 1.24.4
+Version: 1.24.5
Summary: Command-line program to download image galleries and collections from several image hosting sites
Home-page: https://github.com/mikf/gallery-dl
Download-URL: https://github.com/mikf/gallery-dl/releases/latest
@@ -42,10 +42,10 @@ gallery-dl
*gallery-dl* is a command-line program
to download image galleries and collections
from several image hosting sites
-(see `Supported Sites <docs/supportedsites.md>`__).
+(see `Supported Sites <https://github.com/mikf/gallery-dl/blob/master/docs/supportedsites.md>`__).
It is a cross-platform tool
-with many `configuration options <docs/configuration.rst>`__
-and powerful `filenaming capabilities <docs/formatting.md>`__.
+with many `configuration options <https://github.com/mikf/gallery-dl/blob/master/docs/configuration.rst>`__
+and powerful `filenaming capabilities <https://github.com/mikf/gallery-dl/blob/master/docs/formatting.md>`__.
|pypi| |build|
@@ -106,9 +106,9 @@ Standalone Executable
Prebuilt executable files with a Python interpreter and
required Python packages included are available for
-- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.24.4/gallery-dl.exe>`__
+- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.24.5/gallery-dl.exe>`__
(Requires `Microsoft Visual C++ Redistributable Package (x86) <https://aka.ms/vs/17/release/vc_redist.x86.exe>`__)
-- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.24.4/gallery-dl.bin>`__
+- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.24.5/gallery-dl.bin>`__
Nightly Builds
@@ -168,7 +168,7 @@ from:
gallery-dl [OPTIONS]... URLS...
-Use :code:`gallery-dl --help` or see `<docs/options.md>`__
+Use :code:`gallery-dl --help` or see `<https://github.com/mikf/gallery-dl/blob/master/docs/options.md>`__
for a full list of all command-line options.
@@ -222,13 +222,13 @@ Documentation
-------------
A list of all available configuration options and their descriptions
-can be found in `<docs/configuration.rst>`__.
+can be found in `<https://github.com/mikf/gallery-dl/blob/master/docs/configuration.rst>`__.
| For a default configuration file with available options set to their
- default values, see `<docs/gallery-dl.conf>`__.
+ default values, see `<https://github.com/mikf/gallery-dl/blob/master/docs/gallery-dl.conf>`__.
| For a commented example with more involved settings and option usage,
- see `<docs/gallery-dl-example.conf>`__.
+ see `<https://github.com/mikf/gallery-dl/blob/master/docs/gallery-dl-example.conf>`__.
Locations
@@ -318,12 +318,11 @@ CAPTCHA or similar, or has not been implemented yet, you can use the
cookies from a browser login session and input them into *gallery-dl*.
This can be done via the
-`cookies <docs/configuration.rst#extractorcookies>`__
+`cookies <https://github.com/mikf/gallery-dl/blob/master/docs/configuration.rst#extractorcookies>`__
option in your configuration file by specifying
- | the path to a Mozilla/Netscape format cookies.txt file exported by a browser addon
- | (e.g. `Get cookies.txt <https://chrome.google.com/webstore/detail/get-cookiestxt/bgaddhkoddajcdgocldbbfleckgcbcid/>`__ for Chrome,
- `Export Cookies <https://addons.mozilla.org/en-US/firefox/addon/export-cookies-txt/>`__ for Firefox)
+ | (e.g. `Export Cookies <https://addons.mozilla.org/en-US/firefox/addon/export-cookies-txt/>`__ for Firefox)
- | a list of name-value pairs gathered from your browser's web developer tools
| (in `Chrome <https://developers.google.com/web/tools/chrome-devtools/storage/cookies>`__,
diff --git a/README.rst b/README.rst
index 5028f9b..ed4afa5 100644
--- a/README.rst
+++ b/README.rst
@@ -69,9 +69,9 @@ Standalone Executable
Prebuilt executable files with a Python interpreter and
required Python packages included are available for
-- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.24.4/gallery-dl.exe>`__
+- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.24.5/gallery-dl.exe>`__
(Requires `Microsoft Visual C++ Redistributable Package (x86) <https://aka.ms/vs/17/release/vc_redist.x86.exe>`__)
-- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.24.4/gallery-dl.bin>`__
+- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.24.5/gallery-dl.bin>`__
Nightly Builds
@@ -285,8 +285,7 @@ This can be done via the
option in your configuration file by specifying
- | the path to a Mozilla/Netscape format cookies.txt file exported by a browser addon
- | (e.g. `Get cookies.txt <https://chrome.google.com/webstore/detail/get-cookiestxt/bgaddhkoddajcdgocldbbfleckgcbcid/>`__ for Chrome,
- `Export Cookies <https://addons.mozilla.org/en-US/firefox/addon/export-cookies-txt/>`__ for Firefox)
+ | (e.g. `Export Cookies <https://addons.mozilla.org/en-US/firefox/addon/export-cookies-txt/>`__ for Firefox)
- | a list of name-value pairs gathered from your browser's web developer tools
| (in `Chrome <https://developers.google.com/web/tools/chrome-devtools/storage/cookies>`__,
diff --git a/data/completion/_gallery-dl b/data/completion/_gallery-dl
index 1125b36..06e8556 100644
--- a/data/completion/_gallery-dl
+++ b/data/completion/_gallery-dl
@@ -70,6 +70,7 @@ _arguments -C -S \
--mtime-from-date'[Set file modification times according to "date" metadata]' \
--exec'[Execute CMD for each downloaded file. Example: --exec "convert {} {}.png && rm {}"]':'<cmd>' \
--exec-after'[Execute CMD after all files were downloaded successfully. Example: --exec-after "cd {} && convert * ../doc.pdf"]':'<cmd>' \
-{-P,--postprocessor}'[Activate the specified post processor]':'<name>' && rc=0
+{-P,--postprocessor}'[Activate the specified post processor]':'<name>' \
+{-O,--postprocessor-option}'[Additional "<key>=<value>" post processor options]':'<opt>' && rc=0
return rc
diff --git a/data/completion/gallery-dl b/data/completion/gallery-dl
index f57306e..203c87d 100644
--- a/data/completion/gallery-dl
+++ b/data/completion/gallery-dl
@@ -10,7 +10,7 @@ _gallery_dl()
elif [[ "${prev}" =~ ^()$ ]]; then
COMPREPLY=( $(compgen -d -- "${cur}") )
else
- COMPREPLY=( $(compgen -W "--help --version --input-file --destination --directory --filename --proxy --source-address --user-agent --clear-cache --cookies --cookies-from-browser --quiet --verbose --get-urls --resolve-urls --dump-json --simulate --extractor-info --list-keywords --list-modules --list-extractors --write-log --write-unsupported --write-pages --limit-rate --retries --http-timeout --sleep --sleep-request --sleep-extractor --filesize-min --filesize-max --chunk-size --no-part --no-skip --no-mtime --no-download --no-postprocessors --no-check-certificate --config --config-yaml --option --ignore-config --username --password --netrc --download-archive --abort --terminate --range --chapter-range --filter --chapter-filter --zip --ugoira-conv --ugoira-conv-lossless --ugoira-conv-copy --write-metadata --write-info-json --write-infojson --write-tags --mtime-from-date --exec --exec-after --postprocessor" -- "${cur}") )
+ COMPREPLY=( $(compgen -W "--help --version --input-file --destination --directory --filename --proxy --source-address --user-agent --clear-cache --cookies --cookies-from-browser --quiet --verbose --get-urls --resolve-urls --dump-json --simulate --extractor-info --list-keywords --list-modules --list-extractors --write-log --write-unsupported --write-pages --limit-rate --retries --http-timeout --sleep --sleep-request --sleep-extractor --filesize-min --filesize-max --chunk-size --no-part --no-skip --no-mtime --no-download --no-postprocessors --no-check-certificate --config --config-yaml --option --ignore-config --username --password --netrc --download-archive --abort --terminate --range --chapter-range --filter --chapter-filter --zip --ugoira-conv --ugoira-conv-lossless --ugoira-conv-copy --write-metadata --write-info-json --write-infojson --write-tags --mtime-from-date --exec --exec-after --postprocessor --postprocessor-option" -- "${cur}") )
fi
}
diff --git a/data/completion/gallery-dl.fish b/data/completion/gallery-dl.fish
index 986d9df..e2a7e6d 100644
--- a/data/completion/gallery-dl.fish
+++ b/data/completion/gallery-dl.fish
@@ -65,3 +65,4 @@ complete -c gallery-dl -l 'mtime-from-date' -d 'Set file modification times acco
complete -c gallery-dl -x -l 'exec' -d 'Execute CMD for each downloaded file. Example: --exec "convert {} {}.png && rm {}"'
complete -c gallery-dl -x -l 'exec-after' -d 'Execute CMD after all files were downloaded successfully. Example: --exec-after "cd {} && convert * ../doc.pdf"'
complete -c gallery-dl -x -s 'P' -l 'postprocessor' -d 'Activate the specified post processor'
+complete -c gallery-dl -x -s 'O' -l 'postprocessor-option' -d 'Additional "<key>=<value>" post processor options'
diff --git a/data/man/gallery-dl.1 b/data/man/gallery-dl.1
index 00723f3..024ddb3 100644
--- a/data/man/gallery-dl.1
+++ b/data/man/gallery-dl.1
@@ -1,4 +1,4 @@
-.TH "GALLERY-DL" "1" "2023-01-11" "1.24.4" "gallery-dl Manual"
+.TH "GALLERY-DL" "1" "2023-01-28" "1.24.5" "gallery-dl Manual"
.\" disable hyphenation
.nh
@@ -208,6 +208,9 @@ Execute CMD after all files were downloaded successfully. Example: --exec-after
.TP
.B "\-P, \-\-postprocessor" \f[I]NAME\f[]
Activate the specified post processor
+.TP
+.B "\-O, \-\-postprocessor\-option" \f[I]OPT\f[]
+Additional '<key>=<value>' post processor options
.SH EXAMPLES
.TP
diff --git a/data/man/gallery-dl.conf.5 b/data/man/gallery-dl.conf.5
index e5742b7..6b11835 100644
--- a/data/man/gallery-dl.conf.5
+++ b/data/man/gallery-dl.conf.5
@@ -1,4 +1,4 @@
-.TH "GALLERY-DL.CONF" "5" "2023-01-11" "1.24.4" "gallery-dl Manual"
+.TH "GALLERY-DL.CONF" "5" "2023-01-28" "1.24.5" "gallery-dl Manual"
.\" disable hyphenation
.nh
.\" disable justification (adjust text to left margin only)
@@ -574,8 +574,19 @@ update its contents with cookies received during data extraction.
.br
* \f[I]object\f[] (scheme -> proxy)
-.IP "Default:" 9
-\f[I]null\f[]
+.IP "Example:" 4
+.. code:: json
+
+"http://10.10.1.10:3128"
+
+.. code:: json
+
+{
+"http" : "http://10.10.1.10:3128",
+"https": "http://10.10.1.10:1080",
+"http://10.20.1.128": "http://10.10.1.10:5323"
+}
+
.IP "Description:" 4
Proxy (or proxies) to be used for remote connections.
@@ -590,16 +601,6 @@ It is also possible to set a proxy for a specific host by using
\f[I]scheme://host\f[] as key.
See \f[I]Requests' proxy documentation\f[] for more details.
-Example:
-
-.. code:: json
-
-{
-"http" : "http://10.10.1.10:3128",
-"https": "http://10.10.1.10:1080",
-"http://10.20.1.128": "http://10.10.1.10:5323"
-}
-
Note: If a proxy URLs does not include a scheme,
\f[I]http://\f[] is assumed.
@@ -669,6 +670,48 @@ Note: \f[I]requests\f[] and \f[I]urllib3\f[] only support HTTP/1.1, while a real
browser would use HTTP/2.
+.SS extractor.*.headers
+.IP "Type:" 6
+\f[I]object\f[] (name -> value)
+
+.IP "Default:" 9
+.. code:: json
+
+{
+"User-Agent" : "<extractor.*.user-agent>",
+"Accept" : "*/*",
+"Accept-Language": "en-US,en;q=0.5",
+"Accept-Encoding": "gzip, deflate"
+}
+
+
+.IP "Description:" 4
+Additional \f[I]HTTP headers\f[]
+to be sent with each HTTP request,
+
+To disable sending a header, set its value to \f[I]null\f[].
+
+
+.SS extractor.*.ciphers
+.IP "Type:" 6
+\f[I]list\f[] of \f[I]strings\f[]
+
+.IP "Example:" 4
+.. code:: json
+
+["ECDHE-ECDSA-AES128-GCM-SHA256",
+"ECDHE-RSA-AES128-GCM-SHA256",
+"ECDHE-ECDSA-CHACHA20-POLY1305",
+"ECDHE-RSA-CHACHA20-POLY1305"]
+
+
+.IP "Description:" 4
+List of TLS/SSL cipher suites in
+\f[I]OpenSSL cipher list format\f[]
+to be passed to
+\f[I]ssl.SSLContext.set_ciphers()\f[]
+
+
.SS extractor.*.keywords
.IP "Type:" 6
\f[I]object\f[] (name -> value)
@@ -897,6 +940,25 @@ will run all three post processors - \f[I]mtime\f[], \f[I]zip\f[], \f[I]exec\f[]
for each downloaded \f[I]pixiv\f[] file.
+.SS extractor.*.postprocessor-options
+.IP "Type:" 6
+\f[I]object\f[] (name -> value)
+
+.IP "Example:" 4
+.. code:: json
+
+{
+"archive": null,
+"keep-files": true
+}
+
+
+.IP "Description:" 4
+Additional \f[I]Postprocessor Options\f[] that get added to each individual
+\f[I]post processor object\f[]
+before initializing it and evaluating filters.
+
+
.SS extractor.*.retries
.IP "Type:" 6
\f[I]integer\f[]
@@ -909,6 +971,26 @@ Maximum number of times a failed HTTP request is retried before
giving up, or \f[I]-1\f[] for infinite retries.
+.SS extractor.*.retry-codes
+.IP "Type:" 6
+\f[I]list\f[] of \f[I]integers\f[]
+
+.IP "Example:" 4
+[404, 429, 430]
+
+.IP "Description:" 4
+Additional \f[I]HTTP response status codes\f[]
+to retry an HTTP request on.
+
+\f[I]2xx\f[] codes (success responses) and
+\f[I]3xx\f[] codes (redirection messages)
+will never be retried and always count as success,
+regardless of this option.
+
+\f[I]5xx\f[] codes (server error responses) will always be retried,
+regardless of this option.
+
+
.SS extractor.*.timeout
.IP "Type:" 6
\f[I]float\f[]
@@ -1208,15 +1290,30 @@ follow the \f[I]source\f[] and download from there if possible.
.SS extractor.danbooru.metadata
.IP "Type:" 6
-\f[I]bool\f[]
+.br
+* \f[I]bool\f[]
+.br
+* \f[I]string\f[]
+.br
+* \f[I]list\f[] of \f[I]strings\f[]
.IP "Default:" 9
\f[I]false\f[]
+.IP "Example:" 4
+.br
+* replacements,comments,ai_tags
+.br
+* ["replacements", "comments", "ai_tags"]
+
.IP "Description:" 4
Extract additional metadata
(notes, artist commentary, parent, children, uploader)
+It is possible to specify a custom list of metadata includes.
+See \f[I]available_includes\f[]
+for possible field names. \f[I]aibooru\f[] also supports \f[I]ai_metadata\f[].
+
Note: This requires 1 additional HTTP request per post.
@@ -1405,7 +1502,7 @@ A (comma-separated) list of subcategories to include
when processing a user profile.
Possible values are
-\f[I]"gallery"\f[], \f[I]"scraps"\f[], \f[I]"journal"\f[], \f[I]"favorite"\f[].
+\f[I]"gallery"\f[], \f[I]"scraps"\f[], \f[I]"journal"\f[], \f[I]"favorite"\f[], \f[I]"status"\f[].
It is possible to use \f[I]"all"\f[] instead of listing all values separately.
@@ -1418,14 +1515,15 @@ It is possible to use \f[I]"all"\f[] instead of listing all values separately.
\f[I]"html"\f[]
.IP "Description:" 4
-Selects the output format of journal entries.
+Selects the output format for textual content. This includes journals,
+literature and status updates.
.br
* \f[I]"html"\f[]: HTML with (roughly) the same layout as on DeviantArt.
.br
* \f[I]"text"\f[]: Plain text with image references and HTML tags removed.
.br
-* \f[I]"none"\f[]: Don't download journals.
+* \f[I]"none"\f[]: Don't download textual content.
.SS extractor.deviantart.mature
@@ -3735,6 +3833,20 @@ Extract overlay notes (position and text).
Note: This requires 1 additional HTTP request per post.
+.SS extractor.[booru].url
+.IP "Type:" 6
+\f[I]string\f[]
+
+.IP "Default:" 9
+\f[I]"file_url"\f[]
+
+.IP "Example:" 4
+"preview_url"
+
+.IP "Description:" 4
+Alternate field name to retrieve download URLs from.
+
+
.SS extractor.[manga-extractor].chapter-reverse
.IP "Type:" 6
\f[I]bool\f[]
@@ -3978,7 +4090,7 @@ Additional HTTP headers to send when downloading files,
\f[I]list\f[] of \f[I]integers\f[]
.IP "Default:" 9
-\f[I][429]\f[]
+\f[I]extractor.*.retry-codes\f[]
.IP "Description:" 4
Additional \f[I]HTTP response status codes\f[]
@@ -3988,7 +4100,7 @@ Codes \f[I]200\f[], \f[I]206\f[], and \f[I]416\f[] (when resuming a \f[I]partial
download) will never be retried and always count as success,
regardless of this option.
-Codes \f[I]500\f[] - \f[I]599\f[] (server error responses) will always be retried,
+\f[I]5xx\f[] codes (server error responses) will always be retried,
regardless of this option.
diff --git a/gallery_dl.egg-info/PKG-INFO b/gallery_dl.egg-info/PKG-INFO
index 9eddd2f..3fe1b55 100644
--- a/gallery_dl.egg-info/PKG-INFO
+++ b/gallery_dl.egg-info/PKG-INFO
@@ -1,6 +1,6 @@
Metadata-Version: 2.1
Name: gallery-dl
-Version: 1.24.4
+Version: 1.24.5
Summary: Command-line program to download image galleries and collections from several image hosting sites
Home-page: https://github.com/mikf/gallery-dl
Download-URL: https://github.com/mikf/gallery-dl/releases/latest
@@ -42,10 +42,10 @@ gallery-dl
*gallery-dl* is a command-line program
to download image galleries and collections
from several image hosting sites
-(see `Supported Sites <docs/supportedsites.md>`__).
+(see `Supported Sites <https://github.com/mikf/gallery-dl/blob/master/docs/supportedsites.md>`__).
It is a cross-platform tool
-with many `configuration options <docs/configuration.rst>`__
-and powerful `filenaming capabilities <docs/formatting.md>`__.
+with many `configuration options <https://github.com/mikf/gallery-dl/blob/master/docs/configuration.rst>`__
+and powerful `filenaming capabilities <https://github.com/mikf/gallery-dl/blob/master/docs/formatting.md>`__.
|pypi| |build|
@@ -106,9 +106,9 @@ Standalone Executable
Prebuilt executable files with a Python interpreter and
required Python packages included are available for
-- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.24.4/gallery-dl.exe>`__
+- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.24.5/gallery-dl.exe>`__
(Requires `Microsoft Visual C++ Redistributable Package (x86) <https://aka.ms/vs/17/release/vc_redist.x86.exe>`__)
-- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.24.4/gallery-dl.bin>`__
+- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.24.5/gallery-dl.bin>`__
Nightly Builds
@@ -168,7 +168,7 @@ from:
gallery-dl [OPTIONS]... URLS...
-Use :code:`gallery-dl --help` or see `<docs/options.md>`__
+Use :code:`gallery-dl --help` or see `<https://github.com/mikf/gallery-dl/blob/master/docs/options.md>`__
for a full list of all command-line options.
@@ -222,13 +222,13 @@ Documentation
-------------
A list of all available configuration options and their descriptions
-can be found in `<docs/configuration.rst>`__.
+can be found in `<https://github.com/mikf/gallery-dl/blob/master/docs/configuration.rst>`__.
| For a default configuration file with available options set to their
- default values, see `<docs/gallery-dl.conf>`__.
+ default values, see `<https://github.com/mikf/gallery-dl/blob/master/docs/gallery-dl.conf>`__.
| For a commented example with more involved settings and option usage,
- see `<docs/gallery-dl-example.conf>`__.
+ see `<https://github.com/mikf/gallery-dl/blob/master/docs/gallery-dl-example.conf>`__.
Locations
@@ -318,12 +318,11 @@ CAPTCHA or similar, or has not been implemented yet, you can use the
cookies from a browser login session and input them into *gallery-dl*.
This can be done via the
-`cookies <docs/configuration.rst#extractorcookies>`__
+`cookies <https://github.com/mikf/gallery-dl/blob/master/docs/configuration.rst#extractorcookies>`__
option in your configuration file by specifying
- | the path to a Mozilla/Netscape format cookies.txt file exported by a browser addon
- | (e.g. `Get cookies.txt <https://chrome.google.com/webstore/detail/get-cookiestxt/bgaddhkoddajcdgocldbbfleckgcbcid/>`__ for Chrome,
- `Export Cookies <https://addons.mozilla.org/en-US/firefox/addon/export-cookies-txt/>`__ for Firefox)
+ | (e.g. `Export Cookies <https://addons.mozilla.org/en-US/firefox/addon/export-cookies-txt/>`__ for Firefox)
- | a list of name-value pairs gathered from your browser's web developer tools
| (in `Chrome <https://developers.google.com/web/tools/chrome-devtools/storage/cookies>`__,
diff --git a/gallery_dl.egg-info/SOURCES.txt b/gallery_dl.egg-info/SOURCES.txt
index 599a828..eab1881 100644
--- a/gallery_dl.egg-info/SOURCES.txt
+++ b/gallery_dl.egg-info/SOURCES.txt
@@ -115,6 +115,7 @@ gallery_dl/extractor/keenspot.py
gallery_dl/extractor/kemonoparty.py
gallery_dl/extractor/khinsider.py
gallery_dl/extractor/komikcast.py
+gallery_dl/extractor/lexica.py
gallery_dl/extractor/lightroom.py
gallery_dl/extractor/lineblog.py
gallery_dl/extractor/livedoor.py
@@ -144,6 +145,7 @@ gallery_dl/extractor/nijie.py
gallery_dl/extractor/nitter.py
gallery_dl/extractor/nozomi.py
gallery_dl/extractor/nsfwalbum.py
+gallery_dl/extractor/nudecollect.py
gallery_dl/extractor/oauth.py
gallery_dl/extractor/paheal.py
gallery_dl/extractor/patreon.py
@@ -204,6 +206,7 @@ gallery_dl/extractor/webmshare.py
gallery_dl/extractor/webtoons.py
gallery_dl/extractor/weibo.py
gallery_dl/extractor/wikiart.py
+gallery_dl/extractor/wikifeet.py
gallery_dl/extractor/xhamster.py
gallery_dl/extractor/xvideos.py
gallery_dl/extractor/ytdl.py
diff --git a/gallery_dl/__init__.py b/gallery_dl/__init__.py
index 611b2b9..245dbf8 100644
--- a/gallery_dl/__init__.py
+++ b/gallery_dl/__init__.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2014-2022 Mike Fährmann
+# Copyright 2014-2023 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -72,6 +72,8 @@ def main():
else:
profile, _, container = profile.partition("::")
config.set((), "cookies", (browser, profile, keyring, container))
+ if args.options_pp:
+ config.set((), "postprocessor-options", args.options_pp)
for opts in args.options:
config.set(*opts)
diff --git a/gallery_dl/downloader/http.py b/gallery_dl/downloader/http.py
index 0bf19c2..6043443 100644
--- a/gallery_dl/downloader/http.py
+++ b/gallery_dl/downloader/http.py
@@ -38,7 +38,7 @@ class HttpDownloader(DownloaderBase):
self.minsize = self.config("filesize-min")
self.maxsize = self.config("filesize-max")
self.retries = self.config("retries", extractor._retries)
- self.retry_codes = self.config("retry-codes")
+ self.retry_codes = self.config("retry-codes", extractor._retry_codes)
self.timeout = self.config("timeout", extractor._timeout)
self.verify = self.config("verify", extractor._verify)
self.mtime = self.config("mtime", True)
@@ -46,8 +46,6 @@ class HttpDownloader(DownloaderBase):
if self.retries < 0:
self.retries = float("inf")
- if self.retry_codes is None:
- self.retry_codes = [429]
if self.minsize:
minsize = text.parse_bytes(self.minsize)
if not minsize:
@@ -104,7 +102,7 @@ class HttpDownloader(DownloaderBase):
codes = kwdict.get("_http_retry_codes")
if codes:
- retry_codes = self.retry_codes.copy()
+ retry_codes = list(self.retry_codes)
retry_codes += codes
else:
retry_codes = self.retry_codes
@@ -392,6 +390,8 @@ MIME_TYPES = {
"application/x-shockwave-flash": "swf",
"application/ogg": "ogg",
+ # https://www.iana.org/assignments/media-types/model/obj
+ "model/obj": "obj",
"application/octet-stream": "bin",
}
@@ -421,6 +421,13 @@ SIGNATURE_CHECKS = {
"7z" : lambda s: s[0:6] == b"\x37\x7A\xBC\xAF\x27\x1C",
"pdf" : lambda s: s[0:5] == b"%PDF-",
"swf" : lambda s: s[0:3] in (b"CWS", b"FWS"),
+ "blend": lambda s: s[0:7] == b"BLENDER",
+ # unfortunately the Wavefront .obj format doesn't have a signature,
+ # so we check for the existence of Blender's comment
+ "obj" : lambda s: s[0:11] == b"# Blender v",
+ # Celsys Clip Studio Paint format
+ # https://github.com/rasensuihei/cliputils/blob/master/README.md
+ "clip": lambda s: s[0:8] == b"CSFCHUNK",
# check 'bin' files against all other file signatures
"bin" : lambda s: False,
}
diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py
index f26f6a9..6140c2c 100644
--- a/gallery_dl/extractor/__init__.py
+++ b/gallery_dl/extractor/__init__.py
@@ -77,6 +77,7 @@ modules = [
"kemonoparty",
"khinsider",
"komikcast",
+ "lexica",
"lightroom",
"lineblog",
"livedoor",
@@ -102,6 +103,7 @@ modules = [
"nitter",
"nozomi",
"nsfwalbum",
+ "nudecollect",
"paheal",
"patreon",
"philomena",
@@ -158,6 +160,7 @@ modules = [
"webtoons",
"weibo",
"wikiart",
+ "wikifeet",
"xhamster",
"xvideos",
"zerochan",
diff --git a/gallery_dl/extractor/booru.py b/gallery_dl/extractor/booru.py
index 0d7d13d..cbd0e07 100644
--- a/gallery_dl/extractor/booru.py
+++ b/gallery_dl/extractor/booru.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2015-2022 Mike Fährmann
+# Copyright 2015-2023 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -27,6 +27,10 @@ class BooruExtractor(BaseExtractor):
notes = self.config("notes", False)
fetch_html = tags or notes
+ url_key = self.config("url")
+ if url_key:
+ self._file_url = operator.itemgetter(url_key)
+
for post in self.posts():
try:
url = self._file_url(post)
diff --git a/gallery_dl/extractor/bunkr.py b/gallery_dl/extractor/bunkr.py
index 8283fbc..1c339a9 100644
--- a/gallery_dl/extractor/bunkr.py
+++ b/gallery_dl/extractor/bunkr.py
@@ -75,7 +75,8 @@ class BunkrAlbumExtractor(LolisafeAlbumExtractor):
headers = {"Referer": root.replace("://", "://stream.", 1) + "/"}
for file in files:
if file["file"].endswith(
- (".mp4", ".m4v", ".mov", ".webm", ".zip", ".rar", ".7z")):
+ (".mp4", ".m4v", ".mov", ".webm", ".mkv", ".ts",
+ ".zip", ".rar", ".7z")):
file["_http_headers"] = headers
file["file"] = file["file"].replace(
"://cdn", "://media-files", 1)
diff --git a/gallery_dl/extractor/common.py b/gallery_dl/extractor/common.py
index ad766da..4cefa1c 100644
--- a/gallery_dl/extractor/common.py
+++ b/gallery_dl/extractor/common.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2014-2022 Mike Fährmann
+# Copyright 2014-2023 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -36,6 +36,7 @@ class Extractor():
browser = None
root = ""
test = None
+ finalize = None
request_interval = 0.0
request_interval_min = 0.0
request_timestamp = 0.0
@@ -44,7 +45,6 @@ class Extractor():
def __init__(self, match):
self.log = logging.getLogger(self.category)
self.url = match.string
- self.finalize = None
if self.basecategory:
self.config = self._config_shared
@@ -53,6 +53,7 @@ class Extractor():
self._parentdir = ""
self._write_pages = self.config("write-pages", False)
+ self._retry_codes = self.config("retry-codes")
self._retries = self.config("retries", 4)
self._timeout = self.config("timeout", 30)
self._verify = self.config("verify", True)
@@ -64,6 +65,8 @@ class Extractor():
if self._retries < 0:
self._retries = float("inf")
+ if not self._retry_codes:
+ self._retry_codes = ()
self._init_session()
self._init_cookies()
@@ -103,12 +106,15 @@ class Extractor():
values[:0] = config.accumulate((self.subcategory,), key, conf=conf)
return values
- def request(self, url, *, method="GET", session=None, retries=None,
- encoding=None, fatal=True, notfound=None, **kwargs):
+ def request(self, url, *, method="GET", session=None,
+ retries=None, retry_codes=None, encoding=None,
+ fatal=True, notfound=None, **kwargs):
if session is None:
session = self.session
if retries is None:
retries = self._retries
+ if retry_codes is None:
+ retry_codes = self._retry_codes
if "proxies" not in kwargs:
kwargs["proxies"] = self._proxies
if "timeout" not in kwargs:
@@ -153,12 +159,12 @@ class Extractor():
code in (403, 503):
content = response.content
if b"_cf_chl_opt" in content or b"jschl-answer" in content:
- self.log.warning("Cloudflare IUAM challenge")
+ self.log.warning("Cloudflare challenge")
break
if b'name="captcha-bypass"' in content:
self.log.warning("Cloudflare CAPTCHA")
break
- if code < 500 and code != 429 and code != 430:
+ if code not in retry_codes and code < 500:
break
finally:
@@ -501,7 +507,10 @@ class Extractor():
try:
with open(path + ".txt", 'wb') as fp:
util.dump_response(
- response, fp, headers=(self._write_pages == "all"))
+ response, fp,
+ headers=(self._write_pages in ("all", "ALL")),
+ hide_auth=(self._write_pages != "ALL")
+ )
except Exception as e:
self.log.warning("Failed to dump HTTP request (%s: %s)",
e.__class__.__name__, e)
diff --git a/gallery_dl/extractor/danbooru.py b/gallery_dl/extractor/danbooru.py
index 4c93604..7b0e572 100644
--- a/gallery_dl/extractor/danbooru.py
+++ b/gallery_dl/extractor/danbooru.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2014-2022 Mike Fährmann
+# Copyright 2014-2023 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -40,7 +40,17 @@ class DanbooruExtractor(BaseExtractor):
self.ugoira = self.config("ugoira", False)
self.external = self.config("external", False)
- self.extended_metadata = self.config("metadata", False)
+
+ metadata = self.config("metadata", False)
+ if metadata:
+ if isinstance(metadata, (list, tuple)):
+ metadata = ",".join(metadata)
+ elif not isinstance(metadata, str):
+ metadata = "artist_commentary,children,notes,parent,uploader"
+ self.metadata_includes = metadata
+ else:
+ self.metadata_includes = None
+
threshold = self.config("threshold")
if isinstance(threshold, int):
self.threshold = 1 if threshold < 1 else threshold
@@ -99,13 +109,10 @@ class DanbooruExtractor(BaseExtractor):
url = post["large_file_url"]
post["extension"] = "webm"
- if self.extended_metadata:
- template = (
- "{}/posts/{}.json?only=artist_commentary,children,notes,"
- "parent,uploader"
- )
- resp = self.request(template.format(self.root, post["id"]))
- post.update(resp.json())
+ if self.metadata_includes:
+ meta_url = "{}/posts/{}.json?only={}".format(
+ self.root, post["id"], self.metadata_includes)
+ post.update(self.request(meta_url).json())
if url[0] == "/":
url = self.root + url
diff --git a/gallery_dl/extractor/deviantart.py b/gallery_dl/extractor/deviantart.py
index aeb2d0a..a3187fa 100644
--- a/gallery_dl/extractor/deviantart.py
+++ b/gallery_dl/extractor/deviantart.py
@@ -118,11 +118,18 @@ class DeviantartExtractor(Extractor):
if "flash" in deviation:
yield self.commit(deviation, deviation["flash"])
- if "excerpt" in deviation and self.commit_journal:
- journal = self.api.deviation_content(deviation["deviationid"])
- if self.extra:
- deviation["_journal"] = journal["html"]
- yield self.commit_journal(deviation, journal)
+ if self.commit_journal:
+ if "excerpt" in deviation:
+ journal = self.api.deviation_content(
+ deviation["deviationid"])
+ elif "body" in deviation:
+ journal = {"html": deviation.pop("body")}
+ else:
+ journal = None
+ if journal:
+ if self.extra:
+ deviation["_journal"] = journal["html"]
+ yield self.commit_journal(deviation, journal)
if not self.extra:
continue
@@ -150,10 +157,19 @@ class DeviantartExtractor(Extractor):
"""Adjust the contents of a Deviation-object"""
if "index" not in deviation:
try:
- deviation["index"] = text.parse_int(
- deviation["url"].rpartition("-")[2])
+ if deviation["url"].startswith("https://sta.sh"):
+ filename = deviation["content"]["src"].split("/")[5]
+ deviation["index_base36"] = filename.partition("-")[0][1:]
+ deviation["index"] = id_from_base36(
+ deviation["index_base36"])
+ else:
+ deviation["index"] = text.parse_int(
+ deviation["url"].rpartition("-")[2])
except KeyError:
deviation["index"] = 0
+ deviation["index_base36"] = "0"
+ if "index_base36" not in deviation:
+ deviation["index_base36"] = base36_from_id(deviation["index"])
if self.user:
deviation["username"] = self.user
@@ -170,13 +186,11 @@ class DeviantartExtractor(Extractor):
if self.comments:
deviation["comments"] = (
- self.api.comments_deviation(deviation["deviationid"])
+ self.api.comments(deviation["deviationid"], target="deviation")
if deviation["stats"]["comments"] else ()
)
# filename metadata
- alphabet = "0123456789abcdefghijklmnopqrstuvwxyz"
- deviation["index_base36"] = util.bencode(deviation["index"], alphabet)
sub = re.compile(r"\W").sub
deviation["filename"] = "".join((
sub("_", deviation["title"].lower()), "_by_",
@@ -253,9 +267,10 @@ class DeviantartExtractor(Extractor):
html = journal["html"]
if html.startswith("<style"):
html = html.partition("</style>")[2]
+ head, _, tail = html.rpartition("<script")
content = "\n".join(
text.unescape(text.remove_html(txt))
- for txt in html.rpartition("<script")[0].split("<br />")
+ for txt in (head or tail).split("<br />")
)
txt = JOURNAL_TEMPLATE_TEXT.format(
title=deviation["title"],
@@ -402,8 +417,9 @@ class DeviantartUserExtractor(DeviantartExtractor):
}),
("https://www.deviantart.com/shimoda7", {
"options": (("include", "all"),),
- "pattern": r"/shimoda7/(gallery(/scraps)?|posts|favourites)$",
- "count": 4,
+ "pattern": r"/shimoda7/"
+ r"(gallery(/scraps)?|posts(/statuses)?|favourites)$",
+ "count": 5,
}),
("https://shimoda7.deviantart.com/"),
)
@@ -414,6 +430,7 @@ class DeviantartUserExtractor(DeviantartExtractor):
(DeviantartGalleryExtractor , base + "gallery"),
(DeviantartScrapsExtractor , base + "gallery/scraps"),
(DeviantartJournalExtractor , base + "posts"),
+ (DeviantartStatusExtractor , base + "posts/statuses"),
(DeviantartFavoriteExtractor, base + "favourites"),
), ("gallery",))
@@ -746,6 +763,97 @@ class DeviantartJournalExtractor(DeviantartExtractor):
return self.api.browse_user_journals(self.user, self.offset)
+class DeviantartStatusExtractor(DeviantartExtractor):
+ """Extractor for an artist's status updates"""
+ subcategory = "status"
+ directory_fmt = ("{category}", "{username}", "Status")
+ filename_fmt = "{category}_{index}_{title}_{date}.{extension}"
+ archive_fmt = "S_{_username}_{index}.{extension}"
+ pattern = BASE_PATTERN + r"/posts/statuses"
+ test = (
+ ("https://www.deviantart.com/t1na/posts/statuses", {
+ "count": 0,
+ }),
+ ("https://www.deviantart.com/justgalym/posts/statuses", {
+ "count": 4,
+ "url": "bf4c44c0c60ff2648a880f4c3723464ad3e7d074",
+ }),
+ # shared deviation
+ ("https://www.deviantart.com/justgalym/posts/statuses", {
+ "options": (("journals", "none"),),
+ "count": 1,
+ "pattern": r"https://images-wixmp-\w+\.wixmp\.com/f"
+ r"/[^/]+/[^.]+\.jpg\?token=",
+ }),
+ # shared sta.sh item
+ ("https://www.deviantart.com/vanillaghosties/posts/statuses", {
+ "options": (("journals", "none"), ("original", False)),
+ "range": "5-",
+ "count": 1,
+ "keyword": {
+ "index" : int,
+ "index_base36": "re:^[0-9a-z]+$",
+ "url" : "re:^https://sta.sh",
+ },
+ }),
+ ("https://www.deviantart.com/justgalym/posts/statuses", {
+ "options": (("journals", "text"),),
+ "url": "c8744f7f733a3029116607b826321233c5ca452d",
+ }),
+ )
+
+ def deviations(self):
+ for status in self.api.user_statuses(self.user, self.offset):
+ yield from self.status(status)
+
+ def status(self, status):
+ for item in status.get("items") or (): # do not trust is_share
+ # shared deviations/statuses
+ if "deviation" in item:
+ yield item["deviation"].copy()
+ if "status" in item:
+ yield from self.status(item["status"].copy())
+ # assume is_deleted == true means necessary fields are missing
+ if status["is_deleted"]:
+ self.log.warning(
+ "Skipping status %s (deleted)", status.get("statusid"))
+ return
+ yield status
+
+ def prepare(self, deviation):
+ if "deviationid" in deviation:
+ return DeviantartExtractor.prepare(self, deviation)
+
+ try:
+ path = deviation["url"].split("/")
+ deviation["index"] = text.parse_int(path[-1] or path[-2])
+ except KeyError:
+ deviation["index"] = 0
+
+ if self.user:
+ deviation["username"] = self.user
+ deviation["_username"] = self.user.lower()
+ else:
+ deviation["username"] = deviation["author"]["username"]
+ deviation["_username"] = deviation["username"].lower()
+
+ deviation["date"] = dt = text.parse_datetime(deviation["ts"])
+ deviation["published_time"] = int(util.datetime_to_timestamp(dt))
+
+ deviation["da_category"] = "Status"
+ deviation["category_path"] = "status"
+ deviation["is_downloadable"] = False
+ deviation["title"] = "Status Update"
+
+ comments_count = deviation.pop("comments_count", 0)
+ deviation["stats"] = {"comments": comments_count}
+ if self.comments:
+ deviation["comments"] = (
+ self.api.comments(deviation["statusid"], target="status")
+ if comments_count else ()
+ )
+
+
class DeviantartPopularExtractor(DeviantartExtractor):
"""Extractor for popular deviations"""
subcategory = "popular"
@@ -867,7 +975,9 @@ class DeviantartDeviationExtractor(DeviantartExtractor):
archive_fmt = "g_{_username}_{index}.{extension}"
pattern = (BASE_PATTERN + r"/(art|journal)/(?:[^/?#]+-)?(\d+)"
r"|(?:https?://)?(?:www\.)?deviantart\.com/"
- r"(?:view/|view(?:-full)?\.php/*\?(?:[^#]+&)?id=)(\d+)")
+ r"(?:view/|deviation/|view(?:-full)?\.php/*\?(?:[^#]+&)?id=)"
+ r"(\d+)" # bare deviation ID without slug
+ r"|(?:https?://)?fav\.me/d([0-9a-z]+)") # base36
test = (
(("https://www.deviantart.com/shimoda7/art/For-the-sake-10073852"), {
"options": (("original", 0),),
@@ -940,6 +1050,15 @@ class DeviantartDeviationExtractor(DeviantartExtractor):
("https://www.deviantart.com/view/1", {
"exception": exception.NotFoundError,
}),
+ # /deviation/ (#3558)
+ ("https://www.deviantart.com/deviation/817215762"),
+ # fav.me (#3558)
+ ("https://fav.me/ddijrpu", {
+ "count": 1,
+ }),
+ ("https://fav.me/dddd", {
+ "exception": exception.NotFoundError,
+ }),
# old-style URLs
("https://shimoda7.deviantart.com"
"/art/For-the-sake-of-a-memory-10073852"),
@@ -956,7 +1075,8 @@ class DeviantartDeviationExtractor(DeviantartExtractor):
def __init__(self, match):
DeviantartExtractor.__init__(self, match)
self.type = match.group(3)
- self.deviation_id = match.group(4) or match.group(5)
+ self.deviation_id = \
+ match.group(4) or match.group(5) or id_from_base36(match.group(6))
def deviations(self):
url = "{}/{}/{}/{}".format(
@@ -1149,9 +1269,9 @@ class DeviantartOAuthAPI():
"mature_content": self.mature}
return self._pagination_list(endpoint, params)
- def comments_deviation(self, deviation_id, offset=0):
- """Fetch comments posted on a deviation"""
- endpoint = "/comments/deviation/" + deviation_id
+ def comments(self, id, target, offset=0):
+ """Fetch comments posted on a target"""
+ endpoint = "/comments/{}/{}".format(target, id)
params = {"maxdepth": "5", "offset": offset, "limit": 50,
"mature_content": self.mature}
return self._pagination_list(endpoint, params=params, key="thread")
@@ -1187,8 +1307,6 @@ class DeviantartOAuthAPI():
def deviation_metadata(self, deviations):
""" Fetch deviation metadata for a set of deviations"""
- if not deviations:
- return []
endpoint = "/deviation/metadata?" + "&".join(
"deviationids[{}]={}".format(num, deviation["deviationid"])
for num, deviation in enumerate(deviations)
@@ -1224,6 +1342,12 @@ class DeviantartOAuthAPI():
endpoint = "/user/profile/" + username
return self._call(endpoint, fatal=False)
+ def user_statuses(self, username, offset=0):
+ """Yield status updates of a specific user"""
+ endpoint = "/user/statuses/"
+ params = {"username": username, "offset": offset, "limit": 50}
+ return self._pagination(endpoint, params)
+
def user_friends_watch(self, username):
"""Watch a user"""
endpoint = "/user/friends/watch/" + username
@@ -1350,10 +1474,12 @@ class DeviantartOAuthAPI():
"Private deviations detected! Run 'gallery-dl "
"oauth:deviantart' and follow the instructions to "
"be able to access them.")
- if self.metadata:
- self._metadata(results)
- if self.folders:
- self._folders(results)
+ # "statusid" cannot be used instead
+ if results and "deviationid" in results[0]:
+ if self.metadata:
+ self._metadata(results)
+ if self.folders:
+ self._folders(results)
yield from results
if not data["has_more"] and (
@@ -1561,6 +1687,17 @@ def _login_impl(extr, username, password):
}
+def id_from_base36(base36):
+ return util.bdecode(base36, _ALPHABET)
+
+
+def base36_from_id(deviation_id):
+ return util.bencode(int(deviation_id), _ALPHABET)
+
+
+_ALPHABET = "0123456789abcdefghijklmnopqrstuvwxyz"
+
+
###############################################################################
# Journal Formats #############################################################
diff --git a/gallery_dl/extractor/fantia.py b/gallery_dl/extractor/fantia.py
index c05ec39..476fdeb 100644
--- a/gallery_dl/extractor/fantia.py
+++ b/gallery_dl/extractor/fantia.py
@@ -21,6 +21,10 @@ class FantiaExtractor(Extractor):
_warning = True
def items(self):
+ self.headers = {
+ "Accept" : "application/json, text/plain, */*",
+ "Referer": self.root,
+ }
if self._warning:
if not self._check_cookies(("_session_id",)):
@@ -43,10 +47,11 @@ class FantiaExtractor(Extractor):
def _pagination(self, url):
params = {"page": 1}
- headers = {"Referer": self.root}
+ headers = self.headers
while True:
page = self.request(url, params=params, headers=headers).text
+ self._csrf_token(page)
post_id = None
for post_id in text.extract_iter(
@@ -57,11 +62,16 @@ class FantiaExtractor(Extractor):
return
params["page"] += 1
+ def _csrf_token(self, page=None):
+ if not page:
+ page = self.request(self.root + "/").text
+ self.headers["X-CSRF-Token"] = text.extr(
+ page, 'name="csrf-token" content="', '"')
+
def _get_post_data(self, post_id):
"""Fetch and process post data"""
- headers = {"Referer": self.root}
url = self.root+"/api/v1/posts/"+post_id
- resp = self.request(url, headers=headers).json()["post"]
+ resp = self.request(url, headers=self.headers).json()["post"]
post = {
"post_id": resp["id"],
"post_url": self.root + "/posts/" + str(resp["id"]),
@@ -173,4 +183,5 @@ class FantiaPostExtractor(FantiaExtractor):
self.post_id = match.group(1)
def posts(self):
+ self._csrf_token()
return (self.post_id,)
diff --git a/gallery_dl/extractor/generic.py b/gallery_dl/extractor/generic.py
index 10c7295..9292da3 100644
--- a/gallery_dl/extractor/generic.py
+++ b/gallery_dl/extractor/generic.py
@@ -150,7 +150,7 @@ class GenericExtractor(Extractor):
https://en.wikipedia.org/wiki/List_of_file_formats
Compared to the "pattern" class variable, here we must exclude also
- other special characters (space, ", ', >), since we are looking for
+ other special characters (space, ", ', <, >), since we are looking for
urls in html tags.
"""
@@ -158,7 +158,7 @@ class GenericExtractor(Extractor):
(?:[^?&#"'>\s]+) # anything until dot+extension
\.(?:jpe?g|jpe|png|gif
|web[mp]|mp4|mkv|og[gmv]|opus) # dot + image/video extensions
- (?:[^"'>\s]*)? # optional query and fragment
+ (?:[^"'<>\s]*)? # optional query and fragment
"""
imageurls_src = re.findall(imageurl_pattern_src, page)
diff --git a/gallery_dl/extractor/hiperdex.py b/gallery_dl/extractor/hiperdex.py
index adee94a..d61c139 100644
--- a/gallery_dl/extractor/hiperdex.py
+++ b/gallery_dl/extractor/hiperdex.py
@@ -1,25 +1,26 @@
# -*- coding: utf-8 -*-
-# Copyright 2020-2021 Mike Fährmann
+# Copyright 2020-2023 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
-"""Extractors for https://hiperdex.com/"""
+"""Extractors for https://1sthiperdex.com/"""
from .common import ChapterExtractor, MangaExtractor
from .. import text
from ..cache import memcache
import re
-BASE_PATTERN = r"((?:https?://)?(?:www\.)?hiperdex\d?\.(?:com|net|info))"
+BASE_PATTERN = (r"((?:https?://)?(?:www\.)?"
+ r"(?:1st)?hiperdex\d?\.(?:com|net|info))")
class HiperdexBase():
"""Base class for hiperdex extractors"""
category = "hiperdex"
- root = "https://hiperdex.com"
+ root = "https://1sthiperdex.com"
@memcache(keyarg=1)
def manga_data(self, manga, page=None):
@@ -52,6 +53,8 @@ class HiperdexBase():
}
def chapter_data(self, chapter):
+ if chapter.startswith("chapter-"):
+ chapter = chapter[8:]
chapter, _, minor = chapter.partition("-")
data = {
"chapter" : text.parse_int(chapter),
@@ -62,12 +65,13 @@ class HiperdexBase():
class HiperdexChapterExtractor(HiperdexBase, ChapterExtractor):
- """Extractor for manga chapters from hiperdex.com"""
+ """Extractor for manga chapters from 1sthiperdex.com"""
pattern = BASE_PATTERN + r"(/manga/([^/?#]+)/([^/?#]+))"
test = (
- ("https://hiperdex.com/manga/domestic-na-kanojo/154-5/", {
- "pattern": r"https://hiperdex\d?.(com|net|info)/wp-content/uploads"
- r"/WP-manga/data/manga_\w+/[0-9a-f]{32}/\d+\.webp",
+ ("https://1sthiperdex.com/manga/domestic-na-kanojo/154-5/", {
+ "pattern": r"https://(1st)?hiperdex\d?.(com|net|info)"
+ r"/wp-content/uploads/WP-manga/data"
+ r"/manga_\w+/[0-9a-f]{32}/\d+\.webp",
"count": 9,
"keyword": {
"artist" : "Sasuga Kei",
@@ -82,6 +86,7 @@ class HiperdexChapterExtractor(HiperdexBase, ChapterExtractor):
"type" : "Manga",
},
}),
+ ("https://hiperdex.com/manga/domestic-na-kanojo/154-5/"),
("https://hiperdex2.com/manga/domestic-na-kanojo/154-5/"),
("https://hiperdex.net/manga/domestic-na-kanojo/154-5/"),
("https://hiperdex.info/manga/domestic-na-kanojo/154-5/"),
@@ -104,11 +109,11 @@ class HiperdexChapterExtractor(HiperdexBase, ChapterExtractor):
class HiperdexMangaExtractor(HiperdexBase, MangaExtractor):
- """Extractor for manga from hiperdex.com"""
+ """Extractor for manga from 1sthiperdex.com"""
chapterclass = HiperdexChapterExtractor
pattern = BASE_PATTERN + r"(/manga/([^/?#]+))/?$"
test = (
- ("https://hiperdex.com/manga/youre-not-that-special/", {
+ ("https://1sthiperdex.com/manga/youre-not-that-special/", {
"count": 51,
"pattern": HiperdexChapterExtractor.pattern,
"keyword": {
@@ -125,6 +130,7 @@ class HiperdexMangaExtractor(HiperdexBase, MangaExtractor):
"type" : "Manhwa",
},
}),
+ ("https://hiperdex.com/manga/youre-not-that-special/"),
("https://hiperdex2.com/manga/youre-not-that-special/"),
("https://hiperdex.net/manga/youre-not-that-special/"),
("https://hiperdex.info/manga/youre-not-that-special/"),
@@ -166,6 +172,7 @@ class HiperdexArtistExtractor(HiperdexBase, MangaExtractor):
reverse = False
pattern = BASE_PATTERN + r"(/manga-a(?:rtist|uthor)/(?:[^/?#]+))"
test = (
+ ("https://1sthiperdex.com/manga-artist/beck-ho-an/"),
("https://hiperdex.net/manga-artist/beck-ho-an/"),
("https://hiperdex2.com/manga-artist/beck-ho-an/"),
("https://hiperdex.info/manga-artist/beck-ho-an/"),
diff --git a/gallery_dl/extractor/hotleak.py b/gallery_dl/extractor/hotleak.py
index eb64db0..7c656be 100644
--- a/gallery_dl/extractor/hotleak.py
+++ b/gallery_dl/extractor/hotleak.py
@@ -8,6 +8,7 @@
from .common import Extractor, Message
from .. import text, exception
+import binascii
BASE_PATTERN = r"(?:https?://)?(?:www\.)?hotleak\.vip"
@@ -49,6 +50,11 @@ class HotleakExtractor(Extractor):
params["page"] += 1
+def decode_video_url(url):
+ # cut first and last 16 characters, reverse, base64 decode
+ return binascii.a2b_base64(url[-17:15:-1]).decode()
+
+
class HotleakPostExtractor(HotleakExtractor):
"""Extractor for individual posts on hotleak"""
subcategory = "post"
@@ -100,8 +106,8 @@ class HotleakPostExtractor(HotleakExtractor):
text.nameext_from_url(data["url"], data)
elif self.type == "video":
- data["url"] = "ytdl:" + text.extr(
- text.unescape(page), '"src":"', '"')
+ data["url"] = "ytdl:" + decode_video_url(text.extr(
+ text.unescape(page), '"src":"', '"'))
text.nameext_from_url(data["url"], data)
data["extension"] = "mp4"
@@ -163,7 +169,8 @@ class HotleakCreatorExtractor(HotleakExtractor):
elif post["type"] == 1:
data["type"] = "video"
- data["url"] = "ytdl:" + post["stream_url_play"]
+ data["url"] = "ytdl:" + decode_video_url(
+ post["stream_url_play"])
text.nameext_from_url(data["url"], data)
data["extension"] = "mp4"
diff --git a/gallery_dl/extractor/instagram.py b/gallery_dl/extractor/instagram.py
index db9f3fb..deb31a0 100644
--- a/gallery_dl/extractor/instagram.py
+++ b/gallery_dl/extractor/instagram.py
@@ -1,7 +1,7 @@
# -*- coding: utf-8 -*-
# Copyright 2018-2020 Leonardo Taccari
-# Copyright 2018-2022 Mike Fährmann
+# Copyright 2018-2023 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -90,6 +90,11 @@ class InstagramExtractor(Extractor):
def posts(self):
return ()
+ def finalize(self):
+ if self._cursor:
+ self.log.info("Use '-o cursor=%s' to continue downloading "
+ "from the current position", self._cursor)
+
def request(self, url, **kwargs):
response = Extractor.request(self, url, **kwargs)
@@ -104,9 +109,6 @@ class InstagramExtractor(Extractor):
page = None
if page:
- if self._cursor:
- self.log.info("Use '-o cursor=%s' to continue downloading "
- "from the current position", self._cursor)
raise exception.StopExtraction("HTTP redirect to %s page (%s)",
page, url.partition("?")[0])
@@ -114,6 +116,10 @@ class InstagramExtractor(Extractor):
if www_claim is not None:
self.www_claim = www_claim
+ csrf_token = response.cookies.get("csrftoken")
+ if csrf_token:
+ self.csrf_token = csrf_token
+
return response
def login(self):
@@ -794,7 +800,12 @@ class InstagramRestAPI():
def user_clips(self, user_id):
endpoint = "/v1/clips/user/"
- data = {"target_user_id": user_id, "page_size": "50"}
+ data = {
+ "target_user_id": user_id,
+ "page_size": "50",
+ "max_id": None,
+ "include_feed_video": "true",
+ }
return self._pagination_post(endpoint, data)
def user_collection(self, collection_id):
@@ -820,19 +831,18 @@ class InstagramRestAPI():
def _call(self, endpoint, **kwargs):
extr = self.extractor
- url = "https://i.instagram.com/api" + endpoint
+ url = "https://www.instagram.com/api" + endpoint
kwargs["headers"] = {
+ "Accept" : "*/*",
"X-CSRFToken" : extr.csrf_token,
"X-Instagram-AJAX": "1006242110",
"X-IG-App-ID" : "936619743392459",
"X-ASBD-ID" : "198387",
"X-IG-WWW-Claim" : extr.www_claim,
- "Origin" : extr.root,
+ "X-Requested-With": "XMLHttpRequest",
+ "Alt-Used" : "www.instagram.com",
"Referer" : extr.root + "/",
}
- kwargs["cookies"] = {
- "csrftoken": extr.csrf_token,
- }
return extr.request(url, **kwargs).json()
def _pagination(self, endpoint, params=None, media=False):
@@ -851,7 +861,7 @@ class InstagramRestAPI():
yield from data["items"]
if not data.get("more_available"):
- return
+ return extr._update_cursor(None)
params["max_id"] = extr._update_cursor(data["next_max_id"])
def _pagination_post(self, endpoint, params):
@@ -866,7 +876,7 @@ class InstagramRestAPI():
info = data["paging_info"]
if not info.get("more_available"):
- return
+ return extr._update_cursor(None)
params["max_id"] = extr._update_cursor(info["max_id"])
def _pagination_sections(self, endpoint, params):
@@ -879,7 +889,7 @@ class InstagramRestAPI():
yield from info["sections"]
if not info.get("more_available"):
- return
+ return extr._update_cursor(None)
params["page"] = info["next_page"]
params["max_id"] = extr._update_cursor(info["next_max_id"])
@@ -894,7 +904,7 @@ class InstagramRestAPI():
yield from item["media_items"]
if "next_max_id" not in data:
- return
+ return extr._update_cursor(None)
params["max_id"] = extr._update_cursor(data["next_max_id"])
@@ -982,12 +992,7 @@ class InstagramGraphqlAPI():
"X-Requested-With": "XMLHttpRequest",
"Referer" : extr.root + "/",
}
- cookies = {
- "csrftoken": extr.csrf_token,
- }
- return extr.request(
- url, params=params, headers=headers, cookies=cookies,
- ).json()["data"]
+ return extr.request(url, params=params, headers=headers).json()["data"]
def _pagination(self, query_hash, variables,
key_data="user", key_edge=None):
@@ -1003,7 +1008,7 @@ class InstagramGraphqlAPI():
info = data["page_info"]
if not info["has_next_page"]:
- return
+ return extr._update_cursor(None)
elif not data["edges"]:
s = "" if self.item.endswith("s") else "s"
raise exception.StopExtraction(
diff --git a/gallery_dl/extractor/kemonoparty.py b/gallery_dl/extractor/kemonoparty.py
index 63e3084..33e8370 100644
--- a/gallery_dl/extractor/kemonoparty.py
+++ b/gallery_dl/extractor/kemonoparty.py
@@ -16,6 +16,7 @@ import re
BASE_PATTERN = r"(?:https?://)?(?:www\.|beta\.)?(kemono|coomer)\.party"
USER_PATTERN = BASE_PATTERN + r"/([^/?#]+)/user/([^/?#]+)"
+HASH_PATTERN = r"/[0-9a-f]{2}/[0-9a-f]{2}/([0-9a-f]{64})"
class KemonopartyExtractor(Extractor):
@@ -41,7 +42,7 @@ class KemonopartyExtractor(Extractor):
self._find_inline = re.compile(
r'src="(?:https?://(?:kemono|coomer)\.party)?(/inline/[^"]+'
r'|/[0-9a-f]{2}/[0-9a-f]{2}/[0-9a-f]{64}\.[^"]+)').findall
- find_hash = re.compile("/[0-9a-f]{2}/[0-9a-f]{2}/([0-9a-f]{64})").match
+ find_hash = re.compile(HASH_PATTERN).match
generators = self._build_file_generators(self.config("files"))
duplicates = self.config("duplicates")
comments = self.config("comments")
@@ -89,10 +90,11 @@ class KemonopartyExtractor(Extractor):
match = find_hash(url)
if match:
file["hash"] = hash = match.group(1)
- if hash in hashes and not duplicates:
- self.log.debug("Skipping %s (duplicate)", url)
- continue
- hashes.add(hash)
+ if not duplicates:
+ if hash in hashes:
+ self.log.debug("Skipping %s (duplicate)", url)
+ continue
+ hashes.add(hash)
else:
file["hash"] = ""
@@ -362,14 +364,17 @@ class KemonopartyDiscordExtractor(KemonopartyExtractor):
"pattern": r"https://kemono\.party/data/("
r"e3/77/e377e3525164559484ace2e64425b0cec1db08.*\.png|"
r"51/45/51453640a5e0a4d23fbf57fb85390f9c5ec154.*\.gif)",
+ "keyword": {"hash": "re:e377e3525164559484ace2e64425b0cec1db08"
+ "|51453640a5e0a4d23fbf57fb85390f9c5ec154"},
"count": ">= 2",
}),
# 'inline' files
(("https://kemono.party/discord"
"/server/315262215055736843/channel/315262215055736843#general"), {
"pattern": r"https://cdn\.discordapp\.com/attachments/\d+/\d+/.+$",
- "range": "1-5",
"options": (("image-filter", "type == 'inline'"),),
+ "keyword": {"hash": ""},
+ "range": "1-5",
}),
)
@@ -383,6 +388,7 @@ class KemonopartyDiscordExtractor(KemonopartyExtractor):
find_inline = re.compile(
r"https?://(?:cdn\.discordapp.com|media\.discordapp\.net)"
r"(/[A-Za-z0-9-._~:/?#\[\]@!$&'()*+,;%=]+)").findall
+ find_hash = re.compile(HASH_PATTERN).match
posts = self.posts()
max_posts = self.config("max-posts")
@@ -393,11 +399,13 @@ class KemonopartyDiscordExtractor(KemonopartyExtractor):
files = []
append = files.append
for attachment in post["attachments"]:
+ match = find_hash(attachment["path"])
+ attachment["hash"] = match.group(1) if match else ""
attachment["type"] = "attachment"
append(attachment)
for path in find_inline(post["content"] or ""):
append({"path": "https://cdn.discordapp.com" + path,
- "name": path, "type": "inline"})
+ "name": path, "type": "inline", "hash": ""})
post["channel_name"] = self.channel_name
post["date"] = text.parse_datetime(
@@ -406,6 +414,7 @@ class KemonopartyDiscordExtractor(KemonopartyExtractor):
yield Message.Directory, post
for post["num"], file in enumerate(files, 1):
+ post["hash"] = file["hash"]
post["type"] = file["type"]
url = file["path"]
diff --git a/gallery_dl/extractor/lexica.py b/gallery_dl/extractor/lexica.py
new file mode 100644
index 0000000..ad93625
--- /dev/null
+++ b/gallery_dl/extractor/lexica.py
@@ -0,0 +1,104 @@
+# -*- coding: utf-8 -*-
+
+# Copyright 2023 Mike Fährmann
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Extractors for https://lexica.art/"""
+
+from .common import Extractor, Message
+from .. import text
+
+
+class LexicaSearchExtractor(Extractor):
+ """Extractor for lexica.art search results"""
+ category = "lexica"
+ subcategory = "search"
+ root = "https://lexica.art"
+ directory_fmt = ("{category}", "{search_tags}")
+ archive_fmt = "{id}"
+ pattern = r"(?:https?://)?lexica\.art/?\?q=([^&#]+)"
+ test = (
+ ("https://lexica.art/?q=tree", {
+ "pattern": r"https://lexica-serve-encoded-images2\.sharif\."
+ r"workers.dev/full_jpg/[0-9a-f-]{36}$",
+ "range": "1-80",
+ "count": 80,
+ "keyword": {
+ "height": int,
+ "id": str,
+ "upscaled_height": int,
+ "upscaled_width": int,
+ "userid": str,
+ "width": int,
+ "prompt": {
+ "c": int,
+ "grid": bool,
+ "height": int,
+ "id": str,
+ "images": list,
+ "initImage": None,
+ "initImageStrength": None,
+ "model": "lexica-aperture-v2",
+ "negativePrompt": str,
+ "prompt": str,
+ "seed": str,
+ "timestamp": r"re:\d{4}-\d\d-\d\dT\d\d:\d\d:\d\d.\d\d\dZ",
+ "width": int,
+ },
+ },
+ }),
+ )
+
+ def __init__(self, match):
+ Extractor.__init__(self, match)
+ self.query = match.group(1)
+ self.text = text.unquote(self.query).replace("+", " ")
+
+ def items(self):
+ base = ("https://lexica-serve-encoded-images2.sharif.workers.dev"
+ "/full_jpg/")
+ tags = self.text
+
+ for image in self.posts():
+ image["filename"] = image["id"]
+ image["extension"] = "jpg"
+ image["search_tags"] = tags
+ yield Message.Directory, image
+ yield Message.Url, base + image["id"], image
+
+ def posts(self):
+ url = self.root + "/api/infinite-prompts"
+ headers = {
+ "Accept" : "application/json, text/plain, */*",
+ "Referer": "{}/?q={}".format(self.root, self.query),
+ }
+ json = {
+ "text" : self.text,
+ "searchMode": "images",
+ "source" : "search",
+ "cursor" : 0,
+ "model" : "lexica-aperture-v2",
+ }
+
+ while True:
+ data = self.request(
+ url, method="POST", headers=headers, json=json).json()
+
+ prompts = {
+ prompt["id"]: prompt
+ for prompt in data["prompts"]
+ }
+
+ for image in data["images"]:
+ image["prompt"] = prompts[image["promptid"]]
+ del image["promptid"]
+ yield image
+
+ cursor = data.get("nextCursor")
+ if not cursor:
+ return
+
+ json["cursor"] = cursor
diff --git a/gallery_dl/extractor/mastodon.py b/gallery_dl/extractor/mastodon.py
index 049e0af..e49d29a 100644
--- a/gallery_dl/extractor/mastodon.py
+++ b/gallery_dl/extractor/mastodon.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2019-2022 Mike Fährmann
+# Copyright 2019-2023 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -31,8 +31,8 @@ class MastodonExtractor(BaseExtractor):
def items(self):
for status in self.statuses():
- if self._check_move:
- self._check_move(status["account"])
+ if self._check_moved:
+ self._check_moved(status["account"])
if not self.reblogs and status["reblog"]:
self.log.debug("Skipping %s (reblog)", status["id"])
continue
@@ -48,12 +48,13 @@ class MastodonExtractor(BaseExtractor):
status["instance_remote"] = \
acct.rpartition("@")[2] if "@" in acct else None
+ status["count"] = len(attachments)
status["tags"] = [tag["name"] for tag in status["tags"]]
status["date"] = text.parse_datetime(
status["created_at"][:19], "%Y-%m-%dT%H:%M:%S")
yield Message.Directory, status
- for media in attachments:
+ for status["num"], media in enumerate(attachments, 1):
status["media"] = media
url = media["url"]
yield Message.Url, url, text.nameext_from_url(url, status)
@@ -62,8 +63,8 @@ class MastodonExtractor(BaseExtractor):
"""Return an iterable containing all relevant Status objects"""
return ()
- def _check_move(self, account):
- self._check_move = None
+ def _check_moved(self, account):
+ self._check_moved = None
if "moved" in account:
self.log.warning("Account '%s' moved to '%s'",
account["acct"], account["moved"]["acct"])
@@ -181,6 +182,10 @@ class MastodonStatusExtractor(MastodonExtractor):
test = (
("https://mastodon.social/@jk/103794036899778366", {
"count": 4,
+ "keyword": {
+ "count": 4,
+ "num": int,
+ },
}),
("https://pawoo.net/@yoru_nine/105038878897832922", {
"content": "b52e807f8ab548d6f896b09218ece01eba83987a",
@@ -229,7 +234,7 @@ class MastodonAPI():
for account in self.account_search(handle, 1):
if account["acct"] == username:
- self.extractor._check_move(account)
+ self.extractor._check_moved(account)
return account["id"]
raise exception.NotFoundError("account")
diff --git a/gallery_dl/extractor/nudecollect.py b/gallery_dl/extractor/nudecollect.py
new file mode 100644
index 0000000..3159919
--- /dev/null
+++ b/gallery_dl/extractor/nudecollect.py
@@ -0,0 +1,142 @@
+# -*- coding: utf-8 -*-
+
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Extractors for https://nudecollect.com/"""
+
+from .common import GalleryExtractor
+from .. import text
+
+
+class NudecollectExtractor(GalleryExtractor):
+ """Base class for Nudecollect extractors"""
+ category = "nudecollect"
+ directory_fmt = ("{category}", "{title}")
+ filename_fmt = "{slug}_{num:>03}.{extension}"
+ archive_fmt = "{slug}_{num}"
+ root = "https://www.nudecollect.com"
+
+ def request(self, url, **kwargs):
+ kwargs["allow_redirects"] = False
+ return GalleryExtractor.request(self, url, **kwargs)
+
+ @staticmethod
+ def get_title(page):
+ return text.unescape(text.extr(page, "<title>", "</title>"))[31:]
+
+ @staticmethod
+ def get_image(page):
+ return text.extr(page, '<img src="', '"')
+
+
+class NudecollectImageExtractor(NudecollectExtractor):
+ """Extractor for individual images from nudecollect.com"""
+ subcategory = "image"
+ pattern = (r"(?:https?://)?(?:www\.)?nudecollect\.com"
+ r"(/content/([^/?#]+)/image-(\d+)-pics-(\d+)"
+ r"-mirror-(\d+)\.html)")
+ test = (
+ (("https://www.nudecollect.com/content/20201220_Teenpornstorage_"
+ "Patritcy_Vanessa_Lesbian_Lust/image-4-pics-108-mirror-43.html"), {
+ "pattern": (r"https://mirror\d+\.nudecollect\.com/showimage"
+ r"/nudecollect-8769086487/image00004-5896498214-43"
+ r"-9689595623/20201220_Teenpornstorage_Patritcy_Vaness"
+ r"a_Lesbian_Lust/9879560327/nudecollect\.com\.jpg"),
+ "keyword": {
+ "slug" : ("20201220_Teenpornstorage_Patritcy"
+ "_Vanessa_Lesbian_Lust"),
+ "title" : ("20201220 Teenpornstorage Patritcy"
+ " Vanessa Lesbian Lust"),
+ "num" : 4,
+ "count" : 108,
+ "mirror": 43,
+ },
+ }),
+ (("https://www.nudecollect.com/content/20201220_Teenpornstorage_"
+ "Patritcy_Vanessa_Lesbian_Lust/image-10-pics-108-mirror-43.html")),
+ )
+
+ def __init__(self, match):
+ NudecollectExtractor.__init__(self, match)
+ _, self.slug, self.num, self.count, self.mirror = match.groups()
+
+ def metadata(self, page):
+ return {
+ "slug" : self.slug,
+ "title" : self.get_title(page),
+ "count" : text.parse_int(self.count),
+ "mirror": text.parse_int(self.mirror),
+ }
+
+ def images(self, page):
+ return ((self.get_image(page), {"num": text.parse_int(self.num)}),)
+
+
+class NudecollectAlbumExtractor(NudecollectExtractor):
+ """Extractor for image albums on nudecollect.com"""
+ subcategory = "album"
+ pattern = (r"(?:https?://)?(?:www\.)?nudecollect\.com"
+ r"/content/([^/?#]+)/(?:index-mirror-(\d+)-(\d+)"
+ r"|page-\d+-pics-(\d+)-mirror-(\d+))\.html")
+ test = (
+ (("https://www.nudecollect.com/content/20170219_TheWhiteBoxxx_"
+ "Caprice_Tracy_Loves_Hot_ass_fingering_and_sensual_lesbian_sex"
+ "_with_alluring_Czech_babes_x125_1080px/index-mirror-67-125.html"), {
+ "pattern": (r"https://mirror\d+\.nudecollect\.com/showimage"
+ r"/nudecollect-8769086487/image00\d\d\d-5896498214-67"
+ r"-9689595623/20170219_TheWhiteBoxxx_Caprice"
+ r"_Tracy_Loves_Hot_ass_fingering_and_sensual_"
+ r"lesbian_sex_with_alluring_Czech_babes_x125_1080px"
+ r"/9879560327/nudecollect\.com\.jpg"),
+ "count" : 125,
+ "keyword": {
+ "slug" : ("20170219_TheWhiteBoxxx_Caprice_Tracy_Loves_Hot_"
+ "ass_fingering_and_sensual_lesbian_sex_with_"
+ "alluring_Czech_babes_x125_1080px"),
+ "title" : ("20170219 TheWhiteBoxxx Caprice Tracy Loves Hot ass"
+ " fingering and sensual lesbian sex with alluring"
+ " Czech babes x125 1080px"),
+ "num" : int,
+ "mirror": 67,
+ },
+ }),
+ (("https://www.nudecollect.com/content/20201220_Teenpornstorage_"
+ "Patritcy_Vanessa_Lesbian_Lust/page-1-pics-108-mirror-43.html"), {
+ "pattern": (r"https://mirror\d+\.nudecollect\.com/showimage"
+ r"/nudecollect-8769086487/image00\d\d\d-5896498214-43"
+ r"-9689595623/20201220_Teenpornstorage_Patritcy_Vaness"
+ r"a_Lesbian_Lust/9879560327/nudecollect\.com\.jpg"),
+ "count" : 108,
+ "keyword": {
+ "slug" : ("20201220_Teenpornstorage_Patritcy"
+ "_Vanessa_Lesbian_Lust"),
+ "title" : ("20201220 Teenpornstorage Patritcy"
+ " Vanessa Lesbian Lust"),
+ "num" : int,
+ "mirror": 43,
+ },
+ }),
+ )
+
+ def __init__(self, match):
+ self.slug = match.group(1)
+ self.mirror = match.group(2) or match.group(5)
+ self.count = text.parse_int(match.group(3) or match.group(4))
+ url = "{}/content/{}/image-1-pics-{}-mirror-{}.html".format(
+ self.root, self.slug, self.count, self.mirror)
+ NudecollectExtractor.__init__(self, match, url)
+
+ def metadata(self, page):
+ return {
+ "slug" : self.slug,
+ "title" : self.get_title(page),
+ "mirror": text.parse_int(self.mirror),
+ }
+
+ def images(self, page):
+ url = self.get_image(page)
+ p1, _, p2 = url.partition("/image0")
+ ufmt = p1 + "/image{:>05}" + p2[4:]
+ return [(ufmt.format(num), None) for num in range(1, self.count + 1)]
diff --git a/gallery_dl/extractor/oauth.py b/gallery_dl/extractor/oauth.py
index d6628c4..9270f33 100644
--- a/gallery_dl/extractor/oauth.py
+++ b/gallery_dl/extractor/oauth.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2017-2022 Mike Fährmann
+# Copyright 2017-2023 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -68,11 +68,19 @@ class OAuthBase(Extractor):
def open(self, url, params, recv=None):
"""Open 'url' in browser amd return response parameters"""
- import webbrowser
url += "?" + urllib.parse.urlencode(params)
- if not self.config("browser", True) or not webbrowser.open(url):
- stdout_write(
- "Please open this URL in your browser:\n\n" + url + "\n\n")
+
+ browser = self.config("browser", True)
+ if browser:
+ import webbrowser
+ browser = webbrowser.get()
+
+ if browser and browser.open(url):
+ self.log.info("Opening URL in %s:", browser.name.capitalize())
+ else:
+ self.log.info("Please open this URL in your browser:")
+
+ stdout_write("\n{}\n\n".format(url))
return (recv or self.recv)()
def error(self, msg):
@@ -80,8 +88,18 @@ class OAuthBase(Extractor):
"Remote server reported an error:\n\n{}\n".format(msg))
def _oauth1_authorization_flow(
- self, request_token_url, authorize_url, access_token_url):
+ self, default_key, default_secret,
+ request_token_url, authorize_url, access_token_url):
"""Perform the OAuth 1.0a authorization flow"""
+
+ api_key = self.oauth_config("api-key") or default_key
+ api_secret = self.oauth_config("api-secret") or default_secret
+ self.session = oauth.OAuth1Session(api_key, api_secret)
+
+ self.log.info("Using %s %s API key (%s)",
+ "default" if api_key == default_key else "custom",
+ self.subcategory, api_key)
+
# get a request token
params = {"oauth_callback": self.redirect_uri}
data = self.session.get(request_token_url, params=params).text
@@ -112,11 +130,18 @@ class OAuthBase(Extractor):
))
def _oauth2_authorization_code_grant(
- self, client_id, client_secret, auth_url, token_url, *,
- scope="read", key="refresh_token", auth=True,
- cache=None, instance=None):
+ self, client_id, client_secret, default_id, default_secret,
+ auth_url, token_url, *, scope="read", duration="permanent",
+ key="refresh_token", auth=True, cache=None, instance=None):
"""Perform an OAuth2 authorization code grant"""
+ client_id = str(client_id) if client_id else default_id
+ client_secret = client_secret or default_secret
+
+ self.log.info("Using %s %s client ID (%s)",
+ "default" if client_id == default_id else "custom",
+ instance or self.subcategory, client_id)
+
state = "gallery-dl_{}_{}".format(
self.subcategory,
oauth.nonce(8),
@@ -127,7 +152,7 @@ class OAuthBase(Extractor):
"response_type": "code",
"state" : state,
"redirect_uri" : self.redirect_uri,
- "duration" : "permanent",
+ "duration" : duration,
"scope" : scope,
}
@@ -137,13 +162,12 @@ class OAuthBase(Extractor):
# check authorization response
if state != params.get("state"):
self.send("'state' mismatch: expected {}, got {}.\n".format(
- state, params.get("state")
- ))
+ state, params.get("state")))
return
if "error" in params:
return self.error(params)
- # exchange the authorization code for a token
+ # exchange authorization code for a token
data = {
"grant_type" : "authorization_code",
"code" : params["code"],
@@ -208,81 +232,36 @@ class OAuthBase(Extractor):
return msg
-class OAuthDeviantart(OAuthBase):
- subcategory = "deviantart"
- pattern = "oauth:deviantart$"
- redirect_uri = REDIRECT_URI_HTTPS
-
- def items(self):
- yield Message.Version, 1
-
- self._oauth2_authorization_code_grant(
- self.oauth_config(
- "client-id", deviantart.DeviantartOAuthAPI.CLIENT_ID),
- self.oauth_config(
- "client-secret", deviantart.DeviantartOAuthAPI.CLIENT_SECRET),
- "https://www.deviantart.com/oauth2/authorize",
- "https://www.deviantart.com/oauth2/token",
- scope="browse user.manage",
- cache=deviantart._refresh_token_cache,
- )
-
+# --------------------------------------------------------------------
+# OAuth 1.0a
class OAuthFlickr(OAuthBase):
subcategory = "flickr"
pattern = "oauth:flickr$"
redirect_uri = REDIRECT_URI_HTTPS
- def __init__(self, match):
- OAuthBase.__init__(self, match)
- self.session = oauth.OAuth1Session(
- self.oauth_config("api-key", flickr.FlickrAPI.API_KEY),
- self.oauth_config("api-secret", flickr.FlickrAPI.API_SECRET),
- )
-
def items(self):
yield Message.Version, 1
self._oauth1_authorization_flow(
+ flickr.FlickrAPI.API_KEY,
+ flickr.FlickrAPI.API_SECRET,
"https://www.flickr.com/services/oauth/request_token",
"https://www.flickr.com/services/oauth/authorize",
"https://www.flickr.com/services/oauth/access_token",
)
-class OAuthReddit(OAuthBase):
- subcategory = "reddit"
- pattern = "oauth:reddit$"
-
- def items(self):
- yield Message.Version, 1
-
- self.session.headers["User-Agent"] = reddit.RedditAPI.USER_AGENT
- self._oauth2_authorization_code_grant(
- self.oauth_config("client-id", reddit.RedditAPI.CLIENT_ID),
- "",
- "https://www.reddit.com/api/v1/authorize",
- "https://www.reddit.com/api/v1/access_token",
- scope="read history",
- cache=reddit._refresh_token_cache,
- )
-
-
class OAuthSmugmug(OAuthBase):
subcategory = "smugmug"
pattern = "oauth:smugmug$"
- def __init__(self, match):
- OAuthBase.__init__(self, match)
- self.session = oauth.OAuth1Session(
- self.oauth_config("api-key", smugmug.SmugmugAPI.API_KEY),
- self.oauth_config("api-secret", smugmug.SmugmugAPI.API_SECRET),
- )
-
def items(self):
yield Message.Version, 1
self._oauth1_authorization_flow(
+ smugmug.SmugmugAPI.API_KEY,
+ smugmug.SmugmugAPI.API_SECRET,
"https://api.smugmug.com/services/oauth/1.0a/getRequestToken",
"https://api.smugmug.com/services/oauth/1.0a/authorize",
"https://api.smugmug.com/services/oauth/1.0a/getAccessToken",
@@ -293,23 +272,61 @@ class OAuthTumblr(OAuthBase):
subcategory = "tumblr"
pattern = "oauth:tumblr$"
- def __init__(self, match):
- OAuthBase.__init__(self, match)
- self.session = oauth.OAuth1Session(
- self.oauth_config("api-key", tumblr.TumblrAPI.API_KEY),
- self.oauth_config("api-secret", tumblr.TumblrAPI.API_SECRET),
- )
-
def items(self):
yield Message.Version, 1
self._oauth1_authorization_flow(
+ tumblr.TumblrAPI.API_KEY,
+ tumblr.TumblrAPI.API_SECRET,
"https://www.tumblr.com/oauth/request_token",
"https://www.tumblr.com/oauth/authorize",
"https://www.tumblr.com/oauth/access_token",
)
+# --------------------------------------------------------------------
+# OAuth 2.0
+
+class OAuthDeviantart(OAuthBase):
+ subcategory = "deviantart"
+ pattern = "oauth:deviantart$"
+ redirect_uri = REDIRECT_URI_HTTPS
+
+ def items(self):
+ yield Message.Version, 1
+
+ self._oauth2_authorization_code_grant(
+ self.oauth_config("client-id"),
+ self.oauth_config("client-secret"),
+ deviantart.DeviantartOAuthAPI.CLIENT_ID,
+ deviantart.DeviantartOAuthAPI.CLIENT_SECRET,
+ "https://www.deviantart.com/oauth2/authorize",
+ "https://www.deviantart.com/oauth2/token",
+ scope="browse user.manage",
+ cache=deviantart._refresh_token_cache,
+ )
+
+
+class OAuthReddit(OAuthBase):
+ subcategory = "reddit"
+ pattern = "oauth:reddit$"
+
+ def items(self):
+ yield Message.Version, 1
+
+ self.session.headers["User-Agent"] = reddit.RedditAPI.USER_AGENT
+ self._oauth2_authorization_code_grant(
+ self.oauth_config("client-id"),
+ "",
+ reddit.RedditAPI.CLIENT_ID,
+ "",
+ "https://www.reddit.com/api/v1/authorize",
+ "https://www.reddit.com/api/v1/access_token",
+ scope="read history",
+ cache=reddit._refresh_token_cache,
+ )
+
+
class OAuthMastodon(OAuthBase):
subcategory = "mastodon"
pattern = "oauth:mastodon:(?:https?://)?([^/?#]+)"
@@ -330,6 +347,8 @@ class OAuthMastodon(OAuthBase):
self._oauth2_authorization_code_grant(
application["client-id"],
application["client-secret"],
+ application["client-id"],
+ application["client-secret"],
"https://{}/oauth/authorize".format(self.instance),
"https://{}/oauth/token".format(self.instance),
instance=self.instance,
@@ -362,6 +381,8 @@ class OAuthMastodon(OAuthBase):
return data
+# --------------------------------------------------------------------
+
class OAuthPixiv(OAuthBase):
subcategory = "pixiv"
pattern = "oauth:pixiv$"
diff --git a/gallery_dl/extractor/philomena.py b/gallery_dl/extractor/philomena.py
index fc85125..df85b96 100644
--- a/gallery_dl/extractor/philomena.py
+++ b/gallery_dl/extractor/philomena.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2021-2022 Mike Fährmann
+# Copyright 2021-2023 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -56,12 +56,12 @@ class PhilomenaExtractor(BooruExtractor):
INSTANCES = {
"derpibooru": {
"root": "https://derpibooru.org",
- "pattern": r"derpibooru\.org",
+ "pattern": r"(?:www\.)?derpibooru\.org",
"filter_id": "56027",
},
"ponybooru": {
"root": "https://ponybooru.org",
- "pattern": r"ponybooru\.org",
+ "pattern": r"(?:www\.)?ponybooru\.org",
"filter_id": "2",
},
"furbooru": {
@@ -128,9 +128,14 @@ class PhilomenaPostExtractor(PhilomenaExtractor):
},
}),
("https://derpibooru.org/1"),
+ ("https://www.derpibooru.org/1"),
+ ("https://www.derpibooru.org/images/1"),
+
("https://ponybooru.org/images/1", {
"content": "bca26f58fafd791fe07adcd2a28efd7751824605",
}),
+ ("https://www.ponybooru.org/images/1"),
+
("https://furbooru.org/images/1", {
"content": "9eaa1e1b32fa0f16520912257dbefaff238d5fd2",
}),
diff --git a/gallery_dl/extractor/sankaku.py b/gallery_dl/extractor/sankaku.py
index 7013f1b..ea4cf43 100644
--- a/gallery_dl/extractor/sankaku.py
+++ b/gallery_dl/extractor/sankaku.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2014-2022 Mike Fährmann
+# Copyright 2014-2023 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -80,15 +80,19 @@ class SankakuTagExtractor(SankakuExtractor):
subcategory = "tag"
directory_fmt = ("{category}", "{search_tags}")
archive_fmt = "t_{search_tags}_{id}"
- pattern = BASE_PATTERN + r"/\?([^#]*)"
+ pattern = BASE_PATTERN + r"/?\?([^#]*)"
test = (
("https://sankaku.app/?tags=bonocho", {
"count": 5,
"pattern": r"https://s\.sankakucomplex\.com/data/[^/]{2}/[^/]{2}"
- r"/[^/]{32}\.\w+\?e=\d+&(expires=\d+&)?m=[^&#]+",
+ r"/[0-9a-f]{32}\.\w+\?e=\d+&(expires=\d+&)?m=[^&#]+",
}),
("https://beta.sankakucomplex.com/?tags=bonocho"),
("https://chan.sankakucomplex.com/?tags=bonocho"),
+ ("https://black.sankakucomplex.com/?tags=bonocho"),
+ ("https://white.sankakucomplex.com/?tags=bonocho"),
+ ("https://sankaku.app/ja?tags=order%3Apopularity"),
+ ("https://sankaku.app/no/?tags=order%3Apopularity"),
# error on five or more tags
("https://chan.sankakucomplex.com/?tags=bonocho+a+b+c+d", {
"options": (("username", None),),
diff --git a/gallery_dl/extractor/twitter.py b/gallery_dl/extractor/twitter.py
index c2d8247..17a2202 100644
--- a/gallery_dl/extractor/twitter.py
+++ b/gallery_dl/extractor/twitter.py
@@ -26,6 +26,7 @@ class TwitterExtractor(Extractor):
cookiedomain = ".twitter.com"
cookienames = ("auth_token",)
root = "https://twitter.com"
+ browser = "firefox"
def __init__(self, match):
Extractor.__init__(self, match)
@@ -945,16 +946,31 @@ class TwitterAPI():
def __init__(self, extractor):
self.extractor = extractor
- self.root = "https://twitter.com/i/api"
+ self.root = "https://api.twitter.com"
+ cookies = extractor.session.cookies
+ cookiedomain = extractor.cookiedomain
+
+ csrf = extractor.config("csrf")
+ if csrf is None or csrf == "cookies":
+ csrf_token = cookies.get("ct0", domain=cookiedomain)
+ else:
+ csrf_token = None
+ if not csrf_token:
+ csrf_token = util.generate_token()
+ cookies.set("ct0", csrf_token, domain=cookiedomain)
+
+ auth_token = cookies.get("auth_token", domain=cookiedomain)
+
self.headers = {
"authorization": "Bearer AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejR"
"COuH5E6I8xnZz4puTs%3D1Zv7ttfk8LF81IUq16cHjhLTvJu"
"4FA33AGWWjCpTnA",
"x-guest-token": None,
- "x-twitter-auth-type": None,
+ "x-twitter-auth-type": "OAuth2Session" if auth_token else None,
"x-twitter-client-language": "en",
"x-twitter-active-user": "yes",
- "x-csrf-token": None,
+ "x-csrf-token": csrf_token,
+ "Origin": "https://twitter.com",
"Referer": "https://twitter.com/",
}
self.params = {
@@ -967,24 +983,36 @@ class TwitterAPI():
"include_can_dm": "1",
"include_can_media_tag": "1",
"include_ext_has_nft_avatar": "1",
+ "include_ext_is_blue_verified": "1",
+ "include_ext_verified_type": "1",
"skip_status": "1",
"cards_platform": "Web-12",
"include_cards": "1",
"include_ext_alt_text": "true",
+ "include_ext_limited_action_results": "false",
"include_quote_count": "true",
"include_reply_count": "1",
"tweet_mode": "extended",
+ "include_ext_collab_control": "true",
+ "include_ext_views": "true",
"include_entities": "true",
"include_user_entities": "true",
"include_ext_media_color": "true",
"include_ext_media_availability": "true",
"include_ext_sensitive_media_warning": "true",
+ "include_ext_trusted_friends_metadata": "true",
"send_error_codes": "true",
"simple_quoted_tweet": "true",
+ "q": None,
"count": "100",
+ "query_source": None,
"cursor": None,
- "ext": "mediaStats,highlightedLabel,hasNftAvatar,"
- "voiceInfo,superFollowMetadata",
+ "pc": None,
+ "spelling_corrections": None,
+ "include_ext_edit_control": "true",
+ "ext": "mediaStats,highlightedLabel,hasNftAvatar,voiceInfo,"
+ "enrichments,superFollowMetadata,unmentionInfo,editControl,"
+ "collab_control,vibe",
}
self.variables = {
"includePromotedContent": False,
@@ -1006,28 +1034,6 @@ class TwitterAPI():
self._syndication = self.extractor.syndication
self._json_dumps = json.JSONEncoder(separators=(",", ":")).encode
- cookies = extractor.session.cookies
- cookiedomain = extractor.cookiedomain
-
- csrf = extractor.config("csrf")
- if csrf is None or csrf == "cookies":
- csrf_token = cookies.get("ct0", domain=cookiedomain)
- else:
- csrf_token = None
- if not csrf_token:
- csrf_token = util.generate_token()
- cookies.set("ct0", csrf_token, domain=cookiedomain)
- self.headers["x-csrf-token"] = csrf_token
-
- if cookies.get("auth_token", domain=cookiedomain):
- # logged in
- self.headers["x-twitter-auth-type"] = "OAuth2Session"
- else:
- # guest
- guest_token = self._guest_token()
- cookies.set("gt", guest_token, domain=cookiedomain)
- self.headers["x-guest-token"] = guest_token
-
def tweet_detail(self, tweet_id):
endpoint = "/graphql/ItejhtHVxU7ksltgMmyaLA/TweetDetail"
variables = {
@@ -1183,17 +1189,26 @@ class TwitterAPI():
@cache(maxage=3600)
def _guest_token(self):
- root = "https://api.twitter.com"
endpoint = "/1.1/guest/activate.json"
- return str(self._call(endpoint, None, root, "POST")["guest_token"])
+ self.extractor.log.info("Requesting guest token")
+ return str(self._call(endpoint, None, "POST", False)["guest_token"])
+
+ def _authenticate_guest(self):
+ guest_token = self._guest_token()
+ if guest_token != self.headers["x-guest-token"]:
+ self.headers["x-guest-token"] = guest_token
+ self.extractor.session.cookies.set(
+ "gt", guest_token, domain=self.extractor.cookiedomain)
- def _call(self, endpoint, params, root=None, method="GET"):
- if root is None:
- root = self.root
+ def _call(self, endpoint, params, method="GET", auth=True):
+ url = self.root + endpoint
while True:
+ if not self.headers["x-twitter-auth-type"] and auth:
+ self._authenticate_guest()
+
response = self.extractor.request(
- root + endpoint, method=method, params=params,
+ url, method=method, params=params,
headers=self.headers, fatal=None)
# update 'x-csrf-token' header (#1170)
@@ -1226,21 +1241,33 @@ class TwitterAPI():
def _pagination_legacy(self, endpoint, params):
original_retweets = (self.extractor.retweets == "original")
+ bottom = ("cursor-bottom-", "sq-cursor-bottom")
while True:
data = self._call(endpoint, params)
- instr = data["timeline"]["instructions"]
- if not instr:
+ instructions = data["timeline"]["instructions"]
+ if not instructions:
return
tweets = data["globalObjects"]["tweets"]
users = data["globalObjects"]["users"]
tweet_id = cursor = None
tweet_ids = []
+ entries = ()
+
+ # process instructions
+ for instr in instructions:
+ if "addEntries" in instr:
+ entries = instr["addEntries"]["entries"]
+ elif "replaceEntry" in instr:
+ entry = instr["replaceEntry"]["entry"]
+ if entry["entryId"].startswith(bottom):
+ cursor = (entry["content"]["operation"]
+ ["cursor"]["value"])
# collect tweet IDs and cursor value
- for entry in instr[0]["addEntries"]["entries"]:
+ for entry in entries:
entry_startswith = entry["entryId"].startswith
if entry_startswith(("tweet-", "sq-I-t-")):
@@ -1252,7 +1279,7 @@ class TwitterAPI():
entry["content"]["timelineModule"]["metadata"]
["conversationMetadata"]["allTweetIds"][::-1])
- elif entry_startswith(("cursor-bottom-", "sq-cursor-bottom")):
+ elif entry_startswith(bottom):
cursor = entry["content"]["operation"]["cursor"]
if not cursor.get("stopOnEmptyResponse", True):
# keep going even if there are no tweets
@@ -1300,11 +1327,7 @@ class TwitterAPI():
quoted["quoted_by_id_str"] = tweet["id_str"]
yield quoted
- # update cursor value
- if "replaceEntry" in instr[-1] :
- cursor = (instr[-1]["replaceEntry"]["entry"]
- ["content"]["operation"]["cursor"]["value"])
-
+ # stop on empty response
if not cursor or (not tweets and not tweet_id):
return
params["cursor"] = cursor
@@ -1346,12 +1369,8 @@ class TwitterAPI():
if user.get("blocked_by"):
if self.headers["x-twitter-auth-type"] and \
extr.config("logout"):
- guest_token = self._guest_token()
- extr.session.cookies.set(
- "gt", guest_token, domain=extr.cookiedomain)
extr._cookiefile = None
del extr.session.cookies["auth_token"]
- self.headers["x-guest-token"] = guest_token
self.headers["x-twitter-auth-type"] = None
extr.log.info("Retrying API request as guest")
continue
@@ -1578,8 +1597,6 @@ def _login_impl(extr, username, password):
"Login with email is no longer possible. "
"You need to provide your username or phone number instead.")
- extr.log.info("Logging in as %s", username)
-
def process(response):
try:
data = response.json()
@@ -1598,8 +1615,10 @@ def _login_impl(extr, username, password):
extr.session.cookies.clear()
api = TwitterAPI(extr)
+ api._authenticate_guest()
headers = api.headers
- headers["Referer"] = "https://twitter.com/i/flow/login"
+
+ extr.log.info("Logging in as %s", username)
# init
data = {
@@ -1653,7 +1672,7 @@ def _login_impl(extr, username, password):
"web_modal": 1,
},
}
- url = "https://twitter.com/i/api/1.1/onboarding/task.json?flow_name=login"
+ url = "https://api.twitter.com/1.1/onboarding/task.json?flow_name=login"
response = extr.request(url, method="POST", headers=headers, json=data)
data = {
@@ -1668,7 +1687,7 @@ def _login_impl(extr, username, password):
},
],
}
- url = "https://twitter.com/i/api/1.1/onboarding/task.json"
+ url = "https://api.twitter.com/1.1/onboarding/task.json"
response = extr.request(
url, method="POST", headers=headers, json=data, fatal=None)
@@ -1692,7 +1711,7 @@ def _login_impl(extr, username, password):
},
],
}
- # url = "https://twitter.com/i/api/1.1/onboarding/task.json"
+ # url = "https://api.twitter.com/1.1/onboarding/task.json"
extr.sleep(random.uniform(2.0, 4.0), "login (username)")
response = extr.request(
url, method="POST", headers=headers, json=data, fatal=None)
@@ -1710,7 +1729,7 @@ def _login_impl(extr, username, password):
},
],
}
- # url = "https://twitter.com/i/api/1.1/onboarding/task.json"
+ # url = "https://api.twitter.com/1.1/onboarding/task.json"
extr.sleep(random.uniform(2.0, 4.0), "login (password)")
response = extr.request(
url, method="POST", headers=headers, json=data, fatal=None)
@@ -1727,7 +1746,7 @@ def _login_impl(extr, username, password):
},
],
}
- # url = "https://twitter.com/i/api/1.1/onboarding/task.json"
+ # url = "https://api.twitter.com/1.1/onboarding/task.json"
response = extr.request(
url, method="POST", headers=headers, json=data, fatal=None)
process(response)
diff --git a/gallery_dl/extractor/wikifeet.py b/gallery_dl/extractor/wikifeet.py
new file mode 100644
index 0000000..70e9646
--- /dev/null
+++ b/gallery_dl/extractor/wikifeet.py
@@ -0,0 +1,118 @@
+# -*- coding: utf-8 -*-
+
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Extractors for https://www.wikifeet.com/"""
+
+from .common import GalleryExtractor
+from .. import text
+import json
+
+
+class WikifeetGalleryExtractor(GalleryExtractor):
+ """Extractor for image galleries from wikifeet.com"""
+ category = "wikifeet"
+ directory_fmt = ("{category}", "{celebrity}")
+ filename_fmt = "{category}_{celeb}_{pid}.{extension}"
+ archive_fmt = "{type}_{celeb}_{pid}"
+ pattern = (r"(?:https?://)(?:(?:www\.)?wikifeetx?|"
+ r"men\.wikifeet)\.com/([^/?#]+)")
+ test = (
+ ("https://www.wikifeet.com/Madison_Beer", {
+ "pattern": (r"https://pics\.wikifeet\.com/Madison_Beer"
+ r"-Feet-\d+\.jpg"),
+ "count" : ">= 352",
+ "keyword": {
+ "celeb" : "Madison_Beer",
+ "celebrity" : "Madison Beer",
+ "birthday" : "dt:1999-03-05 00:00:00",
+ "birthplace": "United States",
+ "rating" : float,
+ "pid" : int,
+ "width" : int,
+ "height" : int,
+ "shoesize" : "7.5 US",
+ "type" : "women",
+ "tags" : list,
+ },
+ }),
+ ("https://www.wikifeetx.com/Tifa_Quinn", {
+ "pattern": (r"https://pics\.wikifeet\.com/Tifa_Quinn"
+ r"-Feet-\d+\.jpg"),
+ "count" : ">= 9",
+ "keyword": {
+ "celeb" : "Tifa_Quinn",
+ "celebrity" : "Tifa Quinn",
+ "birthday" : "[NOT SET]",
+ "birthplace": "United States",
+ "rating" : float,
+ "pid" : int,
+ "width" : int,
+ "height" : int,
+ "shoesize" : "[NOT SET]",
+ "type" : "women",
+ "tags" : list,
+ },
+ }),
+ ("https://men.wikifeet.com/Chris_Hemsworth", {
+ "pattern": (r"https://pics\.wikifeet\.com/Chris_Hemsworth"
+ r"-Feet-\d+\.jpg"),
+ "count" : ">= 860",
+ "keyword": {
+ "celeb" : "Chris_Hemsworth",
+ "celebrity" : "Chris Hemsworth",
+ "birthday" : "dt:1983-08-11 00:00:00",
+ "birthplace": "Australia",
+ "rating" : float,
+ "pid" : int,
+ "width" : int,
+ "height" : int,
+ "shoesize" : "12.5 US",
+ "type" : "men",
+ "tags" : list,
+ },
+ }),
+ )
+
+ def __init__(self, match):
+ self.root = text.root_from_url(match.group(0))
+ if "wikifeetx.com" in self.root:
+ self.category = "wikifeetx"
+ self.type = "men" if "://men." in self.root else "women"
+ self.celeb = match.group(1)
+ GalleryExtractor.__init__(self, match, self.root + "/" + self.celeb)
+
+ def metadata(self, page):
+ extr = text.extract_from(page)
+ return {
+ "celeb" : self.celeb,
+ "type" : self.type,
+ "rating" : text.parse_float(extr('"ratingValue": "', '"')),
+ "celebrity" : text.unescape(extr("times'>", "</h1>")),
+ "shoesize" : text.remove_html(extr("Shoe Size:", "edit")),
+ "birthplace": text.remove_html(extr("Birthplace:", "edit")),
+ "birthday" : text.parse_datetime(text.remove_html(
+ extr("Birth Date:", "edit")), "%Y-%m-%d"),
+ }
+
+ def images(self, page):
+ tagmap = {
+ "C": "Close-up",
+ "T": "Toenails",
+ "N": "Nylons",
+ "A": "Arches",
+ "S": "Soles",
+ "B": "Barefoot",
+ }
+ ufmt = "https://pics.wikifeet.com/" + self.celeb + "-Feet-{}.jpg"
+ return [
+ (ufmt.format(data["pid"]), {
+ "pid" : data["pid"],
+ "width" : data["pw"],
+ "height": data["ph"],
+ "tags" : [tagmap[tag] for tag in data["tags"]],
+ })
+ for data in json.loads(text.extr(page, "['gdata'] = ", ";"))
+ ]
diff --git a/gallery_dl/job.py b/gallery_dl/job.py
index e1a6767..f7d84f0 100644
--- a/gallery_dl/job.py
+++ b/gallery_dl/job.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2015-2022 Mike Fährmann
+# Copyright 2015-2023 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -473,13 +473,18 @@ class DownloadJob(Job):
postprocessors = extr.config_accumulate("postprocessors")
if postprocessors:
self.hooks = collections.defaultdict(list)
+
pp_log = self.get_logger("postprocessor")
+ pp_conf = config.get((), "postprocessor") or {}
+ pp_opts = cfg("postprocessor-options")
pp_list = []
- pp_conf = config.get((), "postprocessor") or {}
for pp_dict in postprocessors:
if isinstance(pp_dict, str):
pp_dict = pp_conf.get(pp_dict) or {"name": pp_dict}
+ if pp_opts:
+ pp_dict = pp_dict.copy()
+ pp_dict.update(pp_opts)
clist = pp_dict.get("whitelist")
if clist is not None:
diff --git a/gallery_dl/option.py b/gallery_dl/option.py
index 32cac79..213cd2d 100644
--- a/gallery_dl/option.py
+++ b/gallery_dl/option.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2017-2022 Mike Fährmann
+# Copyright 2017-2023 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -48,15 +48,18 @@ class DeprecatedConfigConstAction(argparse.Action):
class ParseAction(argparse.Action):
"""Parse <key>=<value> options and set them as config values"""
def __call__(self, parser, namespace, values, option_string=None):
- key, _, value = values.partition("=")
- try:
- value = json.loads(value)
- except ValueError:
- pass
+ key, value = _parse_option(values)
key = key.split(".") # splitting an empty string becomes [""]
namespace.options.append((key[:-1], key[-1], value))
+class OptionAction(argparse.Action):
+ """Parse <key>=<value> options for """
+ def __call__(self, parser, namespace, values, option_string=None):
+ key, value = _parse_option(values)
+ namespace.options_pp[key] = value
+
+
class Formatter(argparse.HelpFormatter):
"""Custom HelpFormatter class to customize help output"""
def __init__(self, *args, **kwargs):
@@ -73,6 +76,15 @@ class Formatter(argparse.HelpFormatter):
return self._metavar_formatter(action, action.dest)(1)[0]
+def _parse_option(opt):
+ key, _, value = opt.partition("=")
+ try:
+ value = json.loads(value)
+ except ValueError:
+ pass
+ return key, value
+
+
def build_parser():
"""Build and configure an ArgumentParser object"""
parser = argparse.ArgumentParser(
@@ -488,6 +500,11 @@ def build_parser():
dest="postprocessors", metavar="NAME", action="append",
help="Activate the specified post processor",
)
+ postprocessor.add_argument(
+ "-O", "--postprocessor-option",
+ dest="options_pp", metavar="OPT", action=OptionAction, default={},
+ help="Additional '<key>=<value>' post processor options",
+ )
parser.add_argument(
"urls",
diff --git a/gallery_dl/util.py b/gallery_dl/util.py
index 543fb10..6b9c457 100644
--- a/gallery_dl/util.py
+++ b/gallery_dl/util.py
@@ -231,7 +231,14 @@ Status: {response.status_code} {response.reason}
Request Headers
---------------
{request_headers}
-
+"""
+ if request.body:
+ outfmt += """
+Request Body
+------------
+{request.body}
+"""
+ outfmt += """
Response Headers
----------------
{response_headers}
diff --git a/gallery_dl/version.py b/gallery_dl/version.py
index aec56c9..6b52610 100644
--- a/gallery_dl/version.py
+++ b/gallery_dl/version.py
@@ -1,9 +1,9 @@
# -*- coding: utf-8 -*-
-# Copyright 2016-2022 Mike Fährmann
+# Copyright 2016-2023 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
-__version__ = "1.24.4"
+__version__ = "1.24.5"
diff --git a/setup.py b/setup.py
index 3d97d27..c91549a 100644
--- a/setup.py
+++ b/setup.py
@@ -50,7 +50,8 @@ PACKAGES = [
DESCRIPTION = ("Command-line program to download image galleries and "
"collections from several image hosting sites")
-LONG_DESCRIPTION = read("README.rst")
+LONG_DESCRIPTION = read("README.rst").replace(
+ "<docs/", "<https://github.com/mikf/gallery-dl/blob/master/docs/")
def build_py2exe():
diff --git a/test/test_downloader.py b/test/test_downloader.py
index 0703754..bbee0f4 100644
--- a/test/test_downloader.py
+++ b/test/test_downloader.py
@@ -313,6 +313,9 @@ SAMPLES = {
("pdf" , b"%PDF-"),
("swf" , b"FWS"),
("swf" , b"CWS"),
+ ("blend", b"BLENDER-v303RENDH"),
+ ("obj" , b"# Blender v3.2.0 OBJ File: 'foo.blend'"),
+ ("clip", b"CSFCHUNK\x00\x00\x00\x00"),
}