summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--CHANGELOG.md47
-rw-r--r--PKG-INFO6
-rw-r--r--README.rst4
-rw-r--r--data/completion/_gallery-dl3
-rw-r--r--data/completion/gallery-dl2
-rw-r--r--data/completion/gallery-dl.fish3
-rw-r--r--data/man/gallery-dl.111
-rw-r--r--data/man/gallery-dl.conf.5131
-rw-r--r--docs/gallery-dl.conf15
-rw-r--r--gallery_dl.egg-info/PKG-INFO6
-rw-r--r--gallery_dl.egg-info/SOURCES.txt5
-rw-r--r--gallery_dl/__init__.py43
-rw-r--r--gallery_dl/aes.py641
-rw-r--r--gallery_dl/cookies.py956
-rw-r--r--gallery_dl/extractor/artstation.py3
-rw-r--r--gallery_dl/extractor/common.py16
-rw-r--r--gallery_dl/extractor/foolfuuka.py14
-rw-r--r--gallery_dl/extractor/foolslide.py3
-rw-r--r--gallery_dl/extractor/gelbooru_v01.py91
-rw-r--r--gallery_dl/extractor/gelbooru_v02.py42
-rw-r--r--gallery_dl/extractor/imgur.py13
-rw-r--r--gallery_dl/extractor/instagram.py94
-rw-r--r--gallery_dl/extractor/khinsider.py19
-rw-r--r--gallery_dl/extractor/lolisafe.py6
-rw-r--r--gallery_dl/extractor/mangafox.py3
-rw-r--r--gallery_dl/extractor/mangahere.py5
-rw-r--r--gallery_dl/extractor/mangasee.py11
-rw-r--r--gallery_dl/extractor/mastodon.py5
-rw-r--r--gallery_dl/extractor/moebooru.py5
-rw-r--r--gallery_dl/extractor/naverwebtoon.py53
-rw-r--r--gallery_dl/extractor/nijie.py48
-rw-r--r--gallery_dl/extractor/oauth.py27
-rw-r--r--gallery_dl/extractor/philomena.py25
-rw-r--r--gallery_dl/extractor/pixiv.py246
-rw-r--r--gallery_dl/extractor/reactor.py3
-rw-r--r--gallery_dl/extractor/readcomiconline.py8
-rw-r--r--gallery_dl/extractor/shopify.py58
-rw-r--r--gallery_dl/extractor/twitter.py89
-rw-r--r--gallery_dl/extractor/vk.py31
-rw-r--r--gallery_dl/extractor/weasyl.py4
-rw-r--r--gallery_dl/extractor/webtoons.py26
-rw-r--r--gallery_dl/formatter.py17
-rw-r--r--gallery_dl/job.py57
-rw-r--r--gallery_dl/option.py24
-rw-r--r--gallery_dl/output.py71
-rw-r--r--gallery_dl/path.py29
-rw-r--r--gallery_dl/postprocessor/metadata.py2
-rw-r--r--gallery_dl/postprocessor/ugoira.py3
-rw-r--r--gallery_dl/util.py32
-rw-r--r--gallery_dl/version.py2
-rw-r--r--gallery_dl/ytdl.py4
-rw-r--r--test/test_formatter.py16
-rw-r--r--test/test_job.py10
-rw-r--r--test/test_util.py14
-rw-r--r--test/test_ytdl.py274
55 files changed, 2977 insertions, 399 deletions
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 61e343d..f41bec9 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,50 @@
# Changelog
+## 1.22.0 - 2022-05-25
+### Additions
+- [gelbooru_v01] add `favorite` extractor ([#2546](https://github.com/mikf/gallery-dl/issues/2546))
+- [Instagram] add `tagged_users` to keywords for stories ([#2582](https://github.com/mikf/gallery-dl/issues/2582), [#2584](https://github.com/mikf/gallery-dl/issues/2584))
+- [lolisafe] implement `domain` option ([#2575](https://github.com/mikf/gallery-dl/issues/2575))
+- [naverwebtoon] support (best)challenge comics ([#2542](https://github.com/mikf/gallery-dl/issues/2542))
+- [nijie] support /history_nuita.php listings ([#2541](https://github.com/mikf/gallery-dl/issues/2541))
+- [pixiv] provide more data when `metadata` is enabled ([#2594](https://github.com/mikf/gallery-dl/issues/2594))
+- [shopify] support several more sites by default ([#2089](https://github.com/mikf/gallery-dl/issues/2089))
+- [twitter] extract alt texts as `description` ([#2617](https://github.com/mikf/gallery-dl/issues/2617))
+- [twitter] recognize vxtwitter URLs ([#2621](https://github.com/mikf/gallery-dl/issues/2621))
+- [weasyl] implement `metadata` option ([#2610](https://github.com/mikf/gallery-dl/issues/2610))
+- implement `--cookies-from-browser` ([#1606](https://github.com/mikf/gallery-dl/issues/1606))
+- implement `output.colors` options ([#2532](https://github.com/mikf/gallery-dl/issues/2532))
+- implement string literals in replacement fields
+- support using extended format strings for archive keys
+### Changes
+- [foolfuuka] match 4chan filenames ([#2577](https://github.com/mikf/gallery-dl/issues/2577))
+- [pixiv] implement `include` option
+ - provide `avatar`/`background` downloads as separate extractors ([#2495](https://github.com/mikf/gallery-dl/issues/2495))
+- [twitter] use a better strategy for user URLs
+- [twitter] disable `cards` by default
+- delay directory creation ([#2461](https://github.com/mikf/gallery-dl/issues/2461), [#2474](https://github.com/mikf/gallery-dl/issues/2474))
+- flush writes to stdout/stderr ([#2529](https://github.com/mikf/gallery-dl/issues/2529))
+- build executables on GitHub Actions with Python 3.10
+### Fixes
+- [artstation] use `"browser": "firefox"` by default ([#2527](https://github.com/mikf/gallery-dl/issues/2527))
+- [imgur] prevent exception with empty albums ([#2557](https://github.com/mikf/gallery-dl/issues/2557))
+- [instagram] report redirects to captcha challenges ([#2543](https://github.com/mikf/gallery-dl/issues/2543))
+- [khinsider] fix metadata extraction ([#2611](https://github.com/mikf/gallery-dl/issues/2611))
+- [mangafox] send Referer headers ([#2592](https://github.com/mikf/gallery-dl/issues/2592))
+- [mangahere] send Referer headers ([#2592](https://github.com/mikf/gallery-dl/issues/2592))
+- [mangasee] use randomly generated PHPSESSID cookie ([#2560](https://github.com/mikf/gallery-dl/issues/2560))
+- [pixiv] make retrieving ugoira metadata non-fatal ([#2562](https://github.com/mikf/gallery-dl/issues/2562))
+- [readcomiconline] update deobfuscation code ([#2481](https://github.com/mikf/gallery-dl/issues/2481))
+- [realbooru] fix extraction ([#2530](https://github.com/mikf/gallery-dl/issues/2530))
+- [vk] handle photos without width/height info ([#2535](https://github.com/mikf/gallery-dl/issues/2535))
+- [vk] fix user ID extraction ([#2535](https://github.com/mikf/gallery-dl/issues/2535))
+- [webtoons] extract real episode numbers ([#2591](https://github.com/mikf/gallery-dl/issues/2591))
+- create missing directories for archive files ([#2597](https://github.com/mikf/gallery-dl/issues/2597))
+- detect circular references with `-K` ([#2609](https://github.com/mikf/gallery-dl/issues/2609))
+- replace "\f" in `--filename` arguments with a form feed character ([#2396](https://github.com/mikf/gallery-dl/issues/2396))
+### Removals
+- [gelbooru_v01] remove tlb.booru.org from supported domains
+
## 1.21.2 - 2022-04-27
### Additions
- [deviantart] implement `pagination` option ([#2488](https://github.com/mikf/gallery-dl/issues/2488))
@@ -14,8 +59,8 @@
- [sexcom] add fallback for empty files ([#2485](https://github.com/mikf/gallery-dl/issues/2485))
- [twitter] improve syndication video selection ([#2354](https://github.com/mikf/gallery-dl/issues/2354))
- [twitter] fix various syndication issues ([#2499](https://github.com/mikf/gallery-dl/issues/2499), [#2354](https://github.com/mikf/gallery-dl/issues/2354))
-- [weibo] fix infinite retries for deleted accounts ([#2521](https://github.com/mikf/gallery-dl/issues/2521))
- [vk] fix extraction ([#2512](https://github.com/mikf/gallery-dl/issues/2512))
+- [weibo] fix infinite retries for deleted accounts ([#2521](https://github.com/mikf/gallery-dl/issues/2521))
- [postprocessor:ugoira] use compatible paths with mkvmerge ([#2487](https://github.com/mikf/gallery-dl/issues/2487))
- [postprocessor:ugoira] do not auto-select the `image2` demuxer ([#2492](https://github.com/mikf/gallery-dl/issues/2492))
diff --git a/PKG-INFO b/PKG-INFO
index 63c6185..ef2be9c 100644
--- a/PKG-INFO
+++ b/PKG-INFO
@@ -1,6 +1,6 @@
Metadata-Version: 2.1
Name: gallery_dl
-Version: 1.21.2
+Version: 1.22.0
Summary: Command-line program to download image galleries and collections from several image hosting sites
Home-page: https://github.com/mikf/gallery-dl
Author: Mike Fährmann
@@ -99,8 +99,8 @@ Standalone Executable
Prebuilt executable files with a Python interpreter and
required Python packages included are available for
-- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.21.2/gallery-dl.exe>`__
-- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.21.2/gallery-dl.bin>`__
+- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.22.0/gallery-dl.exe>`__
+- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.22.0/gallery-dl.bin>`__
| Executables build from the latest commit can be found at
| https://github.com/mikf/gallery-dl/actions/workflows/executables.yml
diff --git a/README.rst b/README.rst
index acdf738..6089da3 100644
--- a/README.rst
+++ b/README.rst
@@ -65,8 +65,8 @@ Standalone Executable
Prebuilt executable files with a Python interpreter and
required Python packages included are available for
-- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.21.2/gallery-dl.exe>`__
-- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.21.2/gallery-dl.bin>`__
+- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.22.0/gallery-dl.exe>`__
+- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.22.0/gallery-dl.bin>`__
| Executables build from the latest commit can be found at
| https://github.com/mikf/gallery-dl/actions/workflows/executables.yml
diff --git a/data/completion/_gallery-dl b/data/completion/_gallery-dl
index f630c8e..e86d725 100644
--- a/data/completion/_gallery-dl
+++ b/data/completion/_gallery-dl
@@ -11,10 +11,11 @@ _arguments -C -S \
{-d,--destination}'[Target location for file downloads]':'<path>' \
{-D,--directory}'[Exact location for file downloads]':'<path>' \
{-f,--filename}'[Filename format string for downloaded files ("/O" for "original" filenames)]':'<format>' \
---cookies'[File to load additional cookies from]':'<file>':_files \
--proxy'[Use the specified proxy]':'<url>' \
--source-address'[Client-side IP address to bind to]':'<ip>' \
--clear-cache'[Delete cached login sessions, cookies, etc. for MODULE (ALL to delete everything)]':'<module>' \
+--cookies'[File to load additional cookies from]':'<file>':_files \
+--cookies-from_browser'[Name of the browser to load cookies from, with optional keyring name prefixed with "+" and profile prefixed with ":"]':'<browser[+keyring][:profile]>' \
{-q,--quiet}'[Activate quiet mode]' \
{-v,--verbose}'[Print various debugging information]' \
{-g,--get-urls}'[Print URLs instead of downloading]' \
diff --git a/data/completion/gallery-dl b/data/completion/gallery-dl
index d8a6124..1bb3b0a 100644
--- a/data/completion/gallery-dl
+++ b/data/completion/gallery-dl
@@ -10,7 +10,7 @@ _gallery_dl()
elif [[ "${prev}" =~ ^()$ ]]; then
COMPREPLY=( $(compgen -d -- "${cur}") )
else
- COMPREPLY=( $(compgen -W "--help --version --input-file --destination --directory --filename --cookies --proxy --source-address --clear-cache --quiet --verbose --get-urls --resolve-urls --dump-json --simulate --extractor-info --list-keywords --list-modules --list-extractors --write-log --write-unsupported --write-pages --limit-rate --retries --http-timeout --sleep --sleep-request --sleep-extractor --filesize-min --filesize-max --no-part --no-skip --no-mtime --no-download --no-check-certificate --config --config-yaml --option --ignore-config --username --password --netrc --download-archive --abort --terminate --range --chapter-range --filter --chapter-filter --zip --ugoira-conv --ugoira-conv-lossless --ugoira-conv-copy --write-metadata --write-info-json --write-infojson --write-tags --mtime-from-date --exec --exec-after --postprocessor" -- "${cur}") )
+ COMPREPLY=( $(compgen -W "--help --version --input-file --destination --directory --filename --proxy --source-address --clear-cache --cookies --cookies-from_browser --quiet --verbose --get-urls --resolve-urls --dump-json --simulate --extractor-info --list-keywords --list-modules --list-extractors --write-log --write-unsupported --write-pages --limit-rate --retries --http-timeout --sleep --sleep-request --sleep-extractor --filesize-min --filesize-max --no-part --no-skip --no-mtime --no-download --no-check-certificate --config --config-yaml --option --ignore-config --username --password --netrc --download-archive --abort --terminate --range --chapter-range --filter --chapter-filter --zip --ugoira-conv --ugoira-conv-lossless --ugoira-conv-copy --write-metadata --write-info-json --write-infojson --write-tags --mtime-from-date --exec --exec-after --postprocessor" -- "${cur}") )
fi
}
diff --git a/data/completion/gallery-dl.fish b/data/completion/gallery-dl.fish
index ff0ee84..a3028b2 100644
--- a/data/completion/gallery-dl.fish
+++ b/data/completion/gallery-dl.fish
@@ -5,10 +5,11 @@ complete -c gallery-dl -r -F -s 'i' -l 'input-file' -d 'Download URLs found in F
complete -c gallery-dl -x -a '(__fish_complete_directories)' -s 'd' -l 'destination' -d 'Target location for file downloads'
complete -c gallery-dl -x -a '(__fish_complete_directories)' -s 'D' -l 'directory' -d 'Exact location for file downloads'
complete -c gallery-dl -x -s 'f' -l 'filename' -d 'Filename format string for downloaded files ("/O" for "original" filenames)'
-complete -c gallery-dl -r -F -l 'cookies' -d 'File to load additional cookies from'
complete -c gallery-dl -x -l 'proxy' -d 'Use the specified proxy'
complete -c gallery-dl -x -l 'source-address' -d 'Client-side IP address to bind to'
complete -c gallery-dl -x -l 'clear-cache' -d 'Delete cached login sessions, cookies, etc. for MODULE (ALL to delete everything)'
+complete -c gallery-dl -r -F -l 'cookies' -d 'File to load additional cookies from'
+complete -c gallery-dl -x -l 'cookies-from_browser' -d 'Name of the browser to load cookies from, with optional keyring name prefixed with "+" and profile prefixed with ":"'
complete -c gallery-dl -s 'q' -l 'quiet' -d 'Activate quiet mode'
complete -c gallery-dl -s 'v' -l 'verbose' -d 'Print various debugging information'
complete -c gallery-dl -s 'g' -l 'get-urls' -d 'Print URLs instead of downloading'
diff --git a/data/man/gallery-dl.1 b/data/man/gallery-dl.1
index 18f4d64..f3bb82c 100644
--- a/data/man/gallery-dl.1
+++ b/data/man/gallery-dl.1
@@ -1,4 +1,4 @@
-.TH "GALLERY-DL" "1" "2022-04-27" "1.21.2" "gallery-dl Manual"
+.TH "GALLERY-DL" "1" "2022-05-25" "1.22.0" "gallery-dl Manual"
.\" disable hyphenation
.nh
@@ -35,9 +35,6 @@ Exact location for file downloads
.B "\-f, \-\-filename" \f[I]FORMAT\f[]
Filename format string for downloaded files ('/O' for "original" filenames)
.TP
-.B "\-\-cookies" \f[I]FILE\f[]
-File to load additional cookies from
-.TP
.B "\-\-proxy" \f[I]URL\f[]
Use the specified proxy
.TP
@@ -47,6 +44,12 @@ Client-side IP address to bind to
.B "\-\-clear\-cache" \f[I]MODULE\f[]
Delete cached login sessions, cookies, etc. for MODULE (ALL to delete everything)
.TP
+.B "\-\-cookies" \f[I]FILE\f[]
+File to load additional cookies from
+.TP
+.B "\-\-cookies\-from_browser" \f[I]BROWSER[+KEYRING][:PROFILE]\f[]
+Name of the browser to load cookies from, with optional keyring name prefixed with '+' and profile prefixed with ':'
+.TP
.B "\-q, \-\-quiet"
Activate quiet mode
.TP
diff --git a/data/man/gallery-dl.conf.5 b/data/man/gallery-dl.conf.5
index aa998ee..6ae0276 100644
--- a/data/man/gallery-dl.conf.5
+++ b/data/man/gallery-dl.conf.5
@@ -1,4 +1,4 @@
-.TH "GALLERY-DL.CONF" "5" "2022-04-27" "1.21.2" "gallery-dl Manual"
+.TH "GALLERY-DL.CONF" "5" "2022-05-25" "1.22.0" "gallery-dl Manual"
.\" disable hyphenation
.nh
.\" disable justification (adjust text to left margin only)
@@ -480,20 +480,23 @@ Enable the use of \f[I].netrc\f[] authentication data.
.SS extractor.*.cookies
.IP "Type:" 6
-\f[I]Path\f[] or \f[I]object\f[]
+\f[I]Path\f[] or \f[I]object\f[] or \f[I]list\f[]
.IP "Default:" 9
\f[I]null\f[]
.IP "Description:" 4
-Source to read additional cookies from. Either as
+Source to read additional cookies from. This can be
.br
-* the \f[I]Path\f[] to a Mozilla/Netscape format cookies.txt file or
-.br
-* a JSON \f[I]object\f[] specifying cookies as a name-to-value mapping
+* The \f[I]Path\f[] to a Mozilla/Netscape format cookies.txt file
-Example:
+.. code:: json
+
+"~/.local/share/cookies-instagram-com.txt"
+
+.br
+* An \f[I]object\f[] specifying cookies as name-value pairs
.. code:: json
@@ -503,6 +506,21 @@ Example:
"isAdult" : "1"
}
+.br
+* A \f[I]list\f[] with up to 3 entries specifying a browser profile.
+
+.br
+* The first entry is the browser name
+.br
+* The optional second entry is a profile name or an absolote path to a profile directory
+.br
+* The optional third entry is the keyring to retrieve passwords for decrypting cookies from
+
+.. code:: json
+
+["firefox"]
+["chromium", "Private", "kwallet"]
+
.SS extractor.*.cookies-update
.IP "Type:" 6
@@ -1003,6 +1021,23 @@ The maximum possible value appears to be \f[I]1920\f[].
Download embedded videos hosted on https://www.blogger.com/
+.SS extractor.cyberdrop.domain
+.IP "Type:" 6
+\f[I]string\f[]
+
+.IP "Default:" 9
+\f[I]"auto"\f[]
+
+.IP "Example:" 4
+"cyberdrop.to"
+
+.IP "Description:" 4
+Specifies the domain used by \f[I]cyberdrop\f[] regardless of input URL.
+
+Setting this option to \f[I]"auto"\f[]
+uses the same domain as a given input URL.
+
+
.SS extractor.danbooru.external
.IP "Type:" 6
\f[I]bool\f[]
@@ -1741,6 +1776,21 @@ If the selected format is not available,
the first in the list gets chosen (usually mp3).
+.SS extractor.lolisafe.domain
+.IP "Type:" 6
+\f[I]string\f[]
+
+.IP "Default:" 9
+\f[I]"auto"\f[]
+
+.IP "Description:" 4
+Specifies the domain used by a \f[I]lolisafe\f[] extractor
+regardless of input URL.
+
+Setting this option to \f[I]"auto"\f[]
+uses the same domain as a given input URL.
+
+
.SS extractor.luscious.gif
.IP "Type:" 6
\f[I]bool\f[]
@@ -1896,7 +1946,7 @@ A (comma-separated) list of subcategories to include
when processing a user profile.
Possible values are
-\f[I]"illustration"\f[], \f[I]"doujin"\f[], \f[I]"favorite"\f[].
+\f[I]"illustration"\f[], \f[I]"doujin"\f[], \f[I]"favorite"\f[], \f[I]"nuita"\f[].
You can use \f[I]"all"\f[] instead of listing all values separately.
@@ -2026,29 +2076,33 @@ Include pins from board sections.
Download from video pins.
-.SS extractor.pixiv.user.avatar
+.SS extractor.pixiv.include
.IP "Type:" 6
-\f[I]bool\f[]
+.br
+* \f[I]string\f[]
+.br
+* \f[I]list\f[] of \f[I]strings\f[]
.IP "Default:" 9
-\f[I]false\f[]
-
-.IP "Description:" 4
-Download user avatars.
+\f[I]"artworks"\f[]
+.IP "Example:" 4
+.br
+* "avatar,background,artworks"
+.br
+* ["avatar", "background", "artworks"]
-.SS extractor.pixiv.user.background
-.IP "Type:" 6
-\f[I]bool\f[]
+.IP "Description:" 4
+A (comma-separated) list of subcategories to include
+when processing a user profile.
-.IP "Default:" 9
-\f[I]false\f[]
+Possible values are
+\f[I]"artworks"\f[], \f[I]"avatar"\f[], \f[I]"background"\f[], \f[I]"favorite"\f[].
-.IP "Description:" 4
-Download user background banners.
+It is possible to use \f[I]"all"\f[] instead of listing all values separately.
-.SS extractor.pixiv.user.metadata
+.SS extractor.pixiv.artworks.metadata
.IP "Type:" 6
\f[I]bool\f[]
@@ -2477,7 +2531,7 @@ See \f[I]Filters\f[] for details.
\f[I]bool\f[] or \f[I]string\f[]
.IP "Default:" 9
-\f[I]true\f[]
+\f[I]false\f[]
.IP "Description:" 4
Controls how to handle \f[I]Twitter Cards\f[].
@@ -2639,6 +2693,8 @@ Special values:
.br
* \f[I]"timeline"\f[]: \f[I]https://twitter.com/i/user/{rest_id}\f[]
.br
+* \f[I]"tweets"\f[]: \f[I]https://twitter.com/id:{rest_id}/tweets\f[]
+.br
* \f[I]"media"\f[]: \f[I]https://twitter.com/id:{rest_id}/media\f[]
Note: To allow gallery-dl to follow custom URL formats, set the \f[I]blacklist\f[]
@@ -2714,6 +2770,23 @@ Your \f[I]Weasyl API Key\f[],
to use your account's browsing settings and filters.
+.SS extractor.weasyl.metadata
+.IP "Type:" 6
+\f[I]bool\f[]
+
+.IP "Default:" 9
+\f[I]false\f[]
+
+.IP "Description:" 4
+Fetch extra submission metadata during gallery downloads.
+.br
+(\f[I]comments\f[], \f[I]description\f[], \f[I]favorites\f[], \f[I]folder_name\f[],
+.br
+\f[I]tags\f[], \f[I]views\f[])
+
+Note: This requires 1 additional HTTP request per submission.
+
+
.SS extractor.weibo.retweets
.IP "Type:" 6
\f[I]bool\f[]
@@ -3252,6 +3325,18 @@ Set this option to \f[I]"eaw"\f[] to also work with east-asian characters
with a display width greater than 1.
+.SS output.colors
+.IP "Type:" 6
+\f[I]object\f[]
+
+.IP "Default:" 9
+\f[I]{"success": "1;32", "skip": "2"}\f[]
+
+.IP "Description:" 4
+Controls the \f[I]ANSI colors\f[]
+used with \f[I]mode: color\f[] for successfully downloaded or skipped files.
+
+
.SS output.skip
.IP "Type:" 6
\f[I]bool\f[]
diff --git a/docs/gallery-dl.conf b/docs/gallery-dl.conf
index 117c117..6e71d42 100644
--- a/docs/gallery-dl.conf
+++ b/docs/gallery-dl.conf
@@ -50,6 +50,10 @@
{
"videos": true
},
+ "cyberdrop":
+ {
+ "domain": "auto"
+ },
"danbooru":
{
"username": null,
@@ -201,7 +205,7 @@
"pixiv":
{
"refresh-token": null,
- "avatar": false,
+ "include": "artworks",
"tags": "japanese",
"ugoira": true
},
@@ -267,7 +271,7 @@
{
"username": null,
"password": null,
- "cards": true,
+ "cards": false,
"conversations": false,
"pinned": false,
"quoted": false,
@@ -292,7 +296,8 @@
},
"weasyl":
{
- "api-key": null
+ "api-key": null,
+ "metadata": false
},
"weibo":
{
@@ -350,6 +355,10 @@
"mode": "auto",
"progress": true,
"shorten": true,
+ "colors": {
+ "success": "1;32",
+ "skip" : "2"
+ },
"skip": true,
"log": "[{name}][{levelname}] {message}",
"logfile": null,
diff --git a/gallery_dl.egg-info/PKG-INFO b/gallery_dl.egg-info/PKG-INFO
index 4b00b85..60a8457 100644
--- a/gallery_dl.egg-info/PKG-INFO
+++ b/gallery_dl.egg-info/PKG-INFO
@@ -1,6 +1,6 @@
Metadata-Version: 2.1
Name: gallery-dl
-Version: 1.21.2
+Version: 1.22.0
Summary: Command-line program to download image galleries and collections from several image hosting sites
Home-page: https://github.com/mikf/gallery-dl
Author: Mike Fährmann
@@ -99,8 +99,8 @@ Standalone Executable
Prebuilt executable files with a Python interpreter and
required Python packages included are available for
-- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.21.2/gallery-dl.exe>`__
-- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.21.2/gallery-dl.bin>`__
+- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.22.0/gallery-dl.exe>`__
+- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.22.0/gallery-dl.bin>`__
| Executables build from the latest commit can be found at
| https://github.com/mikf/gallery-dl/actions/workflows/executables.yml
diff --git a/gallery_dl.egg-info/SOURCES.txt b/gallery_dl.egg-info/SOURCES.txt
index 4e226fb..954dafb 100644
--- a/gallery_dl.egg-info/SOURCES.txt
+++ b/gallery_dl.egg-info/SOURCES.txt
@@ -13,8 +13,10 @@ docs/gallery-dl-example.conf
docs/gallery-dl.conf
gallery_dl/__init__.py
gallery_dl/__main__.py
+gallery_dl/aes.py
gallery_dl/cache.py
gallery_dl/config.py
+gallery_dl/cookies.py
gallery_dl/exception.py
gallery_dl/formatter.py
gallery_dl/job.py
@@ -213,4 +215,5 @@ test/test_output.py
test/test_postprocessor.py
test/test_results.py
test/test_text.py
-test/test_util.py \ No newline at end of file
+test/test_util.py
+test/test_ytdl.py \ No newline at end of file
diff --git a/gallery_dl/__init__.py b/gallery_dl/__init__.py
index 0214659..d66e285 100644
--- a/gallery_dl/__init__.py
+++ b/gallery_dl/__init__.py
@@ -12,7 +12,7 @@ import logging
from . import version, config, option, output, extractor, job, util, exception
__author__ = "Mike Fährmann"
-__copyright__ = "Copyright 2014-2021 Mike Fährmann"
+__copyright__ = "Copyright 2014-2022 Mike Fährmann"
__license__ = "GPLv2"
__maintainer__ = "Mike Fährmann"
__email__ = "mike_faehrmann@web.de"
@@ -22,10 +22,13 @@ __version__ = version.__version__
def progress(urls, pformat):
"""Wrapper around urls to output a simple progress indicator"""
if pformat is True:
- pformat = "[{current}/{total}] {url}"
+ pformat = "[{current}/{total}] {url}\n"
+ else:
+ pformat += "\n"
+
pinfo = {"total": len(urls)}
for pinfo["current"], pinfo["url"] in enumerate(urls, 1):
- print(pformat.format_map(pinfo), file=sys.stderr)
+ output.stderr_write(pformat.format_map(pinfo))
yield pinfo["url"]
@@ -116,9 +119,12 @@ def main():
if args.yamlfiles:
config.load(args.yamlfiles, strict=True, fmt="yaml")
if args.filename:
- if args.filename == "/O":
- args.filename = "{filename}.{extension}"
- config.set((), "filename", args.filename)
+ filename = args.filename
+ if filename == "/O":
+ filename = "{filename}.{extension}"
+ elif filename.startswith("\\f"):
+ filename = "\f" + filename[2:]
+ config.set((), "filename", filename)
if args.directory:
config.set((), "base-directory", args.directory)
config.set((), "directory", ())
@@ -128,6 +134,10 @@ def main():
config.set((), "skip", "abort:" + str(args.abort))
if args.terminate:
config.set((), "skip", "terminate:" + str(args.terminate))
+ if args.cookies_from_browser:
+ browser, _, profile = args.cookies_from_browser.partition(":")
+ browser, _, keyring = browser.partition("+")
+ config.set((), "cookies", (browser, profile, keyring))
for opts in args.options:
config.set(*opts)
@@ -189,20 +199,23 @@ def main():
pass
if args.list_modules:
- for module_name in extractor.modules:
- print(module_name)
+ extractor.modules.append("")
+ sys.stdout.write("\n".join(extractor.modules))
+
elif args.list_extractors:
+ write = sys.stdout.write
+ fmt = "{}\n{}\nCategory: {} - Subcategory: {}{}\n\n".format
+
for extr in extractor.extractors():
if not extr.__doc__:
continue
- print(extr.__name__)
- print(extr.__doc__)
- print("Category:", extr.category,
- "- Subcategory:", extr.subcategory)
test = next(extr._get_tests(), None)
- if test:
- print("Example :", test[0])
- print()
+ write(fmt(
+ extr.__name__, extr.__doc__,
+ extr.category, extr.subcategory,
+ "\nExample : " + test[0] if test else "",
+ ))
+
elif args.clear_cache:
from . import cache
log = logging.getLogger("cache")
diff --git a/gallery_dl/aes.py b/gallery_dl/aes.py
new file mode 100644
index 0000000..22cb052
--- /dev/null
+++ b/gallery_dl/aes.py
@@ -0,0 +1,641 @@
+# -*- coding: utf-8 -*-
+
+# This is a slightly modified version of yt-dlp's aes module.
+# https://github.com/yt-dlp/yt-dlp/blob/master/yt_dlp/aes.py
+
+import struct
+import binascii
+from math import ceil
+
+try:
+ from Cryptodome.Cipher import AES as Cryptodome_AES
+except ImportError:
+ try:
+ from Crypto.Cipher import AES as Cryptodome_AES
+ except ImportError:
+ Cryptodome_AES = None
+
+
+if Cryptodome_AES:
+ def aes_cbc_decrypt_bytes(data, key, iv):
+ """Decrypt bytes with AES-CBC using pycryptodome"""
+ return Cryptodome_AES.new(
+ key, Cryptodome_AES.MODE_CBC, iv).decrypt(data)
+
+ def aes_gcm_decrypt_and_verify_bytes(data, key, tag, nonce):
+ """Decrypt bytes with AES-GCM using pycryptodome"""
+ return Cryptodome_AES.new(
+ key, Cryptodome_AES.MODE_GCM, nonce).decrypt_and_verify(data, tag)
+else:
+ def aes_cbc_decrypt_bytes(data, key, iv):
+ """Decrypt bytes with AES-CBC using native implementation"""
+ return intlist_to_bytes(aes_cbc_decrypt(
+ bytes_to_intlist(data),
+ bytes_to_intlist(key),
+ bytes_to_intlist(iv),
+ ))
+
+ def aes_gcm_decrypt_and_verify_bytes(data, key, tag, nonce):
+ """Decrypt bytes with AES-GCM using native implementation"""
+ return intlist_to_bytes(aes_gcm_decrypt_and_verify(
+ bytes_to_intlist(data),
+ bytes_to_intlist(key),
+ bytes_to_intlist(tag),
+ bytes_to_intlist(nonce),
+ ))
+
+
+bytes_to_intlist = list
+
+
+def intlist_to_bytes(xs):
+ if not xs:
+ return b""
+ return struct.pack("%dB" % len(xs), *xs)
+
+
+def unpad_pkcs7(data):
+ return data[:-data[-1]]
+
+
+BLOCK_SIZE_BYTES = 16
+
+
+def aes_ecb_encrypt(data, key, iv=None):
+ """
+ Encrypt with aes in ECB mode
+
+ @param {int[]} data cleartext
+ @param {int[]} key 16/24/32-Byte cipher key
+ @param {int[]} iv Unused for this mode
+ @returns {int[]} encrypted data
+ """
+ expanded_key = key_expansion(key)
+ block_count = int(ceil(float(len(data)) / BLOCK_SIZE_BYTES))
+
+ encrypted_data = []
+ for i in range(block_count):
+ block = data[i * BLOCK_SIZE_BYTES: (i + 1) * BLOCK_SIZE_BYTES]
+ encrypted_data += aes_encrypt(block, expanded_key)
+ encrypted_data = encrypted_data[:len(data)]
+
+ return encrypted_data
+
+
+def aes_ecb_decrypt(data, key, iv=None):
+ """
+ Decrypt with aes in ECB mode
+
+ @param {int[]} data cleartext
+ @param {int[]} key 16/24/32-Byte cipher key
+ @param {int[]} iv Unused for this mode
+ @returns {int[]} decrypted data
+ """
+ expanded_key = key_expansion(key)
+ block_count = int(ceil(float(len(data)) / BLOCK_SIZE_BYTES))
+
+ encrypted_data = []
+ for i in range(block_count):
+ block = data[i * BLOCK_SIZE_BYTES: (i + 1) * BLOCK_SIZE_BYTES]
+ encrypted_data += aes_decrypt(block, expanded_key)
+ encrypted_data = encrypted_data[:len(data)]
+
+ return encrypted_data
+
+
+def aes_ctr_decrypt(data, key, iv):
+ """
+ Decrypt with aes in counter mode
+
+ @param {int[]} data cipher
+ @param {int[]} key 16/24/32-Byte cipher key
+ @param {int[]} iv 16-Byte initialization vector
+ @returns {int[]} decrypted data
+ """
+ return aes_ctr_encrypt(data, key, iv)
+
+
+def aes_ctr_encrypt(data, key, iv):
+ """
+ Encrypt with aes in counter mode
+
+ @param {int[]} data cleartext
+ @param {int[]} key 16/24/32-Byte cipher key
+ @param {int[]} iv 16-Byte initialization vector
+ @returns {int[]} encrypted data
+ """
+ expanded_key = key_expansion(key)
+ block_count = int(ceil(float(len(data)) / BLOCK_SIZE_BYTES))
+ counter = iter_vector(iv)
+
+ encrypted_data = []
+ for i in range(block_count):
+ counter_block = next(counter)
+ block = data[i * BLOCK_SIZE_BYTES: (i + 1) * BLOCK_SIZE_BYTES]
+ block += [0] * (BLOCK_SIZE_BYTES - len(block))
+
+ cipher_counter_block = aes_encrypt(counter_block, expanded_key)
+ encrypted_data += xor(block, cipher_counter_block)
+ encrypted_data = encrypted_data[:len(data)]
+
+ return encrypted_data
+
+
+def aes_cbc_decrypt(data, key, iv):
+ """
+ Decrypt with aes in CBC mode
+
+ @param {int[]} data cipher
+ @param {int[]} key 16/24/32-Byte cipher key
+ @param {int[]} iv 16-Byte IV
+ @returns {int[]} decrypted data
+ """
+ expanded_key = key_expansion(key)
+ block_count = int(ceil(float(len(data)) / BLOCK_SIZE_BYTES))
+
+ decrypted_data = []
+ previous_cipher_block = iv
+ for i in range(block_count):
+ block = data[i * BLOCK_SIZE_BYTES: (i + 1) * BLOCK_SIZE_BYTES]
+ block += [0] * (BLOCK_SIZE_BYTES - len(block))
+
+ decrypted_block = aes_decrypt(block, expanded_key)
+ decrypted_data += xor(decrypted_block, previous_cipher_block)
+ previous_cipher_block = block
+ decrypted_data = decrypted_data[:len(data)]
+
+ return decrypted_data
+
+
+def aes_cbc_encrypt(data, key, iv):
+ """
+ Encrypt with aes in CBC mode. Using PKCS#7 padding
+
+ @param {int[]} data cleartext
+ @param {int[]} key 16/24/32-Byte cipher key
+ @param {int[]} iv 16-Byte IV
+ @returns {int[]} encrypted data
+ """
+ expanded_key = key_expansion(key)
+ block_count = int(ceil(float(len(data)) / BLOCK_SIZE_BYTES))
+
+ encrypted_data = []
+ previous_cipher_block = iv
+ for i in range(block_count):
+ block = data[i * BLOCK_SIZE_BYTES: (i + 1) * BLOCK_SIZE_BYTES]
+ remaining_length = BLOCK_SIZE_BYTES - len(block)
+ block += [remaining_length] * remaining_length
+ mixed_block = xor(block, previous_cipher_block)
+
+ encrypted_block = aes_encrypt(mixed_block, expanded_key)
+ encrypted_data += encrypted_block
+
+ previous_cipher_block = encrypted_block
+
+ return encrypted_data
+
+
+def aes_gcm_decrypt_and_verify(data, key, tag, nonce):
+ """
+ Decrypt with aes in GBM mode and checks authenticity using tag
+
+ @param {int[]} data cipher
+ @param {int[]} key 16-Byte cipher key
+ @param {int[]} tag authentication tag
+ @param {int[]} nonce IV (recommended 12-Byte)
+ @returns {int[]} decrypted data
+ """
+
+ # XXX: check aes, gcm param
+
+ hash_subkey = aes_encrypt([0] * BLOCK_SIZE_BYTES, key_expansion(key))
+
+ if len(nonce) == 12:
+ j0 = nonce + [0, 0, 0, 1]
+ else:
+ fill = (BLOCK_SIZE_BYTES - (len(nonce) % BLOCK_SIZE_BYTES)) % \
+ BLOCK_SIZE_BYTES + 8
+ ghash_in = nonce + [0] * fill + bytes_to_intlist(
+ (8 * len(nonce)).to_bytes(8, "big"))
+ j0 = ghash(hash_subkey, ghash_in)
+
+ # TODO: add nonce support to aes_ctr_decrypt
+
+ # nonce_ctr = j0[:12]
+ iv_ctr = inc(j0)
+
+ decrypted_data = aes_ctr_decrypt(
+ data, key, iv_ctr + [0] * (BLOCK_SIZE_BYTES - len(iv_ctr)))
+
+ pad_len = len(data) // 16 * 16
+ s_tag = ghash(
+ hash_subkey,
+ data +
+ [0] * (BLOCK_SIZE_BYTES - len(data) + pad_len) + # pad
+ bytes_to_intlist(
+ (0 * 8).to_bytes(8, "big") + # length of associated data
+ ((len(data) * 8).to_bytes(8, "big")) # length of data
+ )
+ )
+
+ if tag != aes_ctr_encrypt(s_tag, key, j0):
+ raise ValueError("Mismatching authentication tag")
+
+ return decrypted_data
+
+
+def aes_encrypt(data, expanded_key):
+ """
+ Encrypt one block with aes
+
+ @param {int[]} data 16-Byte state
+ @param {int[]} expanded_key 176/208/240-Byte expanded key
+ @returns {int[]} 16-Byte cipher
+ """
+ rounds = len(expanded_key) // BLOCK_SIZE_BYTES - 1
+
+ data = xor(data, expanded_key[:BLOCK_SIZE_BYTES])
+ for i in range(1, rounds + 1):
+ data = sub_bytes(data)
+ data = shift_rows(data)
+ if i != rounds:
+ data = list(iter_mix_columns(data, MIX_COLUMN_MATRIX))
+ data = xor(data, expanded_key[
+ i * BLOCK_SIZE_BYTES: (i + 1) * BLOCK_SIZE_BYTES])
+
+ return data
+
+
+def aes_decrypt(data, expanded_key):
+ """
+ Decrypt one block with aes
+
+ @param {int[]} data 16-Byte cipher
+ @param {int[]} expanded_key 176/208/240-Byte expanded key
+ @returns {int[]} 16-Byte state
+ """
+ rounds = len(expanded_key) // BLOCK_SIZE_BYTES - 1
+
+ for i in range(rounds, 0, -1):
+ data = xor(data, expanded_key[
+ i * BLOCK_SIZE_BYTES: (i + 1) * BLOCK_SIZE_BYTES])
+ if i != rounds:
+ data = list(iter_mix_columns(data, MIX_COLUMN_MATRIX_INV))
+ data = shift_rows_inv(data)
+ data = sub_bytes_inv(data)
+ data = xor(data, expanded_key[:BLOCK_SIZE_BYTES])
+
+ return data
+
+
+def aes_decrypt_text(data, password, key_size_bytes):
+ """
+ Decrypt text
+ - The first 8 Bytes of decoded 'data' are the 8 high Bytes of the counter
+ - The cipher key is retrieved by encrypting the first 16 Byte of 'password'
+ with the first 'key_size_bytes' Bytes from 'password'
+ (if necessary filled with 0's)
+ - Mode of operation is 'counter'
+
+ @param {str} data Base64 encoded string
+ @param {str,unicode} password Password (will be encoded with utf-8)
+ @param {int} key_size_bytes Possible values: 16 for 128-Bit,
+ 24 for 192-Bit, or
+ 32 for 256-Bit
+ @returns {str} Decrypted data
+ """
+ NONCE_LENGTH_BYTES = 8
+
+ data = bytes_to_intlist(binascii.a2b_base64(data))
+ password = bytes_to_intlist(password.encode("utf-8"))
+
+ key = password[:key_size_bytes] + [0] * (key_size_bytes - len(password))
+ key = aes_encrypt(key[:BLOCK_SIZE_BYTES], key_expansion(key)) * \
+ (key_size_bytes // BLOCK_SIZE_BYTES)
+
+ nonce = data[:NONCE_LENGTH_BYTES]
+ cipher = data[NONCE_LENGTH_BYTES:]
+
+ return intlist_to_bytes(aes_ctr_decrypt(
+ cipher, key, nonce + [0] * (BLOCK_SIZE_BYTES - NONCE_LENGTH_BYTES)
+ ))
+
+
+RCON = (
+ 0x8d, 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1b, 0x36,
+)
+
+SBOX = (
+ 0x63, 0x7C, 0x77, 0x7B, 0xF2, 0x6B, 0x6F, 0xC5,
+ 0x30, 0x01, 0x67, 0x2B, 0xFE, 0xD7, 0xAB, 0x76,
+ 0xCA, 0x82, 0xC9, 0x7D, 0xFA, 0x59, 0x47, 0xF0,
+ 0xAD, 0xD4, 0xA2, 0xAF, 0x9C, 0xA4, 0x72, 0xC0,
+ 0xB7, 0xFD, 0x93, 0x26, 0x36, 0x3F, 0xF7, 0xCC,
+ 0x34, 0xA5, 0xE5, 0xF1, 0x71, 0xD8, 0x31, 0x15,
+ 0x04, 0xC7, 0x23, 0xC3, 0x18, 0x96, 0x05, 0x9A,
+ 0x07, 0x12, 0x80, 0xE2, 0xEB, 0x27, 0xB2, 0x75,
+ 0x09, 0x83, 0x2C, 0x1A, 0x1B, 0x6E, 0x5A, 0xA0,
+ 0x52, 0x3B, 0xD6, 0xB3, 0x29, 0xE3, 0x2F, 0x84,
+ 0x53, 0xD1, 0x00, 0xED, 0x20, 0xFC, 0xB1, 0x5B,
+ 0x6A, 0xCB, 0xBE, 0x39, 0x4A, 0x4C, 0x58, 0xCF,
+ 0xD0, 0xEF, 0xAA, 0xFB, 0x43, 0x4D, 0x33, 0x85,
+ 0x45, 0xF9, 0x02, 0x7F, 0x50, 0x3C, 0x9F, 0xA8,
+ 0x51, 0xA3, 0x40, 0x8F, 0x92, 0x9D, 0x38, 0xF5,
+ 0xBC, 0xB6, 0xDA, 0x21, 0x10, 0xFF, 0xF3, 0xD2,
+ 0xCD, 0x0C, 0x13, 0xEC, 0x5F, 0x97, 0x44, 0x17,
+ 0xC4, 0xA7, 0x7E, 0x3D, 0x64, 0x5D, 0x19, 0x73,
+ 0x60, 0x81, 0x4F, 0xDC, 0x22, 0x2A, 0x90, 0x88,
+ 0x46, 0xEE, 0xB8, 0x14, 0xDE, 0x5E, 0x0B, 0xDB,
+ 0xE0, 0x32, 0x3A, 0x0A, 0x49, 0x06, 0x24, 0x5C,
+ 0xC2, 0xD3, 0xAC, 0x62, 0x91, 0x95, 0xE4, 0x79,
+ 0xE7, 0xC8, 0x37, 0x6D, 0x8D, 0xD5, 0x4E, 0xA9,
+ 0x6C, 0x56, 0xF4, 0xEA, 0x65, 0x7A, 0xAE, 0x08,
+ 0xBA, 0x78, 0x25, 0x2E, 0x1C, 0xA6, 0xB4, 0xC6,
+ 0xE8, 0xDD, 0x74, 0x1F, 0x4B, 0xBD, 0x8B, 0x8A,
+ 0x70, 0x3E, 0xB5, 0x66, 0x48, 0x03, 0xF6, 0x0E,
+ 0x61, 0x35, 0x57, 0xB9, 0x86, 0xC1, 0x1D, 0x9E,
+ 0xE1, 0xF8, 0x98, 0x11, 0x69, 0xD9, 0x8E, 0x94,
+ 0x9B, 0x1E, 0x87, 0xE9, 0xCE, 0x55, 0x28, 0xDF,
+ 0x8C, 0xA1, 0x89, 0x0D, 0xBF, 0xE6, 0x42, 0x68,
+ 0x41, 0x99, 0x2D, 0x0F, 0xB0, 0x54, 0xBB, 0x16,
+)
+
+SBOX_INV = (
+ 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38,
+ 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb,
+ 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87,
+ 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb,
+ 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d,
+ 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e,
+ 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2,
+ 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25,
+ 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16,
+ 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92,
+ 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda,
+ 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84,
+ 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a,
+ 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06,
+ 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02,
+ 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b,
+ 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea,
+ 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73,
+ 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85,
+ 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e,
+ 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89,
+ 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b,
+ 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20,
+ 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4,
+ 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31,
+ 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f,
+ 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d,
+ 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef,
+ 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0,
+ 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61,
+ 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26,
+ 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
+)
+
+MIX_COLUMN_MATRIX = (
+ (0x2, 0x3, 0x1, 0x1),
+ (0x1, 0x2, 0x3, 0x1),
+ (0x1, 0x1, 0x2, 0x3),
+ (0x3, 0x1, 0x1, 0x2),
+)
+
+MIX_COLUMN_MATRIX_INV = (
+ (0xE, 0xB, 0xD, 0x9),
+ (0x9, 0xE, 0xB, 0xD),
+ (0xD, 0x9, 0xE, 0xB),
+ (0xB, 0xD, 0x9, 0xE),
+)
+
+RIJNDAEL_EXP_TABLE = (
+ 0x01, 0x03, 0x05, 0x0F, 0x11, 0x33, 0x55, 0xFF,
+ 0x1A, 0x2E, 0x72, 0x96, 0xA1, 0xF8, 0x13, 0x35,
+ 0x5F, 0xE1, 0x38, 0x48, 0xD8, 0x73, 0x95, 0xA4,
+ 0xF7, 0x02, 0x06, 0x0A, 0x1E, 0x22, 0x66, 0xAA,
+ 0xE5, 0x34, 0x5C, 0xE4, 0x37, 0x59, 0xEB, 0x26,
+ 0x6A, 0xBE, 0xD9, 0x70, 0x90, 0xAB, 0xE6, 0x31,
+ 0x53, 0xF5, 0x04, 0x0C, 0x14, 0x3C, 0x44, 0xCC,
+ 0x4F, 0xD1, 0x68, 0xB8, 0xD3, 0x6E, 0xB2, 0xCD,
+ 0x4C, 0xD4, 0x67, 0xA9, 0xE0, 0x3B, 0x4D, 0xD7,
+ 0x62, 0xA6, 0xF1, 0x08, 0x18, 0x28, 0x78, 0x88,
+ 0x83, 0x9E, 0xB9, 0xD0, 0x6B, 0xBD, 0xDC, 0x7F,
+ 0x81, 0x98, 0xB3, 0xCE, 0x49, 0xDB, 0x76, 0x9A,
+ 0xB5, 0xC4, 0x57, 0xF9, 0x10, 0x30, 0x50, 0xF0,
+ 0x0B, 0x1D, 0x27, 0x69, 0xBB, 0xD6, 0x61, 0xA3,
+ 0xFE, 0x19, 0x2B, 0x7D, 0x87, 0x92, 0xAD, 0xEC,
+ 0x2F, 0x71, 0x93, 0xAE, 0xE9, 0x20, 0x60, 0xA0,
+ 0xFB, 0x16, 0x3A, 0x4E, 0xD2, 0x6D, 0xB7, 0xC2,
+ 0x5D, 0xE7, 0x32, 0x56, 0xFA, 0x15, 0x3F, 0x41,
+ 0xC3, 0x5E, 0xE2, 0x3D, 0x47, 0xC9, 0x40, 0xC0,
+ 0x5B, 0xED, 0x2C, 0x74, 0x9C, 0xBF, 0xDA, 0x75,
+ 0x9F, 0xBA, 0xD5, 0x64, 0xAC, 0xEF, 0x2A, 0x7E,
+ 0x82, 0x9D, 0xBC, 0xDF, 0x7A, 0x8E, 0x89, 0x80,
+ 0x9B, 0xB6, 0xC1, 0x58, 0xE8, 0x23, 0x65, 0xAF,
+ 0xEA, 0x25, 0x6F, 0xB1, 0xC8, 0x43, 0xC5, 0x54,
+ 0xFC, 0x1F, 0x21, 0x63, 0xA5, 0xF4, 0x07, 0x09,
+ 0x1B, 0x2D, 0x77, 0x99, 0xB0, 0xCB, 0x46, 0xCA,
+ 0x45, 0xCF, 0x4A, 0xDE, 0x79, 0x8B, 0x86, 0x91,
+ 0xA8, 0xE3, 0x3E, 0x42, 0xC6, 0x51, 0xF3, 0x0E,
+ 0x12, 0x36, 0x5A, 0xEE, 0x29, 0x7B, 0x8D, 0x8C,
+ 0x8F, 0x8A, 0x85, 0x94, 0xA7, 0xF2, 0x0D, 0x17,
+ 0x39, 0x4B, 0xDD, 0x7C, 0x84, 0x97, 0xA2, 0xFD,
+ 0x1C, 0x24, 0x6C, 0xB4, 0xC7, 0x52, 0xF6, 0x01,
+)
+
+RIJNDAEL_LOG_TABLE = (
+ 0x00, 0x00, 0x19, 0x01, 0x32, 0x02, 0x1a, 0xc6,
+ 0x4b, 0xc7, 0x1b, 0x68, 0x33, 0xee, 0xdf, 0x03,
+ 0x64, 0x04, 0xe0, 0x0e, 0x34, 0x8d, 0x81, 0xef,
+ 0x4c, 0x71, 0x08, 0xc8, 0xf8, 0x69, 0x1c, 0xc1,
+ 0x7d, 0xc2, 0x1d, 0xb5, 0xf9, 0xb9, 0x27, 0x6a,
+ 0x4d, 0xe4, 0xa6, 0x72, 0x9a, 0xc9, 0x09, 0x78,
+ 0x65, 0x2f, 0x8a, 0x05, 0x21, 0x0f, 0xe1, 0x24,
+ 0x12, 0xf0, 0x82, 0x45, 0x35, 0x93, 0xda, 0x8e,
+ 0x96, 0x8f, 0xdb, 0xbd, 0x36, 0xd0, 0xce, 0x94,
+ 0x13, 0x5c, 0xd2, 0xf1, 0x40, 0x46, 0x83, 0x38,
+ 0x66, 0xdd, 0xfd, 0x30, 0xbf, 0x06, 0x8b, 0x62,
+ 0xb3, 0x25, 0xe2, 0x98, 0x22, 0x88, 0x91, 0x10,
+ 0x7e, 0x6e, 0x48, 0xc3, 0xa3, 0xb6, 0x1e, 0x42,
+ 0x3a, 0x6b, 0x28, 0x54, 0xfa, 0x85, 0x3d, 0xba,
+ 0x2b, 0x79, 0x0a, 0x15, 0x9b, 0x9f, 0x5e, 0xca,
+ 0x4e, 0xd4, 0xac, 0xe5, 0xf3, 0x73, 0xa7, 0x57,
+ 0xaf, 0x58, 0xa8, 0x50, 0xf4, 0xea, 0xd6, 0x74,
+ 0x4f, 0xae, 0xe9, 0xd5, 0xe7, 0xe6, 0xad, 0xe8,
+ 0x2c, 0xd7, 0x75, 0x7a, 0xeb, 0x16, 0x0b, 0xf5,
+ 0x59, 0xcb, 0x5f, 0xb0, 0x9c, 0xa9, 0x51, 0xa0,
+ 0x7f, 0x0c, 0xf6, 0x6f, 0x17, 0xc4, 0x49, 0xec,
+ 0xd8, 0x43, 0x1f, 0x2d, 0xa4, 0x76, 0x7b, 0xb7,
+ 0xcc, 0xbb, 0x3e, 0x5a, 0xfb, 0x60, 0xb1, 0x86,
+ 0x3b, 0x52, 0xa1, 0x6c, 0xaa, 0x55, 0x29, 0x9d,
+ 0x97, 0xb2, 0x87, 0x90, 0x61, 0xbe, 0xdc, 0xfc,
+ 0xbc, 0x95, 0xcf, 0xcd, 0x37, 0x3f, 0x5b, 0xd1,
+ 0x53, 0x39, 0x84, 0x3c, 0x41, 0xa2, 0x6d, 0x47,
+ 0x14, 0x2a, 0x9e, 0x5d, 0x56, 0xf2, 0xd3, 0xab,
+ 0x44, 0x11, 0x92, 0xd9, 0x23, 0x20, 0x2e, 0x89,
+ 0xb4, 0x7c, 0xb8, 0x26, 0x77, 0x99, 0xe3, 0xa5,
+ 0x67, 0x4a, 0xed, 0xde, 0xc5, 0x31, 0xfe, 0x18,
+ 0x0d, 0x63, 0x8c, 0x80, 0xc0, 0xf7, 0x70, 0x07,
+)
+
+
+def key_expansion(data):
+ """
+ Generate key schedule
+
+ @param {int[]} data 16/24/32-Byte cipher key
+ @returns {int[]} 176/208/240-Byte expanded key
+ """
+ data = data[:] # copy
+ rcon_iteration = 1
+ key_size_bytes = len(data)
+ expanded_key_size_bytes = (key_size_bytes // 4 + 7) * BLOCK_SIZE_BYTES
+
+ while len(data) < expanded_key_size_bytes:
+ temp = data[-4:]
+ temp = key_schedule_core(temp, rcon_iteration)
+ rcon_iteration += 1
+ data += xor(temp, data[-key_size_bytes: 4 - key_size_bytes])
+
+ for _ in range(3):
+ temp = data[-4:]
+ data += xor(temp, data[-key_size_bytes: 4 - key_size_bytes])
+
+ if key_size_bytes == 32:
+ temp = data[-4:]
+ temp = sub_bytes(temp)
+ data += xor(temp, data[-key_size_bytes: 4 - key_size_bytes])
+
+ for _ in range(3 if key_size_bytes == 32 else
+ 2 if key_size_bytes == 24 else 0):
+ temp = data[-4:]
+ data += xor(temp, data[-key_size_bytes: 4 - key_size_bytes])
+ data = data[:expanded_key_size_bytes]
+
+ return data
+
+
+def iter_vector(iv):
+ while True:
+ yield iv
+ iv = inc(iv)
+
+
+def sub_bytes(data):
+ return [SBOX[x] for x in data]
+
+
+def sub_bytes_inv(data):
+ return [SBOX_INV[x] for x in data]
+
+
+def rotate(data):
+ return data[1:] + [data[0]]
+
+
+def key_schedule_core(data, rcon_iteration):
+ data = rotate(data)
+ data = sub_bytes(data)
+ data[0] = data[0] ^ RCON[rcon_iteration]
+
+ return data
+
+
+def xor(data1, data2):
+ return [x ^ y for x, y in zip(data1, data2)]
+
+
+def iter_mix_columns(data, matrix):
+ for i in (0, 4, 8, 12):
+ for row in matrix:
+ mixed = 0
+ for j in range(4):
+ if data[i:i + 4][j] == 0 or row[j] == 0:
+ mixed ^= 0
+ else:
+ mixed ^= RIJNDAEL_EXP_TABLE[
+ (RIJNDAEL_LOG_TABLE[data[i + j]] +
+ RIJNDAEL_LOG_TABLE[row[j]]) % 0xFF
+ ]
+ yield mixed
+
+
+def shift_rows(data):
+ return [
+ data[((column + row) & 0b11) * 4 + row]
+ for column in range(4)
+ for row in range(4)
+ ]
+
+
+def shift_rows_inv(data):
+ return [
+ data[((column - row) & 0b11) * 4 + row]
+ for column in range(4)
+ for row in range(4)
+ ]
+
+
+def shift_block(data):
+ data_shifted = []
+
+ bit = 0
+ for n in data:
+ if bit:
+ n |= 0x100
+ bit = n & 1
+ n >>= 1
+ data_shifted.append(n)
+
+ return data_shifted
+
+
+def inc(data):
+ data = data[:] # copy
+ for i in range(len(data) - 1, -1, -1):
+ if data[i] == 255:
+ data[i] = 0
+ else:
+ data[i] = data[i] + 1
+ break
+ return data
+
+
+def block_product(block_x, block_y):
+ # NIST SP 800-38D, Algorithm 1
+
+ if len(block_x) != BLOCK_SIZE_BYTES or len(block_y) != BLOCK_SIZE_BYTES:
+ raise ValueError(
+ "Length of blocks need to be %d bytes" % BLOCK_SIZE_BYTES)
+
+ block_r = [0xE1] + [0] * (BLOCK_SIZE_BYTES - 1)
+ block_v = block_y[:]
+ block_z = [0] * BLOCK_SIZE_BYTES
+
+ for i in block_x:
+ for bit in range(7, -1, -1):
+ if i & (1 << bit):
+ block_z = xor(block_z, block_v)
+
+ do_xor = block_v[-1] & 1
+ block_v = shift_block(block_v)
+ if do_xor:
+ block_v = xor(block_v, block_r)
+
+ return block_z
+
+
+def ghash(subkey, data):
+ # NIST SP 800-38D, Algorithm 2
+
+ if len(data) % BLOCK_SIZE_BYTES:
+ raise ValueError(
+ "Length of data should be %d bytes" % BLOCK_SIZE_BYTES)
+
+ last_y = [0] * BLOCK_SIZE_BYTES
+ for i in range(0, len(data), BLOCK_SIZE_BYTES):
+ block = data[i: i + BLOCK_SIZE_BYTES]
+ last_y = block_product(xor(last_y, block), subkey)
+
+ return last_y
diff --git a/gallery_dl/cookies.py b/gallery_dl/cookies.py
new file mode 100644
index 0000000..b173a30
--- /dev/null
+++ b/gallery_dl/cookies.py
@@ -0,0 +1,956 @@
+# -*- coding: utf-8 -*-
+
+# Copyright 2022 Mike Fährmann
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+# Adapted from yt-dlp's cookies module.
+# https://github.com/yt-dlp/yt-dlp/blob/master/yt_dlp/cookies.py
+
+import binascii
+import contextlib
+import ctypes
+import json
+import logging
+import os
+import shutil
+import sqlite3
+import struct
+import subprocess
+import sys
+import tempfile
+from datetime import datetime, timedelta, timezone
+from hashlib import pbkdf2_hmac
+from http.cookiejar import Cookie
+from . import aes
+
+
+SUPPORTED_BROWSERS_CHROMIUM = {
+ "brave", "chrome", "chromium", "edge", "opera", "vivaldi"}
+SUPPORTED_BROWSERS = SUPPORTED_BROWSERS_CHROMIUM | {"firefox", "safari"}
+
+logger = logging.getLogger("cookies")
+
+
+def load_cookies(cookiejar, browser_specification):
+ browser_name, profile, keyring = \
+ _parse_browser_specification(*browser_specification)
+
+ if browser_name == "firefox":
+ load_cookies_firefox(cookiejar, profile)
+ elif browser_name == "safari":
+ load_cookies_safari(cookiejar, profile)
+ elif browser_name in SUPPORTED_BROWSERS_CHROMIUM:
+ load_cookies_chrome(cookiejar, browser_name, profile, keyring)
+ else:
+ raise ValueError("unknown browser '{}'".format(browser_name))
+
+
+def load_cookies_firefox(cookiejar, profile=None):
+ set_cookie = cookiejar.set_cookie
+ with _firefox_cookies_database(profile) as db:
+ for name, value, domain, path, secure, expires in db.execute(
+ "SELECT name, value, host, path, isSecure, expiry "
+ "FROM moz_cookies"):
+ set_cookie(Cookie(
+ 0, name, value, None, False,
+ domain, bool(domain), domain.startswith("."),
+ path, bool(path), secure, expires, False, None, None, {},
+ ))
+
+
+def load_cookies_safari(cookiejar, profile=None):
+ """Ref.: https://github.com/libyal/dtformats/blob
+ /main/documentation/Safari%20Cookies.asciidoc
+ - This data appears to be out of date
+ but the important parts of the database structure is the same
+ - There are a few bytes here and there
+ which are skipped during parsing
+ """
+ with _safari_cookies_database() as fp:
+ data = fp.read()
+ page_sizes, body_start = _safari_parse_cookies_header(data)
+ p = DataParser(data[body_start:])
+ for page_size in page_sizes:
+ _safari_parse_cookies_page(p.read_bytes(page_size), cookiejar)
+
+
+def load_cookies_chrome(cookiejar, browser_name, profile, keyring):
+ config = _get_chromium_based_browser_settings(browser_name)
+
+ with _chrome_cookies_database(profile, config) as db:
+
+ db.text_factory = bytes
+ decryptor = get_cookie_decryptor(
+ config["directory"], config["keyring"], keyring=keyring)
+
+ try:
+ rows = db.execute(
+ "SELECT host_key, name, value, encrypted_value, path, "
+ "expires_utc, is_secure FROM cookies")
+ except sqlite3.OperationalError:
+ rows = db.execute(
+ "SELECT host_key, name, value, encrypted_value, path, "
+ "expires_utc, secure FROM cookies")
+
+ set_cookie = cookiejar.set_cookie
+ failed_cookies = unencrypted_cookies = 0
+
+ for domain, name, value, enc_value, path, expires, secure in rows:
+
+ if not value and enc_value: # encrypted
+ value = decryptor.decrypt(enc_value)
+ if value is None:
+ failed_cookies += 1
+ continue
+ else:
+ value = value.decode()
+ unencrypted_cookies += 1
+
+ domain = domain.decode()
+ path = path.decode()
+ name = name.decode()
+
+ set_cookie(Cookie(
+ 0, name, value, None, False,
+ domain, bool(domain), domain.startswith("."),
+ path, bool(path), secure, expires, False, None, None, {},
+ ))
+
+ if failed_cookies > 0:
+ failed_message = " ({} could not be decrypted)".format(failed_cookies)
+ else:
+ failed_message = ""
+
+ logger.info("Extracted %s cookies from %s%s",
+ len(cookiejar), browser_name, failed_message)
+ counts = decryptor.cookie_counts.copy()
+ counts["unencrypted"] = unencrypted_cookies
+ logger.debug("cookie version breakdown: %s", counts)
+
+
+# --------------------------------------------------------------------
+# firefox
+
+def _firefox_cookies_database(profile=None):
+ if profile is None:
+ search_root = _firefox_browser_directory()
+ elif _is_path(profile):
+ search_root = profile
+ else:
+ search_root = os.path.join(_firefox_browser_directory(), profile)
+
+ path = _find_most_recently_used_file(search_root, "cookies.sqlite")
+ if path is None:
+ raise FileNotFoundError("Unable to find Firefox cookies database in "
+ "{}".format(search_root))
+
+ logger.debug("Extracting cookies from %s", path)
+ return DatabaseCopy(path)
+
+
+def _firefox_browser_directory():
+ if sys.platform in ("linux", "linux2"):
+ return os.path.expanduser("~/.mozilla/firefox")
+ if sys.platform == "win32":
+ return os.path.expandvars(R"%APPDATA%\Mozilla\Firefox\Profiles")
+ if sys.platform == "darwin":
+ return os.path.expanduser("~/Library/Application Support/Firefox")
+ raise ValueError("unsupported platform '{}'".format(sys.platform))
+
+
+# --------------------------------------------------------------------
+# safari
+
+def _safari_cookies_database():
+ try:
+ path = os.path.expanduser("~/Library/Cookies/Cookies.binarycookies")
+ return open(path, "rb")
+ except FileNotFoundError:
+ logger.debug("Trying secondary cookie location")
+ path = os.path.expanduser("~/Library/Containers/com.apple.Safari/Data"
+ "/Library/Cookies/Cookies.binarycookies")
+ return open(path, "rb")
+
+
+def _safari_parse_cookies_header(data):
+ p = DataParser(data)
+ p.expect_bytes(b"cook", "database signature")
+ number_of_pages = p.read_uint(big_endian=True)
+ page_sizes = [p.read_uint(big_endian=True)
+ for _ in range(number_of_pages)]
+ return page_sizes, p.cursor
+
+
+def _safari_parse_cookies_page(data, jar):
+ p = DataParser(data)
+ p.expect_bytes(b"\x00\x00\x01\x00", "page signature")
+ number_of_cookies = p.read_uint()
+ record_offsets = [p.read_uint() for _ in range(number_of_cookies)]
+ if number_of_cookies == 0:
+ logger.debug("a cookies page of size %s has no cookies", len(data))
+ return
+
+ p.skip_to(record_offsets[0], "unknown page header field")
+
+ for i, record_offset in enumerate(record_offsets):
+ p.skip_to(record_offset, "space between records")
+ record_length = _safari_parse_cookies_record(
+ data[record_offset:], jar)
+ p.read_bytes(record_length)
+ p.skip_to_end("space in between pages")
+
+
+def _safari_parse_cookies_record(data, cookiejar):
+ p = DataParser(data)
+ record_size = p.read_uint()
+ p.skip(4, "unknown record field 1")
+ flags = p.read_uint()
+ is_secure = bool(flags & 0x0001)
+ p.skip(4, "unknown record field 2")
+ domain_offset = p.read_uint()
+ name_offset = p.read_uint()
+ path_offset = p.read_uint()
+ value_offset = p.read_uint()
+ p.skip(8, "unknown record field 3")
+ expiration_date = _mac_absolute_time_to_posix(p.read_double())
+ _creation_date = _mac_absolute_time_to_posix(p.read_double()) # noqa: F841
+
+ try:
+ p.skip_to(domain_offset)
+ domain = p.read_cstring()
+
+ p.skip_to(name_offset)
+ name = p.read_cstring()
+
+ p.skip_to(path_offset)
+ path = p.read_cstring()
+
+ p.skip_to(value_offset)
+ value = p.read_cstring()
+ except UnicodeDecodeError:
+ logger.warning("failed to parse Safari cookie "
+ "because UTF-8 decoding failed")
+ return record_size
+
+ p.skip_to(record_size, "space at the end of the record")
+
+ cookiejar.set_cookie(Cookie(
+ 0, name, value, None, False,
+ domain, bool(domain), domain.startswith('.'),
+ path, bool(path), is_secure, expiration_date, False,
+ None, None, {},
+ ))
+
+ return record_size
+
+
+# --------------------------------------------------------------------
+# chrome
+
+def _chrome_cookies_database(profile, config):
+ if profile is None:
+ search_root = config["directory"]
+ elif _is_path(profile):
+ search_root = profile
+ config["directory"] = (os.path.dirname(profile)
+ if config["profiles"] else profile)
+ elif config["profiles"]:
+ search_root = os.path.join(config["directory"], profile)
+ else:
+ logger.warning("%s does not support profiles", config["browser"])
+ search_root = config["directory"]
+
+ path = _find_most_recently_used_file(search_root, "Cookies")
+ if path is None:
+ raise FileNotFoundError("Unable tp find {} cookies database in "
+ "'{}'".format(config["browser"], search_root))
+
+ logger.debug("Extracting cookies from %s", path)
+ return DatabaseCopy(path)
+
+
+def _get_chromium_based_browser_settings(browser_name):
+ # https://chromium.googlesource.com/chromium
+ # /src/+/HEAD/docs/user_data_dir.md
+ join = os.path.join
+
+ if sys.platform in ("linux", "linux2"):
+ config = (os.environ.get("XDG_CONFIG_HOME") or
+ os.path.expanduser("~/.config"))
+
+ browser_dir = {
+ "brave" : join(config, "BraveSoftware/Brave-Browser"),
+ "chrome" : join(config, "google-chrome"),
+ "chromium": join(config, "chromium"),
+ "edge" : join(config, "microsoft-edge"),
+ "opera" : join(config, "opera"),
+ "vivaldi" : join(config, "vivaldi"),
+ }[browser_name]
+
+ elif sys.platform == "win32":
+ appdata_local = os.path.expandvars("%LOCALAPPDATA%")
+ appdata_roaming = os.path.expandvars("%APPDATA%")
+ browser_dir = {
+ "brave" : join(appdata_local,
+ R"BraveSoftware\Brave-Browser\User Data"),
+ "chrome" : join(appdata_local, R"Google\Chrome\User Data"),
+ "chromium": join(appdata_local, R"Chromium\User Data"),
+ "edge" : join(appdata_local, R"Microsoft\Edge\User Data"),
+ "opera" : join(appdata_roaming, R"Opera Software\Opera Stable"),
+ "vivaldi" : join(appdata_local, R"Vivaldi\User Data"),
+ }[browser_name]
+
+ elif sys.platform == "darwin":
+ appdata = os.path.expanduser("~/Library/Application Support")
+ browser_dir = {
+ "brave" : join(appdata, "BraveSoftware/Brave-Browser"),
+ "chrome" : join(appdata, "Google/Chrome"),
+ "chromium": join(appdata, "Chromium"),
+ "edge" : join(appdata, "Microsoft Edge"),
+ "opera" : join(appdata, "com.operasoftware.Opera"),
+ "vivaldi" : join(appdata, "Vivaldi"),
+ }[browser_name]
+
+ else:
+ raise ValueError("unsupported platform '{}'".format(sys.platform))
+
+ # Linux keyring names can be determined by snooping on dbus
+ # while opening the browser in KDE:
+ # dbus-monitor "interface="org.kde.KWallet"" "type=method_return"
+ keyring_name = {
+ "brave" : "Brave",
+ "chrome" : "Chrome",
+ "chromium": "Chromium",
+ "edge" : "Microsoft Edge" if sys.platform == "darwin" else
+ "Chromium",
+ "opera" : "Opera" if sys.platform == "darwin" else "Chromium",
+ "vivaldi" : "Vivaldi" if sys.platform == "darwin" else "Chrome",
+ }[browser_name]
+
+ browsers_without_profiles = {"opera"}
+
+ return {
+ "browser" : browser_name,
+ "directory": browser_dir,
+ "keyring" : keyring_name,
+ "profiles" : browser_name not in browsers_without_profiles
+ }
+
+
+class ChromeCookieDecryptor:
+ """
+ Overview:
+
+ Linux:
+ - cookies are either v10 or v11
+ - v10: AES-CBC encrypted with a fixed key
+ - v11: AES-CBC encrypted with an OS protected key (keyring)
+ - v11 keys can be stored in various places depending on the
+ activate desktop environment [2]
+
+ Mac:
+ - cookies are either v10 or not v10
+ - v10: AES-CBC encrypted with an OS protected key (keyring)
+ and more key derivation iterations than linux
+ - not v10: "old data" stored as plaintext
+
+ Windows:
+ - cookies are either v10 or not v10
+ - v10: AES-GCM encrypted with a key which is encrypted with DPAPI
+ - not v10: encrypted with DPAPI
+
+ Sources:
+ - [1] https://chromium.googlesource.com/chromium/src/+/refs/heads
+ /main/components/os_crypt/
+ - [2] https://chromium.googlesource.com/chromium/src/+/refs/heads
+ /main/components/os_crypt/key_storage_linux.cc
+ - KeyStorageLinux::CreateService
+ """
+
+ def decrypt(self, encrypted_value):
+ raise NotImplementedError("Must be implemented by sub classes")
+
+ @property
+ def cookie_counts(self):
+ raise NotImplementedError("Must be implemented by sub classes")
+
+
+def get_cookie_decryptor(browser_root, browser_keyring_name, *, keyring=None):
+ if sys.platform in ("linux", "linux2"):
+ return LinuxChromeCookieDecryptor(
+ browser_keyring_name, keyring=keyring)
+ elif sys.platform == "darwin":
+ return MacChromeCookieDecryptor(browser_keyring_name)
+ elif sys.platform == "win32":
+ return WindowsChromeCookieDecryptor(browser_root)
+ else:
+ raise NotImplementedError("Chrome cookie decryption is not supported "
+ "on {}".format(sys.platform))
+
+
+class LinuxChromeCookieDecryptor(ChromeCookieDecryptor):
+ def __init__(self, browser_keyring_name, *, keyring=None):
+ self._v10_key = self.derive_key(b"peanuts")
+ password = _get_linux_keyring_password(browser_keyring_name, keyring)
+ self._v11_key = None if password is None else self.derive_key(password)
+ self._cookie_counts = {"v10": 0, "v11": 0, "other": 0}
+
+ @staticmethod
+ def derive_key(password):
+ # values from
+ # https://chromium.googlesource.com/chromium/src/+/refs/heads
+ # /main/components/os_crypt/os_crypt_linux.cc
+ return pbkdf2_sha1(password, salt=b"saltysalt",
+ iterations=1, key_length=16)
+
+ @property
+ def cookie_counts(self):
+ return self._cookie_counts
+
+ def decrypt(self, encrypted_value):
+ version = encrypted_value[:3]
+ ciphertext = encrypted_value[3:]
+
+ if version == b"v10":
+ self._cookie_counts["v10"] += 1
+ return _decrypt_aes_cbc(ciphertext, self._v10_key)
+
+ elif version == b"v11":
+ self._cookie_counts["v11"] += 1
+ if self._v11_key is None:
+ logger.warning("cannot decrypt v11 cookies: no key found")
+ return None
+ return _decrypt_aes_cbc(ciphertext, self._v11_key)
+
+ else:
+ self._cookie_counts["other"] += 1
+ return None
+
+
+class MacChromeCookieDecryptor(ChromeCookieDecryptor):
+ def __init__(self, browser_keyring_name):
+ password = _get_mac_keyring_password(browser_keyring_name)
+ self._v10_key = None if password is None else self.derive_key(password)
+ self._cookie_counts = {"v10": 0, "other": 0}
+
+ @staticmethod
+ def derive_key(password):
+ # values from
+ # https://chromium.googlesource.com/chromium/src/+/refs/heads
+ # /main/components/os_crypt/os_crypt_mac.mm
+ return pbkdf2_sha1(password, salt=b"saltysalt",
+ iterations=1003, key_length=16)
+
+ @property
+ def cookie_counts(self):
+ return self._cookie_counts
+
+ def decrypt(self, encrypted_value):
+ version = encrypted_value[:3]
+ ciphertext = encrypted_value[3:]
+
+ if version == b"v10":
+ self._cookie_counts["v10"] += 1
+ if self._v10_key is None:
+ logger.warning("cannot decrypt v10 cookies: no key found")
+ return None
+
+ return _decrypt_aes_cbc(ciphertext, self._v10_key)
+
+ else:
+ self._cookie_counts["other"] += 1
+ # other prefixes are considered "old data",
+ # which were stored as plaintext
+ # https://chromium.googlesource.com/chromium/src/+/refs/heads
+ # /main/components/os_crypt/os_crypt_mac.mm
+ return encrypted_value
+
+
+class WindowsChromeCookieDecryptor(ChromeCookieDecryptor):
+ def __init__(self, browser_root):
+ self._v10_key = _get_windows_v10_key(browser_root)
+ self._cookie_counts = {"v10": 0, "other": 0}
+
+ @property
+ def cookie_counts(self):
+ return self._cookie_counts
+
+ def decrypt(self, encrypted_value):
+ version = encrypted_value[:3]
+ ciphertext = encrypted_value[3:]
+
+ if version == b"v10":
+ self._cookie_counts["v10"] += 1
+ if self._v10_key is None:
+ logger.warning("cannot decrypt v10 cookies: no key found")
+ return None
+
+ # https://chromium.googlesource.com/chromium/src/+/refs/heads
+ # /main/components/os_crypt/os_crypt_win.cc
+ # kNonceLength
+ nonce_length = 96 // 8
+ # boringssl
+ # EVP_AEAD_AES_GCM_TAG_LEN
+ authentication_tag_length = 16
+
+ raw_ciphertext = ciphertext
+ nonce = raw_ciphertext[:nonce_length]
+ ciphertext = raw_ciphertext[
+ nonce_length:-authentication_tag_length]
+ authentication_tag = raw_ciphertext[-authentication_tag_length:]
+
+ return _decrypt_aes_gcm(
+ ciphertext, self._v10_key, nonce, authentication_tag)
+
+ else:
+ self._cookie_counts["other"] += 1
+ # any other prefix means the data is DPAPI encrypted
+ # https://chromium.googlesource.com/chromium/src/+/refs/heads
+ # /main/components/os_crypt/os_crypt_win.cc
+ return _decrypt_windows_dpapi(encrypted_value).decode()
+
+
+# --------------------------------------------------------------------
+# keyring
+
+def _choose_linux_keyring():
+ """
+ https://chromium.googlesource.com/chromium/src/+/refs/heads
+ /main/components/os_crypt/key_storage_util_linux.cc
+ SelectBackend
+ """
+ desktop_environment = _get_linux_desktop_environment(os.environ)
+ logger.debug("Detected desktop environment: %s", desktop_environment)
+ if desktop_environment == DE_KDE:
+ return KEYRING_KWALLET
+ if desktop_environment == DE_OTHER:
+ return KEYRING_BASICTEXT
+ return KEYRING_GNOMEKEYRING
+
+
+def _get_kwallet_network_wallet():
+ """ The name of the wallet used to store network passwords.
+
+ https://chromium.googlesource.com/chromium/src/+/refs/heads
+ /main/components/os_crypt/kwallet_dbus.cc
+ KWalletDBus::NetworkWallet
+ which does a dbus call to the following function:
+ https://api.kde.org/frameworks/kwallet/html/classKWallet_1_1Wallet.html
+ Wallet::NetworkWallet
+ """
+ default_wallet = "kdewallet"
+ try:
+ proc, stdout = Popen_communicate(
+ "dbus-send", "--session", "--print-reply=literal",
+ "--dest=org.kde.kwalletd5",
+ "/modules/kwalletd5",
+ "org.kde.KWallet.networkWallet"
+ )
+
+ if proc.returncode != 0:
+ logger.warning("failed to read NetworkWallet")
+ return default_wallet
+ else:
+ network_wallet = stdout.decode().strip()
+ logger.debug("NetworkWallet = '%s'", network_wallet)
+ return network_wallet
+ except Exception as exc:
+ logger.warning("exception while obtaining NetworkWallet (%s: %s)",
+ exc.__class__.__name__, exc)
+ return default_wallet
+
+
+def _get_kwallet_password(browser_keyring_name):
+ logger.debug("using kwallet-query to obtain password from kwallet")
+
+ if shutil.which("kwallet-query") is None:
+ logger.error(
+ "kwallet-query command not found. KWallet and kwallet-query "
+ "must be installed to read from KWallet. kwallet-query should be "
+ "included in the kwallet package for your distribution")
+ return b""
+
+ network_wallet = _get_kwallet_network_wallet()
+
+ try:
+ proc, stdout = Popen_communicate(
+ "kwallet-query",
+ "--read-password", browser_keyring_name + " Safe Storage",
+ "--folder", browser_keyring_name + " Keys",
+ network_wallet,
+ )
+
+ if proc.returncode != 0:
+ logger.error("kwallet-query failed with return code {}. "
+ "Please consult the kwallet-query man page "
+ "for details".format(proc.returncode))
+ return b""
+
+ if stdout.lower().startswith(b"failed to read"):
+ logger.debug("Failed to read password from kwallet. "
+ "Using empty string instead")
+ # This sometimes occurs in KDE because chrome does not check
+ # hasEntry and instead just tries to read the value (which
+ # kwallet returns "") whereas kwallet-query checks hasEntry.
+ # To verify this:
+ # dbus-monitor "interface="org.kde.KWallet"" "type=method_return"
+ # while starting chrome.
+ # This may be a bug, as the intended behaviour is to generate a
+ # random password and store it, but that doesn't matter here.
+ return b""
+ else:
+ logger.debug("password found")
+ if stdout[-1:] == b"\n":
+ stdout = stdout[:-1]
+ return stdout
+ except Exception as exc:
+ logger.warning("exception running kwallet-query (%s: %s)",
+ exc.__class__.__name__, exc)
+ return b""
+
+
+def _get_gnome_keyring_password(browser_keyring_name):
+ try:
+ import secretstorage
+ except ImportError:
+ logger.error("secretstorage not available")
+ return b""
+
+ # Gnome keyring does not seem to organise keys in the same way as KWallet,
+ # using `dbus-monitor` during startup, it can be observed that chromium
+ # lists all keys and presumably searches for its key in the list.
+ # It appears that we must do the same.
+ # https://github.com/jaraco/keyring/issues/556
+ with contextlib.closing(secretstorage.dbus_init()) as con:
+ col = secretstorage.get_default_collection(con)
+ label = browser_keyring_name + " Safe Storage"
+ for item in col.get_all_items():
+ if item.get_label() == label:
+ return item.get_secret()
+ else:
+ logger.error("failed to read from keyring")
+ return b""
+
+
+def _get_linux_keyring_password(browser_keyring_name, keyring):
+ # Note: chrome/chromium can be run with the following flags
+ # to determine which keyring backend it has chosen to use
+ # - chromium --enable-logging=stderr --v=1 2>&1 | grep key_storage_
+ #
+ # Chromium supports --password-store=<basic|gnome|kwallet>
+ # so the automatic detection will not be sufficient in all cases.
+
+ if not keyring:
+ keyring = _choose_linux_keyring()
+ logger.debug("Chosen keyring: %s", keyring)
+
+ if keyring == KEYRING_KWALLET:
+ return _get_kwallet_password(browser_keyring_name)
+ elif keyring == KEYRING_GNOMEKEYRING:
+ return _get_gnome_keyring_password(browser_keyring_name)
+ elif keyring == KEYRING_BASICTEXT:
+ # when basic text is chosen, all cookies are stored as v10
+ # so no keyring password is required
+ return None
+ assert False, "Unknown keyring " + keyring
+
+
+def _get_mac_keyring_password(browser_keyring_name):
+ logger.debug("using find-generic-password to obtain "
+ "password from OSX keychain")
+ try:
+ proc, stdout = Popen_communicate(
+ "security", "find-generic-password",
+ "-w", # write password to stdout
+ "-a", browser_keyring_name, # match "account"
+ "-s", browser_keyring_name + " Safe Storage", # match "service"
+ )
+
+ if stdout[-1:] == b"\n":
+ stdout = stdout[:-1]
+ return stdout
+ except Exception as exc:
+ logger.warning("exception running find-generic-password (%s: %s)",
+ exc.__class__.__name__, exc)
+ return None
+
+
+def _get_windows_v10_key(browser_root):
+ path = _find_most_recently_used_file(browser_root, "Local State")
+ if path is None:
+ logger.error("could not find local state file")
+ return None
+ logger.debug("Found local state file at '%s'", path)
+ with open(path, encoding="utf8") as f:
+ data = json.load(f)
+ try:
+ base64_key = data["os_crypt"]["encrypted_key"]
+ except KeyError:
+ logger.error("no encrypted key in Local State")
+ return None
+ encrypted_key = binascii.a2b_base64(base64_key)
+ prefix = b"DPAPI"
+ if not encrypted_key.startswith(prefix):
+ logger.error("invalid key")
+ return None
+ return _decrypt_windows_dpapi(encrypted_key[len(prefix):])
+
+
+# --------------------------------------------------------------------
+# utility
+
+class ParserError(Exception):
+ pass
+
+
+class DataParser:
+ def __init__(self, data):
+ self.cursor = 0
+ self._data = data
+
+ def read_bytes(self, num_bytes):
+ if num_bytes < 0:
+ raise ParserError("invalid read of {} bytes".format(num_bytes))
+ end = self.cursor + num_bytes
+ if end > len(self._data):
+ raise ParserError("reached end of input")
+ data = self._data[self.cursor:end]
+ self.cursor = end
+ return data
+
+ def expect_bytes(self, expected_value, message):
+ value = self.read_bytes(len(expected_value))
+ if value != expected_value:
+ raise ParserError("unexpected value: {} != {} ({})".format(
+ value, expected_value, message))
+
+ def read_uint(self, big_endian=False):
+ data_format = ">I" if big_endian else "<I"
+ return struct.unpack(data_format, self.read_bytes(4))[0]
+
+ def read_double(self, big_endian=False):
+ data_format = ">d" if big_endian else "<d"
+ return struct.unpack(data_format, self.read_bytes(8))[0]
+
+ def read_cstring(self):
+ buffer = []
+ while True:
+ c = self.read_bytes(1)
+ if c == b"\x00":
+ return b"".join(buffer).decode()
+ else:
+ buffer.append(c)
+
+ def skip(self, num_bytes, description="unknown"):
+ if num_bytes > 0:
+ logger.debug("skipping {} bytes ({}): {!r}".format(
+ num_bytes, description, self.read_bytes(num_bytes)))
+ elif num_bytes < 0:
+ raise ParserError("invalid skip of {} bytes".format(num_bytes))
+
+ def skip_to(self, offset, description="unknown"):
+ self.skip(offset - self.cursor, description)
+
+ def skip_to_end(self, description="unknown"):
+ self.skip_to(len(self._data), description)
+
+
+class DatabaseCopy():
+
+ def __init__(self, path):
+ self.path = path
+ self.directory = self.database = None
+
+ def __enter__(self):
+ try:
+ self.directory = tempfile.TemporaryDirectory(prefix="gallery-dl-")
+ path_copy = os.path.join(self.directory.name, "copy.sqlite")
+ shutil.copyfile(self.path, path_copy)
+ self.database = db = sqlite3.connect(
+ path_copy, isolation_level=None, check_same_thread=False)
+ return db
+ except BaseException:
+ if self.directory:
+ self.directory.cleanup()
+ raise
+
+ def __exit__(self, exc, value, tb):
+ self.database.close()
+ self.directory.cleanup()
+
+
+def Popen_communicate(*args):
+ proc = subprocess.Popen(
+ args, stdout=subprocess.PIPE, stderr=subprocess.DEVNULL)
+ try:
+ stdout, stderr = proc.communicate()
+ except BaseException: # Including KeyboardInterrupt
+ proc.kill()
+ proc.wait()
+ raise
+ return proc, stdout
+
+
+"""
+https://chromium.googlesource.com/chromium/src/+/refs/heads
+/main/base/nix/xdg_util.h - DesktopEnvironment
+"""
+DE_OTHER = "other"
+DE_CINNAMON = "cinnamon"
+DE_GNOME = "gnome"
+DE_KDE = "kde"
+DE_PANTHEON = "pantheon"
+DE_UNITY = "unity"
+DE_XFCE = "xfce"
+
+
+"""
+https://chromium.googlesource.com/chromium/src/+/refs/heads
+/main/components/os_crypt/key_storage_util_linux.h - SelectedLinuxBackend
+"""
+KEYRING_KWALLET = "kwallet"
+KEYRING_GNOMEKEYRING = "gnomekeyring"
+KEYRING_BASICTEXT = "basictext"
+SUPPORTED_KEYRINGS = {"kwallet", "gnomekeyring", "basictext"}
+
+
+def _get_linux_desktop_environment(env):
+ """
+ Ref: https://chromium.googlesource.com/chromium/src/+/refs/heads
+ /main/base/nix/xdg_util.cc - GetDesktopEnvironment
+ """
+ xdg_current_desktop = env.get("XDG_CURRENT_DESKTOP")
+ desktop_session = env.get("DESKTOP_SESSION")
+
+ if xdg_current_desktop:
+ xdg_current_desktop = (xdg_current_desktop.partition(":")[0]
+ .strip().lower())
+
+ if xdg_current_desktop == "unity":
+ if desktop_session and "gnome-fallback" in desktop_session:
+ return DE_GNOME
+ else:
+ return DE_UNITY
+ elif xdg_current_desktop == "gnome":
+ return DE_GNOME
+ elif xdg_current_desktop == "x-cinnamon":
+ return DE_CINNAMON
+ elif xdg_current_desktop == "kde":
+ return DE_KDE
+ elif xdg_current_desktop == "pantheon":
+ return DE_PANTHEON
+ elif xdg_current_desktop == "xfce":
+ return DE_XFCE
+
+ if desktop_session:
+ if desktop_session in ("mate", "gnome"):
+ return DE_GNOME
+ if "kde" in desktop_session:
+ return DE_KDE
+ if "xfce" in desktop_session:
+ return DE_XFCE
+
+ if "GNOME_DESKTOP_SESSION_ID" in env:
+ return DE_GNOME
+ if "KDE_FULL_SESSION" in env:
+ return DE_KDE
+ return DE_OTHER
+
+
+def _mac_absolute_time_to_posix(timestamp):
+ return int((datetime(2001, 1, 1, 0, 0, tzinfo=timezone.utc) +
+ timedelta(seconds=timestamp)).timestamp())
+
+
+def pbkdf2_sha1(password, salt, iterations, key_length):
+ return pbkdf2_hmac("sha1", password, salt, iterations, key_length)
+
+
+def _decrypt_aes_cbc(ciphertext, key, initialization_vector=b" " * 16):
+ plaintext = aes.unpad_pkcs7(
+ aes.aes_cbc_decrypt_bytes(ciphertext, key, initialization_vector))
+ try:
+ return plaintext.decode()
+ except UnicodeDecodeError:
+ logger.warning("failed to decrypt cookie (AES-CBC) because UTF-8 "
+ "decoding failed. Possibly the key is wrong?")
+ return None
+
+
+def _decrypt_aes_gcm(ciphertext, key, nonce, authentication_tag):
+ try:
+ plaintext = aes.aes_gcm_decrypt_and_verify_bytes(
+ ciphertext, key, authentication_tag, nonce)
+ except ValueError:
+ logger.warning("failed to decrypt cookie (AES-GCM) because MAC check "
+ "failed. Possibly the key is wrong?")
+ return None
+
+ try:
+ return plaintext.decode()
+ except UnicodeDecodeError:
+ logger.warning("failed to decrypt cookie (AES-GCM) because UTF-8 "
+ "decoding failed. Possibly the key is wrong?")
+ return None
+
+
+def _decrypt_windows_dpapi(ciphertext):
+ """
+ References:
+ - https://docs.microsoft.com/en-us/windows
+ /win32/api/dpapi/nf-dpapi-cryptunprotectdata
+ """
+ from ctypes.wintypes import DWORD
+
+ class DATA_BLOB(ctypes.Structure):
+ _fields_ = [("cbData", DWORD),
+ ("pbData", ctypes.POINTER(ctypes.c_char))]
+
+ buffer = ctypes.create_string_buffer(ciphertext)
+ blob_in = DATA_BLOB(ctypes.sizeof(buffer), buffer)
+ blob_out = DATA_BLOB()
+ ret = ctypes.windll.crypt32.CryptUnprotectData(
+ ctypes.byref(blob_in), # pDataIn
+ None, # ppszDataDescr: human readable description of pDataIn
+ None, # pOptionalEntropy: salt?
+ None, # pvReserved: must be NULL
+ None, # pPromptStruct: information about prompts to display
+ 0, # dwFlags
+ ctypes.byref(blob_out) # pDataOut
+ )
+ if not ret:
+ logger.warning("failed to decrypt with DPAPI")
+ return None
+
+ result = ctypes.string_at(blob_out.pbData, blob_out.cbData)
+ ctypes.windll.kernel32.LocalFree(blob_out.pbData)
+ return result
+
+
+def _find_most_recently_used_file(root, filename):
+ # if there are multiple browser profiles, take the most recently used one
+ paths = []
+ for curr_root, dirs, files in os.walk(root):
+ for file in files:
+ if file == filename:
+ paths.append(os.path.join(curr_root, file))
+ if not paths:
+ return None
+ return max(paths, key=lambda path: os.lstat(path).st_mtime)
+
+
+def _is_path(value):
+ return os.path.sep in value
+
+
+def _parse_browser_specification(browser, profile=None, keyring=None):
+ if browser not in SUPPORTED_BROWSERS:
+ raise ValueError("unsupported browser '{}'".format(browser))
+ if keyring and keyring not in SUPPORTED_KEYRINGS:
+ raise ValueError("unsupported keyring '{}'".format(keyring))
+ if profile and _is_path(profile):
+ profile = os.path.expanduser(profile)
+ return browser, profile, keyring
diff --git a/gallery_dl/extractor/artstation.py b/gallery_dl/extractor/artstation.py
index 5675081..e686c70 100644
--- a/gallery_dl/extractor/artstation.py
+++ b/gallery_dl/extractor/artstation.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2018-2021 Mike Fährmann
+# Copyright 2018-2022 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -20,6 +20,7 @@ class ArtstationExtractor(Extractor):
filename_fmt = "{category}_{id}_{asset[id]}_{title}.{extension}"
directory_fmt = ("{category}", "{userinfo[username]}")
archive_fmt = "{asset[id]}"
+ browser = "firefox"
root = "https://www.artstation.com"
def __init__(self, match):
diff --git a/gallery_dl/extractor/common.py b/gallery_dl/extractor/common.py
index abb352c..cac8c2d 100644
--- a/gallery_dl/extractor/common.py
+++ b/gallery_dl/extractor/common.py
@@ -306,23 +306,29 @@ class Extractor():
cookiefile = util.expand_path(cookies)
try:
with open(cookiefile) as fp:
- cookies = util.load_cookiestxt(fp)
+ util.cookiestxt_load(fp, self._cookiejar)
except Exception as exc:
self.log.warning("cookies: %s", exc)
else:
- self._update_cookies(cookies)
self._cookiefile = cookiefile
+ elif isinstance(cookies, (list, tuple)):
+ from ..cookies import load_cookies
+ try:
+ load_cookies(self._cookiejar, cookies)
+ except Exception as exc:
+ self.log.warning("cookies: %s", exc)
else:
self.log.warning(
- "expected 'dict' or 'str' value for 'cookies' option, "
- "got '%s' (%s)", cookies.__class__.__name__, cookies)
+ "Expected 'dict', 'list', or 'str' value for 'cookies' "
+ "option, got '%s' (%s)",
+ cookies.__class__.__name__, cookies)
def _store_cookies(self):
"""Store the session's cookiejar in a cookies.txt file"""
if self._cookiefile and self.config("cookies-update", True):
try:
with open(self._cookiefile, "w") as fp:
- util.save_cookiestxt(fp, self._cookiejar)
+ util.cookiestxt_store(fp, self._cookiejar)
except OSError as exc:
self.log.warning("cookies: %s", exc)
diff --git a/gallery_dl/extractor/foolfuuka.py b/gallery_dl/extractor/foolfuuka.py
index 04e5926..093113d 100644
--- a/gallery_dl/extractor/foolfuuka.py
+++ b/gallery_dl/extractor/foolfuuka.py
@@ -6,7 +6,7 @@
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
-"""Extractors for 4chan archives based on FoolFuuka"""
+"""Extractors for FoolFuuka 4chan archives"""
from .common import BaseExtractor, Message
from .. import text
@@ -16,6 +16,7 @@ import itertools
class FoolfuukaExtractor(BaseExtractor):
"""Base extractor for FoolFuuka based boards/archives"""
basecategory = "foolfuuka"
+ filename_fmt = "{timestamp_ms} {filename_media}.{extension}"
archive_fmt = "{board[shortname]}_{num}_{timestamp}"
external = "default"
@@ -40,6 +41,9 @@ class FoolfuukaExtractor(BaseExtractor):
post["filename"], _, post["extension"] = \
media["media"].rpartition(".")
+ post["filename_media"] = media["media_filename"].rpartition(".")[0]
+ post["timestamp_ms"] = text.parse_int(
+ media["media_orig"].rpartition(".")[0])
yield Message.Url, url, post
def metadata(self):
@@ -66,6 +70,7 @@ BASE_PATTERN = FoolfuukaExtractor.update({
},
"archivedmoe": {
"root": "https://archived.moe",
+ "pattern": r"archived\.moe",
},
"archiveofsins": {
"root": "https://archiveofsins.com",
@@ -73,12 +78,15 @@ BASE_PATTERN = FoolfuukaExtractor.update({
},
"b4k": {
"root": "https://arch.b4k.co",
+ "pattern": r"arch\.b4k\.co",
},
"desuarchive": {
"root": "https://desuarchive.org",
+ "pattern": r"desuarchive\.org",
},
"fireden": {
"root": "https://boards.fireden.net",
+ "pattern": r"boards\.fireden\.net",
},
"nyafuu": {
"root": "https://archive.nyafuu.org",
@@ -90,9 +98,11 @@ BASE_PATTERN = FoolfuukaExtractor.update({
},
"thebarchive": {
"root": "https://thebarchive.com",
+ "pattern": r"thebarchive\.com",
},
"wakarimasen": {
"root": "https://archive.wakarimasen.moe",
+ "pattern": r"archive\.wakarimasen\.moe",
},
})
@@ -101,7 +111,7 @@ class FoolfuukaThreadExtractor(FoolfuukaExtractor):
"""Base extractor for threads on FoolFuuka based boards/archives"""
subcategory = "thread"
directory_fmt = ("{category}", "{board[shortname]}",
- "{thread_num}{title:? - //}")
+ "{thread_num} {title|comment[:50]}")
pattern = BASE_PATTERN + r"/([^/?#]+)/thread/(\d+)"
test = (
("https://archive.4plebs.org/tg/thread/54059290", {
diff --git a/gallery_dl/extractor/foolslide.py b/gallery_dl/extractor/foolslide.py
index c09eb96..382cc25 100644
--- a/gallery_dl/extractor/foolslide.py
+++ b/gallery_dl/extractor/foolslide.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2016-2021 Mike Fährmann
+# Copyright 2016-2022 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -41,6 +41,7 @@ class FoolslideExtractor(BaseExtractor):
BASE_PATTERN = FoolslideExtractor.update({
"kireicake": {
"root": "https://reader.kireicake.com",
+ "pattern": r"reader\.kireicake\.com",
},
"powermanga": {
"root": "https://read.powermanga.org",
diff --git a/gallery_dl/extractor/gelbooru_v01.py b/gallery_dl/extractor/gelbooru_v01.py
index 541f454..9c19664 100644
--- a/gallery_dl/extractor/gelbooru_v01.py
+++ b/gallery_dl/extractor/gelbooru_v01.py
@@ -1,12 +1,12 @@
# -*- coding: utf-8 -*-
-# Copyright 2021 Mike Fährmann
+# Copyright 2021-2022 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
-"""Extractors for Gelbooru v0.1 sites"""
+"""Extractors for Gelbooru Beta 0.1.11 sites"""
from . import booru
from .. import text
@@ -42,14 +42,43 @@ class GelbooruV01Extractor(booru.BooruExtractor):
return post
+ def _pagination(self, url, begin, end):
+ pid = self.page_start
+
+ while True:
+ page = self.request(url + str(pid)).text
+
+ cnt = 0
+ for post_id in text.extract_iter(page, begin, end):
+ yield self._parse_post(post_id)
+ cnt += 1
+
+ if cnt < self.per_page:
+ return
+ pid += self.per_page
+
BASE_PATTERN = GelbooruV01Extractor.update({
- "thecollection" : {"root": "https://the-collection.booru.org"},
- "illusioncardsbooru": {"root": "https://illusioncards.booru.org"},
- "allgirlbooru" : {"root": "https://allgirl.booru.org"},
- "drawfriends" : {"root": "https://drawfriends.booru.org"},
- "vidyart" : {"root": "https://vidyart.booru.org"},
- "theloudbooru" : {"root": "https://tlb.booru.org"},
+ "thecollection": {
+ "root": "https://the-collection.booru.org",
+ "pattern": r"the-collection\.booru\.org",
+ },
+ "illusioncardsbooru": {
+ "root": "https://illusioncards.booru.org",
+ "pattern": r"illusioncards\.booru\.org",
+ },
+ "allgirlbooru": {
+ "root": "https://allgirl.booru.org",
+ "pattern": r"allgirl\.booru\.org",
+ },
+ "drawfriends": {
+ "root": "https://drawfriends.booru.org",
+ "pattern": r"drawfriends\.booru\.org",
+ },
+ "vidyart": {
+ "root": "https://vidyart.booru.org",
+ "pattern": r"vidyart\.booru\.org",
+ },
})
@@ -75,7 +104,6 @@ class GelbooruV01TagExtractor(GelbooruV01Extractor):
}),
("https://drawfriends.booru.org/index.php?page=post&s=list&tags=all"),
("https://vidyart.booru.org/index.php?page=post&s=list&tags=all"),
- ("https://tlb.booru.org/index.php?page=post&s=list&tags=all"),
)
def __init__(self, match):
@@ -88,20 +116,42 @@ class GelbooruV01TagExtractor(GelbooruV01Extractor):
def posts(self):
url = "{}/index.php?page=post&s=list&tags={}&pid=".format(
self.root, self.tags)
- pid = self.page_start
+ return self._pagination(url, 'class="thumb"><a id="p', '"')
- while True:
- page = self.request(url + str(pid)).text
- cnt = 0
- for post_id in text.extract_iter(
- page, 'class="thumb"><a id="p', '"'):
- yield self._parse_post(post_id)
- cnt += 1
+class GelbooruV01FavoriteExtractor(GelbooruV01Extractor):
+ subcategory = "favorite"
+ directory_fmt = ("{category}", "favorites", "{favorite_id}")
+ archive_fmt = "f_{favorite_id}_{id}"
+ per_page = 50
+ pattern = BASE_PATTERN + r"/index\.php\?page=favorites&s=view&id=(\d+)"
+ test = (
+ (("https://the-collection.booru.org"
+ "/index.php?page=favorites&s=view&id=1166"), {
+ "count": 2,
+ }),
+ (("https://illusioncards.booru.org"
+ "/index.php?page=favorites&s=view&id=84887"), {
+ "count": 2,
+ }),
+ ("https://allgirl.booru.org/index.php?page=favorites&s=view&id=380", {
+ "count": 4,
+ }),
+ ("https://drawfriends.booru.org/index.php?page=favorites&s=view&id=1"),
+ ("https://vidyart.booru.org/index.php?page=favorites&s=view&id=1"),
+ )
- if cnt < self.per_page:
- return
- pid += self.per_page
+ def __init__(self, match):
+ GelbooruV01Extractor.__init__(self, match)
+ self.favorite_id = match.group(match.lastindex)
+
+ def metadata(self):
+ return {"favorite_id": text.parse_int(self.favorite_id)}
+
+ def posts(self):
+ url = "{}/index.php?page=favorites&s=view&id={}&pid=".format(
+ self.root, self.favorite_id)
+ return self._pagination(url, "posts[", "]")
class GelbooruV01PostExtractor(GelbooruV01Extractor):
@@ -141,7 +191,6 @@ class GelbooruV01PostExtractor(GelbooruV01Extractor):
}),
("https://drawfriends.booru.org/index.php?page=post&s=view&id=107474"),
("https://vidyart.booru.org/index.php?page=post&s=view&id=383111"),
- ("https://tlb.booru.org/index.php?page=post&s=view&id=127223"),
)
def __init__(self, match):
diff --git a/gallery_dl/extractor/gelbooru_v02.py b/gallery_dl/extractor/gelbooru_v02.py
index 7e16a51..2dd0c0c 100644
--- a/gallery_dl/extractor/gelbooru_v02.py
+++ b/gallery_dl/extractor/gelbooru_v02.py
@@ -6,7 +6,7 @@
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
-"""Extractors for Gelbooru v0.2 sites"""
+"""Extractors for Gelbooru Beta 0.2 sites"""
from . import booru
from .. import text, util, exception
@@ -26,6 +26,9 @@ class GelbooruV02Extractor(booru.BooruExtractor):
except KeyError:
self.api_root = self.root
+ if self.category == "realbooru":
+ self._file_url = self._file_url_realbooru
+
def _api_request(self, params):
url = self.api_root + "/index.php?page=dapi&s=post&q=index"
return ElementTree.fromstring(self.request(url, params=params).text)
@@ -61,6 +64,14 @@ class GelbooruV02Extractor(booru.BooruExtractor):
post["date"] = text.parse_datetime(
post["created_at"], "%a %b %d %H:%M:%S %z %Y")
+ def _file_url_realbooru(self, post):
+ url = post["file_url"]
+ if url.count("/") == 5:
+ md5 = post["md5"]
+ url = "{}/images/{}/{}/{}.{}".format(
+ self.root, md5[0:2], md5[2:4], md5, url.rpartition(".")[2])
+ return url
+
def _extended_tags(self, post, page=None):
if not page:
url = "{}/index.php?page=post&s=view&id={}".format(
@@ -105,11 +116,23 @@ class GelbooruV02Extractor(booru.BooruExtractor):
INSTANCES = {
- "realbooru": {"root": "https://realbooru.com"},
- "rule34" : {"root": "https://rule34.xxx",
- "api_root": " https://api.rule34.xxx"},
- "safebooru": {"root": "https://safebooru.org"},
- "tbib" : {"root": "https://tbib.org"},
+ "realbooru": {
+ "root": "https://realbooru.com",
+ "pattern": r"realbooru\.com",
+ },
+ "rule34": {
+ "root": "https://rule34.xxx",
+ "pattern": r"rule34\.xxx",
+ "api_root": "https://api.rule34.xxx",
+ },
+ "safebooru": {
+ "root": "https://safebooru.org",
+ "pattern": r"safebooru\.org",
+ },
+ "tbib": {
+ "root": "https://tbib.org",
+ "pattern": r"tbib\.org",
+ },
}
BASE_PATTERN = GelbooruV02Extractor.update(INSTANCES)
@@ -147,7 +170,7 @@ class GelbooruV02TagExtractor(GelbooruV02Extractor):
return {"search_tags": self.tags}
def posts(self):
- return self._pagination({"tags" : self.tags})
+ return self._pagination({"tags": self.tags})
class GelbooruV02PoolExtractor(GelbooruV02Extractor):
@@ -213,7 +236,7 @@ class GelbooruV02FavoriteExtractor(GelbooruV02Extractor):
"count": 2,
}),
("https://realbooru.com/index.php?page=favorites&s=view&id=274", {
- "count": 4,
+ "count": 2,
}),
("https://tbib.org/index.php?page=favorites&s=view&id=7881", {
"count": 3,
@@ -279,7 +302,8 @@ class GelbooruV02PostExtractor(GelbooruV02Extractor):
},
}),
("https://realbooru.com/index.php?page=post&s=view&id=668483", {
- "url": "2421b5b0e15d5e20f9067090a8b0fd4114d3e7d9",
+ "pattern": r"https://realbooru\.com/images/dc/b5"
+ r"/dcb5c0ce9ec0bf74a6930608985f4719\.jpeg",
"content": "7f5873ce3b6cd295ea2e81fcb49583098ea9c8da",
}),
("https://tbib.org/index.php?page=post&s=view&id=9233957", {
diff --git a/gallery_dl/extractor/imgur.py b/gallery_dl/extractor/imgur.py
index 2035655..fd78ce2 100644
--- a/gallery_dl/extractor/imgur.py
+++ b/gallery_dl/extractor/imgur.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2015-2021 Mike Fährmann
+# Copyright 2015-2022 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -201,17 +201,24 @@ class ImgurAlbumExtractor(ImgurExtractor):
("https://imgur.com/a/TcBmQ", {
"exception": exception.HttpError,
}),
+ ("https://imgur.com/a/pjOnJA0", { # empty, no 'media' (#2557)
+ "count": 0,
+ }),
("https://www.imgur.com/a/TcBmP"), # www
("https://m.imgur.com/a/TcBmP"), # mobile
)
def items(self):
album = self.api.album(self.key)
- album["date"] = text.parse_datetime(album["created_at"])
- images = album["media"]
+ try:
+ images = album["media"]
+ except KeyError:
+ return
+
del album["media"]
count = len(images)
+ album["date"] = text.parse_datetime(album["created_at"])
try:
del album["ad_url"]
diff --git a/gallery_dl/extractor/instagram.py b/gallery_dl/extractor/instagram.py
index e07b64e..82c9858 100644
--- a/gallery_dl/extractor/instagram.py
+++ b/gallery_dl/extractor/instagram.py
@@ -80,12 +80,22 @@ class InstagramExtractor(Extractor):
def request(self, url, **kwargs):
response = Extractor.request(self, url, **kwargs)
- if response.history and "/accounts/login/" in response.request.url:
- if self._cursor:
- self.log.info("Use '-o cursor=%s' to continue downloading "
- "from the current position", self._cursor)
- raise exception.StopExtraction(
- "HTTP redirect to login page (%s)", response.request.url)
+ if response.history:
+
+ url = response.request.url
+ if "/accounts/login/" in url:
+ page = "login"
+ elif "/challenge/" in url:
+ page = "challenge"
+ else:
+ page = None
+
+ if page:
+ if self._cursor:
+ self.log.info("Use '-o cursor=%s' to continue downloading "
+ "from the current position", self._cursor)
+ raise exception.StopExtraction("HTTP redirect to %s page (%s)",
+ page, url.partition("?")[0])
www_claim = response.headers.get("x-ig-set-www-claim")
if www_claim is not None:
@@ -298,7 +308,7 @@ class InstagramExtractor(Extractor):
video = None
media = image
- files.append({
+ media = {
"num" : num,
"date" : text.parse_timestamp(item.get("taken_at") or
media.get("taken_at")),
@@ -309,7 +319,9 @@ class InstagramExtractor(Extractor):
"video_url" : video["url"] if video else None,
"width" : media["width"],
"height" : media["height"],
- })
+ }
+ self._extract_tagged_users(item, media)
+ files.append(media)
return data
@@ -321,22 +333,45 @@ class InstagramExtractor(Extractor):
"abcdefghijklmnopqrstuvwxyz"
"0123456789-_")
- def _extract_tagged_users(self, src, dest):
- if "edge_media_to_tagged_user" not in src:
- return
- edges = src["edge_media_to_tagged_user"]["edges"]
+ @staticmethod
+ def _extract_tagged_users(src, dest):
+ dest["tagged_users"] = tagged_users = []
+
+ edges = src.get("edge_media_to_tagged_user")
if edges:
- dest["tagged_users"] = tagged_users = []
- for edge in edges:
+ for edge in edges["edges"]:
user = edge["node"]["user"]
- tagged_users.append({
- "id" : user["id"],
- "username" : user["username"],
- "full_name": user["full_name"],
- })
-
- def _extract_shared_data(self, url):
- page = self.request(url).text
+ tagged_users.append({"id" : user["id"],
+ "username" : user["username"],
+ "full_name": user["full_name"]})
+
+ usertags = src.get("usertags")
+ if usertags:
+ for tag in usertags["in"]:
+ user = tag["user"]
+ tagged_users.append({"id" : user["pk"],
+ "username" : user["username"],
+ "full_name": user["full_name"]})
+
+ mentions = src.get("reel_mentions")
+ if mentions:
+ for mention in mentions:
+ user = mention["user"]
+ tagged_users.append({"id" : user.get("pk"),
+ "username" : user["username"],
+ "full_name": user["full_name"]})
+
+ stickers = src.get("story_bloks_stickers")
+ if stickers:
+ for sticker in stickers:
+ sticker = sticker["bloks_sticker"]
+ if sticker["bloks_sticker_type"] == "mention":
+ user = sticker["sticker_data"]["ig_mention"]
+ tagged_users.append({"id" : user["account_id"],
+ "username" : user["username"],
+ "full_name": user["full_name"]})
+
+ def _extract_shared_data(self, page):
shared_data, pos = text.extract(
page, "window._sharedData =", ";</script>")
additional_data, pos = text.extract(
@@ -349,13 +384,15 @@ class InstagramExtractor(Extractor):
return data
def _extract_profile_page(self, url):
- data = self._extract_shared_data(url)["entry_data"]
+ page = self.request(url).text
+ data = self._extract_shared_data(page)["entry_data"]
if "HttpErrorPage" in data:
raise exception.NotFoundError("user")
return data["ProfilePage"][0]["graphql"]["user"]
def _extract_post_page(self, url):
- data = self._extract_shared_data(url)["entry_data"]
+ page = self.request(url).text
+ data = self._extract_shared_data(page)["entry_data"]
if "HttpErrorPage" in data:
raise exception.NotFoundError("post")
return data["PostPage"][0]
@@ -524,7 +561,8 @@ class InstagramTagExtractor(InstagramExtractor):
def posts(self):
url = "{}/explore/tags/{}/".format(self.root, self.item)
- page = self._extract_shared_data(url)["entry_data"]["TagPage"][0]
+ page = self._extract_shared_data(
+ self.request(url).text)["entry_data"]["TagPage"][0]
if "data" in page:
return self._pagination_sections(page["data"]["recent"])
@@ -718,8 +756,12 @@ class InstagramStoriesExtractor(InstagramExtractor):
reel_id = "highlight:" + self.highlight_id
else:
url = "{}/stories/{}/".format(self.root, self.user)
+ with self.request(url, allow_redirects=False) as response:
+ if 300 <= response.status_code < 400:
+ return ()
+ page = response.text
try:
- data = self._extract_shared_data(url)["entry_data"]
+ data = self._extract_shared_data(page)["entry_data"]
user = data["StoriesPage"][0]["user"]
except KeyError:
return ()
diff --git a/gallery_dl/extractor/khinsider.py b/gallery_dl/extractor/khinsider.py
index 67a1a95..e7827b1 100644
--- a/gallery_dl/extractor/khinsider.py
+++ b/gallery_dl/extractor/khinsider.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2016-2020 Mike Fährmann
+# Copyright 2016-2022 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -26,7 +26,18 @@ class KhinsiderSoundtrackExtractor(AsynchronousMixin, Extractor):
"pattern": r"https?://vgm(site|downloads).com"
r"/soundtracks/horizon-riders-wii/[^/]+"
r"/Horizon%20Riders%20Wii%20-%20Full%20Soundtrack.mp3",
- "keyword": "12ca70e0709ea15250e577ea388cf2b5b0c65630",
+ "keyword": {
+ "album": {
+ "count": 1,
+ "date": "Sep 18th, 2016",
+ "name": "Horizon Riders (Wii)",
+ "size": 26214400,
+ "type": "Gamerip",
+ },
+ "extension": "mp3",
+ "filename": "Horizon Riders Wii - Full Soundtrack",
+ },
+ "count": 1,
})
def __init__(self, match):
@@ -48,10 +59,10 @@ class KhinsiderSoundtrackExtractor(AsynchronousMixin, Extractor):
def metadata(self, page):
extr = text.extract_from(page)
return {"album": {
- "name" : text.unescape(extr("Album name: <b>", "<")),
+ "name" : text.unescape(extr("<h2>", "<")),
"count": text.parse_int(extr("Number of Files: <b>", "<")),
"size" : text.parse_bytes(extr("Total Filesize: <b>", "<")[:-1]),
- "date" : extr("Date added: <b>", "<"),
+ "date" : extr("Date Added: <b>", "<"),
"type" : extr("Album type: <b>", "<"),
}}
diff --git a/gallery_dl/extractor/lolisafe.py b/gallery_dl/extractor/lolisafe.py
index ad7cd1d..b6a508d 100644
--- a/gallery_dl/extractor/lolisafe.py
+++ b/gallery_dl/extractor/lolisafe.py
@@ -63,6 +63,12 @@ class LolisafeAlbumExtractor(LolisafeExtractor):
LolisafeExtractor.__init__(self, match)
self.album_id = match.group(match.lastindex)
+ domain = self.config("domain")
+ if domain is None or domain == "auto":
+ self.root = text.root_from_url(match.group(0))
+ else:
+ self.root = text.ensure_http_scheme(domain)
+
def items(self):
files, data = self.fetch_album(self.album_id)
diff --git a/gallery_dl/extractor/mangafox.py b/gallery_dl/extractor/mangafox.py
index f6514ca..4808105 100644
--- a/gallery_dl/extractor/mangafox.py
+++ b/gallery_dl/extractor/mangafox.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2017-2021 Mike Fährmann
+# Copyright 2017-2022 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -34,6 +34,7 @@ class MangafoxChapterExtractor(ChapterExtractor):
base, self.cstr, self.volume, self.chapter, self.minor = match.groups()
self.urlbase = self.root + base
ChapterExtractor.__init__(self, match, self.urlbase + "/1.html")
+ self.session.headers["Referer"] = self.root + "/"
def metadata(self, page):
manga, pos = text.extract(page, "<title>", "</title>")
diff --git a/gallery_dl/extractor/mangahere.py b/gallery_dl/extractor/mangahere.py
index f655f94..461c92d 100644
--- a/gallery_dl/extractor/mangahere.py
+++ b/gallery_dl/extractor/mangahere.py
@@ -17,8 +17,8 @@ class MangahereBase():
"""Base class for mangahere extractors"""
category = "mangahere"
root = "https://www.mangahere.cc"
- mobile_root = "https://m.mangahere.cc"
- url_fmt = mobile_root + "/manga/{}/{}.html"
+ root_mobile = "https://m.mangahere.cc"
+ url_fmt = root_mobile + "/manga/{}/{}.html"
class MangahereChapterExtractor(MangahereBase, ChapterExtractor):
@@ -42,6 +42,7 @@ class MangahereChapterExtractor(MangahereBase, ChapterExtractor):
self.part, self.volume, self.chapter = match.groups()
url = self.url_fmt.format(self.part, 1)
ChapterExtractor.__init__(self, match, url)
+ self.session.headers["Referer"] = self.root_mobile + "/"
def metadata(self, page):
pos = page.index("</select>")
diff --git a/gallery_dl/extractor/mangasee.py b/gallery_dl/extractor/mangasee.py
index 0b0da65..2bd11ef 100644
--- a/gallery_dl/extractor/mangasee.py
+++ b/gallery_dl/extractor/mangasee.py
@@ -9,7 +9,7 @@
"""Extractors for https://mangasee123.com/"""
from .common import ChapterExtractor, MangaExtractor
-from .. import text
+from .. import text, util
import json
@@ -57,6 +57,15 @@ class MangaseeChapterExtractor(MangaseeBase, ChapterExtractor):
},
})
+ def __init__(self, match):
+ ChapterExtractor.__init__(self, match)
+ self.session.headers["Referer"] = self.gallery_url
+
+ domain = "mangasee123.com"
+ cookies = self.session.cookies
+ if not cookies.get("PHPSESSID", domain=domain):
+ cookies.set("PHPSESSID", util.generate_token(13), domain=domain)
+
def metadata(self, page):
extr = text.extract_from(page)
self.chapter = data = json.loads(extr("vm.CurChapter =", ";\r\n"))
diff --git a/gallery_dl/extractor/mastodon.py b/gallery_dl/extractor/mastodon.py
index cd7cabb..6e780e8 100644
--- a/gallery_dl/extractor/mastodon.py
+++ b/gallery_dl/extractor/mastodon.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2019-2021 Mike Fährmann
+# Copyright 2019-2022 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -60,12 +60,14 @@ class MastodonExtractor(BaseExtractor):
INSTANCES = {
"mastodon.social": {
"root" : "https://mastodon.social",
+ "pattern" : r"mastodon\.social",
"access-token" : "Y06R36SMvuXXN5_wiPKFAEFiQaMSQg0o_hGgc86Jj48",
"client-id" : "dBSHdpsnOUZgxOnjKSQrWEPakO3ctM7HmsyoOd4FcRo",
"client-secret": "DdrODTHs_XoeOsNVXnILTMabtdpWrWOAtrmw91wU1zI",
},
"pawoo": {
"root" : "https://pawoo.net",
+ "pattern" : r"pawoo\.net",
"access-token" : "c12c9d275050bce0dc92169a28db09d7"
"0d62d0a75a8525953098c167eacd3668",
"client-id" : "978a25f843ec01e53d09be2c290cd75c"
@@ -75,6 +77,7 @@ INSTANCES = {
},
"baraag": {
"root" : "https://baraag.net",
+ "pattern" : r"baraag\.net",
"access-token" : "53P1Mdigf4EJMH-RmeFOOSM9gdSDztmrAYFgabOKKE0",
"client-id" : "czxx2qilLElYHQ_sm-lO8yXuGwOHxLX9RYYaD0-nq1o",
"client-secret": "haMaFdMBgK_-BIxufakmI2gFgkYjqmgXGEO2tB-R2xY",
diff --git a/gallery_dl/extractor/moebooru.py b/gallery_dl/extractor/moebooru.py
index 604d65c..65b9a83 100644
--- a/gallery_dl/extractor/moebooru.py
+++ b/gallery_dl/extractor/moebooru.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2020-2021 Mike Fährmann
+# Copyright 2020-2022 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -54,6 +54,7 @@ class MoebooruExtractor(BooruExtractor):
BASE_PATTERN = MoebooruExtractor.update({
"yandere": {
"root": "https://yande.re",
+ "pattern": r"yande\.re",
},
"konachan": {
"root": "https://konachan.com",
@@ -61,6 +62,7 @@ BASE_PATTERN = MoebooruExtractor.update({
},
"hypnohub": {
"root": "https://hypnohub.net",
+ "pattern": r"hypnohub\.net",
},
"sakugabooru": {
"root": "https://www.sakugabooru.com",
@@ -68,6 +70,7 @@ BASE_PATTERN = MoebooruExtractor.update({
},
"lolibooru": {
"root": "https://lolibooru.moe",
+ "pattern": r"lolibooru\.moe",
},
})
diff --git a/gallery_dl/extractor/naverwebtoon.py b/gallery_dl/extractor/naverwebtoon.py
index 348f6a1..eadd460 100644
--- a/gallery_dl/extractor/naverwebtoon.py
+++ b/gallery_dl/extractor/naverwebtoon.py
@@ -1,6 +1,7 @@
# -*- coding: utf-8 -*-
# Copyright 2021 Seonghyeon Cho
+# Copyright 2022 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -10,8 +11,10 @@
from .common import GalleryExtractor, Extractor, Message
from .. import text
+import re
-BASE_PATTERN = r"(?:https?://)?comic\.naver\.com/webtoon"
+BASE_PATTERN = (r"(?:https?://)?comic\.naver\.com"
+ r"/(webtoon|challenge|bestChallenge)")
class NaverwebtoonBase():
@@ -25,19 +28,33 @@ class NaverwebtoonEpisodeExtractor(NaverwebtoonBase, GalleryExtractor):
directory_fmt = ("{category}", "{comic}")
filename_fmt = "{episode:>03}-{num:>02}.{extension}"
archive_fmt = "{title_id}_{episode}_{num}"
- pattern = BASE_PATTERN + r"/detail\.nhn\?([^#]+)"
+ pattern = BASE_PATTERN + r"/detail(?:\.nhn)?\?([^#]+)"
test = (
- (("https://comic.naver.com/webtoon/detail.nhn?"
- "titleId=26458&no=1&weekday=tue"), {
+ (("https://comic.naver.com/webtoon/detail"
+ "?titleId=26458&no=1&weekday=tue"), {
"url": "47a956ba8c7a837213d5985f50c569fcff986f75",
"content": "3806b6e8befbb1920048de9888dfce6220f69a60",
"count": 14
}),
+ (("https://comic.naver.com/challenge/detail"
+ "?titleId=765124&no=1"), {
+ "pattern": r"https://image-comic\.pstatic\.net/nas"
+ r"/user_contents_data/challenge_comic/2021/01/19"
+ r"/342586/upload_7149856273586337846\.jpeg",
+ "count": 1,
+ }),
+ (("https://comic.naver.com/bestChallenge/detail.nhn"
+ "?titleId=771467&no=3"), {
+ "pattern": r"https://image-comic\.pstatic\.net/nas"
+ r"/user_contents_data/challenge_comic/2021/04/28"
+ r"/345534/upload_3617293622396203109\.jpeg",
+ "count": 1,
+ }),
)
def __init__(self, match):
- query = match.group(1)
- url = "{}/webtoon/detail.nhn?{}".format(self.root, query)
+ path, query = match.groups()
+ url = "{}/{}/detail?{}".format(self.root, path, query)
GalleryExtractor.__init__(self, match, url)
query = text.parse_query(query)
@@ -70,22 +87,31 @@ class NaverwebtoonEpisodeExtractor(NaverwebtoonBase, GalleryExtractor):
class NaverwebtoonComicExtractor(NaverwebtoonBase, Extractor):
subcategory = "comic"
categorytransfer = True
- pattern = (BASE_PATTERN + r"/list\.nhn\?([^#]+)")
+ pattern = (BASE_PATTERN + r"/list(?:\.nhn)?\?([^#]+)")
test = (
- ("https://comic.naver.com/webtoon/list.nhn?titleId=22073", {
+ ("https://comic.naver.com/webtoon/list?titleId=22073", {
"pattern": NaverwebtoonEpisodeExtractor.pattern,
"count": 32,
}),
+ ("https://comic.naver.com/challenge/list?titleId=765124", {
+ "pattern": NaverwebtoonEpisodeExtractor.pattern,
+ "count": 25,
+ }),
+ ("https://comic.naver.com/bestChallenge/list.nhn?titleId=789786", {
+ "pattern": NaverwebtoonEpisodeExtractor.pattern,
+ "count": ">= 12",
+ }),
)
def __init__(self, match):
Extractor.__init__(self, match)
- query = text.parse_query(match.group(1))
+ self.path, query = match.groups()
+ query = text.parse_query(query)
self.title_id = query.get("titleId")
self.page_no = text.parse_int(query.get("page"), 1)
def items(self):
- url = self.root + "/webtoon/list.nhn"
+ url = "{}/{}/list".format(self.root, self.path)
params = {"titleId": self.title_id, "page": self.page_no}
data = {"_extractor": NaverwebtoonEpisodeExtractor}
@@ -103,7 +129,8 @@ class NaverwebtoonComicExtractor(NaverwebtoonBase, Extractor):
def get_episode_urls(self, page):
"""Extract and return all episode urls in page"""
return [
- self.root + "/webtoon/detail.nhn?" + query
- for query in text.extract_iter(
- page, '<a href="/webtoon/detail?', '"')
+ self.root + path
+ for path in re.findall(
+ r'<a href="(/(?:webtoon|challenge|bestChallenge)'
+ r'/detail\?[^"]+)', page)
][::2]
diff --git a/gallery_dl/extractor/nijie.py b/gallery_dl/extractor/nijie.py
index 90ca01d..832831f 100644
--- a/gallery_dl/extractor/nijie.py
+++ b/gallery_dl/extractor/nijie.py
@@ -91,6 +91,10 @@ class NijieExtractor(AsynchronousMixin, Extractor):
"url": url,
})
+ @staticmethod
+ def _extract_user_name(page):
+ return text.unescape(text.extract(page, "<br />", "<")[0] or "")
+
def login(self):
"""Login and obtain session cookies"""
if not self._check_cookies(self.cookienames):
@@ -119,9 +123,8 @@ class NijieExtractor(AsynchronousMixin, Extractor):
while True:
page = self.request(url, params=params, notfound="artist").text
- if not self.user_name:
- self.user_name = text.unescape(text.extract(
- page, '<br />', '<')[0] or "")
+ if self.user_name is None:
+ self.user_name = self._extract_user_name(page)
yield from text.extract_iter(page, 'illust_id="', '"')
if '<a rel="next"' not in page:
@@ -137,11 +140,12 @@ class NijieUserExtractor(NijieExtractor):
test = ("https://nijie.info/members.php?id=44",)
def items(self):
- base = "{}/{{}}.php?id={}".format(self.root, self.user_id)
+ fmt = "{}/{{}}.php?id={}".format(self.root, self.user_id).format
return self._dispatch_extractors((
- (NijieIllustrationExtractor, base.format("members_illust")),
- (NijieDoujinExtractor , base.format("members_dojin")),
- (NijieFavoriteExtractor , base.format("user_like_illust_view")),
+ (NijieIllustrationExtractor, fmt("members_illust")),
+ (NijieDoujinExtractor , fmt("members_dojin")),
+ (NijieFavoriteExtractor , fmt("user_like_illust_view")),
+ (NijieNuitaExtractor , fmt("history_nuita")),
), ("illustration", "doujin"))
@@ -217,6 +221,36 @@ class NijieFavoriteExtractor(NijieExtractor):
return data
+class NijieNuitaExtractor(NijieExtractor):
+ """Extractor for a nijie user's 抜いた list"""
+ subcategory = "nuita"
+ directory_fmt = ("{category}", "nuita", "{user_id}")
+ archive_fmt = "n_{user_id}_{image_id}_{num}"
+ pattern = BASE_PATTERN + r"/history_nuita\.php\?id=(\d+)"
+ test = ("https://nijie.info/history_nuita.php?id=728995", {
+ "range": "1-10",
+ "count": 10,
+ "keyword": {
+ "user_id" : 728995,
+ "user_name": "莚",
+ },
+ })
+
+ def image_ids(self):
+ return self._pagination("history_nuita")
+
+ def _extract_data(self, page):
+ data = NijieExtractor._extract_data(page)
+ data["user_id"] = self.user_id
+ data["user_name"] = self.user_name
+ return data
+
+ @staticmethod
+ def _extract_user_name(page):
+ return text.unescape(text.extract(
+ page, "<title>", "さんの抜いた")[0] or "")
+
+
class NijieImageExtractor(NijieExtractor):
"""Extractor for a work/image from nijie.info"""
subcategory = "image"
diff --git a/gallery_dl/extractor/oauth.py b/gallery_dl/extractor/oauth.py
index 428f772..653822f 100644
--- a/gallery_dl/extractor/oauth.py
+++ b/gallery_dl/extractor/oauth.py
@@ -11,6 +11,7 @@
from .common import Extractor, Message
from . import deviantart, flickr, mastodon, pixiv, reddit, smugmug, tumblr
from .. import text, oauth, util, config, exception
+from ..output import stdout_write
from ..cache import cache
import urllib.parse
import hashlib
@@ -37,7 +38,7 @@ class OAuthBase(Extractor):
def recv(self):
"""Open local HTTP server and recv callback parameters"""
import socket
- print("Waiting for response. (Cancel with Ctrl+c)")
+ stdout_write("Waiting for response. (Cancel with Ctrl+c)\n")
server = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
server.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
server.bind(("localhost", self.config("port", 6414)))
@@ -60,7 +61,7 @@ class OAuthBase(Extractor):
def send(self, msg):
"""Send 'msg' to the socket opened in 'recv()'"""
- print(msg)
+ stdout_write(msg)
self.client.send(b"HTTP/1.1 200 OK\r\n\r\n" + msg.encode())
self.client.close()
@@ -69,12 +70,13 @@ class OAuthBase(Extractor):
import webbrowser
url += "?" + urllib.parse.urlencode(params)
if not self.config("browser", True) or not webbrowser.open(url):
- print("Please open this URL in your browser:")
- print(url, end="\n\n", flush=True)
+ stdout_write(
+ "Please open this URL in your browser:\n\n" + url + "\n\n")
return (recv or self.recv)()
def error(self, msg):
- return self.send("Remote server reported an error:\n\n" + str(msg))
+ return self.send(
+ "Remote server reported an error:\n\n{}\n".format(msg))
def _oauth1_authorization_flow(
self, request_token_url, authorize_url, access_token_url):
@@ -133,7 +135,7 @@ class OAuthBase(Extractor):
# check authorization response
if state != params.get("state"):
- self.send("'state' mismatch: expected {}, got {}.".format(
+ self.send("'state' mismatch: expected {}, got {}.\n".format(
state, params.get("state")
))
return
@@ -188,7 +190,7 @@ class OAuthBase(Extractor):
opt = self.oauth_config(names[0])
if self.cache and (opt is None or opt == "cache"):
- msg += _vh + " been cached and will automatically be used."
+ msg += _vh + " been cached and will automatically be used.\n"
else:
msg += "Put " + _va + " into your configuration file as \n"
msg += " and\n".join(
@@ -200,7 +202,7 @@ class OAuthBase(Extractor):
"\nor set\n'extractor.{}.{}' to \"cache\""
.format(self.subcategory, names[0])
)
- msg += "\nto use {}.".format(_it)
+ msg += "\nto use {}.\n".format(_it)
return msg
@@ -398,9 +400,9 @@ class OAuthPixiv(OAuthBase):
data = self.session.post(url, headers=headers, data=data).json()
if "error" in data:
- print(data)
+ stdout_write("\n{}\n".format(data))
if data["error"] in ("invalid_request", "invalid_grant"):
- print("'code' expired, try again")
+ stdout_write("'code' expired, try again\n\n")
return
token = data["refresh_token"]
@@ -409,10 +411,10 @@ class OAuthPixiv(OAuthBase):
pixiv._refresh_token_cache.update(username, token)
self.log.info("Writing 'refresh-token' to cache")
- print(self._generate_message(("refresh-token",), (token,)))
+ stdout_write(self._generate_message(("refresh-token",), (token,)))
def _input(self):
- print("""
+ stdout_write("""\
1) Open your browser's Developer Tools (F12) and switch to the Network tab
2) Login
3) Select the last network monitor entry ('callback?state=...')
@@ -421,6 +423,7 @@ class OAuthPixiv(OAuthBase):
- This 'code' will expire 30 seconds after logging in.
- Copy-pasting more than just the 'code' value will work as well,
like the entire URL or several query parameters.
+
""")
code = input("code: ")
return code.rpartition("=")[2].strip()
diff --git a/gallery_dl/extractor/philomena.py b/gallery_dl/extractor/philomena.py
index 92b8113..951b34d 100644
--- a/gallery_dl/extractor/philomena.py
+++ b/gallery_dl/extractor/philomena.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2021 Mike Fährmann
+# Copyright 2021-2022 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -58,12 +58,21 @@ class PhilomenaExtractor(BooruExtractor):
INSTANCES = {
- "derpibooru": {"root": "https://derpibooru.org",
- "filter_id": "56027"},
- "ponybooru" : {"root": "https://ponybooru.org",
- "filter_id": "2"},
- "furbooru" : {"root": "https://furbooru.org",
- "filter_id": "2"},
+ "derpibooru": {
+ "root": "https://derpibooru.org",
+ "pattern": r"derpibooru\.org",
+ "filter_id": "56027",
+ },
+ "ponybooru": {
+ "root": "https://ponybooru.org",
+ "pattern": r"ponybooru\.org",
+ "filter_id": "2",
+ },
+ "furbooru": {
+ "root": "https://furbooru.org",
+ "pattern": r"furbooru\.org",
+ "filter_id": "2",
+ },
}
BASE_PATTERN = PhilomenaExtractor.update(INSTANCES)
@@ -239,5 +248,5 @@ class PhilomenaGalleryExtractor(PhilomenaExtractor):
def posts(self):
gallery_id = "gallery_id:" + self.gallery_id
url = self.root + "/api/v1/json/search/images"
- params = {"sd": "desc", "sf": gallery_id, "q" : gallery_id}
+ params = {"sd": "desc", "sf": gallery_id, "q": gallery_id}
return self._pagination(url, params)
diff --git a/gallery_dl/extractor/pixiv.py b/gallery_dl/extractor/pixiv.py
index a33df42..9b35e42 100644
--- a/gallery_dl/extractor/pixiv.py
+++ b/gallery_dl/extractor/pixiv.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2014-2021 Mike Fährmann
+# Copyright 2014-2022 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -10,16 +10,16 @@
from .common import Extractor, Message
from .. import text, util, exception
-from ..cache import cache
+from ..cache import cache, memcache
from datetime import datetime, timedelta
import itertools
import hashlib
-import time
class PixivExtractor(Extractor):
"""Base class for pixiv extractors"""
category = "pixiv"
+ root = "https://www.pixiv.net"
directory_fmt = ("{category}", "{user[id]} {user[account]}")
filename_fmt = "{id}_p{num}.{extension}"
archive_fmt = "{id}{suffix}.{extension}"
@@ -73,7 +73,14 @@ class PixivExtractor(Extractor):
if work["type"] == "ugoira":
if not self.load_ugoira:
continue
- ugoira = self.api.ugoira_metadata(work["id"])
+
+ try:
+ ugoira = self.api.ugoira_metadata(work["id"])
+ except exception.StopExtraction as exc:
+ self.log.warning(
+ "Unable to retrieve Ugoira metatdata (%s - %s)",
+ work.get("id"), exc.message)
+ continue
url = ugoira["zip_urls"]["medium"].replace(
"_ugoira600x600", "_ugoira1920x1080")
@@ -91,22 +98,70 @@ class PixivExtractor(Extractor):
work["suffix"] = "_p{:02}".format(work["num"])
yield Message.Url, url, text.nameext_from_url(url, work)
+ @staticmethod
+ def _make_work(kind, url, user):
+ p = url.split("/")
+ return {
+ "create_date" : "{}-{}-{}T{}:{}:{}+09:00".format(
+ p[5], p[6], p[7], p[8], p[9], p[10]) if len(p) > 9 else None,
+ "height" : 0,
+ "id" : kind,
+ "image_urls" : None,
+ "meta_pages" : (),
+ "meta_single_page": {"original_image_url": url},
+ "page_count" : 1,
+ "sanity_level" : 0,
+ "tags" : (),
+ "title" : kind,
+ "type" : kind,
+ "user" : user,
+ "width" : 0,
+ "x_restrict" : 0,
+ }
+
def works(self):
- """Return an iterable containing all relevant 'work'-objects"""
+ """Return an iterable containing all relevant 'work' objects"""
def metadata(self):
- """Collect metadata for extractor-job"""
+ """Collect metadata for extractor job"""
return {}
class PixivUserExtractor(PixivExtractor):
- """Extractor for works of a pixiv user"""
+ """Extractor for a pixiv user profile"""
subcategory = "user"
pattern = (r"(?:https?://)?(?:www\.|touch\.)?pixiv\.net/(?:"
- r"(?:en/)?users/(\d+)(?:/(?:artworks|illustrations|manga)"
- r"(?:/([^/?#]+))?)?/?(?:$|[?#])"
- r"|member(?:_illust)?\.php\?id=(\d+)(?:&([^#]+))?"
- r"|(?:u(?:ser)?/|(?:mypage\.php)?#id=)(\d+))")
+ r"(?:en/)?u(?:sers)?/|member\.php\?id=|(?:mypage\.php)?#id="
+ r")(\d+)(?:$|[?#])")
+ test = (
+ ("https://www.pixiv.net/en/users/173530"),
+ ("https://www.pixiv.net/u/173530"),
+ ("https://www.pixiv.net/member.php?id=173530"),
+ ("https://www.pixiv.net/mypage.php#id=173530"),
+ ("https://www.pixiv.net/#id=173530"),
+ )
+
+ def __init__(self, match):
+ PixivExtractor.__init__(self, match)
+ self.user_id = match.group(1)
+
+ def items(self):
+ base = "{}/users/{}/".format(self.root, self.user_id)
+ return self._dispatch_extractors((
+ (PixivAvatarExtractor , base + "avatar"),
+ (PixivBackgroundExtractor, base + "background"),
+ (PixivArtworksExtractor , base + "artworks"),
+ (PixivFavoriteExtractor , base + "bookmarks/artworks"),
+ ), ("artworks",))
+
+
+class PixivArtworksExtractor(PixivExtractor):
+ """Extractor for artworks of a pixiv user"""
+ subcategory = "artworks"
+ pattern = (r"(?:https?://)?(?:www\.|touch\.)?pixiv\.net/(?:"
+ r"(?:en/)?users/(\d+)/(?:artworks|illustrations|manga)"
+ r"(?:/([^/?#]+))?/?(?:$|[?#])"
+ r"|member_illust\.php\?id=(\d+)(?:&([^#]+))?)")
test = (
("https://www.pixiv.net/en/users/173530/artworks", {
"url": "852c31ad83b6840bacbce824d85f2a997889efb7",
@@ -120,47 +175,30 @@ class PixivUserExtractor(PixivExtractor):
"&tag=%E6%89%8B%E3%81%B6%E3%82%8D"), {
"url": "25b1cd81153a8ff82eec440dd9f20a4a22079658",
}),
- # avatar (#595, #623, #1124)
- ("https://www.pixiv.net/en/users/173530", {
- "options": (("avatar", True),),
- "content": "4e57544480cc2036ea9608103e8f024fa737fe66",
- "range": "1",
- }),
- # background (#623, #1124, #2495)
- ("https://www.pixiv.net/en/users/194921", {
- "options": (("background", True),),
- "content": "aeda3536003ea3002f70657cb93c5053f26f5843",
- "range": "1",
- }),
# deleted account
("http://www.pixiv.net/member_illust.php?id=173531", {
"options": (("metadata", True),),
"exception": exception.NotFoundError,
}),
- ("https://www.pixiv.net/en/users/173530"),
("https://www.pixiv.net/en/users/173530/manga"),
("https://www.pixiv.net/en/users/173530/illustrations"),
("https://www.pixiv.net/member_illust.php?id=173530"),
- ("https://www.pixiv.net/u/173530"),
- ("https://www.pixiv.net/user/173530"),
- ("https://www.pixiv.net/mypage.php#id=173530"),
- ("https://www.pixiv.net/#id=173530"),
("https://touch.pixiv.net/member_illust.php?id=173530"),
)
def __init__(self, match):
PixivExtractor.__init__(self, match)
- u1, t1, u2, t2, u3 = match.groups()
+ u1, t1, u2, t2 = match.groups()
if t1:
t1 = text.unquote(t1)
elif t2:
t2 = text.parse_query(t2).get("tag")
- self.user_id = u1 or u2 or u3
+ self.user_id = u1 or u2
self.tag = t1 or t2
def metadata(self):
if self.config("metadata"):
- return {"user": self.api.user_detail(self.user_id)["user"]}
+ return self.api.user_detail(self.user_id)
return {}
def works(self):
@@ -173,54 +211,60 @@ class PixivUserExtractor(PixivExtractor):
if tag in [t["name"].lower() for t in work["tags"]]
)
- avatar = self.config("avatar")
- background = self.config("background")
- if avatar or background:
- work_list = []
- detail = self.api.user_detail(self.user_id)
- user = detail["user"]
-
- if avatar:
- url = user["profile_image_urls"]["medium"]
- work_list.append((self._make_work(
- "avatar", url.replace("_170.", "."), user),))
-
- if background:
- url = detail["profile"]["background_image_url"]
- if url:
- if "/c/" in url:
- parts = url.split("/")
- del parts[3:5]
- url = "/".join(parts)
- url = url.replace("_master1200.", ".")
- work = self._make_work("background", url, user)
- if url.endswith(".jpg"):
- work["_fallback"] = (url[:-4] + ".png",)
- work_list.append((work,))
-
- work_list.append(works)
- works = itertools.chain.from_iterable(work_list)
-
return works
- @staticmethod
- def _make_work(kind, url, user):
- return {
- "create_date" : None,
- "height" : 0,
- "id" : kind,
- "image_urls" : None,
- "meta_pages" : (),
- "meta_single_page": {"original_image_url": url},
- "page_count" : 1,
- "sanity_level" : 0,
- "tags" : (),
- "title" : kind,
- "type" : kind,
- "user" : user,
- "width" : 0,
- "x_restrict" : 0,
- }
+
+class PixivAvatarExtractor(PixivExtractor):
+ """Extractor for pixiv avatars"""
+ subcategory = "avatar"
+ filename_fmt = "avatar{date:?_//%Y-%m-%d}.{extension}"
+ archive_fmt = "avatar_{user[id]}_{date}"
+ pattern = (r"(?:https?://)?(?:www\.)?pixiv\.net"
+ r"/(?:en/)?users/(\d+)/avatar")
+ test = ("https://www.pixiv.net/en/users/173530/avatar", {
+ "content": "4e57544480cc2036ea9608103e8f024fa737fe66",
+ })
+
+ def __init__(self, match):
+ PixivExtractor.__init__(self, match)
+ self.user_id = match.group(1)
+
+ def works(self):
+ user = self.api.user_detail(self.user_id)["user"]
+ url = user["profile_image_urls"]["medium"].replace("_170.", ".")
+ return (self._make_work("avatar", url, user),)
+
+
+class PixivBackgroundExtractor(PixivExtractor):
+ """Extractor for pixiv background banners"""
+ subcategory = "background"
+ filename_fmt = "background{date?_//:%Y-%m-%d}.{extension}"
+ archive_fmt = "background_{user[id]}_{date}"
+ pattern = (r"(?:https?://)?(?:www\.)?pixiv\.net"
+ r"/(?:en/)?users/(\d+)/background")
+ test = ("https://www.pixiv.net/en/users/194921/background", {
+ "pattern": r"https://i\.pximg\.net/background/img/2021/01/30/16/12/02"
+ r"/194921_af1f71e557a42f499213d4b9eaccc0f8\.jpg",
+ })
+
+ def __init__(self, match):
+ PixivExtractor.__init__(self, match)
+ self.user_id = match.group(1)
+
+ def works(self):
+ detail = self.api.user_detail(self.user_id)
+ url = detail["profile"]["background_image_url"]
+ if not url:
+ return ()
+ if "/c/" in url:
+ parts = url.split("/")
+ del parts[3:5]
+ url = "/".join(parts)
+ url = url.replace("_master1200.", ".")
+ work = self._make_work("background", url, detail["user"])
+ if url.endswith(".jpg"):
+ work["_fallback"] = (url[:-4] + ".png",)
+ return (work,)
class PixivMeExtractor(PixivExtractor):
@@ -312,10 +356,10 @@ class PixivFavoriteExtractor(PixivExtractor):
r"|bookmark\.php)(?:\?([^#]*))?")
test = (
("https://www.pixiv.net/en/users/173530/bookmarks/artworks", {
- "url": "e717eb511500f2fa3497aaee796a468ecf685cc4",
+ "url": "85a3104eaaaf003c7b3947117ca2f1f0b1cfc949",
}),
("https://www.pixiv.net/bookmark.php?id=173530", {
- "url": "e717eb511500f2fa3497aaee796a468ecf685cc4",
+ "url": "85a3104eaaaf003c7b3947117ca2f1f0b1cfc949",
}),
# bookmarks with specific tag
(("https://www.pixiv.net/en/users/3137110"
@@ -735,66 +779,70 @@ class PixivAppAPI():
def illust_detail(self, illust_id):
params = {"illust_id": illust_id}
- return self._call("v1/illust/detail", params)["illust"]
+ return self._call("/v1/illust/detail", params)["illust"]
def illust_follow(self, restrict="all"):
params = {"restrict": restrict}
- return self._pagination("v2/illust/follow", params)
+ return self._pagination("/v2/illust/follow", params)
def illust_ranking(self, mode="day", date=None):
params = {"mode": mode, "date": date}
- return self._pagination("v1/illust/ranking", params)
+ return self._pagination("/v1/illust/ranking", params)
def illust_related(self, illust_id):
params = {"illust_id": illust_id}
- return self._pagination("v2/illust/related", params)
+ return self._pagination("/v2/illust/related", params)
def search_illust(self, word, sort=None, target=None, duration=None,
date_start=None, date_end=None):
params = {"word": word, "search_target": target,
"sort": sort, "duration": duration,
"start_date": date_start, "end_date": date_end}
- return self._pagination("v1/search/illust", params)
+ return self._pagination("/v1/search/illust", params)
def user_bookmarks_illust(self, user_id, tag=None, restrict="public"):
params = {"user_id": user_id, "tag": tag, "restrict": restrict}
- return self._pagination("v1/user/bookmarks/illust", params)
+ return self._pagination("/v1/user/bookmarks/illust", params)
+ @memcache(keyarg=1)
def user_detail(self, user_id):
params = {"user_id": user_id}
- return self._call("v1/user/detail", params)
+ return self._call("/v1/user/detail", params)
def user_following(self, user_id, restrict="public"):
params = {"user_id": user_id, "restrict": restrict}
- return self._pagination("v1/user/following", params, "user_previews")
+ return self._pagination("/v1/user/following", params, "user_previews")
def user_illusts(self, user_id):
params = {"user_id": user_id}
- return self._pagination("v1/user/illusts", params)
+ return self._pagination("/v1/user/illusts", params)
def ugoira_metadata(self, illust_id):
params = {"illust_id": illust_id}
- return self._call("v1/ugoira/metadata", params)["ugoira_metadata"]
+ return self._call("/v1/ugoira/metadata", params)["ugoira_metadata"]
def _call(self, endpoint, params=None):
- url = "https://app-api.pixiv.net/" + endpoint
+ url = "https://app-api.pixiv.net" + endpoint
+
+ while True:
+ self.login()
+ response = self.extractor.request(url, params=params, fatal=False)
+ data = response.json()
- self.login()
- response = self.extractor.request(url, params=params, fatal=False)
- data = response.json()
+ if "error" not in data:
+ return data
+
+ self.log.debug(data)
- if "error" in data:
if response.status_code == 404:
raise exception.NotFoundError()
error = data["error"]
if "rate limit" in (error.get("message") or "").lower():
- self.log.info("Waiting two minutes for API rate limit reset.")
- time.sleep(120)
- return self._call(endpoint, params)
- raise exception.StopExtraction("API request failed: %s", error)
+ self.extractor.wait(seconds=300)
+ continue
- return data
+ raise exception.StopExtraction("API request failed: %s", error)
def _pagination(self, endpoint, params, key="illusts"):
while True:
diff --git a/gallery_dl/extractor/reactor.py b/gallery_dl/extractor/reactor.py
index b3a620a..db8d700 100644
--- a/gallery_dl/extractor/reactor.py
+++ b/gallery_dl/extractor/reactor.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2019-2021 Mike Fährmann
+# Copyright 2019-2022 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -160,6 +160,7 @@ BASE_PATTERN = ReactorExtractor.update({
},
"thatpervert": {
"root": "http://thatpervert.com",
+ "pattern": r"thatpervert\.com",
},
})
diff --git a/gallery_dl/extractor/readcomiconline.py b/gallery_dl/extractor/readcomiconline.py
index c8b8c9a..16b9191 100644
--- a/gallery_dl/extractor/readcomiconline.py
+++ b/gallery_dl/extractor/readcomiconline.py
@@ -24,6 +24,7 @@ class ReadcomiconlineBase():
archive_fmt = "{issue_id}_{page}"
root = "https://readcomiconline.li"
browser = "firefox"
+ request_interval = (1, 9)
def request(self, url, **kwargs):
"""Detect and handle redirects to CAPTCHA pages"""
@@ -85,7 +86,7 @@ class ReadcomiconlineIssueExtractor(ReadcomiconlineBase, ChapterExtractor):
return [
(beau(url), None)
for url in text.extract_iter(
- page, 'lstImages.push("', '"'
+ page, "lstImages.push('", "'",
)
]
@@ -129,10 +130,13 @@ class ReadcomiconlineComicExtractor(ReadcomiconlineBase, MangaExtractor):
def beau(url):
- """https://readcomiconline.li/Scripts/rguard.min.js?v=1.1"""
+ """https://readcomiconline.li/Scripts/rguard.min.js"""
if url.startswith("https"):
return url
+ url = url.replace("_x236", "d")
+ url = url.replace("_x945", "g")
+
containsS0 = "=s0" in url
url = url[:-3 if containsS0 else -6]
url = url[4:22] + url[25:]
diff --git a/gallery_dl/extractor/shopify.py b/gallery_dl/extractor/shopify.py
index f276e84..f2bf3cb 100644
--- a/gallery_dl/extractor/shopify.py
+++ b/gallery_dl/extractor/shopify.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2019-2021 Mike Fährmann
+# Copyright 2019-2022 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -43,19 +43,45 @@ class ShopifyExtractor(BaseExtractor):
BASE_PATTERN = ShopifyExtractor.update({
+ "chelseacrew": {
+ "root": "https://chelseacrew.com",
+ "pattern": r"(?:www\.)?chelseacrew\.com",
+ },
"fashionnova": {
"root": "https://www.fashionnova.com",
"pattern": r"(?:www\.)?fashionnova\.com",
},
+ "loungeunderwear": {
+ "root": "https://loungeunderwear.com",
+ "pattern": r"(?:[a-z]+\.)?loungeunderwear\.com",
+ },
+ "michaelscameras": {
+ "root": "https://michaels.com.au",
+ "pattern": r"michaels\.com\.au",
+ },
+ "modcloth": {
+ "root": "https://modcloth.com",
+ "pattern": r"modcloth\.com",
+ },
"omgmiamiswimwear": {
"root": "https://www.omgmiamiswimwear.com",
+ "pattern": r"(?:www\.)?omgmiamiswimwear\.com",
+ },
+ "pinupgirlclothing": {
+ "root": "https://pinupgirlclothing.com",
+ "pattern": r"pinupgirlclothing\.com",
+ },
+ "raidlondon": {
+ "root": "https://www.raidlondon.com",
+ "pattern": r"(?:www\.)?raidlondon\.com",
+ },
+ "unique-vintage": {
+ "root": "https://www.unique-vintage.com",
+ "pattern": r"(?:www\.)?unique\-vintage\.com",
},
"windsorstore": {
"root": "https://www.windsorstore.com",
- },
- "loungeunderwear": {
- "root": "https://loungeunderwear.com",
- "pattern": r"(?:[a-z]+\.)?loungeunderwear\.com",
+ "pattern": r"(?:www\.)?windsorstore\.com",
},
})
@@ -66,15 +92,21 @@ class ShopifyCollectionExtractor(ShopifyExtractor):
directory_fmt = ("{category}", "{collection[title]}")
pattern = BASE_PATTERN + r"(/collections/[\w-]+)/?(?:$|[?#])"
test = (
+ ("https://chelseacrew.com/collections/flats"),
("https://www.fashionnova.com/collections/mini-dresses", {
"range": "1-20",
"count": 20,
}),
("https://www.fashionnova.com/collections/mini-dresses/?page=1"),
("https://www.fashionnova.com/collections/mini-dresses#1"),
+ ("https://loungeunderwear.com/collections/apparel"),
+ ("https://michaels.com.au/collections/microphones"),
+ ("https://modcloth.com/collections/shoes"),
("https://www.omgmiamiswimwear.com/collections/fajas"),
+ ("https://pinupgirlclothing.com/collections/evening"),
+ ("https://www.raidlondon.com/collections/flats"),
+ ("https://www.unique-vintage.com/collections/flapper-1920s"),
("https://www.windsorstore.com/collections/dresses-ball-gowns"),
- ("https://loungeunderwear.com/collections/apparel"),
)
def metadata(self):
@@ -99,18 +131,28 @@ class ShopifyProductExtractor(ShopifyExtractor):
directory_fmt = ("{category}", "Products")
pattern = BASE_PATTERN + r"((?:/collections/[\w-]+)?/products/[\w-]+)"
test = (
+ ("https://chelseacrew.com/collections/flats/products/dora"),
("https://www.fashionnova.com/products/essential-slide-red", {
"pattern": r"https?://cdn\d*\.shopify.com/",
"count": 3,
}),
+ ("https://www.fashionnova.com/collections/flats/products/name"),
+ ("https://de.loungeunderwear.com/products/ribbed-crop-top-black"),
+ ("https://michaels.com.au/collections/audio/products"
+ "/boya-by-wm4-pro-k5-2-4ghz-mic-android-1-1-101281"),
+ ("https://modcloth.com/collections/shoes/products/heidii-brn"),
("https://www.omgmiamiswimwear.com/products/la-medusa-maxi-dress", {
"pattern": r"https://cdn\.shopify\.com/s/files/1/1819/6171/",
"count": 5,
}),
- ("https://www.fashionnova.com/collections/flats/products/name"),
+ ("https://pinupgirlclothing.com/collections/evening/products"
+ "/clarice-coat-dress-in-olive-green-poly-crepe-laura-byrnes-design"),
+ ("https://www.raidlondon.com/collections/flats/products"
+ "/raid-addyson-chunky-flat-shoe-in-white"),
+ ("https://www.unique-vintage.com/collections/flapper-1920s/products"
+ "/unique-vintage-plus-size-black-silver-beaded-troyes-flapper-dress"),
("https://www.windsorstore.com/collections/accessories-belts/products"
"/rhine-buckle-dbl-o-ring-pu-strap-belt-073010158001"),
- ("https://de.loungeunderwear.com/products/ribbed-crop-top-black"),
)
def products(self):
diff --git a/gallery_dl/extractor/twitter.py b/gallery_dl/extractor/twitter.py
index 4c947e7..2737d34 100644
--- a/gallery_dl/extractor/twitter.py
+++ b/gallery_dl/extractor/twitter.py
@@ -15,7 +15,7 @@ import json
BASE_PATTERN = (
r"(?:https?://)?(?:www\.|mobile\.)?"
- r"(?:(?:fx)?twitter\.com|nitter\.net)"
+ r"(?:(?:[fv]x)?twitter\.com|nitter\.net)"
)
@@ -39,7 +39,7 @@ class TwitterExtractor(Extractor):
self.pinned = self.config("pinned", False)
self.quoted = self.config("quoted", False)
self.videos = self.config("videos", True)
- self.cards = self.config("cards", True)
+ self.cards = self.config("cards", False)
self._user_cache = {}
self._init_sizes()
@@ -104,6 +104,7 @@ class TwitterExtractor(Extractor):
def _extract_media(self, tweet, entities, files):
for media in entities:
+ descr = media.get("ext_alt_text")
width = media["original_info"].get("width", 0)
height = media["original_info"].get("height", 0)
@@ -112,9 +113,10 @@ class TwitterExtractor(Extractor):
files.append({
"url": "ytdl:{}/i/web/status/{}".format(
self.root, tweet["id_str"]),
- "width" : width,
- "height" : height,
- "extension": None,
+ "width" : width,
+ "height" : height,
+ "extension" : None,
+ "description": descr,
})
elif self.videos:
video_info = media["video_info"]
@@ -123,22 +125,24 @@ class TwitterExtractor(Extractor):
key=lambda v: v.get("bitrate", 0),
)
files.append({
- "url" : variant["url"],
- "width" : width,
- "height" : height,
- "bitrate" : variant.get("bitrate", 0),
- "duration": video_info.get(
+ "url" : variant["url"],
+ "width" : width,
+ "height" : height,
+ "bitrate" : variant.get("bitrate", 0),
+ "duration" : video_info.get(
"duration_millis", 0) / 1000,
+ "description": descr,
})
elif "media_url_https" in media:
url = media["media_url_https"]
base, _, fmt = url.rpartition(".")
base += "?format=" + fmt + "&name="
files.append(text.nameext_from_url(url, {
- "url" : base + self._size_image,
- "width" : width,
- "height" : height,
- "_fallback": self._image_fallback(base),
+ "url" : base + self._size_image,
+ "width" : width,
+ "height" : height,
+ "_fallback" : self._image_fallback(base),
+ "description": descr,
}))
else:
files.append({"url": media["media_url"]})
@@ -323,6 +327,9 @@ class TwitterExtractor(Extractor):
elif userfmt == "media":
cls = TwitterMediaExtractor
fmt = (self.root + "/id:{rest_id}/media").format_map
+ elif userfmt == "tweets":
+ cls = TwitterTweetsExtractor
+ fmt = (self.root + "/id:{rest_id}/tweets").format_map
else:
cls = None
fmt = userfmt.format_map
@@ -383,7 +390,7 @@ class TwitterExtractor(Extractor):
class TwitterTimelineExtractor(TwitterExtractor):
- """Extractor for Tweets from a user's timeline"""
+ """Extractor for a Twitter user timeline"""
subcategory = "timeline"
pattern = (BASE_PATTERN + r"/(?!search)(?:([^/?#]+)/?(?:$|[?#])"
r"|i(?:/user/|ntent/user\?user_id=)(\d+))")
@@ -400,6 +407,8 @@ class TwitterTimelineExtractor(TwitterExtractor):
("https://www.twitter.com/id:2976459548"),
("https://twitter.com/i/user/2976459548"),
("https://twitter.com/intent/user?user_id=2976459548"),
+ ("https://fxtwitter.com/supernaturepics"),
+ ("https://vxtwitter.com/supernaturepics"),
)
def __init__(self, match):
@@ -409,6 +418,52 @@ class TwitterTimelineExtractor(TwitterExtractor):
self.user = "id:" + user_id
def tweets(self):
+ tweets = (self.api.user_tweets(self.user) if self.retweets else
+ self.api.user_media(self.user))
+
+ # yield initial batch of (media) tweets
+ tweet = None
+ for tweet in tweets:
+ yield tweet
+
+ if tweet is None:
+ return
+
+ # get username
+ if not self.user.startswith("id:"):
+ username = self.user
+ elif "core" in tweet:
+ username = (tweet["core"]["user_results"]["result"]
+ ["legacy"]["screen_name"])
+ else:
+ username = tweet["user"]["screen_name"]
+
+ # get tweet data
+ if "legacy" in tweet:
+ tweet = tweet["legacy"]
+
+ # yield search results starting from last tweet id
+ yield from self.api.search_adaptive(
+ "from:{} include:retweets include:nativeretweets max_id:{} "
+ "filter:images OR card_name:animated_gif OR filter:native_video"
+ .format(username, tweet["id_str"])
+ )
+
+
+class TwitterTweetsExtractor(TwitterExtractor):
+ """Extractor for Tweets from a user's Tweets timeline"""
+ subcategory = "tweets"
+ pattern = BASE_PATTERN + r"/(?!search)([^/?#]+)/tweets(?!\w)"
+ test = (
+ ("https://twitter.com/supernaturepics/tweets", {
+ "range": "1-40",
+ "url": "c570ac1aae38ed1463be726cc46f31cac3d82a40",
+ }),
+ ("https://mobile.twitter.com/supernaturepics/tweets#t"),
+ ("https://www.twitter.com/id:2976459548/tweets"),
+ )
+
+ def tweets(self):
return self.api.user_tweets(self.user)
@@ -662,6 +717,10 @@ class TwitterTweetExtractor(TwitterExtractor):
"options": (("syndication", True),),
"count": 1,
}),
+ # media alt texts / descriptions (#2617)
+ ("https://twitter.com/my0nruri/status/1528379296041299968", {
+ "keyword": {"description": "oc"}
+ }),
)
def __init__(self, match):
diff --git a/gallery_dl/extractor/vk.py b/gallery_dl/extractor/vk.py
index 8fb9bbf..23f6ea2 100644
--- a/gallery_dl/extractor/vk.py
+++ b/gallery_dl/extractor/vk.py
@@ -9,7 +9,7 @@
"""Extractors for https://vk.com/"""
from .common import Extractor, Message
-from .. import text
+from .. import text, exception
BASE_PATTERN = r"(?:https://)?(?:www\.|m\.)?vk\.com"
@@ -39,9 +39,15 @@ class VkExtractor(Extractor):
self.log.warning("no photo URL found (%s)", photo.get("id"))
continue
- photo.update(data)
- photo["url"], photo["width"], photo["height"] = photo[size]
+ try:
+ photo["url"], photo["width"], photo["height"] = photo[size]
+ except ValueError:
+ # photo without width/height entries (#2535)
+ photo["url"] = photo[size + "src"]
+ photo["width"] = photo["height"] = 0
+
photo["id"] = photo["id"].rpartition("_")[2]
+ photo.update(data)
text.nameext_from_url(photo["url"], photo)
yield Message.Url, photo["url"], photo
@@ -66,6 +72,10 @@ class VkExtractor(Extractor):
url, method="POST", headers=headers, data=data,
).json()["payload"][1]
+ if len(payload) < 4:
+ self.log.debug(payload)
+ raise exception.AuthorizationError(payload[0])
+
total = payload[1]
photos = payload[3]
@@ -105,7 +115,7 @@ class VkPhotosExtractor(VkExtractor):
},
}),
("https://vk.com/cosplayinrussia", {
- "range": "25-35",
+ "range": "15-25",
"keywords": {
"id": r"re:\d+",
"user": {
@@ -117,6 +127,12 @@ class VkPhotosExtractor(VkExtractor):
},
},
}),
+ # photos without width/height (#2535)
+ ("https://vk.com/id76957806", {
+ "pattern": r"https://sun\d+-\d+\.userapi\.com/",
+ "range": "1-9",
+ "count": 9,
+ }),
("https://m.vk.com/albums398982326"),
("https://www.vk.com/id398982326?profile=1"),
("https://vk.com/albums-165740836"),
@@ -150,7 +166,8 @@ class VkPhotosExtractor(VkExtractor):
'<h1 class="page_name">', "<")).replace(" ", " "),
"info": text.unescape(text.remove_html(extr(
'<span class="current_text">', '</span'))),
- "id" : extr('<a href="/albums', '"'),
+ "id" : (extr('<a href="/albums', '"') or
+ extr('data-from-id="', '"')),
}}
@@ -166,6 +183,10 @@ class VkAlbumExtractor(VkExtractor):
("https://vk.com/album-165740836_281339889", {
"count": 12,
}),
+ # "Access denied" (#2556)
+ ("https://vk.com/album-53775183_00", {
+ "exception": exception.AuthorizationError,
+ }),
)
def __init__(self, match):
diff --git a/gallery_dl/extractor/weasyl.py b/gallery_dl/extractor/weasyl.py
index 75b78c5..599a175 100644
--- a/gallery_dl/extractor/weasyl.py
+++ b/gallery_dl/extractor/weasyl.py
@@ -47,6 +47,7 @@ class WeasylExtractor(Extractor):
return data
def submissions(self, owner_login, folderid=None):
+ metadata = self.config("metadata")
url = "{}/api/users/{}/gallery".format(self.root, owner_login)
params = {
"nextid" : None,
@@ -56,6 +57,9 @@ class WeasylExtractor(Extractor):
while True:
data = self.request(url, params=params).json()
for submission in data["submissions"]:
+ if metadata:
+ submission = self.request_submission(
+ submission["submitid"])
if self.populate_submission(submission):
submission["folderid"] = folderid
# Do any submissions have more than one url? If so
diff --git a/gallery_dl/extractor/webtoons.py b/gallery_dl/extractor/webtoons.py
index cf5b192..59f46f0 100644
--- a/gallery_dl/extractor/webtoons.py
+++ b/gallery_dl/extractor/webtoons.py
@@ -1,6 +1,7 @@
# -*- coding: utf-8 -*-
# Copyright 2020 Leonardo Taccari
+# Copyright 2021-2022 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -41,8 +42,8 @@ class WebtoonsEpisodeExtractor(WebtoonsBase, GalleryExtractor):
"""Extractor for an episode on webtoons.com"""
subcategory = "episode"
directory_fmt = ("{category}", "{comic}")
- filename_fmt = "{episode}-{num:>02}.{extension}"
- archive_fmt = "{title_no}_{episode}_{num}"
+ filename_fmt = "{episode_no}-{num:>02}.{extension}"
+ archive_fmt = "{title_no}_{episode_no}_{num}"
pattern = (BASE_PATTERN + r"/([^/?#]+)/([^/?#]+)/(?:[^/?#]+))"
r"/viewer(?:\?([^#'\"]+))")
test = (
@@ -54,6 +55,18 @@ class WebtoonsEpisodeExtractor(WebtoonsBase, GalleryExtractor):
"49e1f2def04c6f7a6a3dacf245a1cd9abe77a6a9"),
"count": 5,
}),
+ (("https://www.webtoons.com/en/challenge/punderworld"
+ "/happy-earth-day-/viewer?title_no=312584&episode_no=40"), {
+ "keyword": {
+ "comic": "punderworld",
+ "description": str,
+ "episode": "36",
+ "episode_no": "40",
+ "genre": "challenge",
+ "title": r"re:^Punderworld - .+",
+ "title_no": "312584",
+ },
+ }),
)
def __init__(self, match):
@@ -65,11 +78,13 @@ class WebtoonsEpisodeExtractor(WebtoonsBase, GalleryExtractor):
query = text.parse_query(query)
self.title_no = query.get("title_no")
- self.episode = query.get("episode_no")
+ self.episode_no = query.get("episode_no")
def metadata(self, page):
+ keywords, pos = text.extract(
+ page, '<meta name="keywords" content="', '"')
title, pos = text.extract(
- page, '<meta property="og:title" content="', '"')
+ page, '<meta property="og:title" content="', '"', pos)
descr, pos = text.extract(
page, '<meta property="og:description" content="', '"', pos)
@@ -77,8 +92,9 @@ class WebtoonsEpisodeExtractor(WebtoonsBase, GalleryExtractor):
"genre" : self.genre,
"comic" : self.comic,
"title_no" : self.title_no,
- "episode" : self.episode,
+ "episode_no" : self.episode_no,
"title" : text.unescape(title),
+ "episode" : keywords.split(", ")[1],
"description": text.unescape(descr),
"lang" : self.lang,
"language" : util.code_to_language(self.lang),
diff --git a/gallery_dl/formatter.py b/gallery_dl/formatter.py
index 27d5e40..d1b3a8a 100644
--- a/gallery_dl/formatter.py
+++ b/gallery_dl/formatter.py
@@ -20,6 +20,7 @@ _CACHE = {}
_CONVERSIONS = None
_GLOBALS = {
"_env": lambda: os.environ,
+ "_lit": lambda: _literal,
"_now": datetime.datetime.now,
}
@@ -219,6 +220,10 @@ def parse_field_name(field_name):
first, rest = _string.formatter_field_name_split(field_name)
funcs = []
+ if first[0] == "'":
+ funcs.append(operator.itemgetter(first[1:-1]))
+ first = "_lit"
+
for is_attr, key in rest:
if is_attr:
func = operator.attrgetter
@@ -344,3 +349,15 @@ def _default_format(format_spec):
def wrap(obj):
return format(obj, format_spec)
return wrap
+
+
+class Literal():
+ # __getattr__, __getattribute__, and __class_getitem__
+ # are all slower than regular __getitem__
+
+ @staticmethod
+ def __getitem__(key):
+ return key
+
+
+_literal = Literal()
diff --git a/gallery_dl/job.py b/gallery_dl/job.py
index 044369a..a0adffb 100644
--- a/gallery_dl/job.py
+++ b/gallery_dl/job.py
@@ -16,6 +16,7 @@ import collections
from . import extractor, downloader, postprocessor
from . import config, text, util, path, formatter, output, exception
from .extractor.message import Message
+from .output import stdout_write
class Job():
@@ -264,7 +265,7 @@ class DownloadJob(Job):
# download succeeded
pathfmt.finalize()
- self.out.success(pathfmt.path, 0)
+ self.out.success(pathfmt.path)
self._skipcnt = 0
if archive:
archive.add(kwdict)
@@ -537,14 +538,14 @@ class KeywordJob(Job):
self.private = config.get(("output",), "private")
def handle_url(self, url, kwdict):
- print("\nKeywords for filenames and --filter:")
- print("------------------------------------")
+ stdout_write("\nKeywords for filenames and --filter:\n"
+ "------------------------------------\n")
self.print_kwdict(kwdict)
raise exception.StopExtraction()
def handle_directory(self, kwdict):
- print("Keywords for directory names:")
- print("-----------------------------")
+ stdout_write("Keywords for directory names:\n"
+ "-----------------------------\n")
self.print_kwdict(kwdict)
def handle_queue(self, url, kwdict):
@@ -565,36 +566,47 @@ class KeywordJob(Job):
self.extractor.log.info(
"Try 'gallery-dl -K \"%s\"' instead.", url)
else:
- print("Keywords for --chapter-filter:")
- print("------------------------------")
+ stdout_write("Keywords for --chapter-filter:\n"
+ "------------------------------\n")
self.print_kwdict(kwdict)
if extr or self.extractor.categorytransfer:
- print()
+ stdout_write("\n")
KeywordJob(extr or url, self).run()
raise exception.StopExtraction()
- def print_kwdict(self, kwdict, prefix=""):
+ def print_kwdict(self, kwdict, prefix="", markers=None):
"""Print key-value pairs in 'kwdict' with formatting"""
+ write = sys.stdout.write
suffix = "]" if prefix else ""
+
+ markerid = id(kwdict)
+ if markers is None:
+ markers = {markerid}
+ elif markerid in markers:
+ write("{}\n <circular reference>\n".format(prefix[:-1]))
+ return # ignore circular reference
+ else:
+ markers.add(markerid)
+
for key, value in sorted(kwdict.items()):
if key[0] == "_" and not self.private:
continue
key = prefix + key + suffix
if isinstance(value, dict):
- self.print_kwdict(value, key + "[")
+ self.print_kwdict(value, key + "[", markers)
elif isinstance(value, list):
if value and isinstance(value[0], dict):
- self.print_kwdict(value[0], key + "[][")
+ self.print_kwdict(value[0], key + "[][", markers)
else:
- print(key, "[]", sep="")
+ write(key + "[]\n")
for val in value:
- print(" -", val)
+ write(" - " + str(val) + "\n")
else:
# string or number
- print(key, "\n ", value, sep="")
+ write("{}\n {}\n".format(key, value))
class UrlJob(Job):
@@ -609,14 +621,14 @@ class UrlJob(Job):
@staticmethod
def handle_url(url, _):
- print(url)
+ stdout_write(url + "\n")
@staticmethod
def handle_url_fallback(url, kwdict):
- print(url)
+ stdout_write(url + "\n")
if "_fallback" in kwdict:
for url in kwdict["_fallback"]:
- print("|", url)
+ stdout_write("| " + url + "\n")
def handle_queue(self, url, kwdict):
cls = kwdict.get("_extractor")
@@ -653,15 +665,18 @@ class InfoJob(Job):
return 0
def _print_multi(self, title, *values):
- print(title, "\n ", " / ".join(json.dumps(v) for v in values), sep="")
+ stdout_write("{}\n {}\n\n".format(
+ title, " / ".join(json.dumps(v) for v in values)))
def _print_config(self, title, optname, value):
optval = self.extractor.config(optname, util.SENTINEL)
if optval is not util.SENTINEL:
- print(title, "(custom):\n ", json.dumps(optval))
- print(title, "(default):\n ", json.dumps(value))
+ stdout_write(
+ "{} (custom):\n {}\n{} (default):\n {}\n\n".format(
+ title, json.dumps(optval), title, json.dumps(value)))
elif value:
- print(title, "(default):\n ", json.dumps(value))
+ stdout_write(
+ "{} (default):\n {}\n\n".format(title, json.dumps(value)))
class DataJob(Job):
diff --git a/gallery_dl/option.py b/gallery_dl/option.py
index 782063d..b2a9aa8 100644
--- a/gallery_dl/option.py
+++ b/gallery_dl/option.py
@@ -39,8 +39,9 @@ class AppendCommandAction(argparse.Action):
class DeprecatedConfigConstAction(argparse.Action):
"""Set argparse const values as config values + deprecation warning"""
def __call__(self, parser, namespace, values, option_string=None):
- print("warning: {} is deprecated. Use {} instead.".format(
- "/".join(self.option_strings), self.choices), file=sys.stderr)
+ sys.stderr.write(
+ "warning: {} is deprecated. Use {} instead.\n".format(
+ "/".join(self.option_strings), self.choices))
namespace.options.append(((), self.dest, self.const))
@@ -59,7 +60,7 @@ class ParseAction(argparse.Action):
class Formatter(argparse.HelpFormatter):
"""Custom HelpFormatter class to customize help output"""
def __init__(self, *args, **kwargs):
- super().__init__(max_help_position=50, *args, **kwargs)
+ super().__init__(max_help_position=30, *args, **kwargs)
def _format_action_invocation(self, action):
opts = action.option_strings[:]
@@ -114,11 +115,6 @@ def build_parser():
"('/O' for \"original\" filenames)"),
)
general.add_argument(
- "--cookies",
- dest="cookies", metavar="FILE", action=ConfigAction,
- help="File to load additional cookies from",
- )
- general.add_argument(
"--proxy",
dest="proxy", metavar="URL", action=ConfigAction,
help="Use the specified proxy",
@@ -134,6 +130,18 @@ def build_parser():
help="Delete cached login sessions, cookies, etc. for MODULE "
"(ALL to delete everything)",
)
+ general.add_argument(
+ "--cookies",
+ dest="cookies", metavar="FILE", action=ConfigAction,
+ help="File to load additional cookies from",
+ )
+ general.add_argument(
+ "--cookies-from_browser",
+ dest="cookies_from_browser", metavar="BROWSER[+KEYRING][:PROFILE]",
+ help=("Name of the browser to load cookies from, "
+ "with optional keyring name prefixed with '+' and "
+ "profile prefixed with ':'"),
+ )
output = parser.add_argument_group("Output Options")
output.add_argument(
diff --git a/gallery_dl/output.py b/gallery_dl/output.py
index 7e00e1a..3531304 100644
--- a/gallery_dl/output.py
+++ b/gallery_dl/output.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2015-2021 Mike Fährmann
+# Copyright 2015-2022 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -205,6 +205,30 @@ def setup_logging_handler(key, fmt=LOG_FORMAT, lvl=LOG_LEVEL):
# --------------------------------------------------------------------
# Utility functions
+def stdout_write_flush(s):
+ sys.stdout.write(s)
+ sys.stdout.flush()
+
+
+def stderr_write_flush(s):
+ sys.stderr.write(s)
+ sys.stderr.flush()
+
+
+if sys.stdout.line_buffering:
+ def stdout_write(s):
+ sys.stdout.write(s)
+else:
+ stdout_write = stdout_write_flush
+
+
+if sys.stderr.line_buffering:
+ def stderr_write(s):
+ sys.stderr.write(s)
+else:
+ stderr_write = stderr_write_flush
+
+
def replace_std_streams(errors="replace"):
"""Replace standard streams and set their error handlers to 'errors'"""
for name in ("stdout", "stdin", "stderr"):
@@ -255,7 +279,7 @@ class NullOutput():
def skip(self, path):
"""Print a message indicating that a download has been skipped"""
- def success(self, path, tries):
+ def success(self, path):
"""Print a message indicating the completion of a download"""
def progress(self, bytes_total, bytes_downloaded, bytes_per_second):
@@ -265,14 +289,10 @@ class NullOutput():
class PipeOutput(NullOutput):
def skip(self, path):
- stdout = sys.stdout
- stdout.write(CHAR_SKIP + path + "\n")
- stdout.flush()
+ stdout_write(CHAR_SKIP + path + "\n")
- def success(self, path, tries):
- stdout = sys.stdout
- stdout.write(path + "\n")
- stdout.flush()
+ def success(self, path):
+ stdout_write(path + "\n")
class TerminalOutput(NullOutput):
@@ -288,38 +308,43 @@ class TerminalOutput(NullOutput):
self.shorten = util.identity
def start(self, path):
- stdout = sys.stdout
- stdout.write(self.shorten(" " + path))
- stdout.flush()
+ stdout_write_flush(self.shorten(" " + path))
def skip(self, path):
- sys.stdout.write(self.shorten(CHAR_SKIP + path) + "\n")
+ stdout_write(self.shorten(CHAR_SKIP + path) + "\n")
- def success(self, path, tries):
- sys.stdout.write("\r" + self.shorten(CHAR_SUCCESS + path) + "\n")
+ def success(self, path):
+ stdout_write("\r" + self.shorten(CHAR_SUCCESS + path) + "\n")
def progress(self, bytes_total, bytes_downloaded, bytes_per_second):
bdl = util.format_value(bytes_downloaded)
bps = util.format_value(bytes_per_second)
if bytes_total is None:
- sys.stderr.write("\r{:>7}B {:>7}B/s ".format(bdl, bps))
+ stderr_write("\r{:>7}B {:>7}B/s ".format(bdl, bps))
else:
- sys.stderr.write("\r{:>3}% {:>7}B {:>7}B/s ".format(
+ stderr_write("\r{:>3}% {:>7}B {:>7}B/s ".format(
bytes_downloaded * 100 // bytes_total, bdl, bps))
class ColorOutput(TerminalOutput):
+ def __init__(self):
+ TerminalOutput.__init__(self)
+
+ colors = config.get(("output",), "colors") or {}
+ self.color_skip = "\033[{}m".format(
+ colors.get("skip", "2"))
+ self.color_success = "\r\033[{}m".format(
+ colors.get("success", "1;32"))
+
def start(self, path):
- stdout = sys.stdout
- stdout.write(self.shorten(path))
- stdout.flush()
+ stdout_write_flush(self.shorten(path))
def skip(self, path):
- sys.stdout.write("\033[2m" + self.shorten(path) + "\033[0m\n")
+ stdout_write(self.color_skip + self.shorten(path) + "\033[0m\n")
- def success(self, path, tries):
- sys.stdout.write("\r\033[1;32m" + self.shorten(path) + "\033[0m\n")
+ def success(self, path):
+ stdout_write(self.color_success + self.shorten(path) + "\033[0m\n")
class EAWCache(dict):
diff --git a/gallery_dl/path.py b/gallery_dl/path.py
index c85bb88..84ee7af 100644
--- a/gallery_dl/path.py
+++ b/gallery_dl/path.py
@@ -74,7 +74,7 @@ class PathFormat():
self.directory = self.realdirectory = \
self.filename = self.extension = self.prefix = \
self.path = self.realpath = self.temppath = ""
- self.delete = self._create_directory = False
+ self.delete = False
extension_map = config("extension-map")
if extension_map is None:
@@ -138,7 +138,11 @@ class PathFormat():
def open(self, mode="wb"):
"""Open file and return a corresponding file object"""
- return open(self.temppath, mode)
+ try:
+ return open(self.temppath, mode)
+ except FileNotFoundError:
+ os.makedirs(self.realdirectory)
+ return open(self.temppath, mode)
def exists(self):
"""Return True if the file exists on disk"""
@@ -187,7 +191,6 @@ class PathFormat():
directory += sep
self.realdirectory = directory
- self._create_directory = True
def set_filename(self, kwdict):
"""Set general filename data"""
@@ -279,9 +282,6 @@ class PathFormat():
def build_path(self):
"""Combine directory and filename to full paths"""
- if self._create_directory:
- os.makedirs(self.realdirectory, exist_ok=True)
- self._create_directory = False
self.filename = filename = self.build_filename(self.kwdict)
self.path = self.directory + filename
self.realpath = self.realdirectory + filename
@@ -317,11 +317,18 @@ class PathFormat():
if self.temppath != self.realpath:
# Move temp file to its actual location
- try:
- os.replace(self.temppath, self.realpath)
- except OSError:
- shutil.copyfile(self.temppath, self.realpath)
- os.unlink(self.temppath)
+ while True:
+ try:
+ os.replace(self.temppath, self.realpath)
+ except FileNotFoundError:
+ # delayed directory creation
+ os.makedirs(self.realdirectory)
+ continue
+ except OSError:
+ # move across different filesystems
+ shutil.copyfile(self.temppath, self.realpath)
+ os.unlink(self.temppath)
+ break
mtime = self.kwdict.get("_mtime")
if mtime:
diff --git a/gallery_dl/postprocessor/metadata.py b/gallery_dl/postprocessor/metadata.py
index 5e8f3e9..4e86239 100644
--- a/gallery_dl/postprocessor/metadata.py
+++ b/gallery_dl/postprocessor/metadata.py
@@ -95,7 +95,7 @@ class MetadataPP(PostProcessor):
with open(path, "w", encoding="utf-8") as fp:
self.write(fp, pathfmt.kwdict)
except FileNotFoundError:
- os.makedirs(directory, exist_ok=True)
+ os.makedirs(directory)
with open(path, "w", encoding="utf-8") as fp:
self.write(fp, pathfmt.kwdict)
diff --git a/gallery_dl/postprocessor/ugoira.py b/gallery_dl/postprocessor/ugoira.py
index fb57e84..0b4c259 100644
--- a/gallery_dl/postprocessor/ugoira.py
+++ b/gallery_dl/postprocessor/ugoira.py
@@ -119,6 +119,9 @@ class UgoiraPP(PostProcessor):
if self.args:
args += self.args
+ # ensure target directory exists
+ os.makedirs(pathfmt.realdirectory, exist_ok=True)
+
# invoke ffmpeg
try:
if self.twopass:
diff --git a/gallery_dl/util.py b/gallery_dl/util.py
index 4bb220a..009ee08 100644
--- a/gallery_dl/util.py
+++ b/gallery_dl/util.py
@@ -302,9 +302,9 @@ def set_mtime(path, mtime):
pass
-def load_cookiestxt(fp):
- """Parse a Netscape cookies.txt file and return a list of its Cookies"""
- cookies = []
+def cookiestxt_load(fp, cookiejar):
+ """Parse a Netscape cookies.txt file and add its Cookies to 'cookiejar'"""
+ set_cookie = cookiejar.set_cookie
for line in fp:
@@ -321,11 +321,12 @@ def load_cookiestxt(fp):
domain, domain_specified, path, secure, expires, name, value = \
line.split("\t")
+
if not name:
name = value
value = None
- cookies.append(Cookie(
+ set_cookie(Cookie(
0, name, value,
None, False,
domain,
@@ -337,12 +338,11 @@ def load_cookiestxt(fp):
False, None, None, {},
))
- return cookies
-
-def save_cookiestxt(fp, cookies):
+def cookiestxt_store(fp, cookies):
"""Write 'cookies' in Netscape cookies.txt format to 'fp'"""
- fp.write("# Netscape HTTP Cookie File\n\n")
+ write = fp.write
+ write("# Netscape HTTP Cookie File\n\n")
for cookie in cookies:
if not cookie.domain:
@@ -355,15 +355,15 @@ def save_cookiestxt(fp, cookies):
name = cookie.name
value = cookie.value
- fp.write("\t".join((
+ write("\t".join((
cookie.domain,
"TRUE" if cookie.domain.startswith(".") else "FALSE",
cookie.path,
"TRUE" if cookie.secure else "FALSE",
"0" if cookie.expires is None else str(cookie.expires),
name,
- value,
- )) + "\n")
+ value + "\n",
+ )))
def code_to_language(code, default=None):
@@ -695,12 +695,18 @@ class ExtendedUrl():
class DownloadArchive():
def __init__(self, path, format_string, cache_key="_archive_key"):
- con = sqlite3.connect(path, timeout=60, check_same_thread=False)
+ try:
+ con = sqlite3.connect(path, timeout=60, check_same_thread=False)
+ except sqlite3.OperationalError:
+ os.makedirs(os.path.dirname(path))
+ con = sqlite3.connect(path, timeout=60, check_same_thread=False)
con.isolation_level = None
self.close = con.close
self.cursor = con.cursor()
- self.keygen = format_string.format_map
+
+ from . import formatter
+ self.keygen = formatter.parse(format_string).format_map
self._cache_key = cache_key
try:
diff --git a/gallery_dl/version.py b/gallery_dl/version.py
index 624f288..1881291 100644
--- a/gallery_dl/version.py
+++ b/gallery_dl/version.py
@@ -6,4 +6,4 @@
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
-__version__ = "1.21.2"
+__version__ = "1.22.0"
diff --git a/gallery_dl/ytdl.py b/gallery_dl/ytdl.py
index 45b9826..b2da445 100644
--- a/gallery_dl/ytdl.py
+++ b/gallery_dl/ytdl.py
@@ -395,9 +395,6 @@ def parse_command_line(module, argv):
"allow_multiple_audio_streams": opts.allow_multiple_audio_streams,
"check_formats": getattr(
opts, "check_formats", None),
- "listformats": opts.listformats,
- "listformats_table": getattr(
- opts, "listformats_table", None),
"outtmpl": opts.outtmpl,
"outtmpl_na_placeholder": opts.outtmpl_na_placeholder,
"paths": getattr(opts, "paths", None),
@@ -448,7 +445,6 @@ def parse_command_line(module, argv):
"writesubtitles": opts.writesubtitles,
"writeautomaticsub": opts.writeautomaticsub,
"allsubtitles": opts.allsubtitles,
- "listsubtitles": opts.listsubtitles,
"subtitlesformat": opts.subtitlesformat,
"subtitleslangs": opts.subtitleslangs,
"matchtitle": module.decodeOption(opts.matchtitle),
diff --git a/test/test_formatter.py b/test/test_formatter.py
index 4cce8a3..efb6963 100644
--- a/test/test_formatter.py
+++ b/test/test_formatter.py
@@ -208,6 +208,22 @@ class TestFormatter(unittest.TestCase):
self.assertRegex(out1, r"^\d{4}-\d\d-\d\d \d\d:\d\d:\d\d(\.\d+)?$")
self.assertNotEqual(out1, out2)
+ def test_literals(self):
+ value = "foo"
+
+ self._run_test("{'foo'}" , value)
+ self._run_test("{'foo'!u}" , value.upper())
+ self._run_test("{'f00':R0/o/}" , value)
+ self._run_test("{'foobar'[:3]}", value)
+ self._run_test("{z|'foo'}" , value)
+ self._run_test("{z|''|'foo'}" , value)
+
+ self._run_test("{_lit[foo]}" , value)
+ self._run_test("{_lit[foo]!u}" , value.upper())
+ self._run_test("{_lit[f00]:R0/o/}" , value)
+ self._run_test("{_lit[foobar][:3]}", value)
+ self._run_test("{z|_lit[foo]}" , value)
+
def test_template(self):
with tempfile.TemporaryDirectory() as tmpdirname:
path1 = os.path.join(tmpdirname, "tpl1")
diff --git a/test/test_job.py b/test/test_job.py
index 0276555..fec6997 100644
--- a/test/test_job.py
+++ b/test/test_job.py
@@ -149,10 +149,13 @@ class TestInfoJob(TestJob):
self.assertEqual(self._capture_stdout(extr), """\
Category / Subcategory
"test_category" / "test_subcategory"
+
Filename format (default):
"test_{filename}.{extension}"
+
Directory format (default):
["{category}"]
+
""")
def test_custom(self):
@@ -165,18 +168,22 @@ Directory format (default):
self.assertEqual(self._capture_stdout(extr), """\
Category / Subcategory
"test_category" / "test_subcategory"
+
Filename format (custom):
"custom"
Filename format (default):
"test_{filename}.{extension}"
+
Directory format (custom):
["custom"]
Directory format (default):
["{category}"]
+
Request interval (custom):
321
Request interval (default):
123.456
+
""")
def test_base_category(self):
@@ -186,10 +193,13 @@ Request interval (default):
self.assertEqual(self._capture_stdout(extr), """\
Category / Subcategory / Basecategory
"test_category" / "test_subcategory" / "test_basecategory"
+
Filename format (default):
"test_{filename}.{extension}"
+
Directory format (default):
["{category}"]
+
""")
diff --git a/test/test_util.py b/test/test_util.py
index c269c9e..7ab1175 100644
--- a/test/test_util.py
+++ b/test/test_util.py
@@ -168,11 +168,12 @@ class TestISO639_1(unittest.TestCase):
class TestCookiesTxt(unittest.TestCase):
- def test_load_cookiestxt(self):
+ def test_cookiestxt_load(self):
def _assert(content, expected):
- cookies = util.load_cookiestxt(io.StringIO(content, None))
- for c, e in zip(cookies, expected):
+ jar = http.cookiejar.CookieJar()
+ util.cookiestxt_load(io.StringIO(content, None), jar)
+ for c, e in zip(jar, expected):
self.assertEqual(c.__dict__, e.__dict__)
_assert("", [])
@@ -218,13 +219,14 @@ class TestCookiesTxt(unittest.TestCase):
)
with self.assertRaises(ValueError):
- util.load_cookiestxt("example.org\tTRUE\t/\tTRUE\t0\tname")
+ util.cookiestxt_load("example.org\tTRUE\t/\tTRUE\t0\tname",
+ http.cookiejar.CookieJar())
- def test_save_cookiestxt(self):
+ def test_cookiestxt_store(self):
def _assert(cookies, expected):
fp = io.StringIO(newline=None)
- util.save_cookiestxt(fp, cookies)
+ util.cookiestxt_store(fp, cookies)
self.assertMultiLineEqual(fp.getvalue(), expected)
_assert([], "# Netscape HTTP Cookie File\n\n")
diff --git a/test/test_ytdl.py b/test/test_ytdl.py
new file mode 100644
index 0000000..eedb4f9
--- /dev/null
+++ b/test/test_ytdl.py
@@ -0,0 +1,274 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+
+# Copyright 2022 Mike Fährmann
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+import os
+import sys
+import unittest
+
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+from gallery_dl import ytdl, util, config # noqa E402
+
+
+class Test_CommandlineArguments(unittest.TestCase):
+ module_name = "youtube_dl"
+
+ @classmethod
+ def setUpClass(cls):
+ try:
+ cls.module = __import__(cls.module_name)
+ except ImportError:
+ raise unittest.SkipTest("cannot import module '{}'".format(
+ cls.module_name))
+ cls.default = ytdl.parse_command_line(cls.module, [])
+
+ def test_ignore_errors(self):
+ self._("--ignore-errors" , "ignoreerrors", True)
+ self._("--abort-on-error", "ignoreerrors", False)
+
+ def test_default_search(self):
+ self._(["--default-search", "foo"] , "default_search", "foo")
+
+ def test_mark_watched(self):
+ self._("--mark-watched" , "mark_watched", True)
+ self._("--no-mark-watched", "mark_watched", False)
+
+ def test_proxy(self):
+ self._(["--proxy", "socks5://127.0.0.1:1080/"],
+ "proxy", "socks5://127.0.0.1:1080/")
+ self._(["--cn-verification-proxy", "https://127.0.0.1"],
+ "cn_verification_proxy", "https://127.0.0.1")
+ self._(["--geo-verification-proxy", "127.0.0.1"],
+ "geo_verification_proxy", "127.0.0.1")
+
+ def test_network_options(self):
+ self._(["--socket-timeout", "3.5"],
+ "socket_timeout", 3.5)
+ self._(["--source-address", "127.0.0.1"],
+ "source_address", "127.0.0.1")
+ self._("-4" , "source_address", "0.0.0.0")
+ self._("--force-ipv4", "source_address", "0.0.0.0")
+ self._("-6" , "source_address", "::")
+ self._("--force-ipv6", "source_address", "::")
+
+ def test_thumbnail_options(self):
+ self._("--write-thumbnail", "writethumbnail", True)
+ self._("--write-all-thumbnails", "write_all_thumbnails", True)
+
+ def test_authentication_options(self):
+ self._(["-u" , "foo"], "username", "foo")
+ self._(["--username", "foo"], "username", "foo")
+
+ self._(["-p" , "bar"], "password", "bar")
+ self._(["--password", "bar"], "password", "bar")
+
+ self._(["--ap-mso" , "mso"], "ap_mso", "mso")
+ self._(["--ap-username", "foo"], "ap_username", "foo")
+ self._(["--ap-password", "bar"], "ap_password", "bar")
+
+ self._(["-2" , "pass"], "twofactor", "pass")
+ self._(["--twofactor", "pass"], "twofactor", "pass")
+
+ self._(["--video-password", "pass"], "videopassword", "pass")
+
+ self._("-n" , "usenetrc", True)
+ self._("--netrc", "usenetrc", True)
+
+ def test_subtitle_options(self):
+ self._("--write-sub" , "writesubtitles" , True)
+ self._("--write-auto-sub", "writeautomaticsub", True)
+
+ self._(["--sub-format", "best"], "subtitlesformat", "best")
+ self._(["--sub-langs", "en,ru"], "subtitleslangs", ["en", "ru"])
+
+ def test_retries(self):
+ inf = float("inf")
+
+ self._(["--retries", "5"], "retries", 5)
+ self._(["--retries", "inf"], "retries", inf)
+ self._(["--retries", "infinite"], "retries", inf)
+ self._(["--fragment-retries", "8"], "fragment_retries", 8)
+ self._(["--fragment-retries", "inf"], "fragment_retries", inf)
+ self._(["--fragment-retries", "infinite"], "fragment_retries", inf)
+
+ def test_geo_bypass(self):
+ self._("--geo-bypass", "geo_bypass", True)
+ self._("--no-geo-bypass", "geo_bypass", False)
+ self._(["--geo-bypass-country", "EN"], "geo_bypass_country", "EN")
+ self._(["--geo-bypass-ip-block", "198.51.100.14/24"],
+ "geo_bypass_ip_block", "198.51.100.14/24")
+
+ def test_headers(self):
+ headers = self.module.std_headers
+
+ self.assertNotEqual(headers["User-Agent"], "Foo/1.0")
+ self._(["--user-agent", "Foo/1.0"])
+ self.assertEqual(headers["User-Agent"], "Foo/1.0")
+
+ self.assertNotIn("Referer", headers)
+ self._(["--referer", "http://example.org/"])
+ self.assertEqual(headers["Referer"], "http://example.org/")
+
+ self.assertNotEqual(headers["Accept"], "*/*")
+ self.assertNotIn("DNT", headers)
+ self._([
+ "--add-header", "accept:*/*",
+ "--add-header", "dnt:1",
+ ])
+ self.assertEqual(headers["accept"], "*/*")
+ self.assertEqual(headers["dnt"], "1")
+
+ def test_extract_audio(self):
+ opts = self._(["--extract-audio"])
+ self.assertEqual(opts["postprocessors"][0], {
+ "key": "FFmpegExtractAudio",
+ "preferredcodec": "best",
+ "preferredquality": "5",
+ "nopostoverwrites": False,
+ })
+
+ opts = self._([
+ "--extract-audio",
+ "--audio-format", "opus",
+ "--audio-quality", "9",
+ "--no-post-overwrites",
+ ])
+ self.assertEqual(opts["postprocessors"][0], {
+ "key": "FFmpegExtractAudio",
+ "preferredcodec": "opus",
+ "preferredquality": "9",
+ "nopostoverwrites": True,
+ })
+
+ def test_recode_video(self):
+ opts = self._(["--recode-video", " mkv "])
+ self.assertEqual(opts["postprocessors"][0], {
+ "key": "FFmpegVideoConvertor",
+ "preferedformat": "mkv",
+ })
+
+ def test_subs(self):
+ opts = self._(["--convert-subs", "srt"])
+ conv = {"key": "FFmpegSubtitlesConvertor", "format": "srt"}
+ if self.module_name == "yt_dlp":
+ conv["when"] = "before_dl"
+ self.assertEqual(opts["postprocessors"][0], conv)
+
+ def test_embed(self):
+ subs = {"key": "FFmpegEmbedSubtitle"}
+ thumb = {"key": "EmbedThumbnail", "already_have_thumbnail": False}
+ if self.module_name == "yt_dlp":
+ subs["already_have_subtitle"] = False
+
+ opts = self._(["--embed-subs", "--embed-thumbnail"])
+ self.assertEqual(opts["postprocessors"], [subs, thumb])
+
+ thumb["already_have_thumbnail"] = True
+ if self.module_name == "yt_dlp":
+ subs["already_have_subtitle"] = True
+ thumb["already_have_thumbnail"] = "all"
+
+ opts = self._([
+ "--embed-thumbnail",
+ "--embed-subs",
+ "--write-sub",
+ "--write-all-thumbnails",
+ ])
+ self.assertEqual(opts["postprocessors"], [subs, thumb])
+
+ def test_metadata(self):
+ opts = self._("--add-metadata")
+ self.assertEqual(opts["postprocessors"][0], {"key": "FFmpegMetadata"})
+
+ def test_metadata_from_title(self):
+ opts = self._(["--metadata-from-title", "%(artist)s - %(title)s"])
+ self.assertEqual(opts["postprocessors"][0], {
+ "key": "MetadataFromTitle",
+ "titleformat": "%(artist)s - %(title)s",
+ })
+
+ def test_xattr(self):
+ self._("--xattr-set-filesize", "xattr_set_filesize", True)
+
+ opts = self._("--xattrs")
+ self.assertEqual(opts["postprocessors"][0], {"key": "XAttrMetadata"})
+
+ def test_noop(self):
+ cmdline = [
+ "--update",
+ "--dump-user-agent",
+ "-F",
+ "--list-formats",
+ "--list-extractors",
+ "--list-thumbnails",
+ "--list-subs",
+ "--ap-list-mso",
+ "--extractor-descriptions",
+ "--ignore-config",
+ ]
+
+ if self.module_name != "yt_dlp":
+ cmdline.extend((
+ "--dump-json",
+ "--dump-single-json",
+ "--config-location", "~",
+ ))
+
+ result = self._(cmdline)
+ result["daterange"] = self.default["daterange"]
+ self.assertEqual(result, self.default)
+
+ def _(self, cmdline, option=util.SENTINEL, expected=None):
+ if isinstance(cmdline, str):
+ cmdline = [cmdline]
+ result = ytdl.parse_command_line(self.module, cmdline)
+ if option is not util.SENTINEL:
+ self.assertEqual(result[option], expected, option)
+ return result
+
+
+class Test_CommandlineArguments_YtDlp(Test_CommandlineArguments):
+ module_name = "yt_dlp"
+
+ def test_retries_extractor(self):
+ inf = float("inf")
+
+ self._(["--extractor-retries", "5"], "extractor_retries", 5)
+ self._(["--extractor-retries", "inf"], "extractor_retries", inf)
+ self._(["--extractor-retries", "infinite"], "extractor_retries", inf)
+
+ def test_remuxs_video(self):
+ opts = self._(["--remux-video", " mkv "])
+ self.assertEqual(opts["postprocessors"][0], {
+ "key": "FFmpegVideoRemuxer",
+ "preferedformat": "mkv",
+ })
+
+ def test_metadata(self):
+ opts = self._(["--embed-metadata",
+ "--no-embed-chapters",
+ "--embed-info-json"])
+ self.assertEqual(opts["postprocessors"][0], {
+ "key": "FFmpegMetadata",
+ "add_chapters": False,
+ "add_metadata": True,
+ "add_infojson": True,
+ })
+
+ def test_metadata_from_title(self):
+ opts = self._(["--metadata-from-title", "%(artist)s - %(title)s"])
+ self.assertEqual(opts["postprocessors"][0], {
+ "key": "MetadataParser",
+ "when": "pre_process",
+ "actions": [self.module.MetadataFromFieldPP.to_action(
+ "title:%(artist)s - %(title)s")],
+ })
+
+
+if __name__ == "__main__":
+ unittest.main(warnings="ignore")