summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLibravatarUnit 193 <unit193@unit193.net>2022-03-15 00:19:57 -0400
committerLibravatarUnit 193 <unit193@unit193.net>2022-03-15 00:19:57 -0400
commitc2e774d3f5a4499b8beb5a12ab46a0099b16b1e7 (patch)
treea14107397b5bcb491aa4f4fb3e0feb4582e1879b
parent7900ee4e3692dbd8056c3e47c81bb22eda030b65 (diff)
New upstream version 1.21.0.upstream/1.21.0
-rw-r--r--CHANGELOG.md41
-rw-r--r--PKG-INFO48
-rw-r--r--README.rst46
-rw-r--r--data/completion/gallery-dl.fish62
-rw-r--r--data/man/gallery-dl.12
-rw-r--r--data/man/gallery-dl.conf.5130
-rw-r--r--gallery_dl.egg-info/PKG-INFO48
-rw-r--r--gallery_dl.egg-info/SOURCES.txt5
-rw-r--r--gallery_dl/downloader/common.py8
-rw-r--r--gallery_dl/downloader/http.py3
-rw-r--r--gallery_dl/downloader/ytdl.py1
-rw-r--r--gallery_dl/extractor/__init__.py4
-rw-r--r--gallery_dl/extractor/booru.py6
-rw-r--r--gallery_dl/extractor/common.py38
-rw-r--r--gallery_dl/extractor/deviantart.py13
-rw-r--r--gallery_dl/extractor/fanbox.py9
-rw-r--r--gallery_dl/extractor/fantia.py28
-rw-r--r--gallery_dl/extractor/hentaicosplays.py1
-rw-r--r--gallery_dl/extractor/imagebam.py106
-rw-r--r--gallery_dl/extractor/kemonoparty.py7
-rw-r--r--gallery_dl/extractor/kissgoddess.py80
-rw-r--r--gallery_dl/extractor/lolisafe.py17
-rw-r--r--gallery_dl/extractor/mangadex.py5
-rw-r--r--gallery_dl/extractor/mememuseum.py120
-rw-r--r--gallery_dl/extractor/newgrounds.py16
-rw-r--r--gallery_dl/extractor/oauth.py8
-rw-r--r--gallery_dl/extractor/patreon.py16
-rw-r--r--gallery_dl/extractor/seiga.py31
-rw-r--r--gallery_dl/extractor/skeb.py26
-rw-r--r--gallery_dl/extractor/slideshare.py104
-rw-r--r--gallery_dl/extractor/subscribestar.py4
-rw-r--r--gallery_dl/extractor/toyhouse.py173
-rw-r--r--gallery_dl/extractor/tumblr.py11
-rw-r--r--gallery_dl/extractor/twibooru.py241
-rw-r--r--gallery_dl/extractor/twitter.py62
-rw-r--r--gallery_dl/extractor/ytdl.py5
-rw-r--r--gallery_dl/path.py12
-rw-r--r--gallery_dl/postprocessor/metadata.py9
-rw-r--r--gallery_dl/postprocessor/mtime.py10
-rw-r--r--gallery_dl/text.py7
-rw-r--r--gallery_dl/util.py34
-rw-r--r--gallery_dl/version.py2
-rw-r--r--gallery_dl/ytdl.py4
-rw-r--r--setup.py1
-rw-r--r--test/test_cookies.py86
-rw-r--r--test/test_text.py12
46 files changed, 1380 insertions, 322 deletions
diff --git a/CHANGELOG.md b/CHANGELOG.md
index c64d80d..ffd11a6 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,44 @@
# Changelog
+## 1.21.0 - 2022-03-14
+### Additions
+- [fantia] add `num` enumeration index ([#2377](https://github.com/mikf/gallery-dl/issues/2377))
+- [fantia] support "Blog Post" content ([#2381](https://github.com/mikf/gallery-dl/issues/2381))
+- [imagebam] add support for /view/ paths ([#2378](https://github.com/mikf/gallery-dl/issues/2378))
+- [kemonoparty] match beta.kemono.party URLs ([#2348](https://github.com/mikf/gallery-dl/issues/2348))
+- [kissgoddess] add `gallery` and `model` extractors ([#1052](https://github.com/mikf/gallery-dl/issues/1052), [#2304](https://github.com/mikf/gallery-dl/issues/2304))
+- [mememuseum] add `tag` and `post` extractors ([#2264](https://github.com/mikf/gallery-dl/issues/2264))
+- [newgrounds] add `post_url` metadata field ([#2328](https://github.com/mikf/gallery-dl/issues/2328))
+- [patreon] add `image_large` file type ([#2257](https://github.com/mikf/gallery-dl/issues/2257))
+- [toyhouse] support `art` listings ([#1546](https://github.com/mikf/gallery-dl/issues/1546), [#2331](https://github.com/mikf/gallery-dl/issues/2331))
+- [twibooru] add extractors for searches, galleries, and posts ([#2219](https://github.com/mikf/gallery-dl/issues/2219))
+- [postprocessor:metadata] implement `mtime` option ([#2307](https://github.com/mikf/gallery-dl/issues/2307))
+- [postprocessor:mtime] add `event` option ([#2307](https://github.com/mikf/gallery-dl/issues/2307))
+- add fish shell completion ([#2363](https://github.com/mikf/gallery-dl/issues/2363))
+- add `timedelta` class to global namespace in filter expressions
+### Changes
+- [seiga] require authentication with `user_session` cookie ([#2372](https://github.com/mikf/gallery-dl/issues/2372))
+ - remove username & password login due to 2FA
+- refactor proxy support ([#2357](https://github.com/mikf/gallery-dl/issues/2357))
+ - allow gallery-dl proxy settings to overwrite environment proxies
+ - allow specifying different proxies for data extraction and download
+### Fixes
+- [bunkr] fix mp4 downloads ([#2239](https://github.com/mikf/gallery-dl/issues/2239))
+- [fanbox] fetch data for each individual post ([#2388](https://github.com/mikf/gallery-dl/issues/2388))
+- [hentaicosplays] send `Referer` header ([#2317](https://github.com/mikf/gallery-dl/issues/2317))
+- [imagebam] set `nsfw_inter` cookie ([#2334](https://github.com/mikf/gallery-dl/issues/2334))
+- [kemonoparty] limit default filename length ([#2373](https://github.com/mikf/gallery-dl/issues/2373))
+- [mangadex] fix chapters without `translatedLanguage` ([#2352](https://github.com/mikf/gallery-dl/issues/2352))
+- [newgrounds] fix video descriptions ([#2328](https://github.com/mikf/gallery-dl/issues/2328))
+- [skeb] add `sent-requests` option ([#2322](https://github.com/mikf/gallery-dl/issues/2322), [#2330](https://github.com/mikf/gallery-dl/issues/2330))
+- [slideshare] fix extraction
+- [subscribestar] unescape attachment URLs ([#2370](https://github.com/mikf/gallery-dl/issues/2370))
+- [twitter] fix handling of 429 Too Many Requests responses ([#2339](https://github.com/mikf/gallery-dl/issues/2339))
+- [twitter] warn about age-restricted Tweets ([#2354](https://github.com/mikf/gallery-dl/issues/2354))
+- [twitter] handle Tweets with "softIntervention" entries
+- [twitter] update query hashes
+- fix another bug in `_check_cookies()` ([#2160](https://github.com/mikf/gallery-dl/issues/2160))
+
## 1.20.5 - 2022-02-14
### Additions
- [furaffinity] add `layout` option ([#2277](https://github.com/mikf/gallery-dl/issues/2277))
@@ -1237,7 +1276,7 @@
- Miscellaneous fixes for `*reactor`, `simplyhentai`
## 1.10.1 - 2019-08-02
-## Fixes
+### Fixes
- Use the correct domain for exhentai.org input URLs
## 1.10.0 - 2019-08-01
diff --git a/PKG-INFO b/PKG-INFO
index baecf30..1fddcdc 100644
--- a/PKG-INFO
+++ b/PKG-INFO
@@ -1,6 +1,6 @@
Metadata-Version: 2.1
Name: gallery_dl
-Version: 1.20.5
+Version: 1.21.0
Summary: Command-line program to download image galleries and collections from several image hosting sites
Home-page: https://github.com/mikf/gallery-dl
Author: Mike Fährmann
@@ -72,14 +72,14 @@ easily installed or upgraded using pip_:
.. code:: bash
- $ python3 -m pip install -U gallery-dl
+ python3 -m pip install -U gallery-dl
Installing the latest dev version directly from GitHub can be done with
pip_ as well:
.. code:: bash
- $ python3 -m pip install -U -I --no-deps --no-cache-dir https://github.com/mikf/gallery-dl/archive/master.tar.gz
+ python3 -m pip install -U -I --no-deps --no-cache-dir https://github.com/mikf/gallery-dl/archive/master.tar.gz
Note: Windows users should use :code:`py -3` instead of :code:`python3`.
@@ -89,7 +89,7 @@ To ensure these packages are up-to-date, run
.. code:: bash
- $ python3 -m pip install --upgrade pip setuptools wheel
+ python3 -m pip install --upgrade pip setuptools wheel
Standalone Executable
@@ -98,8 +98,8 @@ Standalone Executable
Prebuilt executable files with a Python interpreter and
required Python packages included are available for
-- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.20.5/gallery-dl.exe>`__
-- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.20.5/gallery-dl.bin>`__
+- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.21.0/gallery-dl.exe>`__
+- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.21.0/gallery-dl.bin>`__
| Executables build from the latest commit can be found at
| https://github.com/mikf/gallery-dl/actions/workflows/executables.yml
@@ -112,7 +112,7 @@ Linux users that are using a distro that is supported by Snapd_ can install *gal
.. code:: bash
- $ snap install gallery-dl
+ snap install gallery-dl
Chocolatey
@@ -122,7 +122,7 @@ Windows users that have Chocolatey_ installed can install *gallery-dl* from the
.. code:: powershell
- $ choco install gallery-dl
+ choco install gallery-dl
Scoop
@@ -132,7 +132,7 @@ Scoop
.. code:: powershell
- $ scoop install gallery-dl
+ scoop install gallery-dl
Usage
@@ -143,7 +143,7 @@ from:
.. code:: bash
- $ gallery-dl [OPTION]... URL...
+ gallery-dl [OPTION]... URL...
See also :code:`gallery-dl --help`.
@@ -155,21 +155,21 @@ Download images; in this case from danbooru via tag search for 'bonocho':
.. code:: bash
- $ gallery-dl "https://danbooru.donmai.us/posts?tags=bonocho"
+ gallery-dl "https://danbooru.donmai.us/posts?tags=bonocho"
-Get the direct URL of an image from a site that requires authentication:
+Get the direct URL of an image from a site supporting authentication with username & password:
.. code:: bash
- $ gallery-dl -g -u "<username>" -p "<password>" "https://seiga.nicovideo.jp/seiga/im3211703"
+ gallery-dl -g -u "<username>" -p "<password>" "https://twitter.com/i/web/status/604341487988576256"
Filter manga chapters by language and chapter number:
.. code:: bash
- $ gallery-dl --chapter-filter "lang == 'fr' and 10 <= chapter < 20" "https://mangadex.org/title/2354/"
+ gallery-dl --chapter-filter "lang == 'fr' and 10 <= chapter < 20" "https://mangadex.org/title/2354/"
| Search a remote resource for URLs and download images from them:
@@ -177,7 +177,7 @@ Filter manga chapters by language and chapter number:
.. code:: bash
- $ gallery-dl "r:https://pastebin.com/raw/FLwrCYsT"
+ gallery-dl "r:https://pastebin.com/raw/FLwrCYsT"
If a site's address is nonstandard for its extractor, you can prefix the URL with the
@@ -185,7 +185,7 @@ extractor's name to force the use of a specific extractor:
.. code:: bash
- $ gallery-dl "tumblr:https://sometumblrblog.example"
+ gallery-dl "tumblr:https://sometumblrblog.example"
Configuration
@@ -233,7 +233,7 @@ Username & Password
Some extractors require you to provide valid login credentials in the form of
a username & password pair. This is necessary for
-``nijie`` and ``seiga``
+``nijie``
and optional for
``aryion``,
``danbooru``,
@@ -259,7 +259,7 @@ You can set the necessary information in your configuration file
{
"extractor": {
- "seiga": {
+ "twitter": {
"username": "<username>",
"password": "<password>"
}
@@ -272,8 +272,8 @@ or you can provide them directly via the
.. code:: bash
- $ gallery-dl -u <username> -p <password> URL
- $ gallery-dl -o username=<username> -o password=<password> URL
+ gallery-dl -u <username> -p <password> URL
+ gallery-dl -o username=<username> -o password=<password> URL
Cookies
@@ -317,7 +317,7 @@ the :code:`--cookies` command-line option:
.. code:: bash
- $ gallery-dl --cookies "$HOME/path/to/cookies.txt" URL
+ gallery-dl --cookies "$HOME/path/to/cookies.txt" URL
OAuth
@@ -335,7 +335,7 @@ To link your account to *gallery-dl*, start by invoking it with
.. code:: bash
- $ gallery-dl oauth:flickr
+ gallery-dl oauth:flickr
You will be sent to the site's authorization page and asked to grant read
access to *gallery-dl*. Authorize it and you will be shown one or more
@@ -346,8 +346,8 @@ To authenticate with a ``mastodon`` instance, run *gallery-dl* with
.. code:: bash
- $ gallery-dl oauth:mastodon:pawoo.net
- $ gallery-dl oauth:mastodon:https://mastodon.social/
+ gallery-dl oauth:mastodon:pawoo.net
+ gallery-dl oauth:mastodon:https://mastodon.social/
diff --git a/README.rst b/README.rst
index 8e9d658..8324066 100644
--- a/README.rst
+++ b/README.rst
@@ -38,14 +38,14 @@ easily installed or upgraded using pip_:
.. code:: bash
- $ python3 -m pip install -U gallery-dl
+ python3 -m pip install -U gallery-dl
Installing the latest dev version directly from GitHub can be done with
pip_ as well:
.. code:: bash
- $ python3 -m pip install -U -I --no-deps --no-cache-dir https://github.com/mikf/gallery-dl/archive/master.tar.gz
+ python3 -m pip install -U -I --no-deps --no-cache-dir https://github.com/mikf/gallery-dl/archive/master.tar.gz
Note: Windows users should use :code:`py -3` instead of :code:`python3`.
@@ -55,7 +55,7 @@ To ensure these packages are up-to-date, run
.. code:: bash
- $ python3 -m pip install --upgrade pip setuptools wheel
+ python3 -m pip install --upgrade pip setuptools wheel
Standalone Executable
@@ -64,8 +64,8 @@ Standalone Executable
Prebuilt executable files with a Python interpreter and
required Python packages included are available for
-- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.20.5/gallery-dl.exe>`__
-- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.20.5/gallery-dl.bin>`__
+- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.21.0/gallery-dl.exe>`__
+- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.21.0/gallery-dl.bin>`__
| Executables build from the latest commit can be found at
| https://github.com/mikf/gallery-dl/actions/workflows/executables.yml
@@ -78,7 +78,7 @@ Linux users that are using a distro that is supported by Snapd_ can install *gal
.. code:: bash
- $ snap install gallery-dl
+ snap install gallery-dl
Chocolatey
@@ -88,7 +88,7 @@ Windows users that have Chocolatey_ installed can install *gallery-dl* from the
.. code:: powershell
- $ choco install gallery-dl
+ choco install gallery-dl
Scoop
@@ -98,7 +98,7 @@ Scoop
.. code:: powershell
- $ scoop install gallery-dl
+ scoop install gallery-dl
Usage
@@ -109,7 +109,7 @@ from:
.. code:: bash
- $ gallery-dl [OPTION]... URL...
+ gallery-dl [OPTION]... URL...
See also :code:`gallery-dl --help`.
@@ -121,21 +121,21 @@ Download images; in this case from danbooru via tag search for 'bonocho':
.. code:: bash
- $ gallery-dl "https://danbooru.donmai.us/posts?tags=bonocho"
+ gallery-dl "https://danbooru.donmai.us/posts?tags=bonocho"
-Get the direct URL of an image from a site that requires authentication:
+Get the direct URL of an image from a site supporting authentication with username & password:
.. code:: bash
- $ gallery-dl -g -u "<username>" -p "<password>" "https://seiga.nicovideo.jp/seiga/im3211703"
+ gallery-dl -g -u "<username>" -p "<password>" "https://twitter.com/i/web/status/604341487988576256"
Filter manga chapters by language and chapter number:
.. code:: bash
- $ gallery-dl --chapter-filter "lang == 'fr' and 10 <= chapter < 20" "https://mangadex.org/title/2354/"
+ gallery-dl --chapter-filter "lang == 'fr' and 10 <= chapter < 20" "https://mangadex.org/title/2354/"
| Search a remote resource for URLs and download images from them:
@@ -143,7 +143,7 @@ Filter manga chapters by language and chapter number:
.. code:: bash
- $ gallery-dl "r:https://pastebin.com/raw/FLwrCYsT"
+ gallery-dl "r:https://pastebin.com/raw/FLwrCYsT"
If a site's address is nonstandard for its extractor, you can prefix the URL with the
@@ -151,7 +151,7 @@ extractor's name to force the use of a specific extractor:
.. code:: bash
- $ gallery-dl "tumblr:https://sometumblrblog.example"
+ gallery-dl "tumblr:https://sometumblrblog.example"
Configuration
@@ -199,7 +199,7 @@ Username & Password
Some extractors require you to provide valid login credentials in the form of
a username & password pair. This is necessary for
-``nijie`` and ``seiga``
+``nijie``
and optional for
``aryion``,
``danbooru``,
@@ -225,7 +225,7 @@ You can set the necessary information in your configuration file
{
"extractor": {
- "seiga": {
+ "twitter": {
"username": "<username>",
"password": "<password>"
}
@@ -238,8 +238,8 @@ or you can provide them directly via the
.. code:: bash
- $ gallery-dl -u <username> -p <password> URL
- $ gallery-dl -o username=<username> -o password=<password> URL
+ gallery-dl -u <username> -p <password> URL
+ gallery-dl -o username=<username> -o password=<password> URL
Cookies
@@ -283,7 +283,7 @@ the :code:`--cookies` command-line option:
.. code:: bash
- $ gallery-dl --cookies "$HOME/path/to/cookies.txt" URL
+ gallery-dl --cookies "$HOME/path/to/cookies.txt" URL
OAuth
@@ -301,7 +301,7 @@ To link your account to *gallery-dl*, start by invoking it with
.. code:: bash
- $ gallery-dl oauth:flickr
+ gallery-dl oauth:flickr
You will be sent to the site's authorization page and asked to grant read
access to *gallery-dl*. Authorize it and you will be shown one or more
@@ -312,8 +312,8 @@ To authenticate with a ``mastodon`` instance, run *gallery-dl* with
.. code:: bash
- $ gallery-dl oauth:mastodon:pawoo.net
- $ gallery-dl oauth:mastodon:https://mastodon.social/
+ gallery-dl oauth:mastodon:pawoo.net
+ gallery-dl oauth:mastodon:https://mastodon.social/
diff --git a/data/completion/gallery-dl.fish b/data/completion/gallery-dl.fish
new file mode 100644
index 0000000..8f915fd
--- /dev/null
+++ b/data/completion/gallery-dl.fish
@@ -0,0 +1,62 @@
+complete -c gallery-dl -x
+complete -c gallery-dl -s 'h' -l 'help' -d 'Print this help message and exit'
+complete -c gallery-dl -l 'version' -d 'Print program version and exit'
+complete -c gallery-dl -r -F -s 'i' -l 'input-file' -d 'Download URLs found in FILE ("-" for stdin). More than one --input-file can be specified'
+complete -c gallery-dl -x -a '(__fish_complete_directories)' -s 'd' -l 'destination' -d 'Target location for file downloads'
+complete -c gallery-dl -x -a '(__fish_complete_directories)' -s 'D' -l 'directory' -d 'Exact location for file downloads'
+complete -c gallery-dl -x -s 'f' -l 'filename' -d 'Filename format string for downloaded files ("/O" for "original" filenames)'
+complete -c gallery-dl -r -F -l 'cookies' -d 'File to load additional cookies from'
+complete -c gallery-dl -x -l 'proxy' -d 'Use the specified proxy'
+complete -c gallery-dl -x -l 'source-address' -d 'Client-side IP address to bind to'
+complete -c gallery-dl -x -l 'clear-cache' -d 'Delete cached login sessions, cookies, etc. for MODULE (ALL to delete everything)'
+complete -c gallery-dl -s 'q' -l 'quiet' -d 'Activate quiet mode'
+complete -c gallery-dl -s 'v' -l 'verbose' -d 'Print various debugging information'
+complete -c gallery-dl -s 'g' -l 'get-urls' -d 'Print URLs instead of downloading'
+complete -c gallery-dl -s 'G' -l 'resolve-urls' -d 'Print URLs instead of downloading; resolve intermediary URLs'
+complete -c gallery-dl -s 'j' -l 'dump-json' -d 'Print JSON information'
+complete -c gallery-dl -s 's' -l 'simulate' -d 'Simulate data extraction; do not download anything'
+complete -c gallery-dl -s 'E' -l 'extractor-info' -d 'Print extractor defaults and settings'
+complete -c gallery-dl -s 'K' -l 'list-keywords' -d 'Print a list of available keywords and example values for the given URLs'
+complete -c gallery-dl -l 'list-modules' -d 'Print a list of available extractor modules'
+complete -c gallery-dl -l 'list-extractors' -d 'Print a list of extractor classes with description, (sub)category and example URL'
+complete -c gallery-dl -r -F -l 'write-log' -d 'Write logging output to FILE'
+complete -c gallery-dl -r -F -l 'write-unsupported' -d 'Write URLs, which get emitted by other extractors but cannot be handled, to FILE'
+complete -c gallery-dl -l 'write-pages' -d 'Write downloaded intermediary pages to files in the current directory to debug problems'
+complete -c gallery-dl -x -s 'r' -l 'limit-rate' -d 'Maximum download rate (e.g. 500k or 2.5M)'
+complete -c gallery-dl -x -s 'R' -l 'retries' -d 'Maximum number of retries for failed HTTP requests or -1 for infinite retries (default: 4)'
+complete -c gallery-dl -x -l 'http-timeout' -d 'Timeout for HTTP connections (default: 30.0)'
+complete -c gallery-dl -x -l 'sleep' -d 'Number of seconds to wait before each download. This can be either a constant value or a range (e.g. 2.7 or 2.0-3.5)'
+complete -c gallery-dl -x -l 'sleep-request' -d 'Number of seconds to wait between HTTP requests during data extraction'
+complete -c gallery-dl -x -l 'sleep-extractor' -d 'Number of seconds to wait before starting data extraction for an input URL'
+complete -c gallery-dl -x -l 'filesize-min' -d 'Do not download files smaller than SIZE (e.g. 500k or 2.5M)'
+complete -c gallery-dl -x -l 'filesize-max' -d 'Do not download files larger than SIZE (e.g. 500k or 2.5M)'
+complete -c gallery-dl -l 'no-part' -d 'Do not use .part files'
+complete -c gallery-dl -l 'no-skip' -d 'Do not skip downloads; overwrite existing files'
+complete -c gallery-dl -l 'no-mtime' -d 'Do not set file modification times according to Last-Modified HTTP response headers'
+complete -c gallery-dl -l 'no-download' -d 'Do not download any files'
+complete -c gallery-dl -l 'no-check-certificate' -d 'Disable HTTPS certificate validation'
+complete -c gallery-dl -r -F -s 'c' -l 'config' -d 'Additional configuration files'
+complete -c gallery-dl -r -F -l 'config-yaml' -d '==SUPPRESS=='
+complete -c gallery-dl -x -s 'o' -l 'option' -d 'Additional "<key>=<value>" option values'
+complete -c gallery-dl -l 'ignore-config' -d 'Do not read the default configuration files'
+complete -c gallery-dl -x -s 'u' -l 'username' -d 'Username to login with'
+complete -c gallery-dl -x -s 'p' -l 'password' -d 'Password belonging to the given username'
+complete -c gallery-dl -l 'netrc' -d 'Enable .netrc authentication data'
+complete -c gallery-dl -r -F -l 'download-archive' -d 'Record all downloaded files in the archive file and skip downloading any file already in it'
+complete -c gallery-dl -x -s 'A' -l 'abort' -d 'Stop current extractor run after N consecutive file downloads were skipped'
+complete -c gallery-dl -x -s 'T' -l 'terminate' -d 'Stop current and parent extractor runs after N consecutive file downloads were skipped'
+complete -c gallery-dl -x -l 'range' -d 'Index-range(s) specifying which images to download. For example "5-10" or "1,3-5,10-"'
+complete -c gallery-dl -x -l 'chapter-range' -d 'Like "--range", but applies to manga-chapters and other delegated URLs'
+complete -c gallery-dl -x -l 'filter' -d 'Python expression controlling which images to download. Files for which the expression evaluates to False are ignored. Available keys are the filename-specific ones listed by "-K". Example: --filter "image_width >= 1000 and rating in ("s", "q")"'
+complete -c gallery-dl -x -l 'chapter-filter' -d 'Like "--filter", but applies to manga-chapters and other delegated URLs'
+complete -c gallery-dl -l 'zip' -d 'Store downloaded files in a ZIP archive'
+complete -c gallery-dl -l 'ugoira-conv' -d 'Convert Pixiv Ugoira to WebM (requires FFmpeg)'
+complete -c gallery-dl -l 'ugoira-conv-lossless' -d 'Convert Pixiv Ugoira to WebM in VP9 lossless mode'
+complete -c gallery-dl -l 'write-metadata' -d 'Write metadata to separate JSON files'
+complete -c gallery-dl -l 'write-info-json' -d 'Write gallery metadata to a info.json file'
+complete -c gallery-dl -l 'write-infojson' -d '==SUPPRESS=='
+complete -c gallery-dl -l 'write-tags' -d 'Write image tags to separate text files'
+complete -c gallery-dl -l 'mtime-from-date' -d 'Set file modification times according to "date" metadata'
+complete -c gallery-dl -x -l 'exec' -d 'Execute CMD for each downloaded file. Example: --exec "convert {} {}.png && rm {}"'
+complete -c gallery-dl -x -l 'exec-after' -d 'Execute CMD after all files were downloaded successfully. Example: --exec-after "cd {} && convert * ../doc.pdf"'
+complete -c gallery-dl -x -s 'P' -l 'postprocessor' -d 'Activate the specified post processor'
diff --git a/data/man/gallery-dl.1 b/data/man/gallery-dl.1
index 1671d2d..3e373fd 100644
--- a/data/man/gallery-dl.1
+++ b/data/man/gallery-dl.1
@@ -1,4 +1,4 @@
-.TH "GALLERY-DL" "1" "2022-02-14" "1.20.5" "gallery-dl Manual"
+.TH "GALLERY-DL" "1" "2022-03-14" "1.21.0" "gallery-dl Manual"
.\" disable hyphenation
.nh
diff --git a/data/man/gallery-dl.conf.5 b/data/man/gallery-dl.conf.5
index 51b20cd..9651d18 100644
--- a/data/man/gallery-dl.conf.5
+++ b/data/man/gallery-dl.conf.5
@@ -1,4 +1,4 @@
-.TH "GALLERY-DL.CONF" "5" "2022-02-14" "1.20.5" "gallery-dl Manual"
+.TH "GALLERY-DL.CONF" "5" "2022-03-14" "1.21.0" "gallery-dl Manual"
.\" disable hyphenation
.nh
.\" disable justification (adjust text to left margin only)
@@ -419,8 +419,6 @@ Specifying a username and password is required for
.br
* \f[I]nijie\f[]
-.br
-* \f[I]seiga\f[]
and optional for
@@ -553,6 +551,28 @@ Note: All proxy URLs should include a scheme,
otherwise \f[I]http://\f[] is assumed.
+.SS extractor.*.source-address
+.IP "Type:" 6
+.br
+* \f[I]string\f[]
+.br
+* \f[I]list\f[] with 1 \f[I]string\f[] and 1 \f[I]integer\f[] as elements
+
+.IP "Example:" 4
+.br
+* "192.168.178.20"
+.br
+* ["192.168.178.20", 8080]
+
+.IP "Description:" 4
+Client-side IP address to bind to.
+
+Can be either a simple \f[I]string\f[] with just the local IP address
+.br
+or a \f[I]list\f[] with IP and explicit port number as elements.
+.br
+
+
.SS extractor.*.user-agent
.IP "Type:" 6
\f[I]string\f[]
@@ -1874,13 +1894,13 @@ port than the default.
\f[I]list\f[] of \f[I]strings\f[]
.IP "Default:" 9
-\f[I]["images", "attachments", "postfile", "content"]\f[]
+\f[I]["images", "image_large", "attachments", "postfile", "content"]\f[]
.IP "Description:" 4
Determines the type and order of files to be downloaded.
Available types are
-\f[I]postfile\f[], \f[I]images\f[], \f[I]attachments\f[], and \f[I]content\f[].
+\f[I]postfile\f[], \f[I]images\f[], \f[I]image_large\f[], \f[I]attachments\f[], and \f[I]content\f[].
.SS extractor.photobucket.subalbums
@@ -2228,6 +2248,17 @@ Download video embeds from external sites.
Download videos.
+.SS extractor.skeb.sent-requests
+.IP "Type:" 6
+\f[I]bool\f[]
+
+.IP "Default:" 9
+\f[I]false\f[]
+
+.IP "Description:" 4
+Download sent requests.
+
+
.SS extractor.skeb.thumbnails
.IP "Type:" 6
\f[I]bool\f[]
@@ -2331,6 +2362,34 @@ Possible types are \f[I]text\f[], \f[I]quote\f[], \f[I]link\f[], \f[I]answer\f[]
You can use \f[I]"all"\f[] instead of listing all types separately.
+.SS extractor.twibooru.api-key
+.IP "Type:" 6
+\f[I]string\f[]
+
+.IP "Default:" 9
+\f[I]null\f[]
+
+.IP "Description:" 4
+Your \f[I]Twibooru API Key\f[],
+to use your account's browsing settings and filters.
+
+
+.SS extractor.twibooru.filter
+.IP "Type:" 6
+\f[I]integer\f[]
+
+.IP "Default:" 9
+\f[I]2\f[] (\f[I]Everything\f[] filter)
+
+.IP "Description:" 4
+The content filter ID to use.
+
+Setting an explicit filter ID overrides any default filters and can be used
+to access 18+ content without \f[I]API Key\f[].
+
+See \f[I]Filters\f[] for details.
+
+
.SS extractor.twitter.cards
.IP "Type:" 6
\f[I]bool\f[] or \f[I]string\f[]
@@ -2511,18 +2570,6 @@ Control video download behavior.
* \f[I]false\f[]: Skip video Tweets
-.SS extractor.twitter.warnings
-.IP "Type:" 6
-\f[I]bool\f[]
-
-.IP "Default:" 9
-\f[I]false\f[]
-
-.IP "Description:" 4
-Emit \f[I]logging messages\f[]
-for non-fatal errors reported by Twitter's API.
-
-
.SS extractor.unsplash.format
.IP "Type:" 6
\f[I]string\f[]
@@ -2912,6 +2959,20 @@ Connection timeout during file downloads.
Certificate validation during file downloads.
+.SS downloader.*.proxy
+.IP "Type:" 6
+\f[I]string\f[] or \f[I]object\f[]
+
+.IP "Default:" 9
+\f[I]extractor.*.proxy\f[]
+
+.IP "Description:" 4
+Proxy server used for file downloads.
+.br
+Disable the use of a proxy by explicitly setting this option to \f[I]null\f[].
+.br
+
+
.SS downloader.http.adjust-extensions
.IP "Type:" 6
\f[I]bool\f[]
@@ -3450,6 +3511,41 @@ Custom format string to build the content of metadata files with.
Note: Only applies for \f[I]"mode": "custom"\f[].
+.SS metadata.mtime
+.IP "Type:" 6
+\f[I]bool\f[]
+
+.IP "Default:" 9
+\f[I]false\f[]
+
+.IP "Description:" 4
+Set modification times for generated metadata files
+according to the accompanying downloaded file.
+
+Enabling this option will only have an effect
+*if* there is actual \f[I]mtime\f[] metadata available, that is
+
+.br
+* after a file download (\f[I]"event": "file"\f[] (default), \f[I]"event": "after"\f[])
+.br
+* when running *after* an \f[I]mtime\f[] post processes for the same \f[I]event\f[]
+
+For example, a \f[I]metadata\f[] post processor for \f[I]"event": "post"\f[] will
+*not* be able to set its file's modification time unless an \f[I]mtime\f[]
+post processor with \f[I]"event": "post"\f[] runs *before* it.
+
+
+.SS mtime.event
+.IP "Type:" 6
+\f[I]string\f[]
+
+.IP "Default:" 9
+\f[I]"file"\f[]
+
+.IP "Description:" 4
+See \f[I]metadata.event\f[]
+
+
.SS mtime.key
.IP "Type:" 6
\f[I]string\f[]
diff --git a/gallery_dl.egg-info/PKG-INFO b/gallery_dl.egg-info/PKG-INFO
index 7e3e5cf..009ede8 100644
--- a/gallery_dl.egg-info/PKG-INFO
+++ b/gallery_dl.egg-info/PKG-INFO
@@ -1,6 +1,6 @@
Metadata-Version: 2.1
Name: gallery-dl
-Version: 1.20.5
+Version: 1.21.0
Summary: Command-line program to download image galleries and collections from several image hosting sites
Home-page: https://github.com/mikf/gallery-dl
Author: Mike Fährmann
@@ -72,14 +72,14 @@ easily installed or upgraded using pip_:
.. code:: bash
- $ python3 -m pip install -U gallery-dl
+ python3 -m pip install -U gallery-dl
Installing the latest dev version directly from GitHub can be done with
pip_ as well:
.. code:: bash
- $ python3 -m pip install -U -I --no-deps --no-cache-dir https://github.com/mikf/gallery-dl/archive/master.tar.gz
+ python3 -m pip install -U -I --no-deps --no-cache-dir https://github.com/mikf/gallery-dl/archive/master.tar.gz
Note: Windows users should use :code:`py -3` instead of :code:`python3`.
@@ -89,7 +89,7 @@ To ensure these packages are up-to-date, run
.. code:: bash
- $ python3 -m pip install --upgrade pip setuptools wheel
+ python3 -m pip install --upgrade pip setuptools wheel
Standalone Executable
@@ -98,8 +98,8 @@ Standalone Executable
Prebuilt executable files with a Python interpreter and
required Python packages included are available for
-- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.20.5/gallery-dl.exe>`__
-- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.20.5/gallery-dl.bin>`__
+- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.21.0/gallery-dl.exe>`__
+- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.21.0/gallery-dl.bin>`__
| Executables build from the latest commit can be found at
| https://github.com/mikf/gallery-dl/actions/workflows/executables.yml
@@ -112,7 +112,7 @@ Linux users that are using a distro that is supported by Snapd_ can install *gal
.. code:: bash
- $ snap install gallery-dl
+ snap install gallery-dl
Chocolatey
@@ -122,7 +122,7 @@ Windows users that have Chocolatey_ installed can install *gallery-dl* from the
.. code:: powershell
- $ choco install gallery-dl
+ choco install gallery-dl
Scoop
@@ -132,7 +132,7 @@ Scoop
.. code:: powershell
- $ scoop install gallery-dl
+ scoop install gallery-dl
Usage
@@ -143,7 +143,7 @@ from:
.. code:: bash
- $ gallery-dl [OPTION]... URL...
+ gallery-dl [OPTION]... URL...
See also :code:`gallery-dl --help`.
@@ -155,21 +155,21 @@ Download images; in this case from danbooru via tag search for 'bonocho':
.. code:: bash
- $ gallery-dl "https://danbooru.donmai.us/posts?tags=bonocho"
+ gallery-dl "https://danbooru.donmai.us/posts?tags=bonocho"
-Get the direct URL of an image from a site that requires authentication:
+Get the direct URL of an image from a site supporting authentication with username & password:
.. code:: bash
- $ gallery-dl -g -u "<username>" -p "<password>" "https://seiga.nicovideo.jp/seiga/im3211703"
+ gallery-dl -g -u "<username>" -p "<password>" "https://twitter.com/i/web/status/604341487988576256"
Filter manga chapters by language and chapter number:
.. code:: bash
- $ gallery-dl --chapter-filter "lang == 'fr' and 10 <= chapter < 20" "https://mangadex.org/title/2354/"
+ gallery-dl --chapter-filter "lang == 'fr' and 10 <= chapter < 20" "https://mangadex.org/title/2354/"
| Search a remote resource for URLs and download images from them:
@@ -177,7 +177,7 @@ Filter manga chapters by language and chapter number:
.. code:: bash
- $ gallery-dl "r:https://pastebin.com/raw/FLwrCYsT"
+ gallery-dl "r:https://pastebin.com/raw/FLwrCYsT"
If a site's address is nonstandard for its extractor, you can prefix the URL with the
@@ -185,7 +185,7 @@ extractor's name to force the use of a specific extractor:
.. code:: bash
- $ gallery-dl "tumblr:https://sometumblrblog.example"
+ gallery-dl "tumblr:https://sometumblrblog.example"
Configuration
@@ -233,7 +233,7 @@ Username & Password
Some extractors require you to provide valid login credentials in the form of
a username & password pair. This is necessary for
-``nijie`` and ``seiga``
+``nijie``
and optional for
``aryion``,
``danbooru``,
@@ -259,7 +259,7 @@ You can set the necessary information in your configuration file
{
"extractor": {
- "seiga": {
+ "twitter": {
"username": "<username>",
"password": "<password>"
}
@@ -272,8 +272,8 @@ or you can provide them directly via the
.. code:: bash
- $ gallery-dl -u <username> -p <password> URL
- $ gallery-dl -o username=<username> -o password=<password> URL
+ gallery-dl -u <username> -p <password> URL
+ gallery-dl -o username=<username> -o password=<password> URL
Cookies
@@ -317,7 +317,7 @@ the :code:`--cookies` command-line option:
.. code:: bash
- $ gallery-dl --cookies "$HOME/path/to/cookies.txt" URL
+ gallery-dl --cookies "$HOME/path/to/cookies.txt" URL
OAuth
@@ -335,7 +335,7 @@ To link your account to *gallery-dl*, start by invoking it with
.. code:: bash
- $ gallery-dl oauth:flickr
+ gallery-dl oauth:flickr
You will be sent to the site's authorization page and asked to grant read
access to *gallery-dl*. Authorize it and you will be shown one or more
@@ -346,8 +346,8 @@ To authenticate with a ``mastodon`` instance, run *gallery-dl* with
.. code:: bash
- $ gallery-dl oauth:mastodon:pawoo.net
- $ gallery-dl oauth:mastodon:https://mastodon.social/
+ gallery-dl oauth:mastodon:pawoo.net
+ gallery-dl oauth:mastodon:https://mastodon.social/
diff --git a/gallery_dl.egg-info/SOURCES.txt b/gallery_dl.egg-info/SOURCES.txt
index edcc5e2..4139a4d 100644
--- a/gallery_dl.egg-info/SOURCES.txt
+++ b/gallery_dl.egg-info/SOURCES.txt
@@ -6,6 +6,7 @@ setup.cfg
setup.py
data/completion/_gallery-dl
data/completion/gallery-dl
+data/completion/gallery-dl.fish
data/man/gallery-dl.1
data/man/gallery-dl.conf.5
docs/gallery-dl-example.conf
@@ -102,6 +103,7 @@ gallery_dl/extractor/kabeuchi.py
gallery_dl/extractor/keenspot.py
gallery_dl/extractor/kemonoparty.py
gallery_dl/extractor/khinsider.py
+gallery_dl/extractor/kissgoddess.py
gallery_dl/extractor/kohlchan.py
gallery_dl/extractor/komikcast.py
gallery_dl/extractor/lightroom.py
@@ -118,6 +120,7 @@ gallery_dl/extractor/mangapark.py
gallery_dl/extractor/mangasee.py
gallery_dl/extractor/mangoxo.py
gallery_dl/extractor/mastodon.py
+gallery_dl/extractor/mememuseum.py
gallery_dl/extractor/message.py
gallery_dl/extractor/moebooru.py
gallery_dl/extractor/myhentaigallery.py
@@ -166,9 +169,11 @@ gallery_dl/extractor/speakerdeck.py
gallery_dl/extractor/subscribestar.py
gallery_dl/extractor/tapas.py
gallery_dl/extractor/test.py
+gallery_dl/extractor/toyhouse.py
gallery_dl/extractor/tsumino.py
gallery_dl/extractor/tumblr.py
gallery_dl/extractor/tumblrgallery.py
+gallery_dl/extractor/twibooru.py
gallery_dl/extractor/twitter.py
gallery_dl/extractor/unsplash.py
gallery_dl/extractor/vanillarock.py
diff --git a/gallery_dl/downloader/common.py b/gallery_dl/downloader/common.py
index d858075..1168d83 100644
--- a/gallery_dl/downloader/common.py
+++ b/gallery_dl/downloader/common.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2014-2020 Mike Fährmann
+# Copyright 2014-2022 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -27,6 +27,12 @@ class DownloaderBase():
self.partdir = util.expand_path(self.partdir)
os.makedirs(self.partdir, exist_ok=True)
+ proxies = self.config("proxy", util.SENTINEL)
+ if proxies is util.SENTINEL:
+ self.proxies = job.extractor._proxies
+ else:
+ self.proxies = util.build_proxy_map(proxies, self.log)
+
def config(self, key, default=None):
"""Interpolate downloader config value for 'key'"""
return config.interpolate(("downloader", self.scheme), key, default)
diff --git a/gallery_dl/downloader/http.py b/gallery_dl/downloader/http.py
index 91ce731..b878f5f 100644
--- a/gallery_dl/downloader/http.py
+++ b/gallery_dl/downloader/http.py
@@ -121,7 +121,8 @@ class HttpDownloader(DownloaderBase):
try:
response = self.session.request(
"GET", url, stream=True, headers=headers,
- timeout=self.timeout, verify=self.verify)
+ timeout=self.timeout, verify=self.verify,
+ proxies=self.proxies)
except (ConnectionError, Timeout) as exc:
msg = str(exc)
continue
diff --git a/gallery_dl/downloader/ytdl.py b/gallery_dl/downloader/ytdl.py
index 462bbf8..2badccf 100644
--- a/gallery_dl/downloader/ytdl.py
+++ b/gallery_dl/downloader/ytdl.py
@@ -25,6 +25,7 @@ class YoutubeDLDownloader(DownloaderBase):
"retries": retries+1 if retries >= 0 else float("inf"),
"socket_timeout": self.config("timeout", extractor._timeout),
"nocheckcertificate": not self.config("verify", extractor._verify),
+ "proxy": self.proxies.get("http") if self.proxies else None,
}
self.ytdl_instance = None
diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py
index b52561e..1bec48e 100644
--- a/gallery_dl/extractor/__init__.py
+++ b/gallery_dl/extractor/__init__.py
@@ -67,6 +67,7 @@ modules = [
"keenspot",
"kemonoparty",
"khinsider",
+ "kissgoddess",
"kohlchan",
"komikcast",
"lightroom",
@@ -81,6 +82,7 @@ modules = [
"mangapark",
"mangasee",
"mangoxo",
+ "mememuseum",
"myhentaigallery",
"myportfolio",
"naver",
@@ -123,9 +125,11 @@ modules = [
"speakerdeck",
"subscribestar",
"tapas",
+ "toyhouse",
"tsumino",
"tumblr",
"tumblrgallery",
+ "twibooru",
"twitter",
"unsplash",
"vanillarock",
diff --git a/gallery_dl/extractor/booru.py b/gallery_dl/extractor/booru.py
index a42ec53..12d98b1 100644
--- a/gallery_dl/extractor/booru.py
+++ b/gallery_dl/extractor/booru.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2015-2021 Mike Fährmann
+# Copyright 2015-2022 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -41,9 +41,9 @@ class BooruExtractor(BaseExtractor):
page_html = self._extended_tags(post)
if notes:
self._notes(post, page_html)
- self._prepare(post)
- post.update(data)
text.nameext_from_url(url, post)
+ post.update(data)
+ self._prepare(post)
yield Message.Directory, post
yield Message.Url, url, post
diff --git a/gallery_dl/extractor/common.py b/gallery_dl/extractor/common.py
index 5a2d3a3..e3559f9 100644
--- a/gallery_dl/extractor/common.py
+++ b/gallery_dl/extractor/common.py
@@ -55,6 +55,7 @@ class Extractor():
self._retries = self.config("retries", 4)
self._timeout = self.config("timeout", 30)
self._verify = self.config("verify", True)
+ self._proxies = util.build_proxy_map(self.config("proxy"), self.log)
self._interval = util.build_duration_func(
self.config("sleep-request", self.request_interval),
self.request_interval_min,
@@ -65,7 +66,6 @@ class Extractor():
self._init_session()
self._init_cookies()
- self._init_proxies()
@classmethod
def from_url(cls, url):
@@ -104,10 +104,12 @@ class Extractor():
def request(self, url, *, method="GET", session=None, retries=None,
encoding=None, fatal=True, notfound=None, **kwargs):
- if retries is None:
- retries = self._retries
if session is None:
session = self.session
+ if retries is None:
+ retries = self._retries
+ if "proxies" not in kwargs:
+ kwargs["proxies"] = self._proxies
if "timeout" not in kwargs:
kwargs["timeout"] = self._timeout
if "verify" not in kwargs:
@@ -289,20 +291,6 @@ class Extractor():
session.mount("https://", adapter)
session.mount("http://", adapter)
- def _init_proxies(self):
- """Update the session's proxy map"""
- proxies = self.config("proxy")
- if proxies:
- if isinstance(proxies, str):
- proxies = {"http": proxies, "https": proxies}
- if isinstance(proxies, dict):
- for scheme, proxy in proxies.items():
- if "://" not in proxy:
- proxies[scheme] = "http://" + proxy.lstrip("/")
- self.session.proxies = proxies
- else:
- self.log.warning("invalid proxy specifier: %s", proxies)
-
def _init_cookies(self):
"""Populate the session's cookiejar"""
self._cookiefile = None
@@ -371,20 +359,25 @@ class Extractor():
for cookie in self._cookiejar:
if cookie.name in names and (
not domain or cookie.domain == domain):
+
if cookie.expires:
diff = int(cookie.expires - now)
+
if diff <= 0:
self.log.warning(
"Cookie '%s' has expired", cookie.name)
+ continue
+
elif diff <= 86400:
hours = diff // 3600
self.log.warning(
"Cookie '%s' will expire in less than %s hour%s",
cookie.name, hours + 1, "s" if hours else "")
- else:
- names.discard(cookie.name)
- if not names:
- return True
+ continue
+
+ names.discard(cookie.name)
+ if not names:
+ return True
return False
def _prepare_ddosguard_cookies(self):
@@ -616,8 +609,7 @@ class BaseExtractor(Extractor):
if index:
self.category, self.root = self.instances[index-1]
if not self.root:
- url = text.ensure_http_scheme(match.group(0))
- self.root = url[:url.index("/", 8)]
+ self.root = text.root_from_url(match.group(0))
else:
self.root = group
self.category = group.partition("://")[2]
diff --git a/gallery_dl/extractor/deviantart.py b/gallery_dl/extractor/deviantart.py
index 94fec16..fda7220 100644
--- a/gallery_dl/extractor/deviantart.py
+++ b/gallery_dl/extractor/deviantart.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2015-2021 Mike Fährmann
+# Copyright 2015-2022 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -1004,6 +1004,7 @@ class DeviantartOAuthAPI():
self.extractor = extractor
self.log = extractor.log
self.headers = {"dA-minor-version": "20200519"}
+ self._warn_429 = True
self.delay = extractor.config("wait-min", 0)
self.delay_min = max(2, self.delay)
@@ -1260,6 +1261,16 @@ class DeviantartOAuthAPI():
if self.delay < 30:
self.delay += 1
self.log.warning("%s. Using %ds delay.", msg, self.delay)
+
+ if self._warn_429 and self.delay >= 3:
+ self._warn_429 = False
+ if self.client_id == self.CLIENT_ID:
+ self.log.info(
+ "Register your own OAuth application and use its "
+ "credentials to prevent this error: "
+ "https://github.com/mikf/gallery-dl/blob/master/do"
+ "cs/configuration.rst#extractordeviantartclient-id"
+ "--client-secret")
else:
self.log.error(msg)
return data
diff --git a/gallery_dl/extractor/fanbox.py b/gallery_dl/extractor/fanbox.py
index ef79808..11436cb 100644
--- a/gallery_dl/extractor/fanbox.py
+++ b/gallery_dl/extractor/fanbox.py
@@ -51,19 +51,16 @@ class FanboxExtractor(Extractor):
url = text.ensure_http_scheme(url)
body = self.request(url, headers=headers).json()["body"]
for item in body["items"]:
- yield self._process_post(item)
+ yield self._get_post_data(item["id"])
url = body["nextUrl"]
- def _get_post_data_from_id(self, post_id):
+ def _get_post_data(self, post_id):
"""Fetch and process post data"""
headers = {"Origin": self.root}
url = "https://api.fanbox.cc/post.info?postId="+post_id
post = self.request(url, headers=headers).json()["body"]
- return self._process_post(post)
-
- def _process_post(self, post):
content_body = post.pop("body", None)
if content_body:
if "html" in content_body:
@@ -279,7 +276,7 @@ class FanboxPostExtractor(FanboxExtractor):
self.post_id = match.group(3)
def posts(self):
- return (self._get_post_data_from_id(self.post_id),)
+ return (self._get_post_data(self.post_id),)
class FanboxRedirectExtractor(Extractor):
diff --git a/gallery_dl/extractor/fantia.py b/gallery_dl/extractor/fantia.py
index 89a965f..c05ec39 100644
--- a/gallery_dl/extractor/fantia.py
+++ b/gallery_dl/extractor/fantia.py
@@ -8,6 +8,7 @@
from .common import Extractor, Message
from .. import text
+import json
class FantiaExtractor(Extractor):
@@ -29,7 +30,9 @@ class FantiaExtractor(Extractor):
for post_id in self.posts():
full_response, post = self._get_post_data(post_id)
yield Message.Directory, post
+ post["num"] = 0
for url, url_data in self._get_urls_from_post(full_response, post):
+ post["num"] += 1
fname = url_data["content_filename"] or url
text.nameext_from_url(fname, url_data)
url_data["file_url"] = url
@@ -90,14 +93,39 @@ class FantiaExtractor(Extractor):
post["content_title"] = content["title"]
post["content_filename"] = content.get("filename", "")
post["content_id"] = content["id"]
+
+ if "comment" in content:
+ post["content_comment"] = content["comment"]
+
if "post_content_photos" in content:
for photo in content["post_content_photos"]:
post["file_id"] = photo["id"]
yield photo["url"]["original"], post
+
if "download_uri" in content:
post["file_id"] = content["id"]
yield self.root+"/"+content["download_uri"], post
+ if content["category"] == "blog" and "comment" in content:
+ comment_json = json.loads(content["comment"])
+ ops = comment_json.get("ops", ())
+
+ # collect blogpost text first
+ blog_text = ""
+ for op in ops:
+ insert = op.get("insert")
+ if isinstance(insert, str):
+ blog_text += insert
+ post["blogpost_text"] = blog_text
+
+ # collect images
+ for op in ops:
+ insert = op.get("insert")
+ if isinstance(insert, dict) and "fantiaImage" in insert:
+ img = insert["fantiaImage"]
+ post["file_id"] = img["id"]
+ yield "https://fantia.jp" + img["original_url"], post
+
class FantiaCreatorExtractor(FantiaExtractor):
"""Extractor for a Fantia creator's works"""
diff --git a/gallery_dl/extractor/hentaicosplays.py b/gallery_dl/extractor/hentaicosplays.py
index 7dd047c..b4f433b 100644
--- a/gallery_dl/extractor/hentaicosplays.py
+++ b/gallery_dl/extractor/hentaicosplays.py
@@ -57,6 +57,7 @@ class HentaicosplaysGalleryExtractor(GalleryExtractor):
self.root = text.ensure_http_scheme(root)
url = "{}/story/{}/".format(self.root, self.slug)
GalleryExtractor.__init__(self, match, url)
+ self.session.headers["Referer"] = url
def metadata(self, page):
title = text.extract(page, "<title>", "</title>")[0]
diff --git a/gallery_dl/extractor/imagebam.py b/gallery_dl/extractor/imagebam.py
index 9370840..7cd67d6 100644
--- a/gallery_dl/extractor/imagebam.py
+++ b/gallery_dl/extractor/imagebam.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2014-2021 Mike Fährmann
+# Copyright 2014-2022 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -10,45 +10,40 @@
from .common import Extractor, Message
from .. import text, exception
+import re
class ImagebamExtractor(Extractor):
"""Base class for imagebam extractors"""
category = "imagebam"
root = "https://www.imagebam.com"
- cookies = None
def __init__(self, match):
Extractor.__init__(self, match)
- self.key = match.group(1)
- if self.cookies:
- self.session.cookies = self.cookies
-
- def get_image_data(self, data):
- page_url = "{}/image/{}".format(self.root, data["image_key"])
- page = self.request(page_url).text
- image_url, pos = text.extract(page, '<img src="https://images', '"')
-
- if not image_url:
- # cache cookies
- ImagebamExtractor.cookies = self.session.cookies
- # repeat request to get past "Continue to your image" pages
- page = self.request(page_url).text
- image_url, pos = text.extract(
- page, '<img src="https://images', '"')
+ self.path = match.group(1)
+ self.session.cookies.set("nsfw_inter", "1", domain="www.imagebam.com")
+ def _parse_image_page(self, path):
+ page = self.request(self.root + path).text
+ url, pos = text.extract(page, '<img src="https://images', '"')
filename = text.unescape(text.extract(page, 'alt="', '"', pos)[0])
- data["url"] = "https://images" + image_url
+
+ data = {
+ "url" : "https://images" + url,
+ "image_key": path.rpartition("/")[2],
+ }
data["filename"], _, data["extension"] = filename.rpartition(".")
+ return data
class ImagebamGalleryExtractor(ImagebamExtractor):
- """Extractor for image galleries from imagebam.com"""
+ """Extractor for imagebam galleries"""
subcategory = "gallery"
directory_fmt = ("{category}", "{title} {gallery_key}")
filename_fmt = "{num:>03} {filename}.{extension}"
archive_fmt = "{gallery_key}_{image_key}"
- pattern = r"(?:https?://)?(?:www\.)?imagebam\.com/gallery/([0-9a-z]+)"
+ pattern = (r"(?:https?://)?(?:www\.)?imagebam\.com"
+ r"(/(?:gallery/|view/G)[a-zA-Z0-9]+)")
test = (
("https://www.imagebam.com/gallery/adz2y0f9574bjpmonaismyrhtjgvey4o", {
"url": "76d976788ae2757ac81694736b07b72356f5c4c8",
@@ -63,50 +58,56 @@ class ImagebamGalleryExtractor(ImagebamExtractor):
("http://www.imagebam.com/gallery/gsl8teckymt4vbvx1stjkyk37j70va2c", {
"exception": exception.HttpError,
}),
+ # /view/ path (#2378)
+ ("https://www.imagebam.com/view/GA3MT1", {
+ "url": "35018ce1e00a2d2825a33d3cd37857edaf804919",
+ "keyword": "3a9f98178f73694c527890c0d7ca9a92b46987ba",
+ }),
)
def items(self):
- url = "{}/gallery/{}".format(self.root, self.key)
- page = self.request(url).text
+ page = self.request(self.root + self.path).text
- data = self.get_metadata(page)
- keys = self.get_image_keys(page)
- keys.reverse()
- data["count"] = len(keys)
- data["gallery_key"] = self.key
+ images = self.images(page)
+ images.reverse()
+
+ data = self.metadata(page)
+ data["count"] = len(images)
+ data["gallery_key"] = self.path.rpartition("/")[2]
yield Message.Directory, data
- for data["num"], data["image_key"] in enumerate(keys, 1):
- self.get_image_data(data)
- yield Message.Url, data["url"], data
+ for data["num"], path in enumerate(images, 1):
+ image = self._parse_image_page(path)
+ image.update(data)
+ yield Message.Url, image["url"], image
@staticmethod
- def get_metadata(page):
- """Return gallery metadata"""
- title = text.extract(page, 'id="gallery-name">', '<')[0]
- return {"title": text.unescape(title.strip())}
-
- def get_image_keys(self, page):
- """Return a list of all image keys"""
- keys = []
+ def metadata(page):
+ return {"title": text.unescape(text.extract(
+ page, 'id="gallery-name">', '<')[0].strip())}
+
+ def images(self, page):
+ findall = re.compile(r'<a href="https://www\.imagebam\.com'
+ r'(/(?:image/|view/M)[a-zA-Z0-9]+)').findall
+
+ paths = []
while True:
- keys.extend(text.extract_iter(
- page, '<a href="https://www.imagebam.com/image/', '"'))
+ paths += findall(page)
pos = page.find('rel="next" aria-label="Next')
if pos > 0:
url = text.rextract(page, 'href="', '"', pos)[0]
if url:
page = self.request(url).text
continue
- return keys
+ return paths
class ImagebamImageExtractor(ImagebamExtractor):
- """Extractor for single images from imagebam.com"""
+ """Extractor for single imagebam images"""
subcategory = "image"
archive_fmt = "{image_key}"
pattern = (r"(?:https?://)?(?:\w+\.)?imagebam\.com"
- r"/(?:image/|(?:[0-9a-f]{2}/){3})([0-9a-f]+)")
+ r"(/(?:image/|view/M|(?:[0-9a-f]{2}/){3})[a-zA-Z0-9]+)")
test = (
("https://www.imagebam.com/image/94d56c502511890", {
"url": "5e9ba3b1451f8ded0ae3a1b84402888893915d4a",
@@ -118,10 +119,19 @@ class ImagebamImageExtractor(ImagebamExtractor):
("https://www.imagebam.com/image/0850951366904951", {
"url": "d37297b17ed1615b4311c8ed511e50ce46e4c748",
}),
+ # /view/ path (#2378)
+ ("https://www.imagebam.com/view/ME8JOQP", {
+ "url": "4dca72bbe61a0360185cf4ab2bed8265b49565b8",
+ "keyword": "15a494c02fd30846b41b42a26117aedde30e4ceb",
+ "content": "f81008666b17a42d8834c4749b910e1dc10a6e83",
+ }),
)
def items(self):
- data = {"image_key": self.key}
- self.get_image_data(data)
- yield Message.Directory, data
- yield Message.Url, data["url"], data
+ path = self.path
+ if path[3] == "/":
+ path = ("/view/" if path[10] == "M" else "/image/") + path[10:]
+
+ image = self._parse_image_page(path)
+ yield Message.Directory, image
+ yield Message.Url, image["url"], image
diff --git a/gallery_dl/extractor/kemonoparty.py b/gallery_dl/extractor/kemonoparty.py
index b898e3b..9537263 100644
--- a/gallery_dl/extractor/kemonoparty.py
+++ b/gallery_dl/extractor/kemonoparty.py
@@ -14,7 +14,7 @@ from ..cache import cache
import itertools
import re
-BASE_PATTERN = r"(?:https?://)?(?:www\.)?(kemono|coomer)\.party"
+BASE_PATTERN = r"(?:https?://)?(?:www\.|beta\.)?(kemono|coomer)\.party"
USER_PATTERN = BASE_PATTERN + r"/([^/?#]+)/user/([^/?#]+)"
@@ -23,15 +23,15 @@ class KemonopartyExtractor(Extractor):
category = "kemonoparty"
root = "https://kemono.party"
directory_fmt = ("{category}", "{service}", "{user}")
- filename_fmt = "{id}_{title}_{num:>02}_{filename}.{extension}"
+ filename_fmt = "{id}_{title}_{num:>02}_{filename[:180]}.{extension}"
archive_fmt = "{service}_{user}_{id}_{num}"
cookiedomain = ".kemono.party"
def __init__(self, match):
if match.group(1) == "coomer":
self.category = "coomerparty"
- self.root = "https://coomer.party"
self.cookiedomain = ".coomer.party"
+ self.root = text.root_from_url(match.group(0))
Extractor.__init__(self, match)
def items(self):
@@ -291,6 +291,7 @@ class KemonopartyPostExtractor(KemonopartyExtractor):
}),
("https://kemono.party/subscribestar/user/alcorart/post/184330"),
("https://www.kemono.party/subscribestar/user/alcorart/post/184330"),
+ ("https://beta.kemono.party/subscribestar/user/alcorart/post/184330"),
)
def __init__(self, match):
diff --git a/gallery_dl/extractor/kissgoddess.py b/gallery_dl/extractor/kissgoddess.py
new file mode 100644
index 0000000..85ec806
--- /dev/null
+++ b/gallery_dl/extractor/kissgoddess.py
@@ -0,0 +1,80 @@
+# -*- coding: utf-8 -*-
+
+# Copyright 2022 Mike Fährmann
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Extractors for https://kissgoddess.com/"""
+
+from .common import GalleryExtractor, Extractor, Message
+from .. import text, exception
+
+
+class KissgoddessGalleryExtractor(GalleryExtractor):
+ """Extractor for image galleries on kissgoddess.com"""
+ category = "kissgoddess"
+ root = "https://kissgoddess.com"
+ pattern = r"(?:https?://)?(?:www\.)?kissgoddess\.com/album/(\d+)"
+ test = ("https://kissgoddess.com/album/18285.html", {
+ "pattern": r"https://pic\.kissgoddess\.com"
+ r"/gallery/16473/18285/s/\d+\.jpg",
+ "count": 8,
+ "keyword": {
+ "gallery_id": 18285,
+ "title": "[Young Champion Extra] 2016.02 No.03 菜乃花 安枝瞳 葉月あや",
+ },
+ })
+
+ def __init__(self, match):
+ self.gallery_id = match.group(1)
+ url = "{}/album/{}.html".format(self.root, self.gallery_id)
+ GalleryExtractor.__init__(self, match, url)
+
+ def metadata(self, page):
+ return {
+ "gallery_id": text.parse_int(self.gallery_id),
+ "title" : text.extract(
+ page, '<title>', "<")[0].rpartition(" | ")[0],
+ }
+
+ def images(self, page):
+ pnum = 1
+
+ while page:
+ for url in text.extract_iter(page, "<img src='", "'"):
+ yield url, None
+
+ pnum += 1
+ url = "{}/album/{}_{}.html".format(
+ self.root, self.gallery_id, pnum)
+ try:
+ page = self.request(url).text
+ except exception.HttpError:
+ return
+
+
+class KissgoddessModelExtractor(Extractor):
+ """Extractor for all galleries of a model on kissgoddess.com"""
+ category = "kissgoddess"
+ subcategory = "model"
+ root = "https://kissgoddess.com"
+ pattern = r"(?:https?://)?(?:www\.)?kissgoddess\.com/people/([^./?#]+)"
+ test = ("https://kissgoddess.com/people/aya-hazuki.html", {
+ "pattern": KissgoddessGalleryExtractor.pattern,
+ "count": ">= 7",
+ })
+
+ def __init__(self, match):
+ Extractor.__init__(self, match)
+ self.model = match.group(1)
+
+ def items(self):
+ url = "{}/people/{}.html".format(self.root, self.model)
+ page = self.request(url).text
+
+ data = {"_extractor": KissgoddessGalleryExtractor}
+ for path in text.extract_iter(page, 'thumb"><a href="/album/', '"'):
+ url = self.root + "/album/" + path
+ yield Message.Queue, url, data
diff --git a/gallery_dl/extractor/lolisafe.py b/gallery_dl/extractor/lolisafe.py
index c63fa51..43377bd 100644
--- a/gallery_dl/extractor/lolisafe.py
+++ b/gallery_dl/extractor/lolisafe.py
@@ -44,7 +44,7 @@ class LolisafelbumExtractor(LolisafeExtractor):
}),
# mp4 (#2239)
("https://bunkr.is/a/ptRHaCn2", {
- "pattern": r"https://cdn\.bunkr\.is/_-RnHoW69L\.mp4",
+ "pattern": r"https://media-files\.bunkr\.is/_-RnHoW69L\.mp4",
"content": "80e61d1dbc5896ae7ef9a28734c747b28b320471",
}),
("https://bunkr.to/a/Lktg9Keq"),
@@ -73,9 +73,8 @@ class LolisafelbumExtractor(LolisafeExtractor):
data["name"], sep, data["id"] = data["filename"].rpartition("-")
if data["extension"] == "mp4":
- data["_http_validate"] = self._check_rewrite
- else:
- data["_http_validate"] = None
+ url = url.replace(
+ "//cdn.bunkr.is/", "//media-files.bunkr.is/", 1)
yield Message.Url, url, data
def fetch_album(self, album_id):
@@ -87,13 +86,3 @@ class LolisafelbumExtractor(LolisafeExtractor):
"album_name": text.unescape(data["title"]),
"count" : data["count"],
}
-
- @staticmethod
- def _check_rewrite(response):
- if response.history and response.headers.get(
- "Content-Type").startswith("text/html"):
- # consume content to reuse connection
- response.content
- # rewrite to download URL
- return response.url.replace("/v/", "/d/", 1)
- return True
diff --git a/gallery_dl/extractor/mangadex.py b/gallery_dl/extractor/mangadex.py
index 152da4f..7194757 100644
--- a/gallery_dl/extractor/mangadex.py
+++ b/gallery_dl/extractor/mangadex.py
@@ -53,7 +53,10 @@ class MangadexExtractor(Extractor):
cattributes = chapter["attributes"]
mattributes = manga["attributes"]
- lang = cattributes["translatedLanguage"].partition("-")[0]
+
+ lang = cattributes.get("translatedLanguage")
+ if lang:
+ lang = lang.partition("-")[0]
if cattributes["chapter"]:
chnum, sep, minor = cattributes["chapter"].partition(".")
diff --git a/gallery_dl/extractor/mememuseum.py b/gallery_dl/extractor/mememuseum.py
new file mode 100644
index 0000000..1de0d76
--- /dev/null
+++ b/gallery_dl/extractor/mememuseum.py
@@ -0,0 +1,120 @@
+# -*- coding: utf-8 -*-
+
+# Copyright 2022 Mike Fährmann
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Extractors for https://meme.museum/"""
+
+from .common import Extractor, Message
+from .. import text
+
+
+class MememuseumExtractor(Extractor):
+ """Base class for meme.museum extractors"""
+ basecategory = "booru"
+ category = "mememuseum"
+ filename_fmt = "{category}_{id}_{md5}.{extension}"
+ archive_fmt = "{id}"
+ root = "https://meme.museum"
+
+ def items(self):
+ data = self.metadata()
+
+ for post in self.posts():
+ url = post["file_url"]
+ for key in ("id", "width", "height"):
+ post[key] = text.parse_int(post[key])
+ post["tags"] = text.unquote(post["tags"])
+ post.update(data)
+ yield Message.Directory, post
+ yield Message.Url, url, text.nameext_from_url(url, post)
+
+ def metadata(self):
+ """Return general metadata"""
+ return ()
+
+ def posts(self):
+ """Return an iterable containing data of all relevant posts"""
+ return ()
+
+
+class MememuseumTagExtractor(MememuseumExtractor):
+ """Extractor for images from meme.museum by search-tags"""
+ subcategory = "tag"
+ directory_fmt = ("{category}", "{search_tags}")
+ pattern = r"(?:https?://)?meme\.museum/post/list/([^/?#]+)"
+ test = ("https://meme.museum/post/list/animated/1", {
+ "pattern": r"https://meme\.museum/_images/\w+/\d+%20-%20",
+ "count": ">= 30"
+ })
+ per_page = 25
+
+ def __init__(self, match):
+ MememuseumExtractor.__init__(self, match)
+ self.tags = text.unquote(match.group(1))
+
+ def metadata(self):
+ return {"search_tags": self.tags}
+
+ def posts(self):
+ pnum = 1
+ while True:
+ url = "{}/post/list/{}/{}".format(self.root, self.tags, pnum)
+ extr = text.extract_from(self.request(url).text)
+
+ while True:
+ mime = extr("data-mime='", "'")
+ if not mime:
+ break
+
+ pid = extr("data-post-id='", "'")
+ tags, dimensions, size = extr("title='", "'").split(" // ")
+ md5 = extr("/_thumbs/", "/")
+ width, _, height = dimensions.partition("x")
+
+ yield {
+ "file_url": "{}/_images/{}/{}%20-%20{}.{}".format(
+ self.root, md5, pid, text.quote(tags),
+ mime.rpartition("/")[2]),
+ "id": pid, "md5": md5, "tags": tags,
+ "width": width, "height": height,
+ "size": text.parse_bytes(size[:-1]),
+ }
+
+ if not extr(">Next<", ">"):
+ return
+ pnum += 1
+
+
+class MememuseumPostExtractor(MememuseumExtractor):
+ """Extractor for single images from meme.museum"""
+ subcategory = "post"
+ pattern = r"(?:https?://)?meme\.museum/post/view/(\d+)"
+ test = ("https://meme.museum/post/view/10243", {
+ "pattern": r"https://meme\.museum/_images/105febebcd5ca791ee332adc4997"
+ r"1f78/10243%20-%20g%20beard%20open_source%20richard_stallm"
+ r"an%20stallman%20tagme%20text\.jpg",
+ "keyword": "3c8009251480cf17248c08b2b194dc0c4d59580e",
+ "content": "45565f3f141fc960a8ae1168b80e718a494c52d2",
+ })
+
+ def __init__(self, match):
+ MememuseumExtractor.__init__(self, match)
+ self.post_id = match.group(1)
+
+ def posts(self):
+ url = "{}/post/view/{}".format(self.root, self.post_id)
+ extr = text.extract_from(self.request(url).text)
+
+ return ({
+ "id" : self.post_id,
+ "tags" : extr(": ", "<"),
+ "md5" : extr("/_thumbs/", "/"),
+ "file_url": self.root + extr("id='main_image' src='", "'"),
+ "width" : extr("data-width=", " ").strip("'\""),
+ "height" : extr("data-height=", " ").strip("'\""),
+ "size" : 0,
+ },)
diff --git a/gallery_dl/extractor/newgrounds.py b/gallery_dl/extractor/newgrounds.py
index 54e2040..6d0e94b 100644
--- a/gallery_dl/extractor/newgrounds.py
+++ b/gallery_dl/extractor/newgrounds.py
@@ -103,7 +103,7 @@ class NewgroundsExtractor(Extractor):
}
def extract_post(self, post_url):
-
+ url = post_url
if "/art/view/" in post_url:
extract_data = self._extract_image_data
elif "/audio/listen/" in post_url:
@@ -111,18 +111,19 @@ class NewgroundsExtractor(Extractor):
else:
extract_data = self._extract_media_data
if self.flash:
- post_url += "/format/flash"
+ url += "/format/flash"
- response = self.request(post_url, fatal=False)
+ response = self.request(url, fatal=False)
if response.status_code >= 400:
return {}
page = response.text
extr = text.extract_from(page)
data = extract_data(extr, post_url)
- data["_comment"] = extr('id="author_comments"', '</div>')
+ data["_comment"] = extr(
+ 'id="author_comments"', '</div>').partition(">")[2]
data["comment"] = text.unescape(text.remove_html(
- data["_comment"].partition(">")[2], "", ""))
+ data["_comment"], "", ""))
data["favorites"] = text.parse_int(extr(
'id="faves_load">', '<').replace(",", ""))
data["score"] = text.parse_float(extr('id="score_number">', '<'))
@@ -134,6 +135,7 @@ class NewgroundsExtractor(Extractor):
data["tags"].sort()
data["user"] = self.user or data["artist"][0]
+ data["post_url"] = post_url
return data
@staticmethod
@@ -171,6 +173,7 @@ class NewgroundsExtractor(Extractor):
def _extract_media_data(self, extr, url):
index = url.split("/")[5]
title = extr('"og:title" content="', '"')
+ descr = extr('"og:description" content="', '"')
src = extr('{"url":"', '"')
if src:
@@ -209,7 +212,7 @@ class NewgroundsExtractor(Extractor):
"title" : text.unescape(title),
"url" : src,
"date" : date,
- "description": text.unescape(extr(
+ "description": text.unescape(descr or extr(
'itemprop="description" content="', '"')),
"rating" : extr('class="rated-', '"'),
"index" : text.parse_int(index),
@@ -319,6 +322,7 @@ class NewgroundsMediaExtractor(NewgroundsExtractor):
"artist" : ["kickinthehead", "danpaladin", "tomfulp"],
"comment" : "re:My fan trailer for Alien Hominid HD!",
"date" : "dt:2013-02-01 09:50:49",
+ "description": "Fan trailer for Alien Hominid HD!",
"favorites" : int,
"filename" : "564957_alternate_31",
"index" : 595355,
diff --git a/gallery_dl/extractor/oauth.py b/gallery_dl/extractor/oauth.py
index 6812f35..428f772 100644
--- a/gallery_dl/extractor/oauth.py
+++ b/gallery_dl/extractor/oauth.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2017-2021 Mike Fährmann
+# Copyright 2017-2022 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -399,7 +399,7 @@ class OAuthPixiv(OAuthBase):
if "error" in data:
print(data)
- if data["error"] == "invalid_request":
+ if data["error"] in ("invalid_request", "invalid_grant"):
print("'code' expired, try again")
return
@@ -417,6 +417,10 @@ class OAuthPixiv(OAuthBase):
2) Login
3) Select the last network monitor entry ('callback?state=...')
4) Copy its 'code' query parameter, paste it below, and press Enter
+
+- This 'code' will expire 30 seconds after logging in.
+- Copy-pasting more than just the 'code' value will work as well,
+ like the entire URL or several query parameters.
""")
code = input("code: ")
return code.rpartition("=")[2].strip()
diff --git a/gallery_dl/extractor/patreon.py b/gallery_dl/extractor/patreon.py
index 051f1ef..35a015f 100644
--- a/gallery_dl/extractor/patreon.py
+++ b/gallery_dl/extractor/patreon.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2019-2021 Mike Fährmann
+# Copyright 2019-2022 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -71,6 +71,15 @@ class PatreonExtractor(Extractor):
name = image.get("file_name") or self._filename(url) or url
yield "image", url, name
+ def _image_large(self, post):
+ image = post.get("image")
+ if image:
+ url = image.get("large_url")
+ if url:
+ name = image.get("file_name") or self._filename(url) or url
+ return (("image_large", url, name),)
+ return ()
+
def _attachments(self, post):
for attachment in post["attachments"]:
url = self.request(
@@ -212,10 +221,11 @@ class PatreonExtractor(Extractor):
def _build_file_generators(self, filetypes):
if filetypes is None:
- return (self._images, self._attachments,
- self._postfile, self._content)
+ return (self._images, self._image_large,
+ self._attachments, self._postfile, self._content)
genmap = {
"images" : self._images,
+ "image_large": self._image_large,
"attachments": self._attachments,
"postfile" : self._postfile,
"content" : self._content,
diff --git a/gallery_dl/extractor/seiga.py b/gallery_dl/extractor/seiga.py
index bf38a77..22c9487 100644
--- a/gallery_dl/extractor/seiga.py
+++ b/gallery_dl/extractor/seiga.py
@@ -1,16 +1,15 @@
# -*- coding: utf-8 -*-
-# Copyright 2016-2020 Mike Fährmann
+# Copyright 2016-2022 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
-"""Extract images from https://seiga.nicovideo.jp/"""
+"""Extractors for https://seiga.nicovideo.jp/"""
from .common import Extractor, Message
from .. import text, util, exception
-from ..cache import cache
class SeigaExtractor(Extractor):
@@ -25,7 +24,9 @@ class SeigaExtractor(Extractor):
self.start_image = 0
def items(self):
- self.login()
+ if not self._check_cookies(("user_session",)):
+ raise exception.StopExtraction("'user_session' cookie required")
+
images = iter(self.get_images())
data = next(images)
@@ -45,28 +46,6 @@ class SeigaExtractor(Extractor):
url, method="HEAD", allow_redirects=False, notfound="image")
return response.headers["Location"].replace("/o/", "/priv/", 1)
- def login(self):
- """Login and set necessary cookies"""
- if not self._check_cookies(("user_session",)):
- username, password = self._get_auth_info()
- self._update_cookies(self._login_impl(username, password))
-
- @cache(maxage=7*24*3600, keyarg=1)
- def _login_impl(self, username, password):
- if not username or not password:
- raise exception.AuthenticationError(
- "Username and password required")
-
- self.log.info("Logging in as %s", username)
- url = "https://account.nicovideo.jp/api/v1/login"
- data = {"mail_tel": username, "password": password}
-
- self.request(url, method="POST", data=data)
- if "user_session" not in self.session.cookies:
- raise exception.AuthenticationError()
- del self.session.cookies["nicosid"]
- return self.session.cookies
-
class SeigaUserExtractor(SeigaExtractor):
"""Extractor for images of a user from seiga.nicovideo.jp"""
diff --git a/gallery_dl/extractor/skeb.py b/gallery_dl/extractor/skeb.py
index 2c806ad..965391c 100644
--- a/gallery_dl/extractor/skeb.py
+++ b/gallery_dl/extractor/skeb.py
@@ -22,10 +22,11 @@ class SkebExtractor(Extractor):
Extractor.__init__(self, match)
self.user_name = match.group(1)
self.thumbnails = self.config("thumbnails", False)
+ self.sent_requests = self.config("sent-requests", False)
def items(self):
- for post_num in self.posts():
- response, post = self._get_post_data(post_num)
+ for user_name, post_num in self.posts():
+ response, post = self._get_post_data(user_name, post_num)
yield Message.Directory, post
for data in self._get_urls_from_post(response, post):
url = data["file_url"]
@@ -38,24 +39,33 @@ class SkebExtractor(Extractor):
url = "{}/api/users/{}/works".format(self.root, self.user_name)
params = {"role": "creator", "sort": "date", "offset": 0}
headers = {"Referer": self.root, "Authorization": "Bearer null"}
+ do_requests = self.sent_requests
while True:
posts = self.request(url, params=params, headers=headers).json()
for post in posts:
post_num = post["path"].rpartition("/")[2]
+ user_name = post["path"].split("/")[1][1:]
if post["private"]:
- self.log.debug("Skipping %s (private)", post_num)
+ self.log.debug("Skipping @%s/%s (private)",
+ user_name, post_num)
continue
- yield post_num
+ yield user_name, post_num
if len(posts) < 30:
- return
+ if do_requests:
+ params["offset"] = 0
+ params['role'] = "client"
+ do_requests = False
+ continue
+ else:
+ return
params["offset"] += 30
- def _get_post_data(self, post_num):
+ def _get_post_data(self, user_name, post_num):
url = "{}/api/users/{}/works/{}".format(
- self.root, self.user_name, post_num)
+ self.root, user_name, post_num)
headers = {"Referer": self.root, "Authorization": "Bearer null"}
resp = self.request(url, headers=headers).json()
creator = resp["creator"]
@@ -130,7 +140,7 @@ class SkebPostExtractor(SkebExtractor):
self.post_num = match.group(2)
def posts(self):
- return (self.post_num,)
+ return ((self.user_name, self.post_num),)
class SkebUserExtractor(SkebExtractor):
diff --git a/gallery_dl/extractor/slideshare.py b/gallery_dl/extractor/slideshare.py
index 91386e8..557c9fb 100644
--- a/gallery_dl/extractor/slideshare.py
+++ b/gallery_dl/extractor/slideshare.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2016-2021 Mike Fährmann, Leonardo Taccari
+# Copyright 2016-2022 Mike Fährmann, Leonardo Taccari
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -8,11 +8,12 @@
"""Extractors for https://www.slideshare.net/"""
-from .common import Extractor, Message
+from .common import GalleryExtractor
from .. import text
+import json
-class SlidesharePresentationExtractor(Extractor):
+class SlidesharePresentationExtractor(GalleryExtractor):
"""Extractor for images from a presentation on slideshare.net"""
category = "slideshare"
subcategory = "presentation"
@@ -24,13 +25,36 @@ class SlidesharePresentationExtractor(Extractor):
test = (
(("https://www.slideshare.net"
"/Slideshare/get-started-with-slide-share"), {
- "url": "23685fb9b94b32c77a547d45dc3a82fe7579ea18",
- "content": "2e90a01c6ca225579ebf8f98ab46f97a28a5e45c",
+ "pattern": r"https://image\.slidesharecdn\.com/getstartedwithslide"
+ r"share-150520173821-lva1-app6892/95/get-started-with-s"
+ r"lide-share-\d+-1024\.jpg\?cb=\d+",
+ "count": 19,
+ "content": "2b6a191eab60b3978fdacfecf2da302dd45bc108",
+ "keyword": {
+ "comments": "0",
+ "description": "Get Started with SlideShare - "
+ "A Beginngers Guide for Creators",
+ "likes": r"re:\d{3,}",
+ "presentation": "get-started-with-slide-share",
+ "published": "dt:2015-05-20 00:00:00",
+ "title": "Getting Started With SlideShare",
+ "user": "Slideshare",
+ "views": r"re:\d{7,}",
+ },
}),
- # long title
+ # long title and description
(("https://www.slideshare.net/pragmaticsolutions/warum-sie-nicht-ihren"
"-mitarbeitenden-ndern-sollten-sondern-ihr-managementsystem"), {
"url": "cf70ca99f57f61affab47ebf8583eb564b21e3a7",
+ "keyword": {
+ "title": "Warum Sie nicht Ihren Mitarbeitenden ändern "
+ "sollten, sondern Ihr Managementsystem",
+ "description": "Mitarbeitende verhalten sich mehrheitlich so, "
+ "wie das System es ihnen vorgibt. Welche Voraus"
+ "setzungen es braucht, damit Ihre Mitarbeitende"
+ "n ihr ganzes Herzblut einsetzen, bespricht Fre"
+ "di Schmidli in diesem Referat.",
+ },
}),
# mobile URL
(("https://www.slideshare.net"
@@ -40,48 +64,50 @@ class SlidesharePresentationExtractor(Extractor):
)
def __init__(self, match):
- Extractor.__init__(self, match)
self.user, self.presentation = match.groups()
+ url = "https://www.slideshare.net/{}/{}".format(
+ self.user, self.presentation)
+ GalleryExtractor.__init__(self, match, url)
- def items(self):
- page = self.request("https://www.slideshare.net/" + self.user +
- "/" + self.presentation).text
- data = self.get_job_metadata(page)
- imgs = self.get_image_urls(page)
- data["count"] = len(imgs)
- yield Message.Directory, data
- for data["num"], url in enumerate(imgs, 1):
- yield Message.Url, url, text.nameext_from_url(url, data)
+ def metadata(self, page):
+ extr = text.extract_from(page)
+ descr = extr('<meta name="description" content="', '"')
+ title = extr('<span class="j-title-breadcrumb">', '</span>')
+ published = extr('<div class="metadata-item">', '</div>')
+ comments = extr('content="UserComments:', '"')
+ likes = extr('content="UserLikes:', '"')
+ views = extr('content="UserPageVisits:', '"')
- def get_job_metadata(self, page):
- """Collect metadata for extractor-job"""
- descr, pos = text.extract(
- page, '<meta name="description" content="', '"')
- category, pos = text.extract(
- page, '<div class="metadata-item">', '</div>', pos)
- views, pos = text.extract(
- page, '<div class="metadata-item">', '</div>', pos)
- published, pos = text.extract(
- page, '<div class="metadata-item">', '</div>', pos)
- title, pos = text.extract(
- page, '<span class="j-title-breadcrumb">', '</span>', pos)
- alt_descr, pos = text.extract(
- page, '<p class="slideshow-description notranslate">', '</p>', pos)
-
- if descr.endswith("…") and alt_descr:
- descr = text.remove_html(alt_descr).strip()
+ if descr.endswith("…"):
+ alt_descr = extr(
+ 'id="slideshow-description-text" class="notranslate">', '</p>')
+ if alt_descr:
+ descr = text.remove_html(alt_descr).strip()
return {
"user": self.user,
"presentation": self.presentation,
"title": text.unescape(title.strip()),
"description": text.unescape(descr),
- "views": text.parse_int(views.rpartition(
- " views")[0].replace(",", "")),
- "published": published.strip(),
+ "views": views,
+ "likes": likes,
+ "comments": comments,
+ "published": text.parse_datetime(
+ published.strip(), "%b. %d, %Y"),
}
@staticmethod
- def get_image_urls(page):
- """Extract and return a list of all image-urls"""
- return list(text.extract_iter(page, 'data-full="', '"'))
+ def images(page):
+ data = json.loads(text.extract(
+ page, "xtend(true, slideshare_object.slideshow_config, ", ");")[0])
+
+ # useing 'stripped_title' here is technically wrong, but it works all
+ # the same, slideshare doesn't seem to care what characters go there
+ begin = "https://image.slidesharecdn.com/{}/95/{}-".format(
+ data["ppt_location"], data["stripped_title"])
+ end = "-1024.jpg?cb=" + str(data["timestamp"])
+
+ return [
+ (begin + str(n) + end, None)
+ for n in range(1, data["slide_count"]+1)
+ ]
diff --git a/gallery_dl/extractor/subscribestar.py b/gallery_dl/extractor/subscribestar.py
index 69e3854..b57013a 100644
--- a/gallery_dl/extractor/subscribestar.py
+++ b/gallery_dl/extractor/subscribestar.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2020-2021 Mike Fährmann
+# Copyright 2020-2022 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -105,7 +105,7 @@ class SubscribestarExtractor(Extractor):
att, 'data-upload-id="', '"')[0]),
"name": text.unescape(text.extract(
att, 'doc_preview-title">', '<')[0] or ""),
- "url" : text.extract(att, 'href="', '"')[0],
+ "url" : text.unescape(text.extract(att, 'href="', '"')[0]),
"type": "attachment",
})
diff --git a/gallery_dl/extractor/toyhouse.py b/gallery_dl/extractor/toyhouse.py
new file mode 100644
index 0000000..c6be38d
--- /dev/null
+++ b/gallery_dl/extractor/toyhouse.py
@@ -0,0 +1,173 @@
+# -*- coding: utf-8 -*-
+
+# Copyright 2022 Mike Fährmann
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Extractors for https://toyhou.se/"""
+
+from .common import Extractor, Message
+from .. import text, util
+
+BASE_PATTERN = r"(?:https?://)?(?:www\.)?toyhou\.se"
+
+
+class ToyhouseExtractor(Extractor):
+ """Base class for toyhouse extractors"""
+ category = "toyhouse"
+ root = "https://toyhou.se"
+ directory_fmt = ("{category}", "{user|artists!S}")
+ archive_fmt = "{id}"
+
+ def __init__(self, match):
+ Extractor.__init__(self, match)
+ self.user = match.group(1)
+ self.offset = 0
+
+ def items(self):
+ metadata = self.metadata()
+
+ for post in util.advance(self.posts(), self.offset):
+ if metadata:
+ post.update(metadata)
+ text.nameext_from_url(post["url"], post)
+ post["id"], _, post["hash"] = post["filename"].partition("_")
+ yield Message.Directory, post
+ yield Message.Url, post["url"], post
+
+ def posts(self):
+ return ()
+
+ def metadata(self):
+ return None
+
+ def skip(self, num):
+ self.offset += num
+ return num
+
+ def _parse_post(self, post, needle='<a href="'):
+ extr = text.extract_from(post)
+ return {
+ "url": extr(needle, '"'),
+ "date": text.parse_datetime(extr(
+ 'Credits\n</h2>\n<div class="mb-1">', '<'),
+ "%d %b %Y, %I:%M:%S %p"),
+ "artists": [
+ text.remove_html(artist)
+ for artist in extr(
+ '<div class="artist-credit">', '</div>\n</div>').split(
+ '<div class="artist-credit">')
+ ],
+ "characters": text.split_html(extr(
+ '<div class="image-characters', '</div>\n</div>'))[2:],
+ }
+
+ def _pagination(self, path):
+ url = self.root + path
+ params = {"page": 1}
+
+ while True:
+ page = self.request(url, params=params).text
+
+ cnt = 0
+ for post in text.extract_iter(
+ page, '<li class="gallery-item">', '</li>'):
+ cnt += 1
+ yield self._parse_post(post)
+
+ if cnt == 0 and params["page"] == 1:
+ token, pos = text.extract(
+ page, '<input name="_token" type="hidden" value="', '"')
+ if not token:
+ return
+ data = {
+ "_token": token,
+ "user" : text.extract(page, 'value="', '"', pos)[0],
+ }
+ self.request(self.root + "/~account/warnings/accept",
+ method="POST", data=data, allow_redirects=False)
+ continue
+
+ if cnt < 18:
+ return
+ params["page"] += 1
+
+
+class ToyhouseArtExtractor(ToyhouseExtractor):
+ """Extractor for artworks of a toyhouse user"""
+ subcategory = "art"
+ pattern = BASE_PATTERN + r"/([^/?#]+)/art"
+
+ test = (
+ ("https://www.toyhou.se/d-floe/art", {
+ "range": "1-30",
+ "count": 30,
+ "pattern": r"https://f\d+\.toyhou\.se/file/f\d+-toyhou-se"
+ r"/images/\d+_\w+\.\w+$",
+ "keyword": {
+ "artists": list,
+ "characters": list,
+ "date": "type:datetime",
+ "hash": r"re:\w+",
+ "id": r"re:\d+",
+ "url": str,
+ "user": "d-floe",
+ },
+ }),
+ # protected by Content Warning
+ ("https://www.toyhou.se/kroksoc/art", {
+ "count": ">= 19",
+ }),
+ )
+
+ def posts(self):
+ return self._pagination("/{}/art".format(self.user))
+
+ def metadata(self):
+ return {"user": self.user}
+
+
+class ToyhouseImageExtractor(ToyhouseExtractor):
+ """Extractor for individual toyhouse images"""
+ subcategory = "image"
+ pattern = (r"(?:https?://)?(?:"
+ r"(?:www\.)?toyhou\.se/~images|"
+ r"f\d+\.toyhou\.se/file/[^/?#]+/(?:image|watermark)s"
+ r")/(\d+)")
+ test = (
+ ("https://toyhou.se/~images/40587320", {
+ "content": "058ec8427977ab432c4cc5be5a6dd39ce18713ef",
+ "keyword": {
+ "artists": ["d-floe"],
+ "characters": ["Sumi"],
+ "date": "dt:2021-10-08 01:32:47",
+ "extension": "png",
+ "filename": "40587320_TT1NaBUr3FLkS1p",
+ "hash": "TT1NaBUr3FLkS1p",
+ "id": "40587320",
+ "url": "https://f2.toyhou.se/file/f2-toyhou-se/images"
+ "/40587320_TT1NaBUr3FLkS1p.png",
+ },
+ }),
+ # direct link, multiple artists
+ (("https://f2.toyhou.se/file/f2-toyhou-se"
+ "/watermarks/36817425_bqhGcwcnU.png?1625561467"), {
+ "keyword": {
+ "artists": [
+ "http://aminoapps.com/p/92sf3z",
+ "kroksoc (Color)"],
+ "characters": ["❀Reiichi❀"],
+ "date": "dt:2021-07-03 20:02:02",
+ "hash": "bqhGcwcnU",
+ "id": "36817425",
+ },
+ }),
+ ("https://f2.toyhou.se/file/f2-toyhou-se"
+ "/images/40587320_TT1NaBUr3FLkS1p.png"),
+ )
+
+ def posts(self):
+ url = "{}/~images/{}".format(self.root, self.user)
+ return (self._parse_post(self.request(url).text, '<img src="'),)
diff --git a/gallery_dl/extractor/tumblr.py b/gallery_dl/extractor/tumblr.py
index 358bc95..fbe641d 100644
--- a/gallery_dl/extractor/tumblr.py
+++ b/gallery_dl/extractor/tumblr.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2016-2020 Mike Fährmann
+# Copyright 2016-2022 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -400,6 +400,15 @@ class TumblrAPI(oauth.OAuth1API):
t = (datetime.now() + timedelta(seconds=float(reset))).time()
self.log.error("Daily API rate limit exceeded")
+
+ api_key = self.api_key or self.session.auth.consumer_key
+ if api_key == self.API_KEY:
+ self.log.info("Register your own OAuth application and "
+ "use its credentials to prevent this error: "
+ "https://github.com/mikf/gallery-dl/blob/mas"
+ "ter/docs/configuration.rst#extractortumblra"
+ "pi-key--api-secret")
+
raise exception.StopExtraction(
"Aborting - Rate limit will reset at %s",
"{:02}:{:02}:{:02}".format(t.hour, t.minute, t.second))
diff --git a/gallery_dl/extractor/twibooru.py b/gallery_dl/extractor/twibooru.py
new file mode 100644
index 0000000..ec8ab35
--- /dev/null
+++ b/gallery_dl/extractor/twibooru.py
@@ -0,0 +1,241 @@
+# -*- coding: utf-8 -*-
+
+# Copyright 2022 Mike Fährmann
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Extractors for https://twibooru.org/"""
+
+from .booru import BooruExtractor
+from .. import text, exception
+import operator
+
+BASE_PATTERN = r"(?:https?://)?twibooru\.org"
+
+
+class TwibooruExtractor(BooruExtractor):
+ """Base class for twibooru extractors"""
+ category = "twibooru"
+ basecategory = "philomena"
+ filename_fmt = "{id}_{filename}.{extension}"
+ archive_fmt = "{id}"
+ request_interval = 6.05
+ per_page = 50
+ root = "https://twibooru.org"
+
+ def __init__(self, match):
+ BooruExtractor.__init__(self, match)
+ self.api = TwibooruAPI(self)
+
+ _file_url = operator.itemgetter("view_url")
+
+ @staticmethod
+ def _prepare(post):
+ post["date"] = text.parse_datetime(
+ post["created_at"], "%Y-%m-%dT%H:%M:%S.%fZ")
+
+ name, sep, rest = post["name"].rpartition(".")
+ post["filename"] = name if sep else rest
+
+
+class TwibooruPostExtractor(TwibooruExtractor):
+ """Extractor for single twibooru posts"""
+ subcategory = "post"
+ request_interval = 1.0
+ pattern = BASE_PATTERN + r"/(\d+)"
+ test = ("https://twibooru.org/1", {
+ "pattern": r"https://cdn.twibooru.org/img/2020/7/8/1/full.png",
+ "content": "aac4d1dba611883ac701aaa8f0b2b322590517ae",
+ "keyword": {
+ "animated": False,
+ "aspect_ratio": 1.0,
+ "comment_count": int,
+ "created_at": "2020-07-08T22:26:55.743Z",
+ "date": "dt:2020-07-08 22:26:55",
+ "description": "Why have I done this?",
+ "downvotes": 0,
+ "duration": 0.0,
+ "faves": int,
+ "first_seen_at": "2020-07-08T22:26:55.743Z",
+ "format": "png",
+ "height": 576,
+ "hidden_from_users": False,
+ "id": 1,
+ "intensities": dict,
+ "locations": [],
+ "media_type": "image",
+ "mime_type": "image/png",
+ "name": "1676547__safe_artist-colon-scraggleman_oc_oc-colon-"
+ "floor+bored_oc+only_bags+under+eyes_bust_earth+pony_"
+ "female_goggles_helmet_mare_meme_neet_neet+home+g.png",
+ "orig_sha512_hash": "re:8b4c00d2[0-9a-f]{120}",
+ "processed": True,
+ "representations": dict,
+ "score": int,
+ "sha512_hash": "8b4c00d2eff52d51ad9647e14738944ab306fd1d8e1bf6"
+ "34fbb181b32f44070aa588938e26c4eb072b1eb61489aa"
+ "f3062fb644a76c79f936b97723a2c3e0e5d3",
+ "size": 70910,
+ "source_url": "",
+ "tag_ids": list,
+ "tags": list,
+ "thumbnails_generated": True,
+ "updated_at": "2022-02-03T15:49:07.110Z",
+ "upvotes": int,
+ "view_url": "https://cdn.twibooru.org/img/2020/7/8/1/full.png",
+ "width": 576,
+ "wilson_score": float,
+ },
+ })
+
+ def __init__(self, match):
+ TwibooruExtractor.__init__(self, match)
+ self.post_id = match.group(1)
+
+ def posts(self):
+ return (self.api.post(self.post_id),)
+
+
+class TwibooruSearchExtractor(TwibooruExtractor):
+ """Extractor for twibooru search results"""
+ subcategory = "search"
+ directory_fmt = ("{category}", "{search_tags}")
+ pattern = BASE_PATTERN + r"/(?:search/?\?([^#]+)|tags/([^/?#]+))"
+ test = (
+ ("https://twibooru.org/search?q=cute", {
+ "range": "40-60",
+ "count": 21,
+ }),
+ ("https://twibooru.org/tags/cute", {
+ "range": "1-20",
+ "count": 20,
+ }),
+ )
+
+ def __init__(self, match):
+ TwibooruExtractor.__init__(self, match)
+ query, tag = match.groups()
+ if tag:
+ q = tag.replace("+", " ")
+ for old, new in (
+ ("-colon-" , ":"),
+ ("-dash-" , "-"),
+ ("-dot-" , "."),
+ ("-plus-" , "+"),
+ ("-fwslash-", "/"),
+ ("-bwslash-", "\\"),
+ ):
+ if old in q:
+ q = q.replace(old, new)
+ self.params = {"q": text.unquote(text.unquote(q))}
+ else:
+ self.params = text.parse_query(query)
+
+ def metadata(self):
+ return {"search_tags": self.params.get("q", "")}
+
+ def posts(self):
+ return self.api.search(self.params)
+
+
+class TwibooruGalleryExtractor(TwibooruExtractor):
+ """Extractor for twibooru galleries"""
+ subcategory = "gallery"
+ directory_fmt = ("{category}", "galleries",
+ "{gallery[id]} {gallery[title]}")
+ pattern = BASE_PATTERN + r"/galleries/(\d+)"
+ test = ("https://twibooru.org/galleries/1", {
+ "range": "1-20",
+ "keyword": {
+ "gallery": {
+ "description": "Best nation pone and "
+ "russian related pics.",
+ "id": 1,
+ "spoiler_warning": "Russia",
+ "thumbnail_id": 694923,
+ "title": "Marussiaverse",
+ },
+ },
+ })
+
+ def __init__(self, match):
+ TwibooruExtractor.__init__(self, match)
+ self.gallery_id = match.group(1)
+
+ def metadata(self):
+ return {"gallery": self.api.gallery(self.gallery_id)}
+
+ def posts(self):
+ gallery_id = "gallery_id:" + self.gallery_id
+ params = {"sd": "desc", "sf": gallery_id, "q" : gallery_id}
+ return self.api.search(params)
+
+
+class TwibooruAPI():
+ """Interface for the Twibooru API
+
+ https://twibooru.org/pages/api
+ """
+
+ def __init__(self, extractor):
+ self.extractor = extractor
+ self.root = "https://twibooru.org/api"
+
+ def gallery(self, gallery_id):
+ endpoint = "/v3/galleries/" + gallery_id
+ return self._call(endpoint)["gallery"]
+
+ def post(self, post_id):
+ endpoint = "/v3/posts/" + post_id
+ return self._call(endpoint)["post"]
+
+ def search(self, params):
+ endpoint = "/v3/search/posts"
+ return self._pagination(endpoint, params)
+
+ def _call(self, endpoint, params=None):
+ url = self.root + endpoint
+
+ while True:
+ response = self.extractor.request(url, params=params, fatal=None)
+
+ if response.status_code < 400:
+ return response.json()
+
+ if response.status_code == 429:
+ until = text.parse_datetime(
+ response.headers["X-RL-Reset"], "%Y-%m-%d %H:%M:%S %Z")
+ # wait an extra minute, just to be safe
+ self.extractor.wait(until=until, adjust=60.0)
+ continue
+
+ # error
+ self.extractor.log.debug(response.content)
+ raise exception.StopExtraction(
+ "%s %s", response.status_code, response.reason)
+
+ def _pagination(self, endpoint, params):
+ extr = self.extractor
+
+ api_key = extr.config("api-key")
+ if api_key:
+ params["key"] = api_key
+
+ filter_id = extr.config("filter")
+ if filter_id:
+ params["filter_id"] = filter_id
+ elif not api_key:
+ params["filter_id"] = "2"
+
+ params["page"] = 1
+ params["per_page"] = per_page = extr.per_page
+
+ while True:
+ data = self._call(endpoint, params)
+ yield from data["posts"]
+
+ if len(data["posts"]) < per_page:
+ return
+ params["page"] += 1
diff --git a/gallery_dl/extractor/twitter.py b/gallery_dl/extractor/twitter.py
index 46b06c2..6d51834 100644
--- a/gallery_dl/extractor/twitter.py
+++ b/gallery_dl/extractor/twitter.py
@@ -649,6 +649,10 @@ class TwitterTweetExtractor(TwitterExtractor):
("https://twitter.com/i/web/status/1460044411165888515", {
"count": 0,
}),
+ # "Misleading" content
+ ("https://twitter.com/i/web/status/1486373748911575046", {
+ "count": 4,
+ }),
)
def __init__(self, match):
@@ -765,7 +769,7 @@ class TwitterAPI():
"__fs_dont_mention_me_view_api_enabled": False,
}
- self._log_warnings = extractor.config("warnings")
+ self._nsfw_warning = True
self._json_dumps = json.JSONEncoder(separators=(",", ":")).encode
self._user = None
@@ -789,7 +793,7 @@ class TwitterAPI():
self.headers["x-guest-token"] = guest_token
def tweet_detail(self, tweet_id):
- endpoint = "/graphql/aD0-HB47XIOxiBl5kTkX5Q/TweetDetail"
+ endpoint = "/graphql/ItejhtHVxU7ksltgMmyaLA/TweetDetail"
variables = {
"focalTweetId": tweet_id,
"with_rux_injections": False,
@@ -801,7 +805,7 @@ class TwitterAPI():
endpoint, variables, ("threaded_conversation_with_injections",))
def user_tweets(self, screen_name):
- endpoint = "/graphql/LNhjy8t3XpIrBYM-ms7sPQ/UserTweets"
+ endpoint = "/graphql/WZT7sCTrLvSOaWOXLDsWbQ/UserTweets"
variables = {
"userId": self._user_id_by_screen_name(screen_name),
"count": 100,
@@ -810,7 +814,7 @@ class TwitterAPI():
return self._pagination_tweets(endpoint, variables)
def user_tweets_and_replies(self, screen_name):
- endpoint = "/graphql/Vg5aF036K40ST3FWvnvRGA/UserTweetsAndReplies"
+ endpoint = "/graphql/t4wEKVulW4Mbv1P0kgxTEw/UserTweetsAndReplies"
variables = {
"userId": self._user_id_by_screen_name(screen_name),
"count": 100,
@@ -819,7 +823,7 @@ class TwitterAPI():
return self._pagination_tweets(endpoint, variables)
def user_media(self, screen_name):
- endpoint = "/graphql/Hl6C7ac051l_QBe3HjGz_A/UserMedia"
+ endpoint = "/graphql/nRybED9kRbN-TOWioHq1ng/UserMedia"
variables = {
"userId": self._user_id_by_screen_name(screen_name),
"count": 100,
@@ -827,7 +831,7 @@ class TwitterAPI():
return self._pagination_tweets(endpoint, variables)
def user_likes(self, screen_name):
- endpoint = "/graphql/smISlRVSnz-GaU_XpU_akw/Likes"
+ endpoint = "/graphql/9MSTt44HoGjVFSg_u3rHDw/Likes"
variables = {
"userId": self._user_id_by_screen_name(screen_name),
"count": 100,
@@ -835,7 +839,7 @@ class TwitterAPI():
return self._pagination_tweets(endpoint, variables)
def user_bookmarks(self):
- endpoint = "/graphql/yKNebSjZKbo2tOd-Qdc7Xg/Bookmarks"
+ endpoint = "/graphql/uKP9v_I31k0_VSBmlpq2Xg/Bookmarks"
variables = {
"count": 100,
}
@@ -843,7 +847,7 @@ class TwitterAPI():
endpoint, variables, ("bookmark_timeline", "timeline"))
def list_latest_tweets_timeline(self, list_id):
- endpoint = "/graphql/RxUL5UHi4Msxt_P9O1729w/ListLatestTweetsTimeline"
+ endpoint = "/graphql/z3l-EHlx-fyg8OvGO4JN8A/ListLatestTweetsTimeline"
variables = {
"listId": list_id,
"count": 100,
@@ -889,7 +893,7 @@ class TwitterAPI():
raise exception.NotFoundError("list")
def list_members(self, list_id):
- endpoint = "/graphql/kk9RQtSa2sc-4_9figZVBw/ListMembers"
+ endpoint = "/graphql/snESM0DPs3c7M1SBm4rvVw/ListMembers"
variables = {
"listId": list_id,
"count": 100,
@@ -899,7 +903,7 @@ class TwitterAPI():
endpoint, variables, ("list", "members_timeline", "timeline"))
def user_following(self, screen_name):
- endpoint = "/graphql/kz464_e4MAOXc3bGOA9kow/Following"
+ endpoint = "/graphql/mIwX8GogcobVlRwlgpHNYA/Following"
variables = {
"userId": self._user_id_by_screen_name(screen_name),
"count": 100,
@@ -961,20 +965,9 @@ class TwitterAPI():
if csrf_token:
self.headers["x-csrf-token"] = csrf_token
- data = response.json()
- if "errors" in data:
- try:
- errors = ", ".join(e["message"] for e in data["errors"])
- except Exception:
- errors = data["errors"]
- else:
- errors = ""
-
if response.status_code < 400:
# success
- if errors and self._log_warnings:
- self.extractor.log.warning(errors)
- return data
+ return response.json()
if response.status_code == 429:
# rate limit exceeded
@@ -984,6 +977,14 @@ class TwitterAPI():
continue
# error
+ try:
+ data = response.json()
+ errors = ", ".join(e["message"] for e in data["errors"])
+ except ValueError:
+ errors = response.text
+ except Exception:
+ errors = data.get("errors", "")
+
raise exception.StopExtraction(
"%s %s (%s)", response.status_code, response.reason, errors)
@@ -1151,6 +1152,10 @@ class TwitterAPI():
tweets.extend(entry["content"]["items"])
elif esw("conversationthread-"):
tweets.extend(entry["content"]["items"])
+ elif esw("tombstone-"):
+ self._report_tombstone(
+ entry,
+ entry["content"]["itemContent"]["tombstoneInfo"])
elif esw("cursor-bottom-"):
cursor = entry["content"]
if not cursor.get("stopOnEmptyResponse", True):
@@ -1162,6 +1167,11 @@ class TwitterAPI():
try:
tweet = ((entry.get("content") or entry["item"])
["itemContent"]["tweet_results"]["result"])
+ if "tombstone" in tweet:
+ self._report_tombstone(entry, tweet["tombstone"])
+ continue
+ if "tweet" in tweet:
+ tweet = tweet["tweet"]
legacy = tweet["legacy"]
except KeyError:
extr.log.debug(
@@ -1248,3 +1258,11 @@ class TwitterAPI():
if stop or not cursor or not entry:
return
variables["cursor"] = cursor
+
+ def _report_tombstone(self, entry, tombstone):
+ text = (tombstone.get("richText") or tombstone["text"])["text"]
+ if text.startswith("Age-restricted") and self._nsfw_warning:
+ self.extractor.log.warning(text)
+ self._nsfw_warning = False
+ self.extractor.log.debug(
+ "Skipping %s (%s)", entry["entryId"].rpartition("-")[2], text)
diff --git a/gallery_dl/extractor/ytdl.py b/gallery_dl/extractor/ytdl.py
index 8f3ef9a..b3a1652 100644
--- a/gallery_dl/extractor/ytdl.py
+++ b/gallery_dl/extractor/ytdl.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2021 Mike Fährmann
+# Copyright 2021-2022 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -64,6 +64,9 @@ class YoutubeDLExtractor(Extractor):
"nocheckcertificate" : not self._verify,
}
+ if self._proxies:
+ user_opts["proxy"] = self._proxies.get("http")
+
username, password = self._get_auth_info()
if username:
user_opts["username"], user_opts["password"] = username, password
diff --git a/gallery_dl/path.py b/gallery_dl/path.py
index 15db67f..c85bb88 100644
--- a/gallery_dl/path.py
+++ b/gallery_dl/path.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2021 Mike Fährmann
+# Copyright 2021-2022 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -10,10 +10,8 @@
import os
import re
-import time
import shutil
import functools
-from email.utils import mktime_tz, parsedate_tz
from . import util, formatter, exception
WINDOWS = util.WINDOWS
@@ -327,10 +325,4 @@ class PathFormat():
mtime = self.kwdict.get("_mtime")
if mtime:
- # Set file modification time
- try:
- if isinstance(mtime, str):
- mtime = mktime_tz(parsedate_tz(mtime))
- os.utime(self.realpath, (time.time(), mtime))
- except Exception:
- pass
+ util.set_mtime(self.realpath, mtime)
diff --git a/gallery_dl/postprocessor/metadata.py b/gallery_dl/postprocessor/metadata.py
index fe65c88..e776888 100644
--- a/gallery_dl/postprocessor/metadata.py
+++ b/gallery_dl/postprocessor/metadata.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2019-2021 Mike Fährmann
+# Copyright 2019-2022 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -59,6 +59,8 @@ class MetadataPP(PostProcessor):
events = events.split(",")
job.register_hooks({event: self.run for event in events}, options)
+ self.mtime = options.get("mtime")
+
def run(self, pathfmt):
directory = self._directory(pathfmt)
path = directory + self._filename(pathfmt)
@@ -71,6 +73,11 @@ class MetadataPP(PostProcessor):
with open(path, "w", encoding="utf-8") as fp:
self.write(fp, pathfmt.kwdict)
+ if self.mtime:
+ mtime = pathfmt.kwdict.get("_mtime")
+ if mtime:
+ util.set_mtime(path, mtime)
+
def _directory(self, pathfmt):
return pathfmt.realdirectory
diff --git a/gallery_dl/postprocessor/mtime.py b/gallery_dl/postprocessor/mtime.py
index d2f1915..098984a 100644
--- a/gallery_dl/postprocessor/mtime.py
+++ b/gallery_dl/postprocessor/mtime.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2019-2021 Mike Fährmann
+# Copyright 2019-2022 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -17,7 +17,13 @@ class MtimePP(PostProcessor):
def __init__(self, job, options):
PostProcessor.__init__(self, job)
self.key = options.get("key", "date")
- job.register_hooks({"file": self.run}, options)
+
+ events = options.get("event")
+ if events is None:
+ events = ("file",)
+ elif isinstance(events, str):
+ events = events.split(",")
+ job.register_hooks({event: self.run for event in events}, options)
def run(self, pathfmt):
mtime = pathfmt.kwdict.get(self.key)
diff --git a/gallery_dl/text.py b/gallery_dl/text.py
index ac4bbcb..97ef3ac 100644
--- a/gallery_dl/text.py
+++ b/gallery_dl/text.py
@@ -46,6 +46,13 @@ def ensure_http_scheme(url, scheme="https://"):
return url
+def root_from_url(url, scheme="https://"):
+ """Extract scheme and domain from a URL"""
+ if not url.startswith(("https://", "http://")):
+ return scheme + url[:url.index("/")]
+ return url[:url.index("/", 8)]
+
+
def filename_from_url(url):
"""Extract the last part of an URL to use as a filename"""
try:
diff --git a/gallery_dl/util.py b/gallery_dl/util.py
index bccae2d..92d1620 100644
--- a/gallery_dl/util.py
+++ b/gallery_dl/util.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2017-2021 Mike Fährmann
+# Copyright 2017-2022 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -12,6 +12,7 @@ import re
import os
import sys
import json
+import time
import random
import sqlite3
import binascii
@@ -20,6 +21,7 @@ import functools
import itertools
import urllib.parse
from http.cookiejar import Cookie
+from email.utils import mktime_tz, parsedate_tz
from . import text, exception
@@ -272,6 +274,15 @@ def remove_directory(path):
pass
+def set_mtime(path, mtime):
+ try:
+ if isinstance(mtime, str):
+ mtime = mktime_tz(parsedate_tz(mtime))
+ os.utime(path, (time.time(), mtime))
+ except Exception:
+ pass
+
+
def load_cookiestxt(fp):
"""Parse a Netscape cookies.txt file and return a list of its Cookies"""
cookies = []
@@ -413,6 +424,7 @@ GLOBALS = {
"parse_int": text.parse_int,
"urlsplit" : urllib.parse.urlsplit,
"datetime" : datetime.datetime,
+ "timedelta": datetime.timedelta,
"abort" : raises(exception.StopExtraction),
"terminate": raises(exception.TerminateExtraction),
"re" : re,
@@ -510,6 +522,26 @@ def build_extractor_filter(categories, negate=True, special=None):
return lambda extr: any(t(extr) for t in tests)
+def build_proxy_map(proxies, log=None):
+ """Generate a proxy map"""
+ if not proxies:
+ return None
+
+ if isinstance(proxies, str):
+ if "://" not in proxies:
+ proxies = "http://" + proxies.lstrip("/")
+ return {"http": proxies, "https": proxies}
+
+ if isinstance(proxies, dict):
+ for scheme, proxy in proxies.items():
+ if "://" not in proxy:
+ proxies[scheme] = "http://" + proxy.lstrip("/")
+ return proxies
+
+ if log:
+ log.warning("invalid proxy specifier: %s", proxies)
+
+
def build_predicate(predicates):
if not predicates:
return lambda url, kwdict: True
diff --git a/gallery_dl/version.py b/gallery_dl/version.py
index 4bc9b57..54c81aa 100644
--- a/gallery_dl/version.py
+++ b/gallery_dl/version.py
@@ -6,4 +6,4 @@
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
-__version__ = "1.20.5"
+__version__ = "1.21.0"
diff --git a/gallery_dl/ytdl.py b/gallery_dl/ytdl.py
index e6953eb..45b9826 100644
--- a/gallery_dl/ytdl.py
+++ b/gallery_dl/ytdl.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2021 Mike Fährmann
+# Copyright 2021-2022 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -46,8 +46,6 @@ def construct_YoutubeDL(module, obj, user_opts, system_opts=None):
if opts.get("format") is None:
opts["format"] = config("format")
- if opts.get("proxy") is None:
- opts["proxy"] = obj.session.proxies.get("http")
if opts.get("nopart") is None:
opts["nopart"] = not config("part", True)
if opts.get("updatetime") is None:
diff --git a/setup.py b/setup.py
index 1a5c315..bf1d927 100644
--- a/setup.py
+++ b/setup.py
@@ -35,6 +35,7 @@ FILES = [
for (path, files) in [
("share/bash-completion/completions", ["data/completion/gallery-dl"]),
("share/zsh/site-functions" , ["data/completion/_gallery-dl"]),
+ ("share/fish/vendor_completions.d" , ["data/completion/gallery-dl.fish"]),
("share/man/man1" , ["data/man/gallery-dl.1"]),
("share/man/man5" , ["data/man/gallery-dl.conf.5"]),
]
diff --git a/test/test_cookies.py b/test/test_cookies.py
index d103d02..0657456 100644
--- a/test/test_cookies.py
+++ b/test/test_cookies.py
@@ -1,7 +1,7 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
-# Copyright 2017-2020 Mike Fährmann
+# Copyright 2017-2022 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -12,6 +12,7 @@ import sys
import unittest
from unittest import mock
+import time
import logging
import tempfile
from os.path import join
@@ -88,7 +89,7 @@ class TestCookiedict(unittest.TestCase):
self.assertEqual(sorted(cookies.values()), sorted(self.cdict.values()))
def test_domain(self):
- for category in ["exhentai", "idolcomplex", "nijie", "seiga"]:
+ for category in ["exhentai", "idolcomplex", "nijie"]:
extr = _get_extractor(category)
cookies = extr.session.cookies
for key in self.cdict:
@@ -107,7 +108,6 @@ class TestCookieLogin(unittest.TestCase):
"exhentai" : ("ipb_member_id", "ipb_pass_hash"),
"idolcomplex": ("login", "pass_hash"),
"nijie" : ("nemail", "nlogin"),
- "seiga" : ("user_session",),
}
for category, cookienames in extr_cookies.items():
cookies = {name: "value" for name in cookienames}
@@ -118,6 +118,86 @@ class TestCookieLogin(unittest.TestCase):
mock_login.assert_not_called()
+class TestCookieUtils(unittest.TestCase):
+
+ def test_check_cookies(self):
+ extr = extractor.find("test:")
+ self.assertFalse(extr._cookiejar, "empty")
+ self.assertFalse(extr.cookiedomain, "empty")
+
+ # always returns False when checking for empty cookie list
+ self.assertFalse(extr._check_cookies(()))
+
+ self.assertFalse(extr._check_cookies(("a",)))
+ self.assertFalse(extr._check_cookies(("a", "b")))
+ self.assertFalse(extr._check_cookies(("a", "b", "c")))
+
+ extr._cookiejar.set("a", "1")
+ self.assertTrue(extr._check_cookies(("a",)))
+ self.assertFalse(extr._check_cookies(("a", "b")))
+ self.assertFalse(extr._check_cookies(("a", "b", "c")))
+
+ extr._cookiejar.set("b", "2")
+ self.assertTrue(extr._check_cookies(("a",)))
+ self.assertTrue(extr._check_cookies(("a", "b")))
+ self.assertFalse(extr._check_cookies(("a", "b", "c")))
+
+ def test_check_cookies_domain(self):
+ extr = extractor.find("test:")
+ self.assertFalse(extr._cookiejar, "empty")
+ extr.cookiedomain = ".example.org"
+
+ self.assertFalse(extr._check_cookies(("a",)))
+ self.assertFalse(extr._check_cookies(("a", "b")))
+
+ extr._cookiejar.set("a", "1")
+ self.assertFalse(extr._check_cookies(("a",)))
+
+ extr._cookiejar.set("a", "1", domain=extr.cookiedomain)
+ self.assertTrue(extr._check_cookies(("a",)))
+
+ extr._cookiejar.set("a", "1", domain="www" + extr.cookiedomain)
+ self.assertEqual(len(extr._cookiejar), 3)
+ self.assertTrue(extr._check_cookies(("a",)))
+
+ extr._cookiejar.set("b", "2", domain=extr.cookiedomain)
+ extr._cookiejar.set("c", "3", domain=extr.cookiedomain)
+ self.assertTrue(extr._check_cookies(("a", "b", "c")))
+
+ def test_check_cookies_expires(self):
+ extr = extractor.find("test:")
+ self.assertFalse(extr._cookiejar, "empty")
+ self.assertFalse(extr.cookiedomain, "empty")
+
+ now = int(time.time())
+ log = logging.getLogger("test")
+
+ extr._cookiejar.set("a", "1", expires=now-100)
+ with mock.patch.object(log, "warning") as mw:
+ self.assertFalse(extr._check_cookies(("a",)))
+ self.assertEqual(mw.call_count, 1)
+ self.assertEqual(mw.call_args[0], ("Cookie '%s' has expired", "a"))
+
+ extr._cookiejar.set("a", "1", expires=now+100)
+ with mock.patch.object(log, "warning") as mw:
+ self.assertFalse(extr._check_cookies(("a",)))
+ self.assertEqual(mw.call_count, 1)
+ self.assertEqual(mw.call_args[0], (
+ "Cookie '%s' will expire in less than %s hour%s", "a", 1, ""))
+
+ extr._cookiejar.set("a", "1", expires=now+100+7200)
+ with mock.patch.object(log, "warning") as mw:
+ self.assertFalse(extr._check_cookies(("a",)))
+ self.assertEqual(mw.call_count, 1)
+ self.assertEqual(mw.call_args[0], (
+ "Cookie '%s' will expire in less than %s hour%s", "a", 3, "s"))
+
+ extr._cookiejar.set("a", "1", expires=now+100+24*3600)
+ with mock.patch.object(log, "warning") as mw:
+ self.assertTrue(extr._check_cookies(("a",)))
+ self.assertEqual(mw.call_count, 0)
+
+
def _get_extractor(category):
for extr in extractor.extractors():
if extr.category == category and hasattr(extr, "_login_impl"):
diff --git a/test/test_text.py b/test/test_text.py
index 3ab9e73..ffed726 100644
--- a/test/test_text.py
+++ b/test/test_text.py
@@ -102,6 +102,18 @@ class TestText(unittest.TestCase):
for value in INVALID_ALT:
self.assertEqual(f(value), value)
+ def test_root_from_url(self, f=text.root_from_url):
+ result = "https://example.org"
+ self.assertEqual(f("https://example.org/") , result)
+ self.assertEqual(f("https://example.org/path"), result)
+ self.assertEqual(f("example.org/") , result)
+ self.assertEqual(f("example.org/path/") , result)
+
+ result = "http://example.org"
+ self.assertEqual(f("http://example.org/") , result)
+ self.assertEqual(f("http://example.org/path/"), result)
+ self.assertEqual(f("example.org/", "http://") , result)
+
def test_filename_from_url(self, f=text.filename_from_url):
result = "filename.ext"