aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--CHANGELOG.md23
-rw-r--r--PKG-INFO6
-rw-r--r--README.rst4
-rw-r--r--data/completion/_gallery-dl5
-rw-r--r--data/completion/gallery-dl4
-rw-r--r--data/completion/gallery-dl.fish5
-rw-r--r--data/man/gallery-dl.117
-rw-r--r--data/man/gallery-dl.conf.534
-rw-r--r--docs/gallery-dl.conf4
-rw-r--r--gallery_dl.egg-info/PKG-INFO6
-rw-r--r--gallery_dl.egg-info/SOURCES.txt3
-rw-r--r--gallery_dl/__init__.py4
-rw-r--r--gallery_dl/config.py17
-rw-r--r--gallery_dl/cookies.py77
-rw-r--r--gallery_dl/extractor/420chan.py76
-rw-r--r--gallery_dl/extractor/4chanarchives.py139
-rw-r--r--gallery_dl/extractor/__init__.py3
-rw-r--r--gallery_dl/extractor/common.py45
-rw-r--r--gallery_dl/extractor/exhentai.py21
-rw-r--r--gallery_dl/extractor/foolfuuka.py28
-rw-r--r--gallery_dl/extractor/foolslide.py13
-rw-r--r--gallery_dl/extractor/imagechest.py18
-rw-r--r--gallery_dl/extractor/imgur.py80
-rw-r--r--gallery_dl/extractor/mangadex.py11
-rw-r--r--gallery_dl/extractor/myportfolio.py6
-rw-r--r--gallery_dl/extractor/nana.py118
-rw-r--r--gallery_dl/extractor/oauth.py2
-rw-r--r--gallery_dl/extractor/pinterest.py34
-rw-r--r--gallery_dl/extractor/sankakucomplex.py4
-rw-r--r--gallery_dl/extractor/urlshortener.py2
-rw-r--r--gallery_dl/option.py36
-rw-r--r--gallery_dl/version.py2
-rw-r--r--test/test_ytdl.py23
33 files changed, 476 insertions, 394 deletions
diff --git a/CHANGELOG.md b/CHANGELOG.md
index a76a0dd..85c732d 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,28 @@
# Changelog
+## 1.25.4 - 2023-05-07
+### Additions
+- [4chanarchives] add `thread` and `board` extractors ([#4012](https://github.com/mikf/gallery-dl/issues/4012))
+- [foolfuuka] add `archive.palanq.win`
+- [imgur] add `favorite-folder` extractor ([#4016](https://github.com/mikf/gallery-dl/issues/4016))
+- [mangadex] add `status` and `tags` metadata ([#4031](https://github.com/mikf/gallery-dl/issues/4031))
+- allow selecting a domain with `--cookies-from-browser`
+- add `--cookies-export` command-line option
+- add `-C` as short option for `--cookies`
+- include exception type in config error messages
+### Fixes
+- [exhentai] update sadpanda check
+- [imagechest] load all images when a "Load More" button is present ([#4028](https://github.com/mikf/gallery-dl/issues/4028))
+- [imgur] fix bug causing some images/albums from user profiles and favorites to be ignored
+- [pinterest] update endpoint for related board pins
+- [pinterest] fix `pin.it` extractor
+- [ytdl] fix yt-dlp `--xff/--geo-bypass` tests ([#3989](https://github.com/mikf/gallery-dl/issues/3989))
+### Removals
+- [420chan] remove module
+- [foolfuuka] remove `archive.alice.al` and `tokyochronos.net`
+- [foolslide] remove `sensescans.com`
+- [nana] remove module
+
## 1.25.3 - 2023-04-30
### Additions
- [imagefap] extract `description` and `categories` metadata ([#3905](https://github.com/mikf/gallery-dl/issues/3905))
diff --git a/PKG-INFO b/PKG-INFO
index f4807ab..8b90cda 100644
--- a/PKG-INFO
+++ b/PKG-INFO
@@ -1,6 +1,6 @@
Metadata-Version: 2.1
Name: gallery_dl
-Version: 1.25.3
+Version: 1.25.4
Summary: Command-line program to download image galleries and collections from several image hosting sites
Home-page: https://github.com/mikf/gallery-dl
Download-URL: https://github.com/mikf/gallery-dl/releases/latest
@@ -109,9 +109,9 @@ Standalone Executable
Prebuilt executable files with a Python interpreter and
required Python packages included are available for
-- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.25.3/gallery-dl.exe>`__
+- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.25.4/gallery-dl.exe>`__
(Requires `Microsoft Visual C++ Redistributable Package (x86) <https://aka.ms/vs/17/release/vc_redist.x86.exe>`__)
-- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.25.3/gallery-dl.bin>`__
+- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.25.4/gallery-dl.bin>`__
Nightly Builds
diff --git a/README.rst b/README.rst
index 1f4b692..36f3ffa 100644
--- a/README.rst
+++ b/README.rst
@@ -72,9 +72,9 @@ Standalone Executable
Prebuilt executable files with a Python interpreter and
required Python packages included are available for
-- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.25.3/gallery-dl.exe>`__
+- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.25.4/gallery-dl.exe>`__
(Requires `Microsoft Visual C++ Redistributable Package (x86) <https://aka.ms/vs/17/release/vc_redist.x86.exe>`__)
-- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.25.3/gallery-dl.bin>`__
+- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.25.4/gallery-dl.bin>`__
Nightly Builds
diff --git a/data/completion/_gallery-dl b/data/completion/_gallery-dl
index a82db8a..94d2f06 100644
--- a/data/completion/_gallery-dl
+++ b/data/completion/_gallery-dl
@@ -16,8 +16,6 @@ _arguments -C -S \
--source-address'[Client-side IP address to bind to]':'<ip>' \
--user-agent'[User-Agent request header]':'<ua>' \
--clear-cache'[Delete cached login sessions, cookies, etc. for MODULE (ALL to delete everything)]':'<module>' \
---cookies'[File to load additional cookies from]':'<file>':_files \
---cookies-from-browser'[Name of the browser to load cookies from, with optional keyring name prefixed with "+", profile prefixed with ":", and container prefixed with "::" ("none" for no container)]':'<browser[+keyring][:profile][::container]>' \
{-q,--quiet}'[Activate quiet mode]' \
{-v,--verbose}'[Print various debugging information]' \
{-g,--get-urls}'[Print URLs instead of downloading]' \
@@ -56,6 +54,9 @@ _arguments -C -S \
{-u,--username}'[Username to login with]':'<user>' \
{-p,--password}'[Password belonging to the given username]':'<pass>' \
--netrc'[Enable .netrc authentication data]' \
+{-C,--cookies}'[File to load additional cookies from]':'<file>':_files \
+--cookies-export'[Export session cookies to FILE]':'<file>':_files \
+--cookies-from-browser'[Name of the browser to load cookies from, with optional domain prefixed with "/", keyring name prefixed with "+", profile prefixed with ":", and container prefixed with "::" ("none" for no container)]':'<browser[/domain][+keyring][:profile][::container]>' \
--download-archive'[Record all downloaded or skipped files in FILE and skip downloading any file already in it]':'<file>':_files \
{-A,--abort}'[Stop current extractor run after N consecutive file downloads were skipped]':'<n>' \
{-T,--terminate}'[Stop current and parent extractor runs after N consecutive file downloads were skipped]':'<n>' \
diff --git a/data/completion/gallery-dl b/data/completion/gallery-dl
index 1750af8..02fc847 100644
--- a/data/completion/gallery-dl
+++ b/data/completion/gallery-dl
@@ -5,12 +5,12 @@ _gallery_dl()
cur="${COMP_WORDS[COMP_CWORD]}"
prev="${COMP_WORDS[COMP_CWORD-1]}"
- if [[ "${prev}" =~ ^(-i|--input-file|--cookies|--write-log|--write-unsupported|-c|--config|--config-yaml|--config-toml|--download-archive)$ ]]; then
+ if [[ "${prev}" =~ ^(-i|--input-file|--write-log|--write-unsupported|-c|--config|--config-yaml|--config-toml|-C|--cookies|--cookies-export|--download-archive)$ ]]; then
COMPREPLY=( $(compgen -f -- "${cur}") )
elif [[ "${prev}" =~ ^()$ ]]; then
COMPREPLY=( $(compgen -d -- "${cur}") )
else
- COMPREPLY=( $(compgen -W "--help --version --input-file --filename --destination --directory --extractors --proxy --source-address --user-agent --clear-cache --cookies --cookies-from-browser --quiet --verbose --get-urls --resolve-urls --dump-json --simulate --extractor-info --list-keywords --list-modules --list-extractors --write-log --write-unsupported --write-pages --limit-rate --retries --http-timeout --sleep --sleep-request --sleep-extractor --filesize-min --filesize-max --chunk-size --no-part --no-skip --no-mtime --no-download --no-postprocessors --no-check-certificate --option --config --config-yaml --config-toml --config-create --config-ignore --ignore-config --username --password --netrc --download-archive --abort --terminate --range --chapter-range --filter --chapter-filter --zip --ugoira-conv --ugoira-conv-lossless --ugoira-conv-copy --write-metadata --write-info-json --write-infojson --write-tags --mtime-from-date --exec --exec-after --postprocessor --postprocessor-option" -- "${cur}") )
+ COMPREPLY=( $(compgen -W "--help --version --input-file --filename --destination --directory --extractors --proxy --source-address --user-agent --clear-cache --quiet --verbose --get-urls --resolve-urls --dump-json --simulate --extractor-info --list-keywords --list-modules --list-extractors --write-log --write-unsupported --write-pages --limit-rate --retries --http-timeout --sleep --sleep-request --sleep-extractor --filesize-min --filesize-max --chunk-size --no-part --no-skip --no-mtime --no-download --no-postprocessors --no-check-certificate --option --config --config-yaml --config-toml --config-create --config-ignore --ignore-config --username --password --netrc --cookies --cookies-export --cookies-from-browser --download-archive --abort --terminate --range --chapter-range --filter --chapter-filter --zip --ugoira-conv --ugoira-conv-lossless --ugoira-conv-copy --write-metadata --write-info-json --write-infojson --write-tags --mtime-from-date --exec --exec-after --postprocessor --postprocessor-option" -- "${cur}") )
fi
}
diff --git a/data/completion/gallery-dl.fish b/data/completion/gallery-dl.fish
index d764543..00e7b24 100644
--- a/data/completion/gallery-dl.fish
+++ b/data/completion/gallery-dl.fish
@@ -10,8 +10,6 @@ complete -c gallery-dl -x -l 'proxy' -d 'Use the specified proxy'
complete -c gallery-dl -x -l 'source-address' -d 'Client-side IP address to bind to'
complete -c gallery-dl -x -l 'user-agent' -d 'User-Agent request header'
complete -c gallery-dl -x -l 'clear-cache' -d 'Delete cached login sessions, cookies, etc. for MODULE (ALL to delete everything)'
-complete -c gallery-dl -r -F -l 'cookies' -d 'File to load additional cookies from'
-complete -c gallery-dl -x -l 'cookies-from-browser' -d 'Name of the browser to load cookies from, with optional keyring name prefixed with "+", profile prefixed with ":", and container prefixed with "::" ("none" for no container)'
complete -c gallery-dl -s 'q' -l 'quiet' -d 'Activate quiet mode'
complete -c gallery-dl -s 'v' -l 'verbose' -d 'Print various debugging information'
complete -c gallery-dl -s 'g' -l 'get-urls' -d 'Print URLs instead of downloading'
@@ -50,6 +48,9 @@ complete -c gallery-dl -l 'ignore-config' -d '==SUPPRESS=='
complete -c gallery-dl -x -s 'u' -l 'username' -d 'Username to login with'
complete -c gallery-dl -x -s 'p' -l 'password' -d 'Password belonging to the given username'
complete -c gallery-dl -l 'netrc' -d 'Enable .netrc authentication data'
+complete -c gallery-dl -r -F -s 'C' -l 'cookies' -d 'File to load additional cookies from'
+complete -c gallery-dl -r -F -l 'cookies-export' -d 'Export session cookies to FILE'
+complete -c gallery-dl -x -l 'cookies-from-browser' -d 'Name of the browser to load cookies from, with optional domain prefixed with "/", keyring name prefixed with "+", profile prefixed with ":", and container prefixed with "::" ("none" for no container)'
complete -c gallery-dl -r -F -l 'download-archive' -d 'Record all downloaded or skipped files in FILE and skip downloading any file already in it'
complete -c gallery-dl -x -s 'A' -l 'abort' -d 'Stop current extractor run after N consecutive file downloads were skipped'
complete -c gallery-dl -x -s 'T' -l 'terminate' -d 'Stop current and parent extractor runs after N consecutive file downloads were skipped'
diff --git a/data/man/gallery-dl.1 b/data/man/gallery-dl.1
index b5ad7f2..f018c63 100644
--- a/data/man/gallery-dl.1
+++ b/data/man/gallery-dl.1
@@ -1,4 +1,4 @@
-.TH "GALLERY-DL" "1" "2023-04-30" "1.25.3" "gallery-dl Manual"
+.TH "GALLERY-DL" "1" "2023-05-07" "1.25.4" "gallery-dl Manual"
.\" disable hyphenation
.nh
@@ -50,12 +50,6 @@ User-Agent request header
.B "\-\-clear\-cache" \f[I]MODULE\f[]
Delete cached login sessions, cookies, etc. for MODULE (ALL to delete everything)
.TP
-.B "\-\-cookies" \f[I]FILE\f[]
-File to load additional cookies from
-.TP
-.B "\-\-cookies\-from\-browser" \f[I]BROWSER[+KEYRING][:PROFILE][::CONTAINER]\f[]
-Name of the browser to load cookies from, with optional keyring name prefixed with '+', profile prefixed with ':', and container prefixed with '::' ('none' for no container)
-.TP
.B "\-q, \-\-quiet"
Activate quiet mode
.TP
@@ -167,6 +161,15 @@ Password belonging to the given username
.B "\-\-netrc"
Enable .netrc authentication data
.TP
+.B "\-C, \-\-cookies" \f[I]FILE\f[]
+File to load additional cookies from
+.TP
+.B "\-\-cookies\-export" \f[I]FILE\f[]
+Export session cookies to FILE
+.TP
+.B "\-\-cookies\-from\-browser" \f[I]BROWSER[/DOMAIN][+KEYRING][:PROFILE][::CONTAINER]\f[]
+Name of the browser to load cookies from, with optional domain prefixed with '/', keyring name prefixed with '+', profile prefixed with ':', and container prefixed with '::' ('none' for no container)
+.TP
.B "\-\-download\-archive" \f[I]FILE\f[]
Record all downloaded or skipped files in FILE and skip downloading any file already in it
.TP
diff --git a/data/man/gallery-dl.conf.5 b/data/man/gallery-dl.conf.5
index 3bd9f17..8008451 100644
--- a/data/man/gallery-dl.conf.5
+++ b/data/man/gallery-dl.conf.5
@@ -1,4 +1,4 @@
-.TH "GALLERY-DL.CONF" "5" "2023-04-30" "1.25.3" "gallery-dl Manual"
+.TH "GALLERY-DL.CONF" "5" "2023-05-07" "1.25.4" "gallery-dl Manual"
.\" disable hyphenation
.nh
.\" disable justification (adjust text to left margin only)
@@ -548,25 +548,35 @@ Source to read additional cookies from. This can be
* The optional third entry is the keyring to retrieve passwords for decrypting cookies from
.br
* The optional fourth entry is a (Firefox) container name (\f[I]"none"\f[] for only cookies with no container)
+.br
+* The optional fifth entry is the domain to extract cookies for. Prefix it with a dot \f[I].\f[] to include cookies for subdomains. Has no effect when also specifying a container.
.. code:: json
["firefox"]
["firefox", null, null, "Personal"]
-["chromium", "Private", "kwallet"]
+["chromium", "Private", "kwallet", null, ".twitter.com"]
.SS extractor.*.cookies-update
.IP "Type:" 6
-\f[I]bool\f[]
+.br
+* \f[I]bool\f[]
+.br
+* \f[I]Path\f[]
.IP "Default:" 9
\f[I]true\f[]
.IP "Description:" 4
-If \f[I]extractor.*.cookies\f[] specifies the \f[I]Path\f[] of a cookies.txt
-file and it can be opened and parsed without errors,
-update its contents with cookies received during data extraction.
+Export session cookies in cookies.txt format.
+
+.br
+* If this is a \f[I]Path\f[], write cookies to the given file path.
+
+.br
+* If this is \f[I]true\f[] and \f[I]extractor.*.cookies\f[] specifies the \f[I]Path\f[]
+of a valid cookies.txt file, update its contents.
.SS extractor.*.proxy
@@ -2409,18 +2419,6 @@ Fetch media from renoted notes.
Fetch media from replies to other notes.
-.SS extractor.nana.favkey
-.IP "Type:" 6
-\f[I]string\f[]
-
-.IP "Default:" 9
-\f[I]null\f[]
-
-.IP "Description:" 4
-Your \f[I]Nana Favorite Key\f[],
-used to access your favorite archives.
-
-
.SS extractor.newgrounds.flash
.IP "Type:" 6
\f[I]bool\f[]
diff --git a/docs/gallery-dl.conf b/docs/gallery-dl.conf
index 09d9e80..92451fd 100644
--- a/docs/gallery-dl.conf
+++ b/docs/gallery-dl.conf
@@ -201,10 +201,6 @@
"format": "original",
"include": "art"
},
- "nana":
- {
- "favkey": null
- },
"nijie":
{
"username": null,
diff --git a/gallery_dl.egg-info/PKG-INFO b/gallery_dl.egg-info/PKG-INFO
index 73bcd92..3554f49 100644
--- a/gallery_dl.egg-info/PKG-INFO
+++ b/gallery_dl.egg-info/PKG-INFO
@@ -1,6 +1,6 @@
Metadata-Version: 2.1
Name: gallery-dl
-Version: 1.25.3
+Version: 1.25.4
Summary: Command-line program to download image galleries and collections from several image hosting sites
Home-page: https://github.com/mikf/gallery-dl
Download-URL: https://github.com/mikf/gallery-dl/releases/latest
@@ -109,9 +109,9 @@ Standalone Executable
Prebuilt executable files with a Python interpreter and
required Python packages included are available for
-- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.25.3/gallery-dl.exe>`__
+- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.25.4/gallery-dl.exe>`__
(Requires `Microsoft Visual C++ Redistributable Package (x86) <https://aka.ms/vs/17/release/vc_redist.x86.exe>`__)
-- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.25.3/gallery-dl.bin>`__
+- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.25.4/gallery-dl.bin>`__
Nightly Builds
diff --git a/gallery_dl.egg-info/SOURCES.txt b/gallery_dl.egg-info/SOURCES.txt
index 2e66ea6..8be6871 100644
--- a/gallery_dl.egg-info/SOURCES.txt
+++ b/gallery_dl.egg-info/SOURCES.txt
@@ -44,8 +44,8 @@ gallery_dl/extractor/2chan.py
gallery_dl/extractor/2chen.py
gallery_dl/extractor/35photo.py
gallery_dl/extractor/3dbooru.py
-gallery_dl/extractor/420chan.py
gallery_dl/extractor/4chan.py
+gallery_dl/extractor/4chanarchives.py
gallery_dl/extractor/500px.py
gallery_dl/extractor/8chan.py
gallery_dl/extractor/8muses.py
@@ -140,7 +140,6 @@ gallery_dl/extractor/misskey.py
gallery_dl/extractor/moebooru.py
gallery_dl/extractor/myhentaigallery.py
gallery_dl/extractor/myportfolio.py
-gallery_dl/extractor/nana.py
gallery_dl/extractor/naver.py
gallery_dl/extractor/naverwebtoon.py
gallery_dl/extractor/newgrounds.py
diff --git a/gallery_dl/__init__.py b/gallery_dl/__init__.py
index a430f13..1450e8f 100644
--- a/gallery_dl/__init__.py
+++ b/gallery_dl/__init__.py
@@ -70,12 +70,14 @@ def main():
if args.cookies_from_browser:
browser, _, profile = args.cookies_from_browser.partition(":")
browser, _, keyring = browser.partition("+")
+ browser, _, domain = browser.partition("/")
if profile.startswith(":"):
container = profile[1:]
profile = None
else:
profile, _, container = profile.partition("::")
- config.set((), "cookies", (browser, profile, keyring, container))
+ config.set((), "cookies", (
+ browser, profile, keyring, container, domain))
if args.options_pp:
config.set((), "postprocessor-options", args.options_pp)
for opts in args.options:
diff --git a/gallery_dl/config.py b/gallery_dl/config.py
index d014293..0b2aca8 100644
--- a/gallery_dl/config.py
+++ b/gallery_dl/config.py
@@ -102,7 +102,8 @@ def load(files=None, strict=False, load=util.json_loads):
log.error(exc)
sys.exit(1)
except Exception as exc:
- log.warning("Could not parse '%s': %s", path, exc)
+ log.error("%s when loading '%s': %s",
+ exc.__class__.__name__, path, exc)
if strict:
sys.exit(2)
else:
@@ -118,7 +119,7 @@ def clear():
_config.clear()
-def get(path, key, default=None, *, conf=_config):
+def get(path, key, default=None, conf=_config):
"""Get the value of property 'key' or a default value"""
try:
for p in path:
@@ -128,7 +129,7 @@ def get(path, key, default=None, *, conf=_config):
return default
-def interpolate(path, key, default=None, *, conf=_config):
+def interpolate(path, key, default=None, conf=_config):
"""Interpolate the value of 'key'"""
if key in conf:
return conf[key]
@@ -142,7 +143,7 @@ def interpolate(path, key, default=None, *, conf=_config):
return default
-def interpolate_common(common, paths, key, default=None, *, conf=_config):
+def interpolate_common(common, paths, key, default=None, conf=_config):
"""Interpolate the value of 'key'
using multiple 'paths' along a 'common' ancestor
"""
@@ -174,7 +175,7 @@ def interpolate_common(common, paths, key, default=None, *, conf=_config):
return default
-def accumulate(path, key, *, conf=_config):
+def accumulate(path, key, conf=_config):
"""Accumulate the values of 'key' along 'path'"""
result = []
try:
@@ -193,7 +194,7 @@ def accumulate(path, key, *, conf=_config):
return result
-def set(path, key, value, *, conf=_config):
+def set(path, key, value, conf=_config):
"""Set the value of property 'key' for this session"""
for p in path:
try:
@@ -203,7 +204,7 @@ def set(path, key, value, *, conf=_config):
conf[key] = value
-def setdefault(path, key, value, *, conf=_config):
+def setdefault(path, key, value, conf=_config):
"""Set the value of property 'key' if it doesn't exist"""
for p in path:
try:
@@ -213,7 +214,7 @@ def setdefault(path, key, value, *, conf=_config):
return conf.setdefault(key, value)
-def unset(path, key, *, conf=_config):
+def unset(path, key, conf=_config):
"""Unset the value of property 'key'"""
try:
for p in path:
diff --git a/gallery_dl/cookies.py b/gallery_dl/cookies.py
index 3d715a7..32ba323 100644
--- a/gallery_dl/cookies.py
+++ b/gallery_dl/cookies.py
@@ -20,7 +20,6 @@ import struct
import subprocess
import sys
import tempfile
-from datetime import datetime, timedelta, timezone
from hashlib import pbkdf2_hmac
from http.cookiejar import Cookie
from . import aes, text, util
@@ -34,19 +33,19 @@ logger = logging.getLogger("cookies")
def load_cookies(cookiejar, browser_specification):
- browser_name, profile, keyring, container = \
+ browser_name, profile, keyring, container, domain = \
_parse_browser_specification(*browser_specification)
if browser_name == "firefox":
- load_cookies_firefox(cookiejar, profile, container)
+ load_cookies_firefox(cookiejar, profile, container, domain)
elif browser_name == "safari":
- load_cookies_safari(cookiejar, profile)
+ load_cookies_safari(cookiejar, profile, domain)
elif browser_name in SUPPORTED_BROWSERS_CHROMIUM:
- load_cookies_chrome(cookiejar, browser_name, profile, keyring)
+ load_cookies_chrome(cookiejar, browser_name, profile, keyring, domain)
else:
raise ValueError("unknown browser '{}'".format(browser_name))
-def load_cookies_firefox(cookiejar, profile=None, container=None):
+def load_cookies_firefox(cookiejar, profile=None, container=None, domain=None):
path, container_id = _firefox_cookies_database(profile, container)
with DatabaseCopy(path) as db:
@@ -60,6 +59,13 @@ def load_cookies_firefox(cookiejar, profile=None, container=None):
sql += " WHERE originAttributes LIKE ? OR originAttributes LIKE ?"
uid = "%userContextId={}".format(container_id)
parameters = (uid, uid + "&%")
+ elif domain:
+ if domain[0] == ".":
+ sql += " WHERE host == ? OR host LIKE ?"
+ parameters = (domain[1:], "%" + domain)
+ else:
+ sql += " WHERE host == ? OR host == ?"
+ parameters = (domain, "." + domain)
set_cookie = cookiejar.set_cookie
for name, value, domain, path, secure, expires in db.execute(
@@ -69,9 +75,10 @@ def load_cookies_firefox(cookiejar, profile=None, container=None):
domain, bool(domain), domain.startswith("."),
path, bool(path), secure, expires, False, None, None, {},
))
+ logger.info("Extracted %s cookies from Firefox", len(cookiejar))
-def load_cookies_safari(cookiejar, profile=None):
+def load_cookies_safari(cookiejar, profile=None, domain=None):
"""Ref.: https://github.com/libyal/dtformats/blob
/main/documentation/Safari%20Cookies.asciidoc
- This data appears to be out of date
@@ -87,7 +94,8 @@ def load_cookies_safari(cookiejar, profile=None):
_safari_parse_cookies_page(p.read_bytes(page_size), cookiejar)
-def load_cookies_chrome(cookiejar, browser_name, profile, keyring):
+def load_cookies_chrome(cookiejar, browser_name, profile=None,
+ keyring=None, domain=None):
config = _get_chromium_based_browser_settings(browser_name)
path = _chrome_cookies_database(profile, config)
logger.debug("Extracting cookies from %s", path)
@@ -95,19 +103,31 @@ def load_cookies_chrome(cookiejar, browser_name, profile, keyring):
with DatabaseCopy(path) as db:
db.text_factory = bytes
decryptor = get_cookie_decryptor(
- config["directory"], config["keyring"], keyring=keyring)
+ config["directory"], config["keyring"], keyring)
+
+ if domain:
+ if domain[0] == ".":
+ condition = " WHERE host_key == ? OR host_key LIKE ?"
+ parameters = (domain[1:], "%" + domain)
+ else:
+ condition = " WHERE host_key == ? OR host_key == ?"
+ parameters = (domain, "." + domain)
+ else:
+ condition = ""
+ parameters = ()
try:
rows = db.execute(
"SELECT host_key, name, value, encrypted_value, path, "
- "expires_utc, is_secure FROM cookies")
+ "expires_utc, is_secure FROM cookies" + condition, parameters)
except sqlite3.OperationalError:
rows = db.execute(
"SELECT host_key, name, value, encrypted_value, path, "
- "expires_utc, secure FROM cookies")
+ "expires_utc, secure FROM cookies" + condition, parameters)
set_cookie = cookiejar.set_cookie
- failed_cookies = unencrypted_cookies = 0
+ failed_cookies = 0
+ unencrypted_cookies = 0
for domain, name, value, enc_value, path, expires, secure in rows:
@@ -136,8 +156,8 @@ def load_cookies_chrome(cookiejar, browser_name, profile, keyring):
failed_message = ""
logger.info("Extracted %s cookies from %s%s",
- len(cookiejar), browser_name, failed_message)
- counts = decryptor.cookie_counts.copy()
+ len(cookiejar), browser_name.capitalize(), failed_message)
+ counts = decryptor.cookie_counts
counts["unencrypted"] = unencrypted_cookies
logger.debug("cookie version breakdown: %s", counts)
@@ -224,7 +244,7 @@ def _safari_parse_cookies_header(data):
return page_sizes, p.cursor
-def _safari_parse_cookies_page(data, jar):
+def _safari_parse_cookies_page(data, cookiejar, domain=None):
p = DataParser(data)
p.expect_bytes(b"\x00\x00\x01\x00", "page signature")
number_of_cookies = p.read_uint()
@@ -238,12 +258,12 @@ def _safari_parse_cookies_page(data, jar):
for i, record_offset in enumerate(record_offsets):
p.skip_to(record_offset, "space between records")
record_length = _safari_parse_cookies_record(
- data[record_offset:], jar)
+ data[record_offset:], cookiejar, domain)
p.read_bytes(record_length)
p.skip_to_end("space in between pages")
-def _safari_parse_cookies_record(data, cookiejar):
+def _safari_parse_cookies_record(data, cookiejar, host=None):
p = DataParser(data)
record_size = p.read_uint()
p.skip(4, "unknown record field 1")
@@ -262,6 +282,14 @@ def _safari_parse_cookies_record(data, cookiejar):
p.skip_to(domain_offset)
domain = p.read_cstring()
+ if host:
+ if host[0] == ".":
+ if host[1:] != domain and not domain.endswith(host):
+ return record_size
+ else:
+ if host != domain and ("." + host) != domain:
+ return record_size
+
p.skip_to(name_offset)
name = p.read_cstring()
@@ -412,18 +440,17 @@ class ChromeCookieDecryptor:
raise NotImplementedError("Must be implemented by sub classes")
-def get_cookie_decryptor(browser_root, browser_keyring_name, *, keyring=None):
+def get_cookie_decryptor(browser_root, browser_keyring_name, keyring=None):
if sys.platform in ("win32", "cygwin"):
return WindowsChromeCookieDecryptor(browser_root)
elif sys.platform == "darwin":
return MacChromeCookieDecryptor(browser_keyring_name)
else:
- return LinuxChromeCookieDecryptor(
- browser_keyring_name, keyring=keyring)
+ return LinuxChromeCookieDecryptor(browser_keyring_name, keyring)
class LinuxChromeCookieDecryptor(ChromeCookieDecryptor):
- def __init__(self, browser_keyring_name, *, keyring=None):
+ def __init__(self, browser_keyring_name, keyring=None):
self._v10_key = self.derive_key(b"peanuts")
password = _get_linux_keyring_password(browser_keyring_name, keyring)
self._v11_key = None if password is None else self.derive_key(password)
@@ -893,8 +920,8 @@ def _get_linux_desktop_environment(env):
def _mac_absolute_time_to_posix(timestamp):
- return int((datetime(2001, 1, 1, 0, 0, tzinfo=timezone.utc) +
- timedelta(seconds=timestamp)).timestamp())
+ # 978307200 is timestamp of 2001-01-01 00:00:00
+ return 978307200 + int(timestamp)
def pbkdf2_sha1(password, salt, iterations, key_length):
@@ -979,7 +1006,7 @@ def _is_path(value):
def _parse_browser_specification(
- browser, profile=None, keyring=None, container=None):
+ browser, profile=None, keyring=None, container=None, domain=None):
browser = browser.lower()
if browser not in SUPPORTED_BROWSERS:
raise ValueError("unsupported browser '{}'".format(browser))
@@ -987,4 +1014,4 @@ def _parse_browser_specification(
raise ValueError("unsupported keyring '{}'".format(keyring))
if profile and _is_path(profile):
profile = os.path.expanduser(profile)
- return browser, profile, keyring, container
+ return browser, profile, keyring, container, domain
diff --git a/gallery_dl/extractor/420chan.py b/gallery_dl/extractor/420chan.py
deleted file mode 100644
index fd0172e..0000000
--- a/gallery_dl/extractor/420chan.py
+++ /dev/null
@@ -1,76 +0,0 @@
-# -*- coding: utf-8 -*-
-
-# Copyright 2021 Mike Fährmann
-#
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License version 2 as
-# published by the Free Software Foundation.
-
-"""Extractors for https://420chan.org/"""
-
-from .common import Extractor, Message
-
-
-class _420chanThreadExtractor(Extractor):
- """Extractor for 420chan threads"""
- category = "420chan"
- subcategory = "thread"
- directory_fmt = ("{category}", "{board}", "{thread} {title}")
- archive_fmt = "{board}_{thread}_{filename}"
- pattern = r"(?:https?://)?boards\.420chan\.org/([^/?#]+)/thread/(\d+)"
- test = ("https://boards.420chan.org/ani/thread/33251/chow-chows", {
- "pattern": r"https://boards\.420chan\.org/ani/src/\d+\.jpg",
- "content": "b07c803b0da78de159709da923e54e883c100934",
- "count": 2,
- })
-
- def __init__(self, match):
- Extractor.__init__(self, match)
- self.board, self.thread = match.groups()
-
- def items(self):
- url = "https://api.420chan.org/{}/res/{}.json".format(
- self.board, self.thread)
- posts = self.request(url).json()["posts"]
-
- data = {
- "board" : self.board,
- "thread": self.thread,
- "title" : posts[0].get("sub") or posts[0]["com"][:50],
- }
-
- yield Message.Directory, data
- for post in posts:
- if "filename" in post:
- post.update(data)
- post["extension"] = post["ext"][1:]
- url = "https://boards.420chan.org/{}/src/{}{}".format(
- post["board"], post["filename"], post["ext"])
- yield Message.Url, url, post
-
-
-class _420chanBoardExtractor(Extractor):
- """Extractor for 420chan boards"""
- category = "420chan"
- subcategory = "board"
- pattern = r"(?:https?://)?boards\.420chan\.org/([^/?#]+)/\d*$"
- test = ("https://boards.420chan.org/po/", {
- "pattern": _420chanThreadExtractor.pattern,
- "count": ">= 100",
- })
-
- def __init__(self, match):
- Extractor.__init__(self, match)
- self.board = match.group(1)
-
- def items(self):
- url = "https://api.420chan.org/{}/threads.json".format(self.board)
- threads = self.request(url).json()
-
- for page in threads:
- for thread in page["threads"]:
- url = "https://boards.420chan.org/{}/thread/{}/".format(
- self.board, thread["no"])
- thread["page"] = page["page"]
- thread["_extractor"] = _420chanThreadExtractor
- yield Message.Queue, url, thread
diff --git a/gallery_dl/extractor/4chanarchives.py b/gallery_dl/extractor/4chanarchives.py
new file mode 100644
index 0000000..041e6a3
--- /dev/null
+++ b/gallery_dl/extractor/4chanarchives.py
@@ -0,0 +1,139 @@
+# -*- coding: utf-8 -*-
+
+# Copyright 2023 Mike Fährmann
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Extractors for https://4chanarchives.com/"""
+
+from .common import Extractor, Message
+from .. import text
+
+
+class _4chanarchivesThreadExtractor(Extractor):
+ """Extractor for threads on 4chanarchives.com"""
+ category = "4chanarchives"
+ subcategory = "thread"
+ root = "https://4chanarchives.com"
+ directory_fmt = ("{category}", "{board}", "{thread} - {title}")
+ filename_fmt = "{no}-{filename}.{extension}"
+ archive_fmt = "{board}_{thread}_{no}"
+ pattern = r"(?:https?://)?4chanarchives\.com/board/([^/?#]+)/thread/(\d+)"
+ test = (
+ ("https://4chanarchives.com/board/c/thread/2707110", {
+ "pattern": r"https://i\.imgur\.com/(0wLGseE|qbByWDc)\.jpg",
+ "count": 2,
+ "keyword": {
+ "board": "c",
+ "com": str,
+ "name": "Anonymous",
+ "no": int,
+ "thread": "2707110",
+ "time": r"re:2016-07-1\d \d\d:\d\d:\d\d",
+ "title": "Ren Kagami from 'Oyako Neburi'",
+ },
+ }),
+ )
+
+ def __init__(self, match):
+ Extractor.__init__(self, match)
+ self.board, self.thread = match.groups()
+
+ def items(self):
+ url = "{}/board/{}/thread/{}".format(
+ self.root, self.board, self.thread)
+ page = self.request(url).text
+ data = self.metadata(page)
+ posts = self.posts(page)
+
+ if not data["title"]:
+ data["title"] = text.unescape(text.remove_html(
+ posts[0]["com"]))[:50]
+
+ for post in posts:
+ post.update(data)
+ yield Message.Directory, post
+ if "url" in post:
+ yield Message.Url, post["url"], post
+
+ def metadata(self, page):
+ return {
+ "board" : self.board,
+ "thread" : self.thread,
+ "title" : text.unescape(text.extr(
+ page, 'property="og:title" content="', '"')),
+ }
+
+ def posts(self, page):
+ """Build a list of all post objects"""
+ return [self.parse(html) for html in text.extract_iter(
+ page, 'id="pc', '</blockquote>')]
+
+ def parse(self, html):
+ """Build post object by extracting data from an HTML post"""
+ post = self._extract_post(html)
+ if ">File: <" in html:
+ self._extract_file(html, post)
+ post["extension"] = post["url"].rpartition(".")[2]
+ return post
+
+ @staticmethod
+ def _extract_post(html):
+ extr = text.extract_from(html)
+ return {
+ "no" : text.parse_int(extr('', '"')),
+ "name": extr('class="name">', '<'),
+ "time": extr('class="dateTime postNum" >', '<').rstrip(),
+ "com" : text.unescape(
+ html[html.find('<blockquote'):].partition(">")[2]),
+ }
+
+ @staticmethod
+ def _extract_file(html, post):
+ extr = text.extract_from(html, html.index(">File: <"))
+ post["url"] = extr('href="', '"')
+ post["filename"] = text.unquote(extr(">", "<").rpartition(".")[0])
+ post["fsize"] = extr("(", ", ")
+ post["w"] = text.parse_int(extr("", "x"))
+ post["h"] = text.parse_int(extr("", ")"))
+
+
+class _4chanarchivesBoardExtractor(Extractor):
+ """Extractor for boards on 4chanarchives.com"""
+ category = "4chanarchives"
+ subcategory = "board"
+ root = "https://4chanarchives.com"
+ pattern = r"(?:https?://)?4chanarchives\.com/board/([^/?#]+)(?:/(\d+))?/?$"
+ test = (
+ ("https://4chanarchives.com/board/c/", {
+ "pattern": _4chanarchivesThreadExtractor.pattern,
+ "range": "1-40",
+ "count": 40,
+ }),
+ ("https://4chanarchives.com/board/c"),
+ ("https://4chanarchives.com/board/c/10"),
+ )
+
+ def __init__(self, match):
+ Extractor.__init__(self, match)
+ self.board, self.page = match.groups()
+
+ def items(self):
+ data = {"_extractor": _4chanarchivesThreadExtractor}
+ pnum = text.parse_int(self.page, 1)
+ needle = '''<span class="postNum desktop">
+ <span><a href="'''
+
+ while True:
+ url = "{}/board/{}/{}".format(self.root, self.board, pnum)
+ page = self.request(url).text
+
+ thread = None
+ for thread in text.extract_iter(page, needle, '"'):
+ yield Message.Queue, thread, data
+
+ if thread is None:
+ return
+ pnum += 1
diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py
index 9841ca7..5475fea 100644
--- a/gallery_dl/extractor/__init__.py
+++ b/gallery_dl/extractor/__init__.py
@@ -14,8 +14,8 @@ modules = [
"2chen",
"35photo",
"3dbooru",
- "420chan",
"4chan",
+ "4chanarchives",
"500px",
"8chan",
"8muses",
@@ -97,7 +97,6 @@ modules = [
"misskey",
"myhentaigallery",
"myportfolio",
- "nana",
"naver",
"naverwebtoon",
"newgrounds",
diff --git a/gallery_dl/extractor/common.py b/gallery_dl/extractor/common.py
index 8024be9..09737ef 100644
--- a/gallery_dl/extractor/common.py
+++ b/gallery_dl/extractor/common.py
@@ -90,6 +90,21 @@ class Extractor():
def config(self, key, default=None):
return config.interpolate(self._cfgpath, key, default)
+ def config_deprecated(self, key, deprecated, default=None,
+ sentinel=util.SENTINEL, history=set()):
+ value = self.config(deprecated, sentinel)
+ if value is not sentinel:
+ if deprecated not in history:
+ history.add(deprecated)
+ self.log.warning("'%s' is deprecated. Use '%s' instead.",
+ deprecated, key)
+ default = value
+
+ value = self.config(key, sentinel)
+ if value is not sentinel:
+ return value
+ return default
+
def config_accumulate(self, key):
return config.accumulate(self._cfgpath, key)
@@ -106,7 +121,7 @@ class Extractor():
values[:0] = config.accumulate((self.subcategory,), key, conf=conf)
return values
- def request(self, url, *, method="GET", session=None,
+ def request(self, url, method="GET", session=None,
retries=None, retry_codes=None, encoding=None,
fatal=True, notfound=None, **kwargs):
if session is None:
@@ -180,7 +195,7 @@ class Extractor():
raise exception.HttpError(msg, response)
- def wait(self, *, seconds=None, until=None, adjust=1.0,
+ def wait(self, seconds=None, until=None, adjust=1.0,
reason="rate limit reset"):
now = time.time()
@@ -364,14 +379,24 @@ class Extractor():
def _store_cookies(self):
"""Store the session's cookiejar in a cookies.txt file"""
- if self._cookiefile and self.config("cookies-update", True):
- try:
- with open(self._cookiefile, "w") as fp:
- util.cookiestxt_store(fp, self._cookiejar)
- except OSError as exc:
- self.log.warning("cookies: %s", exc)
+ export = self.config("cookies-update", True)
+ if not export:
+ return
+
+ if isinstance(export, str):
+ path = util.expand_path(export)
+ else:
+ path = self._cookiefile
+ if not path:
+ return
+
+ try:
+ with open(path, "w") as fp:
+ util.cookiestxt_store(fp, self._cookiejar)
+ except OSError as exc:
+ self.log.warning("cookies: %s", exc)
- def _update_cookies(self, cookies, *, domain=""):
+ def _update_cookies(self, cookies, domain=""):
"""Update the session's cookiejar with 'cookies'"""
if isinstance(cookies, dict):
self._update_cookies_dict(cookies, domain or self.cookiedomain)
@@ -391,7 +416,7 @@ class Extractor():
for name, value in cookiedict.items():
setcookie(name, value, domain=domain)
- def _check_cookies(self, cookienames, *, domain=None):
+ def _check_cookies(self, cookienames, domain=None):
"""Check if all 'cookienames' are in the session's cookiejar"""
if not self._cookiejar:
return False
diff --git a/gallery_dl/extractor/exhentai.py b/gallery_dl/extractor/exhentai.py
index dccc74e..9cd7ae4 100644
--- a/gallery_dl/extractor/exhentai.py
+++ b/gallery_dl/extractor/exhentai.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2014-2022 Mike Fährmann
+# Copyright 2014-2023 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -21,8 +21,7 @@ class ExhentaiExtractor(Extractor):
"""Base class for exhentai extractors"""
category = "exhentai"
directory_fmt = ("{category}", "{gid} {title[:247]}")
- filename_fmt = (
- "{gid}_{num:>04}_{image_token}_{filename}.{extension}")
+ filename_fmt = "{gid}_{num:>04}_{image_token}_{filename}.{extension}"
archive_fmt = "{gid}_{num}"
cookienames = ("ipb_member_id", "ipb_pass_hash")
cookiedomain = ".exhentai.org"
@@ -56,10 +55,10 @@ class ExhentaiExtractor(Extractor):
if version != "ex":
self.session.cookies.set("nw", "1", domain=self.cookiedomain)
- def request(self, *args, **kwargs):
- response = Extractor.request(self, *args, **kwargs)
- if self._is_sadpanda(response):
- self.log.info("sadpanda.jpg")
+ def request(self, url, **kwargs):
+ response = Extractor.request(self, url, **kwargs)
+ if response.history and response.headers.get("Content-Length") == "0":
+ self.log.info("blank page")
raise exception.AuthorizationError()
return response
@@ -100,14 +99,6 @@ class ExhentaiExtractor(Extractor):
raise exception.AuthenticationError()
return {c: response.cookies[c] for c in self.cookienames}
- @staticmethod
- def _is_sadpanda(response):
- """Return True if the response object contains a sad panda"""
- return (
- response.headers.get("Content-Length") == "9615" and
- "sadpanda.jpg" in response.headers.get("Content-Disposition", "")
- )
-
class ExhentaiGalleryExtractor(ExhentaiExtractor):
"""Extractor for image galleries from exhentai.org"""
diff --git a/gallery_dl/extractor/foolfuuka.py b/gallery_dl/extractor/foolfuuka.py
index 4f9a6bf..76fb69e 100644
--- a/gallery_dl/extractor/foolfuuka.py
+++ b/gallery_dl/extractor/foolfuuka.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2019-2022 Mike Fährmann
+# Copyright 2019-2023 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -88,13 +88,9 @@ BASE_PATTERN = FoolfuukaExtractor.update({
"root": "https://boards.fireden.net",
"pattern": r"boards\.fireden\.net",
},
- "rozenarcana": {
- "root": "https://archive.alice.al",
- "pattern": r"(?:archive\.)?alice\.al",
- },
- "tokyochronos": {
- "root": "https://www.tokyochronos.net",
- "pattern": r"(?:www\.)?tokyochronos\.net",
+ "palanq": {
+ "root": "https://archive.palanq.win",
+ "pattern": r"archive\.palanq\.win",
},
"rbt": {
"root": "https://rbt.asia",
@@ -137,11 +133,8 @@ class FoolfuukaThreadExtractor(FoolfuukaExtractor):
("https://boards.fireden.net/sci/thread/11264294/", {
"url": "61cab625c95584a12a30049d054931d64f8d20aa",
}),
- ("https://archive.alice.al/c/thread/2849220/", {
- "url": "632e2c8de05de6b3847685f4bf1b4e5c6c9e0ed5",
- }),
- ("https://www.tokyochronos.net/a/thread/241664141/", {
- "url": "ae03852cf44e3dcfce5be70274cb1828e1dbb7d6",
+ ("https://archive.palanq.win/c/thread/4209598/", {
+ "url": "1f9b5570d228f1f2991c827a6631030bc0e5933c",
}),
("https://rbt.asia/g/thread/61487650/", {
"url": "fadd274b25150a1bdf03a40c58db320fa3b617c4",
@@ -187,8 +180,7 @@ class FoolfuukaBoardExtractor(FoolfuukaExtractor):
("https://arch.b4k.co/meta/"),
("https://desuarchive.org/a/"),
("https://boards.fireden.net/sci/"),
- ("https://archive.alice.al/c/"),
- ("https://www.tokyochronos.net/a/"),
+ ("https://archive.palanq.win/c/"),
("https://rbt.asia/g/"),
("https://thebarchive.com/b/"),
)
@@ -231,8 +223,7 @@ class FoolfuukaSearchExtractor(FoolfuukaExtractor):
("https://archiveofsins.com/_/search/text/test/"),
("https://desuarchive.org/_/search/text/test/"),
("https://boards.fireden.net/_/search/text/test/"),
- ("https://archive.alice.al/_/search/text/test/"),
- ("https://www.tokyochronos.net/_/search/text/test/"),
+ ("https://archive.palanq.win/_/search/text/test/"),
("https://rbt.asia/_/search/text/test/"),
("https://thebarchive.com/_/search/text/test/"),
)
@@ -297,8 +288,7 @@ class FoolfuukaGalleryExtractor(FoolfuukaExtractor):
("https://arch.b4k.co/meta/gallery/"),
("https://desuarchive.org/a/gallery/5"),
("https://boards.fireden.net/sci/gallery/6"),
- ("https://archive.alice.al/c/gallery/7"),
- ("https://www.tokyochronos.net/a/gallery/7"),
+ ("https://archive.palanq.win/c/gallery"),
("https://rbt.asia/g/gallery/8"),
("https://thebarchive.com/b/gallery/9"),
)
diff --git a/gallery_dl/extractor/foolslide.py b/gallery_dl/extractor/foolslide.py
index 4a38fb4..57d37b7 100644
--- a/gallery_dl/extractor/foolslide.py
+++ b/gallery_dl/extractor/foolslide.py
@@ -42,11 +42,6 @@ BASE_PATTERN = FoolslideExtractor.update({
"root": "https://read.powermanga.org",
"pattern": r"read(?:er)?\.powermanga\.org",
},
- "sensescans": {
- "root": "https://sensescans.com/reader",
- "pattern": r"(?:(?:www\.)?sensescans\.com/reader"
- r"|reader\.sensescans\.com)",
- },
})
@@ -64,11 +59,6 @@ class FoolslideChapterExtractor(FoolslideExtractor):
"url": "854c5817f8f767e1bccd05fa9d58ffb5a4b09384",
"keyword": "a60c42f2634b7387899299d411ff494ed0ad6dbe",
}),
- ("https://sensescans.com/reader/read/ao_no_orchestra/en/0/26/", {
- "url": "bbd428dc578f5055e9f86ad635b510386cd317cd",
- "keyword": "083ef6f8831c84127fe4096fa340a249be9d1424",
- }),
- ("https://reader.sensescans.com/read/ao_no_orchestra/en/0/26/"),
)
def items(self):
@@ -129,9 +119,6 @@ class FoolslideMangaExtractor(FoolslideExtractor):
"volume": int,
},
}),
- ("https://sensescans.com/reader/series/yotsubato/", {
- "count": ">= 3",
- }),
)
def items(self):
diff --git a/gallery_dl/extractor/imagechest.py b/gallery_dl/extractor/imagechest.py
index 8b18d5e..086b95d 100644
--- a/gallery_dl/extractor/imagechest.py
+++ b/gallery_dl/extractor/imagechest.py
@@ -31,6 +31,12 @@ class ImagechestGalleryExtractor(GalleryExtractor):
"content": "076959e65be30249a2c651fbe6090dc30ba85193",
"count": 3
}),
+ # "Load More Files" button (#4028)
+ ("https://imgchest.com/p/9p4n3q2z7nq", {
+ "pattern": r"https://cdn\.imgchest\.com/files/\w+\.(jpg|png)",
+ "url": "f5674e8ba79d336193c9f698708d9dcc10e78cc7",
+ "count": 52,
+ }),
)
def __init__(self, match):
@@ -49,6 +55,18 @@ class ImagechestGalleryExtractor(GalleryExtractor):
}
def images(self, page):
+ if " More Files</button>" in page:
+ url = "{}/p/{}/loadAll".format(self.root, self.gallery_id)
+ headers = {
+ "X-Requested-With": "XMLHttpRequest",
+ "Origin" : self.root,
+ "Referer" : self.gallery_url,
+ }
+ csrf_token = text.extr(page, 'name="csrf-token" content="', '"')
+ data = {"_token": csrf_token}
+ page += self.request(
+ url, method="POST", headers=headers, data=data).text
+
return [
(url, None)
for url in text.extract_iter(page, 'data-url="', '"')
diff --git a/gallery_dl/extractor/imgur.py b/gallery_dl/extractor/imgur.py
index f8f1600..4c29d98 100644
--- a/gallery_dl/extractor/imgur.py
+++ b/gallery_dl/extractor/imgur.py
@@ -47,8 +47,13 @@ class ImgurExtractor(Extractor):
image_ex = ImgurImageExtractor
for item in items:
- item["_extractor"] = album_ex if item["is_album"] else image_ex
- yield Message.Queue, item["link"], item
+ if item["is_album"]:
+ url = "https://imgur.com/a/" + item["id"]
+ item["_extractor"] = album_ex
+ else:
+ url = "https://imgur.com/" + item["id"]
+ item["_extractor"] = image_ex
+ yield Message.Queue, url, item
class ImgurImageExtractor(ImgurExtractor):
@@ -272,7 +277,7 @@ class ImgurUserExtractor(ImgurExtractor):
("https://imgur.com/user/Miguenzo", {
"range": "1-100",
"count": 100,
- "pattern": r"https?://(i.imgur.com|imgur.com/a)/[\w.]+",
+ "pattern": r"https://imgur\.com(/a)?/\w+$",
}),
("https://imgur.com/user/Miguenzo/posts"),
("https://imgur.com/user/Miguenzo/submitted"),
@@ -285,17 +290,41 @@ class ImgurUserExtractor(ImgurExtractor):
class ImgurFavoriteExtractor(ImgurExtractor):
"""Extractor for a user's favorites"""
subcategory = "favorite"
- pattern = BASE_PATTERN + r"/user/([^/?#]+)/favorites"
+ pattern = BASE_PATTERN + r"/user/([^/?#]+)/favorites/?$"
test = ("https://imgur.com/user/Miguenzo/favorites", {
"range": "1-100",
"count": 100,
- "pattern": r"https?://(i.imgur.com|imgur.com/a)/[\w.]+",
+ "pattern": r"https://imgur\.com(/a)?/\w+$",
})
def items(self):
return self._items_queue(self.api.account_favorites(self.key))
+class ImgurFavoriteFolderExtractor(ImgurExtractor):
+ """Extractor for a user's favorites folder"""
+ subcategory = "favorite-folder"
+ pattern = BASE_PATTERN + r"/user/([^/?#]+)/favorites/folder/(\d+)"
+ test = (
+ ("https://imgur.com/user/mikf1/favorites/folder/11896757/public", {
+ "pattern": r"https://imgur\.com(/a)?/\w+$",
+ "count": 3,
+ }),
+ ("https://imgur.com/user/mikf1/favorites/folder/11896741/private", {
+ "pattern": r"https://imgur\.com(/a)?/\w+$",
+ "count": 5,
+ }),
+ )
+
+ def __init__(self, match):
+ ImgurExtractor.__init__(self, match)
+ self.folder_id = match.group(2)
+
+ def items(self):
+ return self._items_queue(self.api.account_favorites_folder(
+ self.key, self.folder_id))
+
+
class ImgurSubredditExtractor(ImgurExtractor):
"""Extractor for a subreddits's imgur links"""
subcategory = "subreddit"
@@ -303,7 +332,7 @@ class ImgurSubredditExtractor(ImgurExtractor):
test = ("https://imgur.com/r/pics", {
"range": "1-100",
"count": 100,
- "pattern": r"https?://(i.imgur.com|imgur.com/a)/[\w.]+",
+ "pattern": r"https://imgur\.com(/a)?/\w+$",
})
def items(self):
@@ -317,7 +346,7 @@ class ImgurTagExtractor(ImgurExtractor):
test = ("https://imgur.com/t/animals", {
"range": "1-100",
"count": 100,
- "pattern": r"https?://(i.imgur.com|imgur.com/a)/[\w.]+",
+ "pattern": r"https://imgur\.com(/a)?/\w+$",
})
def items(self):
@@ -331,7 +360,7 @@ class ImgurSearchExtractor(ImgurExtractor):
test = ("https://imgur.com/search?q=cute+cat", {
"range": "1-100",
"count": 100,
- "pattern": r"https?://(i.imgur.com|imgur.com/a)/[\w.]+",
+ "pattern": r"https://imgur\.com(/a)?/\w+$",
})
def items(self):
@@ -346,15 +375,18 @@ class ImgurAPI():
"""
def __init__(self, extractor):
self.extractor = extractor
- self.headers = {
- "Authorization": "Client-ID " + (
- extractor.config("client-id") or "546c25a59c58ad7"),
- }
+ self.client_id = extractor.config("client-id") or "546c25a59c58ad7"
+ self.headers = {"Authorization": "Client-ID " + self.client_id}
def account_favorites(self, account):
endpoint = "/3/account/{}/gallery_favorites".format(account)
return self._pagination(endpoint)
+ def account_favorites_folder(self, account, folder_id):
+ endpoint = "/3/account/{}/folders/{}/favorites".format(
+ account, folder_id)
+ return self._pagination_v2(endpoint)
+
def gallery_search(self, query):
endpoint = "/3/gallery/search"
params = {"q": query}
@@ -386,12 +418,12 @@ class ImgurAPI():
endpoint = "/post/v1/posts/" + gallery_hash
return self._call(endpoint)
- def _call(self, endpoint, params=None):
+ def _call(self, endpoint, params=None, headers=None):
while True:
try:
return self.extractor.request(
"https://api.imgur.com" + endpoint,
- params=params, headers=self.headers,
+ params=params, headers=(headers or self.headers),
).json()
except exception.HttpError as exc:
if exc.status not in (403, 429) or \
@@ -410,3 +442,23 @@ class ImgurAPI():
return
yield from data
num += 1
+
+ def _pagination_v2(self, endpoint, params=None, key=None):
+ if params is None:
+ params = {}
+ params["client_id"] = self.client_id
+ params["page"] = 0
+ params["sort"] = "newest"
+
+ headers = {
+ "Referer": "https://imgur.com/",
+ "Origin": "https://imgur.com",
+ }
+
+ while True:
+ data = self._call(endpoint, params, headers)["data"]
+ if not data:
+ return
+ yield from data
+
+ params["page"] += 1
diff --git a/gallery_dl/extractor/mangadex.py b/gallery_dl/extractor/mangadex.py
index 409483b..12b8f39 100644
--- a/gallery_dl/extractor/mangadex.py
+++ b/gallery_dl/extractor/mangadex.py
@@ -85,6 +85,10 @@ class MangadexExtractor(Extractor):
data["group"] = [group["attributes"]["name"]
for group in relationships["scanlation_group"]]
+ data["status"] = mattributes["status"]
+ data["tags"] = [tag["attributes"]["name"]["en"]
+ for tag in mattributes["tags"]]
+
return data
@@ -94,13 +98,13 @@ class MangadexChapterExtractor(MangadexExtractor):
pattern = BASE_PATTERN + r"/chapter/([0-9a-f-]+)"
test = (
("https://mangadex.org/chapter/f946ac53-0b71-4b5d-aeb2-7931b13c4aaa", {
- "keyword": "86fb262cf767dac6d965cd904ad499adba466404",
+ "keyword": "e86128a79ebe7201b648f1caa828496a2878dc8f",
# "content": "50383a4c15124682057b197d40261641a98db514",
}),
# oneshot
("https://mangadex.org/chapter/61a88817-9c29-4281-bdf1-77b3c1be9831", {
"count": 64,
- "keyword": "6abcbe1e24eeb1049dc931958853cd767ee483fb",
+ "keyword": "d11ed057a919854696853362be35fc0ba7dded4c",
}),
# MANGA Plus (#1154)
("https://mangadex.org/chapter/74149a55-e7c4-44ea-8a37-98e879c1096f", {
@@ -157,6 +161,9 @@ class MangadexMangaExtractor(MangadexExtractor):
"language": str,
"artist" : ["Arakawa Hiromu"],
"author" : ["Arakawa Hiromu"],
+ "status" : "completed",
+ "tags" : ["Oneshot", "Historical", "Action",
+ "Martial Arts", "Drama", "Tragedy"],
},
}),
("https://mangadex.cc/manga/d0c88e3b-ea64-4e07-9841-c1d2ac982f4a/", {
diff --git a/gallery_dl/extractor/myportfolio.py b/gallery_dl/extractor/myportfolio.py
index 7d23518..fd16f24 100644
--- a/gallery_dl/extractor/myportfolio.py
+++ b/gallery_dl/extractor/myportfolio.py
@@ -1,12 +1,12 @@
# -*- coding: utf-8 -*-
-# Copyright 2018-2022 Mike Fährmann
+# Copyright 2018-2023 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
-"""Extract images from https://www.myportfolio.com/"""
+"""Extractors for https://www.myportfolio.com/"""
from .common import Extractor, Message
from .. import text, exception
@@ -21,7 +21,7 @@ class MyportfolioGalleryExtractor(Extractor):
archive_fmt = "{user}_{filename}"
pattern = (r"(?:myportfolio:(?:https?://)?([^/]+)|"
r"(?:https?://)?([\w-]+\.myportfolio\.com))"
- r"(/[^/?&#]+)?")
+ r"(/[^/?#]+)?")
test = (
("https://andrewling.myportfolio.com/volvo-xc-90-hybrid", {
"url": "acea0690c76db0e5cf267648cefd86e921bc3499",
diff --git a/gallery_dl/extractor/nana.py b/gallery_dl/extractor/nana.py
deleted file mode 100644
index 24e676f..0000000
--- a/gallery_dl/extractor/nana.py
+++ /dev/null
@@ -1,118 +0,0 @@
-# -*- coding: utf-8 -*-
-
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License version 2 as
-# published by the Free Software Foundation.
-
-"""Extractors for https://nana.my.id/"""
-
-from .common import GalleryExtractor, Extractor, Message
-from .. import text, util, exception
-
-
-class NanaGalleryExtractor(GalleryExtractor):
- """Extractor for image galleries from nana.my.id"""
- category = "nana"
- directory_fmt = ("{category}", "{title}")
- pattern = r"(?:https?://)?nana\.my\.id/reader/([^/?#]+)"
- test = (
- (("https://nana.my.id/reader/"
- "059f7de55a4297413bfbd432ce7d6e724dd42bae"), {
- "pattern": r"https://nana\.my\.id/reader/"
- r"\w+/image/page\?path=.*\.\w+",
- "keyword": {
- "title" : "Everybody Loves Shion",
- "artist": "fuzui",
- "tags" : list,
- "count" : 29,
- },
- }),
- (("https://nana.my.id/reader/"
- "77c8712b67013e427923573379f5bafcc0c72e46"), {
- "pattern": r"https://nana\.my\.id/reader/"
- r"\w+/image/page\?path=.*\.\w+",
- "keyword": {
- "title" : "Lovey-Dovey With an Otaku-Friendly Gyaru",
- "artist": "Sueyuu",
- "tags" : ["Sueyuu"],
- "count" : 58,
- },
- }),
- )
-
- def __init__(self, match):
- self.gallery_id = match.group(1)
- url = "https://nana.my.id/reader/" + self.gallery_id
- GalleryExtractor.__init__(self, match, url)
-
- def metadata(self, page):
- title = text.unescape(
- text.extr(page, '</a>&nbsp; ', '</div>'))
- artist = text.unescape(text.extr(
- page, '<title>', '</title>'))[len(title):-10]
- tags = text.extr(page, 'Reader.tags = "', '"')
-
- return {
- "gallery_id": self.gallery_id,
- "title" : title,
- "artist" : artist[4:] if artist.startswith(" by ") else "",
- "tags" : tags.split(", ") if tags else (),
- "lang" : "en",
- "language" : "English",
- }
-
- def images(self, page):
- data = util.json_loads(text.extr(page, "Reader.pages = ", ".pages"))
- return [
- ("https://nana.my.id" + image, None)
- for image in data["pages"]
- ]
-
-
-class NanaSearchExtractor(Extractor):
- """Extractor for nana search results"""
- category = "nana"
- subcategory = "search"
- pattern = r"(?:https?://)?nana\.my\.id(?:/?\?([^#]+))"
- test = (
- ('https://nana.my.id/?q=+"elf"&sort=desc', {
- "pattern": NanaGalleryExtractor.pattern,
- "range": "1-100",
- "count": 100,
- }),
- ("https://nana.my.id/?q=favorites%3A", {
- "pattern": NanaGalleryExtractor.pattern,
- "count": ">= 2",
- }),
- )
-
- def __init__(self, match):
- Extractor.__init__(self, match)
- self.params = text.parse_query(match.group(1))
- self.params["p"] = text.parse_int(self.params.get("p"), 1)
- self.params["q"] = self.params.get("q") or ""
-
- def items(self):
- if "favorites:" in self.params["q"]:
- favkey = self.config("favkey")
- if not favkey:
- raise exception.AuthenticationError(
- "'Favorite key' not provided. "
- "Please see 'https://nana.my.id/tutorial'")
- self.session.cookies.set("favkey", favkey, domain="nana.my.id")
-
- data = {"_extractor": NanaGalleryExtractor}
- while True:
- try:
- page = self.request(
- "https://nana.my.id", params=self.params).text
- except exception.HttpError:
- return
-
- for gallery in text.extract_iter(
- page, '<div class="id3">', '</div>'):
- url = "https://nana.my.id" + text.extr(
- gallery, '<a href="', '"')
- yield Message.Queue, url, data
-
- self.params["p"] += 1
diff --git a/gallery_dl/extractor/oauth.py b/gallery_dl/extractor/oauth.py
index 404f296..824757c 100644
--- a/gallery_dl/extractor/oauth.py
+++ b/gallery_dl/extractor/oauth.py
@@ -134,7 +134,7 @@ class OAuthBase(Extractor):
def _oauth2_authorization_code_grant(
self, client_id, client_secret, default_id, default_secret,
- auth_url, token_url, *, scope="read", duration="permanent",
+ auth_url, token_url, scope="read", duration="permanent",
key="refresh_token", auth=True, cache=None, instance=None):
"""Perform an OAuth2 authorization code grant"""
diff --git a/gallery_dl/extractor/pinterest.py b/gallery_dl/extractor/pinterest.py
index 31ddbcc..92e0588 100644
--- a/gallery_dl/extractor/pinterest.py
+++ b/gallery_dl/extractor/pinterest.py
@@ -112,7 +112,7 @@ class PinterestExtractor(Extractor):
class PinterestPinExtractor(PinterestExtractor):
"""Extractor for images from a single pin from pinterest.com"""
subcategory = "pin"
- pattern = BASE_PATTERN + r"/pin/([^/?#&]+)(?!.*#related$)"
+ pattern = BASE_PATTERN + r"/pin/([^/?#]+)(?!.*#related$)"
test = (
("https://www.pinterest.com/pin/858146903966145189/", {
"url": "afb3c26719e3a530bb0e871c480882a801a4e8a5",
@@ -121,7 +121,7 @@ class PinterestPinExtractor(PinterestExtractor):
}),
# video pin (#1189)
("https://www.pinterest.com/pin/422564377542934214/", {
- "pattern": r"https://v\.pinimg\.com/videos/mc/hls/d7/22/ff"
+ "pattern": r"https://v\d*\.pinimg\.com/videos/mc/hls/d7/22/ff"
r"/d722ff00ab2352981b89974b37909de8.m3u8",
}),
("https://www.pinterest.com/pin/858146903966145188/", {
@@ -147,8 +147,8 @@ class PinterestBoardExtractor(PinterestExtractor):
subcategory = "board"
directory_fmt = ("{category}", "{board[owner][username]}", "{board[name]}")
archive_fmt = "{board[id]}_{id}"
- pattern = (BASE_PATTERN + r"/(?!pin/)([^/?#&]+)"
- "/(?!_saved|_created|pins/)([^/?#&]+)/?$")
+ pattern = (BASE_PATTERN + r"/(?!pin/)([^/?#]+)"
+ "/(?!_saved|_created|pins/)([^/?#]+)/?$")
test = (
("https://www.pinterest.com/g1952849/test-/", {
"pattern": r"https://i\.pinimg\.com/originals/",
@@ -198,7 +198,7 @@ class PinterestBoardExtractor(PinterestExtractor):
class PinterestUserExtractor(PinterestExtractor):
"""Extractor for a user's boards"""
subcategory = "user"
- pattern = BASE_PATTERN + r"/(?!pin/)([^/?#&]+)(?:/_saved)?/?$"
+ pattern = BASE_PATTERN + r"/(?!pin/)([^/?#]+)(?:/_saved)?/?$"
test = (
("https://www.pinterest.com/g1952849/", {
"pattern": PinterestBoardExtractor.pattern,
@@ -223,7 +223,7 @@ class PinterestAllpinsExtractor(PinterestExtractor):
"""Extractor for a user's 'All Pins' feed"""
subcategory = "allpins"
directory_fmt = ("{category}", "{user}")
- pattern = BASE_PATTERN + r"/(?!pin/)([^/?#&]+)/pins/?$"
+ pattern = BASE_PATTERN + r"/(?!pin/)([^/?#]+)/pins/?$"
test = ("https://www.pinterest.com/g1952849/pins/", {
"pattern": r"https://i\.pinimg\.com/originals/[0-9a-f]{2}"
r"/[0-9a-f]{2}/[0-9a-f]{2}/[0-9a-f]{32}\.\w{3}",
@@ -245,10 +245,10 @@ class PinterestCreatedExtractor(PinterestExtractor):
"""Extractor for a user's created pins"""
subcategory = "created"
directory_fmt = ("{category}", "{user}")
- pattern = BASE_PATTERN + r"/(?!pin/)([^/?#&]+)/_created/?$"
+ pattern = BASE_PATTERN + r"/(?!pin/)([^/?#]+)/_created/?$"
test = ("https://www.pinterest.de/digitalmomblog/_created/", {
"pattern": r"https://i\.pinimg\.com/originals/[0-9a-f]{2}"
- r"/[0-9a-f]{2}/[0-9a-f]{2}/[0-9a-f]{32}\.jpg",
+ r"/[0-9a-f]{2}/[0-9a-f]{2}/[0-9a-f]{32}\.(jpg|png)",
"count": 10,
"range": "1-10",
})
@@ -270,7 +270,7 @@ class PinterestSectionExtractor(PinterestExtractor):
directory_fmt = ("{category}", "{board[owner][username]}",
"{board[name]}", "{section[title]}")
archive_fmt = "{board[id]}_{id}"
- pattern = BASE_PATTERN + r"/(?!pin/)([^/?#&]+)/([^/?#&]+)/([^/?#&]+)"
+ pattern = BASE_PATTERN + r"/(?!pin/)([^/?#]+)/([^/?#]+)/([^/?#]+)"
test = ("https://www.pinterest.com/g1952849/stuff/section", {
"count": 2,
})
@@ -321,7 +321,7 @@ class PinterestRelatedPinExtractor(PinterestPinExtractor):
"""Extractor for related pins of another pin from pinterest.com"""
subcategory = "related-pin"
directory_fmt = ("{category}", "related {original_pin[id]}")
- pattern = BASE_PATTERN + r"/pin/([^/?#&]+).*#related$"
+ pattern = BASE_PATTERN + r"/pin/([^/?#]+).*#related$"
test = ("https://www.pinterest.com/pin/858146903966145189/#related", {
"range": "31-70",
"count": 40,
@@ -340,7 +340,7 @@ class PinterestRelatedBoardExtractor(PinterestBoardExtractor):
subcategory = "related-board"
directory_fmt = ("{category}", "{board[owner][username]}",
"{board[name]}", "related")
- pattern = BASE_PATTERN + r"/(?!pin/)([^/?#&]+)/([^/?#&]+)/?#related$"
+ pattern = BASE_PATTERN + r"/(?!pin/)([^/?#]+)/([^/?#]+)/?#related$"
test = ("https://www.pinterest.com/g1952849/test-/#related", {
"range": "31-70",
"count": 40,
@@ -348,13 +348,13 @@ class PinterestRelatedBoardExtractor(PinterestBoardExtractor):
})
def pins(self):
- return self.api.board_related(self.board["id"])
+ return self.api.board_content_recommendation(self.board["id"])
class PinterestPinitExtractor(PinterestExtractor):
"""Extractor for images from a pin.it URL"""
subcategory = "pinit"
- pattern = r"(?:https?://)?pin\.it/([^/?#&]+)"
+ pattern = r"(?:https?://)?pin\.it/([^/?#]+)"
test = (
("https://pin.it/Hvt8hgT", {
@@ -370,7 +370,7 @@ class PinterestPinitExtractor(PinterestExtractor):
self.shortened_id = match.group(1)
def items(self):
- url = "https://api.pinterest.com/url_shortener/{}/redirect".format(
+ url = "https://api.pinterest.com/url_shortener/{}/redirect/".format(
self.shortened_id)
response = self.request(url, method="HEAD", allow_redirects=False)
location = response.headers.get("Location")
@@ -458,10 +458,10 @@ class PinterestAPI():
options = {"section_id": section_id}
return self._pagination("BoardSectionPins", options)
- def board_related(self, board_id):
+ def board_content_recommendation(self, board_id):
"""Yield related pins of a specific board"""
- options = {"board_id": board_id, "add_vase": True}
- return self._pagination("BoardRelatedPixieFeed", options)
+ options = {"id": board_id, "type": "board", "add_vase": True}
+ return self._pagination("BoardContentRecommendation", options)
def user_pins(self, user):
"""Yield all pins from 'user'"""
diff --git a/gallery_dl/extractor/sankakucomplex.py b/gallery_dl/extractor/sankakucomplex.py
index 5d83299..47e067b 100644
--- a/gallery_dl/extractor/sankakucomplex.py
+++ b/gallery_dl/extractor/sankakucomplex.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2019-2021 Mike Fährmann
+# Copyright 2019-2023 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -112,7 +112,7 @@ class SankakucomplexTagExtractor(SankakucomplexExtractor):
"""Extractor for sankakucomplex blog articles by tag or author"""
subcategory = "tag"
pattern = (r"(?:https?://)?www\.sankakucomplex\.com"
- r"/((?:tag|category|author)/[^/&?#]+)")
+ r"/((?:tag|category|author)/[^/?#]+)")
test = (
("https://www.sankakucomplex.com/tag/cosplay/", {
"range": "1-50",
diff --git a/gallery_dl/extractor/urlshortener.py b/gallery_dl/extractor/urlshortener.py
index 1a39b5b..972b508 100644
--- a/gallery_dl/extractor/urlshortener.py
+++ b/gallery_dl/extractor/urlshortener.py
@@ -34,7 +34,7 @@ BASE_PATTERN = UrlshortenerExtractor.update(INSTANCES)
class UrlshortenerLinkExtractor(UrlshortenerExtractor):
"""Extractor for general-purpose URL shorteners"""
subcategory = "link"
- pattern = BASE_PATTERN + r"/([^/?&#]+)"
+ pattern = BASE_PATTERN + r"/([^/?#]+)"
test = (
("https://bit.ly/3cWIUgq", {
"count": 1,
diff --git a/gallery_dl/option.py b/gallery_dl/option.py
index aad307f..08e6e70 100644
--- a/gallery_dl/option.py
+++ b/gallery_dl/option.py
@@ -148,20 +148,6 @@ def build_parser():
help="Delete cached login sessions, cookies, etc. for MODULE "
"(ALL to delete everything)",
)
- general.add_argument(
- "--cookies",
- dest="cookies", metavar="FILE", action=ConfigAction,
- help="File to load additional cookies from",
- )
- general.add_argument(
- "--cookies-from-browser",
- dest="cookies_from_browser",
- metavar="BROWSER[+KEYRING][:PROFILE][::CONTAINER]",
- help=("Name of the browser to load cookies from, "
- "with optional keyring name prefixed with '+', "
- "profile prefixed with ':', and "
- "container prefixed with '::' ('none' for no container)"),
- )
output = parser.add_argument_group("Output Options")
output.add_argument(
@@ -374,6 +360,28 @@ def build_parser():
help="Enable .netrc authentication data",
)
+ cookies = parser.add_argument_group("Cookie Options")
+ cookies.add_argument(
+ "-C", "--cookies",
+ dest="cookies", metavar="FILE", action=ConfigAction,
+ help="File to load additional cookies from",
+ )
+ cookies.add_argument(
+ "--cookies-export",
+ dest="cookies-update", metavar="FILE", action=ConfigAction,
+ help="Export session cookies to FILE",
+ )
+ cookies.add_argument(
+ "--cookies-from-browser",
+ dest="cookies_from_browser",
+ metavar="BROWSER[/DOMAIN][+KEYRING][:PROFILE][::CONTAINER]",
+ help=("Name of the browser to load cookies from, with optional "
+ "domain prefixed with '/', "
+ "keyring name prefixed with '+', "
+ "profile prefixed with ':', and "
+ "container prefixed with '::' ('none' for no container)"),
+ )
+
selection = parser.add_argument_group("Selection Options")
selection.add_argument(
"--download-archive",
diff --git a/gallery_dl/version.py b/gallery_dl/version.py
index d4ef532..4f9e49a 100644
--- a/gallery_dl/version.py
+++ b/gallery_dl/version.py
@@ -6,4 +6,4 @@
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
-__version__ = "1.25.3"
+__version__ = "1.25.4"
diff --git a/test/test_ytdl.py b/test/test_ytdl.py
index 4c20f67..878ac85 100644
--- a/test/test_ytdl.py
+++ b/test/test_ytdl.py
@@ -235,12 +235,6 @@ class Test_CommandlineArguments(unittest.TestCase):
class Test_CommandlineArguments_YtDlp(Test_CommandlineArguments):
module_name = "yt_dlp"
- @classmethod
- def setUpClass(cls):
- super().setUpClass()
- if cls.module.version.__version__ > "2023.03.04":
- cls.test_geo_bypass = cls._test_geo_bypass_xff
-
def test_retries_extractor(self):
inf = float("inf")
@@ -275,7 +269,22 @@ class Test_CommandlineArguments_YtDlp(Test_CommandlineArguments):
"title:%(artist)s - %(title)s")],
})
- def _test_geo_bypass_xff(self):
+ def test_geo_bypass(self):
+ try:
+ ytdl.parse_command_line(self.module, ["--xff", "default"])
+ except Exception:
+ # before --xff (c16644642)
+ return Test_CommandlineArguments.test_geo_bypass(self)
+
+ self._(["--xff", "default"],
+ "geo_bypass", "default")
+ self._(["--xff", "never"],
+ "geo_bypass", "never")
+ self._(["--xff", "EN"],
+ "geo_bypass", "EN")
+ self._(["--xff", "198.51.100.14/24"],
+ "geo_bypass", "198.51.100.14/24")
+
self._("--geo-bypass",
"geo_bypass", "default")
self._("--no-geo-bypass",