diff options
33 files changed, 476 insertions, 394 deletions
diff --git a/CHANGELOG.md b/CHANGELOG.md index a76a0dd..85c732d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,28 @@ # Changelog +## 1.25.4 - 2023-05-07 +### Additions +- [4chanarchives] add `thread` and `board` extractors ([#4012](https://github.com/mikf/gallery-dl/issues/4012)) +- [foolfuuka] add `archive.palanq.win` +- [imgur] add `favorite-folder` extractor ([#4016](https://github.com/mikf/gallery-dl/issues/4016)) +- [mangadex] add `status` and `tags` metadata ([#4031](https://github.com/mikf/gallery-dl/issues/4031)) +- allow selecting a domain with `--cookies-from-browser` +- add `--cookies-export` command-line option +- add `-C` as short option for `--cookies` +- include exception type in config error messages +### Fixes +- [exhentai] update sadpanda check +- [imagechest] load all images when a "Load More" button is present ([#4028](https://github.com/mikf/gallery-dl/issues/4028)) +- [imgur] fix bug causing some images/albums from user profiles and favorites to be ignored +- [pinterest] update endpoint for related board pins +- [pinterest] fix `pin.it` extractor +- [ytdl] fix yt-dlp `--xff/--geo-bypass` tests ([#3989](https://github.com/mikf/gallery-dl/issues/3989)) +### Removals +- [420chan] remove module +- [foolfuuka] remove `archive.alice.al` and `tokyochronos.net` +- [foolslide] remove `sensescans.com` +- [nana] remove module + ## 1.25.3 - 2023-04-30 ### Additions - [imagefap] extract `description` and `categories` metadata ([#3905](https://github.com/mikf/gallery-dl/issues/3905)) @@ -1,6 +1,6 @@ Metadata-Version: 2.1 Name: gallery_dl -Version: 1.25.3 +Version: 1.25.4 Summary: Command-line program to download image galleries and collections from several image hosting sites Home-page: https://github.com/mikf/gallery-dl Download-URL: https://github.com/mikf/gallery-dl/releases/latest @@ -109,9 +109,9 @@ Standalone Executable Prebuilt executable files with a Python interpreter and required Python packages included are available for -- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.25.3/gallery-dl.exe>`__ +- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.25.4/gallery-dl.exe>`__ (Requires `Microsoft Visual C++ Redistributable Package (x86) <https://aka.ms/vs/17/release/vc_redist.x86.exe>`__) -- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.25.3/gallery-dl.bin>`__ +- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.25.4/gallery-dl.bin>`__ Nightly Builds @@ -72,9 +72,9 @@ Standalone Executable Prebuilt executable files with a Python interpreter and required Python packages included are available for -- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.25.3/gallery-dl.exe>`__ +- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.25.4/gallery-dl.exe>`__ (Requires `Microsoft Visual C++ Redistributable Package (x86) <https://aka.ms/vs/17/release/vc_redist.x86.exe>`__) -- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.25.3/gallery-dl.bin>`__ +- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.25.4/gallery-dl.bin>`__ Nightly Builds diff --git a/data/completion/_gallery-dl b/data/completion/_gallery-dl index a82db8a..94d2f06 100644 --- a/data/completion/_gallery-dl +++ b/data/completion/_gallery-dl @@ -16,8 +16,6 @@ _arguments -C -S \ --source-address'[Client-side IP address to bind to]':'<ip>' \ --user-agent'[User-Agent request header]':'<ua>' \ --clear-cache'[Delete cached login sessions, cookies, etc. for MODULE (ALL to delete everything)]':'<module>' \ ---cookies'[File to load additional cookies from]':'<file>':_files \ ---cookies-from-browser'[Name of the browser to load cookies from, with optional keyring name prefixed with "+", profile prefixed with ":", and container prefixed with "::" ("none" for no container)]':'<browser[+keyring][:profile][::container]>' \ {-q,--quiet}'[Activate quiet mode]' \ {-v,--verbose}'[Print various debugging information]' \ {-g,--get-urls}'[Print URLs instead of downloading]' \ @@ -56,6 +54,9 @@ _arguments -C -S \ {-u,--username}'[Username to login with]':'<user>' \ {-p,--password}'[Password belonging to the given username]':'<pass>' \ --netrc'[Enable .netrc authentication data]' \ +{-C,--cookies}'[File to load additional cookies from]':'<file>':_files \ +--cookies-export'[Export session cookies to FILE]':'<file>':_files \ +--cookies-from-browser'[Name of the browser to load cookies from, with optional domain prefixed with "/", keyring name prefixed with "+", profile prefixed with ":", and container prefixed with "::" ("none" for no container)]':'<browser[/domain][+keyring][:profile][::container]>' \ --download-archive'[Record all downloaded or skipped files in FILE and skip downloading any file already in it]':'<file>':_files \ {-A,--abort}'[Stop current extractor run after N consecutive file downloads were skipped]':'<n>' \ {-T,--terminate}'[Stop current and parent extractor runs after N consecutive file downloads were skipped]':'<n>' \ diff --git a/data/completion/gallery-dl b/data/completion/gallery-dl index 1750af8..02fc847 100644 --- a/data/completion/gallery-dl +++ b/data/completion/gallery-dl @@ -5,12 +5,12 @@ _gallery_dl() cur="${COMP_WORDS[COMP_CWORD]}" prev="${COMP_WORDS[COMP_CWORD-1]}" - if [[ "${prev}" =~ ^(-i|--input-file|--cookies|--write-log|--write-unsupported|-c|--config|--config-yaml|--config-toml|--download-archive)$ ]]; then + if [[ "${prev}" =~ ^(-i|--input-file|--write-log|--write-unsupported|-c|--config|--config-yaml|--config-toml|-C|--cookies|--cookies-export|--download-archive)$ ]]; then COMPREPLY=( $(compgen -f -- "${cur}") ) elif [[ "${prev}" =~ ^()$ ]]; then COMPREPLY=( $(compgen -d -- "${cur}") ) else - COMPREPLY=( $(compgen -W "--help --version --input-file --filename --destination --directory --extractors --proxy --source-address --user-agent --clear-cache --cookies --cookies-from-browser --quiet --verbose --get-urls --resolve-urls --dump-json --simulate --extractor-info --list-keywords --list-modules --list-extractors --write-log --write-unsupported --write-pages --limit-rate --retries --http-timeout --sleep --sleep-request --sleep-extractor --filesize-min --filesize-max --chunk-size --no-part --no-skip --no-mtime --no-download --no-postprocessors --no-check-certificate --option --config --config-yaml --config-toml --config-create --config-ignore --ignore-config --username --password --netrc --download-archive --abort --terminate --range --chapter-range --filter --chapter-filter --zip --ugoira-conv --ugoira-conv-lossless --ugoira-conv-copy --write-metadata --write-info-json --write-infojson --write-tags --mtime-from-date --exec --exec-after --postprocessor --postprocessor-option" -- "${cur}") ) + COMPREPLY=( $(compgen -W "--help --version --input-file --filename --destination --directory --extractors --proxy --source-address --user-agent --clear-cache --quiet --verbose --get-urls --resolve-urls --dump-json --simulate --extractor-info --list-keywords --list-modules --list-extractors --write-log --write-unsupported --write-pages --limit-rate --retries --http-timeout --sleep --sleep-request --sleep-extractor --filesize-min --filesize-max --chunk-size --no-part --no-skip --no-mtime --no-download --no-postprocessors --no-check-certificate --option --config --config-yaml --config-toml --config-create --config-ignore --ignore-config --username --password --netrc --cookies --cookies-export --cookies-from-browser --download-archive --abort --terminate --range --chapter-range --filter --chapter-filter --zip --ugoira-conv --ugoira-conv-lossless --ugoira-conv-copy --write-metadata --write-info-json --write-infojson --write-tags --mtime-from-date --exec --exec-after --postprocessor --postprocessor-option" -- "${cur}") ) fi } diff --git a/data/completion/gallery-dl.fish b/data/completion/gallery-dl.fish index d764543..00e7b24 100644 --- a/data/completion/gallery-dl.fish +++ b/data/completion/gallery-dl.fish @@ -10,8 +10,6 @@ complete -c gallery-dl -x -l 'proxy' -d 'Use the specified proxy' complete -c gallery-dl -x -l 'source-address' -d 'Client-side IP address to bind to' complete -c gallery-dl -x -l 'user-agent' -d 'User-Agent request header' complete -c gallery-dl -x -l 'clear-cache' -d 'Delete cached login sessions, cookies, etc. for MODULE (ALL to delete everything)' -complete -c gallery-dl -r -F -l 'cookies' -d 'File to load additional cookies from' -complete -c gallery-dl -x -l 'cookies-from-browser' -d 'Name of the browser to load cookies from, with optional keyring name prefixed with "+", profile prefixed with ":", and container prefixed with "::" ("none" for no container)' complete -c gallery-dl -s 'q' -l 'quiet' -d 'Activate quiet mode' complete -c gallery-dl -s 'v' -l 'verbose' -d 'Print various debugging information' complete -c gallery-dl -s 'g' -l 'get-urls' -d 'Print URLs instead of downloading' @@ -50,6 +48,9 @@ complete -c gallery-dl -l 'ignore-config' -d '==SUPPRESS==' complete -c gallery-dl -x -s 'u' -l 'username' -d 'Username to login with' complete -c gallery-dl -x -s 'p' -l 'password' -d 'Password belonging to the given username' complete -c gallery-dl -l 'netrc' -d 'Enable .netrc authentication data' +complete -c gallery-dl -r -F -s 'C' -l 'cookies' -d 'File to load additional cookies from' +complete -c gallery-dl -r -F -l 'cookies-export' -d 'Export session cookies to FILE' +complete -c gallery-dl -x -l 'cookies-from-browser' -d 'Name of the browser to load cookies from, with optional domain prefixed with "/", keyring name prefixed with "+", profile prefixed with ":", and container prefixed with "::" ("none" for no container)' complete -c gallery-dl -r -F -l 'download-archive' -d 'Record all downloaded or skipped files in FILE and skip downloading any file already in it' complete -c gallery-dl -x -s 'A' -l 'abort' -d 'Stop current extractor run after N consecutive file downloads were skipped' complete -c gallery-dl -x -s 'T' -l 'terminate' -d 'Stop current and parent extractor runs after N consecutive file downloads were skipped' diff --git a/data/man/gallery-dl.1 b/data/man/gallery-dl.1 index b5ad7f2..f018c63 100644 --- a/data/man/gallery-dl.1 +++ b/data/man/gallery-dl.1 @@ -1,4 +1,4 @@ -.TH "GALLERY-DL" "1" "2023-04-30" "1.25.3" "gallery-dl Manual" +.TH "GALLERY-DL" "1" "2023-05-07" "1.25.4" "gallery-dl Manual" .\" disable hyphenation .nh @@ -50,12 +50,6 @@ User-Agent request header .B "\-\-clear\-cache" \f[I]MODULE\f[] Delete cached login sessions, cookies, etc. for MODULE (ALL to delete everything) .TP -.B "\-\-cookies" \f[I]FILE\f[] -File to load additional cookies from -.TP -.B "\-\-cookies\-from\-browser" \f[I]BROWSER[+KEYRING][:PROFILE][::CONTAINER]\f[] -Name of the browser to load cookies from, with optional keyring name prefixed with '+', profile prefixed with ':', and container prefixed with '::' ('none' for no container) -.TP .B "\-q, \-\-quiet" Activate quiet mode .TP @@ -167,6 +161,15 @@ Password belonging to the given username .B "\-\-netrc" Enable .netrc authentication data .TP +.B "\-C, \-\-cookies" \f[I]FILE\f[] +File to load additional cookies from +.TP +.B "\-\-cookies\-export" \f[I]FILE\f[] +Export session cookies to FILE +.TP +.B "\-\-cookies\-from\-browser" \f[I]BROWSER[/DOMAIN][+KEYRING][:PROFILE][::CONTAINER]\f[] +Name of the browser to load cookies from, with optional domain prefixed with '/', keyring name prefixed with '+', profile prefixed with ':', and container prefixed with '::' ('none' for no container) +.TP .B "\-\-download\-archive" \f[I]FILE\f[] Record all downloaded or skipped files in FILE and skip downloading any file already in it .TP diff --git a/data/man/gallery-dl.conf.5 b/data/man/gallery-dl.conf.5 index 3bd9f17..8008451 100644 --- a/data/man/gallery-dl.conf.5 +++ b/data/man/gallery-dl.conf.5 @@ -1,4 +1,4 @@ -.TH "GALLERY-DL.CONF" "5" "2023-04-30" "1.25.3" "gallery-dl Manual" +.TH "GALLERY-DL.CONF" "5" "2023-05-07" "1.25.4" "gallery-dl Manual" .\" disable hyphenation .nh .\" disable justification (adjust text to left margin only) @@ -548,25 +548,35 @@ Source to read additional cookies from. This can be * The optional third entry is the keyring to retrieve passwords for decrypting cookies from .br * The optional fourth entry is a (Firefox) container name (\f[I]"none"\f[] for only cookies with no container) +.br +* The optional fifth entry is the domain to extract cookies for. Prefix it with a dot \f[I].\f[] to include cookies for subdomains. Has no effect when also specifying a container. .. code:: json ["firefox"] ["firefox", null, null, "Personal"] -["chromium", "Private", "kwallet"] +["chromium", "Private", "kwallet", null, ".twitter.com"] .SS extractor.*.cookies-update .IP "Type:" 6 -\f[I]bool\f[] +.br +* \f[I]bool\f[] +.br +* \f[I]Path\f[] .IP "Default:" 9 \f[I]true\f[] .IP "Description:" 4 -If \f[I]extractor.*.cookies\f[] specifies the \f[I]Path\f[] of a cookies.txt -file and it can be opened and parsed without errors, -update its contents with cookies received during data extraction. +Export session cookies in cookies.txt format. + +.br +* If this is a \f[I]Path\f[], write cookies to the given file path. + +.br +* If this is \f[I]true\f[] and \f[I]extractor.*.cookies\f[] specifies the \f[I]Path\f[] +of a valid cookies.txt file, update its contents. .SS extractor.*.proxy @@ -2409,18 +2419,6 @@ Fetch media from renoted notes. Fetch media from replies to other notes. -.SS extractor.nana.favkey -.IP "Type:" 6 -\f[I]string\f[] - -.IP "Default:" 9 -\f[I]null\f[] - -.IP "Description:" 4 -Your \f[I]Nana Favorite Key\f[], -used to access your favorite archives. - - .SS extractor.newgrounds.flash .IP "Type:" 6 \f[I]bool\f[] diff --git a/docs/gallery-dl.conf b/docs/gallery-dl.conf index 09d9e80..92451fd 100644 --- a/docs/gallery-dl.conf +++ b/docs/gallery-dl.conf @@ -201,10 +201,6 @@ "format": "original", "include": "art" }, - "nana": - { - "favkey": null - }, "nijie": { "username": null, diff --git a/gallery_dl.egg-info/PKG-INFO b/gallery_dl.egg-info/PKG-INFO index 73bcd92..3554f49 100644 --- a/gallery_dl.egg-info/PKG-INFO +++ b/gallery_dl.egg-info/PKG-INFO @@ -1,6 +1,6 @@ Metadata-Version: 2.1 Name: gallery-dl -Version: 1.25.3 +Version: 1.25.4 Summary: Command-line program to download image galleries and collections from several image hosting sites Home-page: https://github.com/mikf/gallery-dl Download-URL: https://github.com/mikf/gallery-dl/releases/latest @@ -109,9 +109,9 @@ Standalone Executable Prebuilt executable files with a Python interpreter and required Python packages included are available for -- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.25.3/gallery-dl.exe>`__ +- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.25.4/gallery-dl.exe>`__ (Requires `Microsoft Visual C++ Redistributable Package (x86) <https://aka.ms/vs/17/release/vc_redist.x86.exe>`__) -- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.25.3/gallery-dl.bin>`__ +- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.25.4/gallery-dl.bin>`__ Nightly Builds diff --git a/gallery_dl.egg-info/SOURCES.txt b/gallery_dl.egg-info/SOURCES.txt index 2e66ea6..8be6871 100644 --- a/gallery_dl.egg-info/SOURCES.txt +++ b/gallery_dl.egg-info/SOURCES.txt @@ -44,8 +44,8 @@ gallery_dl/extractor/2chan.py gallery_dl/extractor/2chen.py gallery_dl/extractor/35photo.py gallery_dl/extractor/3dbooru.py -gallery_dl/extractor/420chan.py gallery_dl/extractor/4chan.py +gallery_dl/extractor/4chanarchives.py gallery_dl/extractor/500px.py gallery_dl/extractor/8chan.py gallery_dl/extractor/8muses.py @@ -140,7 +140,6 @@ gallery_dl/extractor/misskey.py gallery_dl/extractor/moebooru.py gallery_dl/extractor/myhentaigallery.py gallery_dl/extractor/myportfolio.py -gallery_dl/extractor/nana.py gallery_dl/extractor/naver.py gallery_dl/extractor/naverwebtoon.py gallery_dl/extractor/newgrounds.py diff --git a/gallery_dl/__init__.py b/gallery_dl/__init__.py index a430f13..1450e8f 100644 --- a/gallery_dl/__init__.py +++ b/gallery_dl/__init__.py @@ -70,12 +70,14 @@ def main(): if args.cookies_from_browser: browser, _, profile = args.cookies_from_browser.partition(":") browser, _, keyring = browser.partition("+") + browser, _, domain = browser.partition("/") if profile.startswith(":"): container = profile[1:] profile = None else: profile, _, container = profile.partition("::") - config.set((), "cookies", (browser, profile, keyring, container)) + config.set((), "cookies", ( + browser, profile, keyring, container, domain)) if args.options_pp: config.set((), "postprocessor-options", args.options_pp) for opts in args.options: diff --git a/gallery_dl/config.py b/gallery_dl/config.py index d014293..0b2aca8 100644 --- a/gallery_dl/config.py +++ b/gallery_dl/config.py @@ -102,7 +102,8 @@ def load(files=None, strict=False, load=util.json_loads): log.error(exc) sys.exit(1) except Exception as exc: - log.warning("Could not parse '%s': %s", path, exc) + log.error("%s when loading '%s': %s", + exc.__class__.__name__, path, exc) if strict: sys.exit(2) else: @@ -118,7 +119,7 @@ def clear(): _config.clear() -def get(path, key, default=None, *, conf=_config): +def get(path, key, default=None, conf=_config): """Get the value of property 'key' or a default value""" try: for p in path: @@ -128,7 +129,7 @@ def get(path, key, default=None, *, conf=_config): return default -def interpolate(path, key, default=None, *, conf=_config): +def interpolate(path, key, default=None, conf=_config): """Interpolate the value of 'key'""" if key in conf: return conf[key] @@ -142,7 +143,7 @@ def interpolate(path, key, default=None, *, conf=_config): return default -def interpolate_common(common, paths, key, default=None, *, conf=_config): +def interpolate_common(common, paths, key, default=None, conf=_config): """Interpolate the value of 'key' using multiple 'paths' along a 'common' ancestor """ @@ -174,7 +175,7 @@ def interpolate_common(common, paths, key, default=None, *, conf=_config): return default -def accumulate(path, key, *, conf=_config): +def accumulate(path, key, conf=_config): """Accumulate the values of 'key' along 'path'""" result = [] try: @@ -193,7 +194,7 @@ def accumulate(path, key, *, conf=_config): return result -def set(path, key, value, *, conf=_config): +def set(path, key, value, conf=_config): """Set the value of property 'key' for this session""" for p in path: try: @@ -203,7 +204,7 @@ def set(path, key, value, *, conf=_config): conf[key] = value -def setdefault(path, key, value, *, conf=_config): +def setdefault(path, key, value, conf=_config): """Set the value of property 'key' if it doesn't exist""" for p in path: try: @@ -213,7 +214,7 @@ def setdefault(path, key, value, *, conf=_config): return conf.setdefault(key, value) -def unset(path, key, *, conf=_config): +def unset(path, key, conf=_config): """Unset the value of property 'key'""" try: for p in path: diff --git a/gallery_dl/cookies.py b/gallery_dl/cookies.py index 3d715a7..32ba323 100644 --- a/gallery_dl/cookies.py +++ b/gallery_dl/cookies.py @@ -20,7 +20,6 @@ import struct import subprocess import sys import tempfile -from datetime import datetime, timedelta, timezone from hashlib import pbkdf2_hmac from http.cookiejar import Cookie from . import aes, text, util @@ -34,19 +33,19 @@ logger = logging.getLogger("cookies") def load_cookies(cookiejar, browser_specification): - browser_name, profile, keyring, container = \ + browser_name, profile, keyring, container, domain = \ _parse_browser_specification(*browser_specification) if browser_name == "firefox": - load_cookies_firefox(cookiejar, profile, container) + load_cookies_firefox(cookiejar, profile, container, domain) elif browser_name == "safari": - load_cookies_safari(cookiejar, profile) + load_cookies_safari(cookiejar, profile, domain) elif browser_name in SUPPORTED_BROWSERS_CHROMIUM: - load_cookies_chrome(cookiejar, browser_name, profile, keyring) + load_cookies_chrome(cookiejar, browser_name, profile, keyring, domain) else: raise ValueError("unknown browser '{}'".format(browser_name)) -def load_cookies_firefox(cookiejar, profile=None, container=None): +def load_cookies_firefox(cookiejar, profile=None, container=None, domain=None): path, container_id = _firefox_cookies_database(profile, container) with DatabaseCopy(path) as db: @@ -60,6 +59,13 @@ def load_cookies_firefox(cookiejar, profile=None, container=None): sql += " WHERE originAttributes LIKE ? OR originAttributes LIKE ?" uid = "%userContextId={}".format(container_id) parameters = (uid, uid + "&%") + elif domain: + if domain[0] == ".": + sql += " WHERE host == ? OR host LIKE ?" + parameters = (domain[1:], "%" + domain) + else: + sql += " WHERE host == ? OR host == ?" + parameters = (domain, "." + domain) set_cookie = cookiejar.set_cookie for name, value, domain, path, secure, expires in db.execute( @@ -69,9 +75,10 @@ def load_cookies_firefox(cookiejar, profile=None, container=None): domain, bool(domain), domain.startswith("."), path, bool(path), secure, expires, False, None, None, {}, )) + logger.info("Extracted %s cookies from Firefox", len(cookiejar)) -def load_cookies_safari(cookiejar, profile=None): +def load_cookies_safari(cookiejar, profile=None, domain=None): """Ref.: https://github.com/libyal/dtformats/blob /main/documentation/Safari%20Cookies.asciidoc - This data appears to be out of date @@ -87,7 +94,8 @@ def load_cookies_safari(cookiejar, profile=None): _safari_parse_cookies_page(p.read_bytes(page_size), cookiejar) -def load_cookies_chrome(cookiejar, browser_name, profile, keyring): +def load_cookies_chrome(cookiejar, browser_name, profile=None, + keyring=None, domain=None): config = _get_chromium_based_browser_settings(browser_name) path = _chrome_cookies_database(profile, config) logger.debug("Extracting cookies from %s", path) @@ -95,19 +103,31 @@ def load_cookies_chrome(cookiejar, browser_name, profile, keyring): with DatabaseCopy(path) as db: db.text_factory = bytes decryptor = get_cookie_decryptor( - config["directory"], config["keyring"], keyring=keyring) + config["directory"], config["keyring"], keyring) + + if domain: + if domain[0] == ".": + condition = " WHERE host_key == ? OR host_key LIKE ?" + parameters = (domain[1:], "%" + domain) + else: + condition = " WHERE host_key == ? OR host_key == ?" + parameters = (domain, "." + domain) + else: + condition = "" + parameters = () try: rows = db.execute( "SELECT host_key, name, value, encrypted_value, path, " - "expires_utc, is_secure FROM cookies") + "expires_utc, is_secure FROM cookies" + condition, parameters) except sqlite3.OperationalError: rows = db.execute( "SELECT host_key, name, value, encrypted_value, path, " - "expires_utc, secure FROM cookies") + "expires_utc, secure FROM cookies" + condition, parameters) set_cookie = cookiejar.set_cookie - failed_cookies = unencrypted_cookies = 0 + failed_cookies = 0 + unencrypted_cookies = 0 for domain, name, value, enc_value, path, expires, secure in rows: @@ -136,8 +156,8 @@ def load_cookies_chrome(cookiejar, browser_name, profile, keyring): failed_message = "" logger.info("Extracted %s cookies from %s%s", - len(cookiejar), browser_name, failed_message) - counts = decryptor.cookie_counts.copy() + len(cookiejar), browser_name.capitalize(), failed_message) + counts = decryptor.cookie_counts counts["unencrypted"] = unencrypted_cookies logger.debug("cookie version breakdown: %s", counts) @@ -224,7 +244,7 @@ def _safari_parse_cookies_header(data): return page_sizes, p.cursor -def _safari_parse_cookies_page(data, jar): +def _safari_parse_cookies_page(data, cookiejar, domain=None): p = DataParser(data) p.expect_bytes(b"\x00\x00\x01\x00", "page signature") number_of_cookies = p.read_uint() @@ -238,12 +258,12 @@ def _safari_parse_cookies_page(data, jar): for i, record_offset in enumerate(record_offsets): p.skip_to(record_offset, "space between records") record_length = _safari_parse_cookies_record( - data[record_offset:], jar) + data[record_offset:], cookiejar, domain) p.read_bytes(record_length) p.skip_to_end("space in between pages") -def _safari_parse_cookies_record(data, cookiejar): +def _safari_parse_cookies_record(data, cookiejar, host=None): p = DataParser(data) record_size = p.read_uint() p.skip(4, "unknown record field 1") @@ -262,6 +282,14 @@ def _safari_parse_cookies_record(data, cookiejar): p.skip_to(domain_offset) domain = p.read_cstring() + if host: + if host[0] == ".": + if host[1:] != domain and not domain.endswith(host): + return record_size + else: + if host != domain and ("." + host) != domain: + return record_size + p.skip_to(name_offset) name = p.read_cstring() @@ -412,18 +440,17 @@ class ChromeCookieDecryptor: raise NotImplementedError("Must be implemented by sub classes") -def get_cookie_decryptor(browser_root, browser_keyring_name, *, keyring=None): +def get_cookie_decryptor(browser_root, browser_keyring_name, keyring=None): if sys.platform in ("win32", "cygwin"): return WindowsChromeCookieDecryptor(browser_root) elif sys.platform == "darwin": return MacChromeCookieDecryptor(browser_keyring_name) else: - return LinuxChromeCookieDecryptor( - browser_keyring_name, keyring=keyring) + return LinuxChromeCookieDecryptor(browser_keyring_name, keyring) class LinuxChromeCookieDecryptor(ChromeCookieDecryptor): - def __init__(self, browser_keyring_name, *, keyring=None): + def __init__(self, browser_keyring_name, keyring=None): self._v10_key = self.derive_key(b"peanuts") password = _get_linux_keyring_password(browser_keyring_name, keyring) self._v11_key = None if password is None else self.derive_key(password) @@ -893,8 +920,8 @@ def _get_linux_desktop_environment(env): def _mac_absolute_time_to_posix(timestamp): - return int((datetime(2001, 1, 1, 0, 0, tzinfo=timezone.utc) + - timedelta(seconds=timestamp)).timestamp()) + # 978307200 is timestamp of 2001-01-01 00:00:00 + return 978307200 + int(timestamp) def pbkdf2_sha1(password, salt, iterations, key_length): @@ -979,7 +1006,7 @@ def _is_path(value): def _parse_browser_specification( - browser, profile=None, keyring=None, container=None): + browser, profile=None, keyring=None, container=None, domain=None): browser = browser.lower() if browser not in SUPPORTED_BROWSERS: raise ValueError("unsupported browser '{}'".format(browser)) @@ -987,4 +1014,4 @@ def _parse_browser_specification( raise ValueError("unsupported keyring '{}'".format(keyring)) if profile and _is_path(profile): profile = os.path.expanduser(profile) - return browser, profile, keyring, container + return browser, profile, keyring, container, domain diff --git a/gallery_dl/extractor/420chan.py b/gallery_dl/extractor/420chan.py deleted file mode 100644 index fd0172e..0000000 --- a/gallery_dl/extractor/420chan.py +++ /dev/null @@ -1,76 +0,0 @@ -# -*- coding: utf-8 -*- - -# Copyright 2021 Mike Fährmann -# -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License version 2 as -# published by the Free Software Foundation. - -"""Extractors for https://420chan.org/""" - -from .common import Extractor, Message - - -class _420chanThreadExtractor(Extractor): - """Extractor for 420chan threads""" - category = "420chan" - subcategory = "thread" - directory_fmt = ("{category}", "{board}", "{thread} {title}") - archive_fmt = "{board}_{thread}_{filename}" - pattern = r"(?:https?://)?boards\.420chan\.org/([^/?#]+)/thread/(\d+)" - test = ("https://boards.420chan.org/ani/thread/33251/chow-chows", { - "pattern": r"https://boards\.420chan\.org/ani/src/\d+\.jpg", - "content": "b07c803b0da78de159709da923e54e883c100934", - "count": 2, - }) - - def __init__(self, match): - Extractor.__init__(self, match) - self.board, self.thread = match.groups() - - def items(self): - url = "https://api.420chan.org/{}/res/{}.json".format( - self.board, self.thread) - posts = self.request(url).json()["posts"] - - data = { - "board" : self.board, - "thread": self.thread, - "title" : posts[0].get("sub") or posts[0]["com"][:50], - } - - yield Message.Directory, data - for post in posts: - if "filename" in post: - post.update(data) - post["extension"] = post["ext"][1:] - url = "https://boards.420chan.org/{}/src/{}{}".format( - post["board"], post["filename"], post["ext"]) - yield Message.Url, url, post - - -class _420chanBoardExtractor(Extractor): - """Extractor for 420chan boards""" - category = "420chan" - subcategory = "board" - pattern = r"(?:https?://)?boards\.420chan\.org/([^/?#]+)/\d*$" - test = ("https://boards.420chan.org/po/", { - "pattern": _420chanThreadExtractor.pattern, - "count": ">= 100", - }) - - def __init__(self, match): - Extractor.__init__(self, match) - self.board = match.group(1) - - def items(self): - url = "https://api.420chan.org/{}/threads.json".format(self.board) - threads = self.request(url).json() - - for page in threads: - for thread in page["threads"]: - url = "https://boards.420chan.org/{}/thread/{}/".format( - self.board, thread["no"]) - thread["page"] = page["page"] - thread["_extractor"] = _420chanThreadExtractor - yield Message.Queue, url, thread diff --git a/gallery_dl/extractor/4chanarchives.py b/gallery_dl/extractor/4chanarchives.py new file mode 100644 index 0000000..041e6a3 --- /dev/null +++ b/gallery_dl/extractor/4chanarchives.py @@ -0,0 +1,139 @@ +# -*- coding: utf-8 -*- + +# Copyright 2023 Mike Fährmann +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. + +"""Extractors for https://4chanarchives.com/""" + +from .common import Extractor, Message +from .. import text + + +class _4chanarchivesThreadExtractor(Extractor): + """Extractor for threads on 4chanarchives.com""" + category = "4chanarchives" + subcategory = "thread" + root = "https://4chanarchives.com" + directory_fmt = ("{category}", "{board}", "{thread} - {title}") + filename_fmt = "{no}-{filename}.{extension}" + archive_fmt = "{board}_{thread}_{no}" + pattern = r"(?:https?://)?4chanarchives\.com/board/([^/?#]+)/thread/(\d+)" + test = ( + ("https://4chanarchives.com/board/c/thread/2707110", { + "pattern": r"https://i\.imgur\.com/(0wLGseE|qbByWDc)\.jpg", + "count": 2, + "keyword": { + "board": "c", + "com": str, + "name": "Anonymous", + "no": int, + "thread": "2707110", + "time": r"re:2016-07-1\d \d\d:\d\d:\d\d", + "title": "Ren Kagami from 'Oyako Neburi'", + }, + }), + ) + + def __init__(self, match): + Extractor.__init__(self, match) + self.board, self.thread = match.groups() + + def items(self): + url = "{}/board/{}/thread/{}".format( + self.root, self.board, self.thread) + page = self.request(url).text + data = self.metadata(page) + posts = self.posts(page) + + if not data["title"]: + data["title"] = text.unescape(text.remove_html( + posts[0]["com"]))[:50] + + for post in posts: + post.update(data) + yield Message.Directory, post + if "url" in post: + yield Message.Url, post["url"], post + + def metadata(self, page): + return { + "board" : self.board, + "thread" : self.thread, + "title" : text.unescape(text.extr( + page, 'property="og:title" content="', '"')), + } + + def posts(self, page): + """Build a list of all post objects""" + return [self.parse(html) for html in text.extract_iter( + page, 'id="pc', '</blockquote>')] + + def parse(self, html): + """Build post object by extracting data from an HTML post""" + post = self._extract_post(html) + if ">File: <" in html: + self._extract_file(html, post) + post["extension"] = post["url"].rpartition(".")[2] + return post + + @staticmethod + def _extract_post(html): + extr = text.extract_from(html) + return { + "no" : text.parse_int(extr('', '"')), + "name": extr('class="name">', '<'), + "time": extr('class="dateTime postNum" >', '<').rstrip(), + "com" : text.unescape( + html[html.find('<blockquote'):].partition(">")[2]), + } + + @staticmethod + def _extract_file(html, post): + extr = text.extract_from(html, html.index(">File: <")) + post["url"] = extr('href="', '"') + post["filename"] = text.unquote(extr(">", "<").rpartition(".")[0]) + post["fsize"] = extr("(", ", ") + post["w"] = text.parse_int(extr("", "x")) + post["h"] = text.parse_int(extr("", ")")) + + +class _4chanarchivesBoardExtractor(Extractor): + """Extractor for boards on 4chanarchives.com""" + category = "4chanarchives" + subcategory = "board" + root = "https://4chanarchives.com" + pattern = r"(?:https?://)?4chanarchives\.com/board/([^/?#]+)(?:/(\d+))?/?$" + test = ( + ("https://4chanarchives.com/board/c/", { + "pattern": _4chanarchivesThreadExtractor.pattern, + "range": "1-40", + "count": 40, + }), + ("https://4chanarchives.com/board/c"), + ("https://4chanarchives.com/board/c/10"), + ) + + def __init__(self, match): + Extractor.__init__(self, match) + self.board, self.page = match.groups() + + def items(self): + data = {"_extractor": _4chanarchivesThreadExtractor} + pnum = text.parse_int(self.page, 1) + needle = '''<span class="postNum desktop"> + <span><a href="''' + + while True: + url = "{}/board/{}/{}".format(self.root, self.board, pnum) + page = self.request(url).text + + thread = None + for thread in text.extract_iter(page, needle, '"'): + yield Message.Queue, thread, data + + if thread is None: + return + pnum += 1 diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py index 9841ca7..5475fea 100644 --- a/gallery_dl/extractor/__init__.py +++ b/gallery_dl/extractor/__init__.py @@ -14,8 +14,8 @@ modules = [ "2chen", "35photo", "3dbooru", - "420chan", "4chan", + "4chanarchives", "500px", "8chan", "8muses", @@ -97,7 +97,6 @@ modules = [ "misskey", "myhentaigallery", "myportfolio", - "nana", "naver", "naverwebtoon", "newgrounds", diff --git a/gallery_dl/extractor/common.py b/gallery_dl/extractor/common.py index 8024be9..09737ef 100644 --- a/gallery_dl/extractor/common.py +++ b/gallery_dl/extractor/common.py @@ -90,6 +90,21 @@ class Extractor(): def config(self, key, default=None): return config.interpolate(self._cfgpath, key, default) + def config_deprecated(self, key, deprecated, default=None, + sentinel=util.SENTINEL, history=set()): + value = self.config(deprecated, sentinel) + if value is not sentinel: + if deprecated not in history: + history.add(deprecated) + self.log.warning("'%s' is deprecated. Use '%s' instead.", + deprecated, key) + default = value + + value = self.config(key, sentinel) + if value is not sentinel: + return value + return default + def config_accumulate(self, key): return config.accumulate(self._cfgpath, key) @@ -106,7 +121,7 @@ class Extractor(): values[:0] = config.accumulate((self.subcategory,), key, conf=conf) return values - def request(self, url, *, method="GET", session=None, + def request(self, url, method="GET", session=None, retries=None, retry_codes=None, encoding=None, fatal=True, notfound=None, **kwargs): if session is None: @@ -180,7 +195,7 @@ class Extractor(): raise exception.HttpError(msg, response) - def wait(self, *, seconds=None, until=None, adjust=1.0, + def wait(self, seconds=None, until=None, adjust=1.0, reason="rate limit reset"): now = time.time() @@ -364,14 +379,24 @@ class Extractor(): def _store_cookies(self): """Store the session's cookiejar in a cookies.txt file""" - if self._cookiefile and self.config("cookies-update", True): - try: - with open(self._cookiefile, "w") as fp: - util.cookiestxt_store(fp, self._cookiejar) - except OSError as exc: - self.log.warning("cookies: %s", exc) + export = self.config("cookies-update", True) + if not export: + return + + if isinstance(export, str): + path = util.expand_path(export) + else: + path = self._cookiefile + if not path: + return + + try: + with open(path, "w") as fp: + util.cookiestxt_store(fp, self._cookiejar) + except OSError as exc: + self.log.warning("cookies: %s", exc) - def _update_cookies(self, cookies, *, domain=""): + def _update_cookies(self, cookies, domain=""): """Update the session's cookiejar with 'cookies'""" if isinstance(cookies, dict): self._update_cookies_dict(cookies, domain or self.cookiedomain) @@ -391,7 +416,7 @@ class Extractor(): for name, value in cookiedict.items(): setcookie(name, value, domain=domain) - def _check_cookies(self, cookienames, *, domain=None): + def _check_cookies(self, cookienames, domain=None): """Check if all 'cookienames' are in the session's cookiejar""" if not self._cookiejar: return False diff --git a/gallery_dl/extractor/exhentai.py b/gallery_dl/extractor/exhentai.py index dccc74e..9cd7ae4 100644 --- a/gallery_dl/extractor/exhentai.py +++ b/gallery_dl/extractor/exhentai.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2014-2022 Mike Fährmann +# Copyright 2014-2023 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -21,8 +21,7 @@ class ExhentaiExtractor(Extractor): """Base class for exhentai extractors""" category = "exhentai" directory_fmt = ("{category}", "{gid} {title[:247]}") - filename_fmt = ( - "{gid}_{num:>04}_{image_token}_{filename}.{extension}") + filename_fmt = "{gid}_{num:>04}_{image_token}_{filename}.{extension}" archive_fmt = "{gid}_{num}" cookienames = ("ipb_member_id", "ipb_pass_hash") cookiedomain = ".exhentai.org" @@ -56,10 +55,10 @@ class ExhentaiExtractor(Extractor): if version != "ex": self.session.cookies.set("nw", "1", domain=self.cookiedomain) - def request(self, *args, **kwargs): - response = Extractor.request(self, *args, **kwargs) - if self._is_sadpanda(response): - self.log.info("sadpanda.jpg") + def request(self, url, **kwargs): + response = Extractor.request(self, url, **kwargs) + if response.history and response.headers.get("Content-Length") == "0": + self.log.info("blank page") raise exception.AuthorizationError() return response @@ -100,14 +99,6 @@ class ExhentaiExtractor(Extractor): raise exception.AuthenticationError() return {c: response.cookies[c] for c in self.cookienames} - @staticmethod - def _is_sadpanda(response): - """Return True if the response object contains a sad panda""" - return ( - response.headers.get("Content-Length") == "9615" and - "sadpanda.jpg" in response.headers.get("Content-Disposition", "") - ) - class ExhentaiGalleryExtractor(ExhentaiExtractor): """Extractor for image galleries from exhentai.org""" diff --git a/gallery_dl/extractor/foolfuuka.py b/gallery_dl/extractor/foolfuuka.py index 4f9a6bf..76fb69e 100644 --- a/gallery_dl/extractor/foolfuuka.py +++ b/gallery_dl/extractor/foolfuuka.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2019-2022 Mike Fährmann +# Copyright 2019-2023 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -88,13 +88,9 @@ BASE_PATTERN = FoolfuukaExtractor.update({ "root": "https://boards.fireden.net", "pattern": r"boards\.fireden\.net", }, - "rozenarcana": { - "root": "https://archive.alice.al", - "pattern": r"(?:archive\.)?alice\.al", - }, - "tokyochronos": { - "root": "https://www.tokyochronos.net", - "pattern": r"(?:www\.)?tokyochronos\.net", + "palanq": { + "root": "https://archive.palanq.win", + "pattern": r"archive\.palanq\.win", }, "rbt": { "root": "https://rbt.asia", @@ -137,11 +133,8 @@ class FoolfuukaThreadExtractor(FoolfuukaExtractor): ("https://boards.fireden.net/sci/thread/11264294/", { "url": "61cab625c95584a12a30049d054931d64f8d20aa", }), - ("https://archive.alice.al/c/thread/2849220/", { - "url": "632e2c8de05de6b3847685f4bf1b4e5c6c9e0ed5", - }), - ("https://www.tokyochronos.net/a/thread/241664141/", { - "url": "ae03852cf44e3dcfce5be70274cb1828e1dbb7d6", + ("https://archive.palanq.win/c/thread/4209598/", { + "url": "1f9b5570d228f1f2991c827a6631030bc0e5933c", }), ("https://rbt.asia/g/thread/61487650/", { "url": "fadd274b25150a1bdf03a40c58db320fa3b617c4", @@ -187,8 +180,7 @@ class FoolfuukaBoardExtractor(FoolfuukaExtractor): ("https://arch.b4k.co/meta/"), ("https://desuarchive.org/a/"), ("https://boards.fireden.net/sci/"), - ("https://archive.alice.al/c/"), - ("https://www.tokyochronos.net/a/"), + ("https://archive.palanq.win/c/"), ("https://rbt.asia/g/"), ("https://thebarchive.com/b/"), ) @@ -231,8 +223,7 @@ class FoolfuukaSearchExtractor(FoolfuukaExtractor): ("https://archiveofsins.com/_/search/text/test/"), ("https://desuarchive.org/_/search/text/test/"), ("https://boards.fireden.net/_/search/text/test/"), - ("https://archive.alice.al/_/search/text/test/"), - ("https://www.tokyochronos.net/_/search/text/test/"), + ("https://archive.palanq.win/_/search/text/test/"), ("https://rbt.asia/_/search/text/test/"), ("https://thebarchive.com/_/search/text/test/"), ) @@ -297,8 +288,7 @@ class FoolfuukaGalleryExtractor(FoolfuukaExtractor): ("https://arch.b4k.co/meta/gallery/"), ("https://desuarchive.org/a/gallery/5"), ("https://boards.fireden.net/sci/gallery/6"), - ("https://archive.alice.al/c/gallery/7"), - ("https://www.tokyochronos.net/a/gallery/7"), + ("https://archive.palanq.win/c/gallery"), ("https://rbt.asia/g/gallery/8"), ("https://thebarchive.com/b/gallery/9"), ) diff --git a/gallery_dl/extractor/foolslide.py b/gallery_dl/extractor/foolslide.py index 4a38fb4..57d37b7 100644 --- a/gallery_dl/extractor/foolslide.py +++ b/gallery_dl/extractor/foolslide.py @@ -42,11 +42,6 @@ BASE_PATTERN = FoolslideExtractor.update({ "root": "https://read.powermanga.org", "pattern": r"read(?:er)?\.powermanga\.org", }, - "sensescans": { - "root": "https://sensescans.com/reader", - "pattern": r"(?:(?:www\.)?sensescans\.com/reader" - r"|reader\.sensescans\.com)", - }, }) @@ -64,11 +59,6 @@ class FoolslideChapterExtractor(FoolslideExtractor): "url": "854c5817f8f767e1bccd05fa9d58ffb5a4b09384", "keyword": "a60c42f2634b7387899299d411ff494ed0ad6dbe", }), - ("https://sensescans.com/reader/read/ao_no_orchestra/en/0/26/", { - "url": "bbd428dc578f5055e9f86ad635b510386cd317cd", - "keyword": "083ef6f8831c84127fe4096fa340a249be9d1424", - }), - ("https://reader.sensescans.com/read/ao_no_orchestra/en/0/26/"), ) def items(self): @@ -129,9 +119,6 @@ class FoolslideMangaExtractor(FoolslideExtractor): "volume": int, }, }), - ("https://sensescans.com/reader/series/yotsubato/", { - "count": ">= 3", - }), ) def items(self): diff --git a/gallery_dl/extractor/imagechest.py b/gallery_dl/extractor/imagechest.py index 8b18d5e..086b95d 100644 --- a/gallery_dl/extractor/imagechest.py +++ b/gallery_dl/extractor/imagechest.py @@ -31,6 +31,12 @@ class ImagechestGalleryExtractor(GalleryExtractor): "content": "076959e65be30249a2c651fbe6090dc30ba85193", "count": 3 }), + # "Load More Files" button (#4028) + ("https://imgchest.com/p/9p4n3q2z7nq", { + "pattern": r"https://cdn\.imgchest\.com/files/\w+\.(jpg|png)", + "url": "f5674e8ba79d336193c9f698708d9dcc10e78cc7", + "count": 52, + }), ) def __init__(self, match): @@ -49,6 +55,18 @@ class ImagechestGalleryExtractor(GalleryExtractor): } def images(self, page): + if " More Files</button>" in page: + url = "{}/p/{}/loadAll".format(self.root, self.gallery_id) + headers = { + "X-Requested-With": "XMLHttpRequest", + "Origin" : self.root, + "Referer" : self.gallery_url, + } + csrf_token = text.extr(page, 'name="csrf-token" content="', '"') + data = {"_token": csrf_token} + page += self.request( + url, method="POST", headers=headers, data=data).text + return [ (url, None) for url in text.extract_iter(page, 'data-url="', '"') diff --git a/gallery_dl/extractor/imgur.py b/gallery_dl/extractor/imgur.py index f8f1600..4c29d98 100644 --- a/gallery_dl/extractor/imgur.py +++ b/gallery_dl/extractor/imgur.py @@ -47,8 +47,13 @@ class ImgurExtractor(Extractor): image_ex = ImgurImageExtractor for item in items: - item["_extractor"] = album_ex if item["is_album"] else image_ex - yield Message.Queue, item["link"], item + if item["is_album"]: + url = "https://imgur.com/a/" + item["id"] + item["_extractor"] = album_ex + else: + url = "https://imgur.com/" + item["id"] + item["_extractor"] = image_ex + yield Message.Queue, url, item class ImgurImageExtractor(ImgurExtractor): @@ -272,7 +277,7 @@ class ImgurUserExtractor(ImgurExtractor): ("https://imgur.com/user/Miguenzo", { "range": "1-100", "count": 100, - "pattern": r"https?://(i.imgur.com|imgur.com/a)/[\w.]+", + "pattern": r"https://imgur\.com(/a)?/\w+$", }), ("https://imgur.com/user/Miguenzo/posts"), ("https://imgur.com/user/Miguenzo/submitted"), @@ -285,17 +290,41 @@ class ImgurUserExtractor(ImgurExtractor): class ImgurFavoriteExtractor(ImgurExtractor): """Extractor for a user's favorites""" subcategory = "favorite" - pattern = BASE_PATTERN + r"/user/([^/?#]+)/favorites" + pattern = BASE_PATTERN + r"/user/([^/?#]+)/favorites/?$" test = ("https://imgur.com/user/Miguenzo/favorites", { "range": "1-100", "count": 100, - "pattern": r"https?://(i.imgur.com|imgur.com/a)/[\w.]+", + "pattern": r"https://imgur\.com(/a)?/\w+$", }) def items(self): return self._items_queue(self.api.account_favorites(self.key)) +class ImgurFavoriteFolderExtractor(ImgurExtractor): + """Extractor for a user's favorites folder""" + subcategory = "favorite-folder" + pattern = BASE_PATTERN + r"/user/([^/?#]+)/favorites/folder/(\d+)" + test = ( + ("https://imgur.com/user/mikf1/favorites/folder/11896757/public", { + "pattern": r"https://imgur\.com(/a)?/\w+$", + "count": 3, + }), + ("https://imgur.com/user/mikf1/favorites/folder/11896741/private", { + "pattern": r"https://imgur\.com(/a)?/\w+$", + "count": 5, + }), + ) + + def __init__(self, match): + ImgurExtractor.__init__(self, match) + self.folder_id = match.group(2) + + def items(self): + return self._items_queue(self.api.account_favorites_folder( + self.key, self.folder_id)) + + class ImgurSubredditExtractor(ImgurExtractor): """Extractor for a subreddits's imgur links""" subcategory = "subreddit" @@ -303,7 +332,7 @@ class ImgurSubredditExtractor(ImgurExtractor): test = ("https://imgur.com/r/pics", { "range": "1-100", "count": 100, - "pattern": r"https?://(i.imgur.com|imgur.com/a)/[\w.]+", + "pattern": r"https://imgur\.com(/a)?/\w+$", }) def items(self): @@ -317,7 +346,7 @@ class ImgurTagExtractor(ImgurExtractor): test = ("https://imgur.com/t/animals", { "range": "1-100", "count": 100, - "pattern": r"https?://(i.imgur.com|imgur.com/a)/[\w.]+", + "pattern": r"https://imgur\.com(/a)?/\w+$", }) def items(self): @@ -331,7 +360,7 @@ class ImgurSearchExtractor(ImgurExtractor): test = ("https://imgur.com/search?q=cute+cat", { "range": "1-100", "count": 100, - "pattern": r"https?://(i.imgur.com|imgur.com/a)/[\w.]+", + "pattern": r"https://imgur\.com(/a)?/\w+$", }) def items(self): @@ -346,15 +375,18 @@ class ImgurAPI(): """ def __init__(self, extractor): self.extractor = extractor - self.headers = { - "Authorization": "Client-ID " + ( - extractor.config("client-id") or "546c25a59c58ad7"), - } + self.client_id = extractor.config("client-id") or "546c25a59c58ad7" + self.headers = {"Authorization": "Client-ID " + self.client_id} def account_favorites(self, account): endpoint = "/3/account/{}/gallery_favorites".format(account) return self._pagination(endpoint) + def account_favorites_folder(self, account, folder_id): + endpoint = "/3/account/{}/folders/{}/favorites".format( + account, folder_id) + return self._pagination_v2(endpoint) + def gallery_search(self, query): endpoint = "/3/gallery/search" params = {"q": query} @@ -386,12 +418,12 @@ class ImgurAPI(): endpoint = "/post/v1/posts/" + gallery_hash return self._call(endpoint) - def _call(self, endpoint, params=None): + def _call(self, endpoint, params=None, headers=None): while True: try: return self.extractor.request( "https://api.imgur.com" + endpoint, - params=params, headers=self.headers, + params=params, headers=(headers or self.headers), ).json() except exception.HttpError as exc: if exc.status not in (403, 429) or \ @@ -410,3 +442,23 @@ class ImgurAPI(): return yield from data num += 1 + + def _pagination_v2(self, endpoint, params=None, key=None): + if params is None: + params = {} + params["client_id"] = self.client_id + params["page"] = 0 + params["sort"] = "newest" + + headers = { + "Referer": "https://imgur.com/", + "Origin": "https://imgur.com", + } + + while True: + data = self._call(endpoint, params, headers)["data"] + if not data: + return + yield from data + + params["page"] += 1 diff --git a/gallery_dl/extractor/mangadex.py b/gallery_dl/extractor/mangadex.py index 409483b..12b8f39 100644 --- a/gallery_dl/extractor/mangadex.py +++ b/gallery_dl/extractor/mangadex.py @@ -85,6 +85,10 @@ class MangadexExtractor(Extractor): data["group"] = [group["attributes"]["name"] for group in relationships["scanlation_group"]] + data["status"] = mattributes["status"] + data["tags"] = [tag["attributes"]["name"]["en"] + for tag in mattributes["tags"]] + return data @@ -94,13 +98,13 @@ class MangadexChapterExtractor(MangadexExtractor): pattern = BASE_PATTERN + r"/chapter/([0-9a-f-]+)" test = ( ("https://mangadex.org/chapter/f946ac53-0b71-4b5d-aeb2-7931b13c4aaa", { - "keyword": "86fb262cf767dac6d965cd904ad499adba466404", + "keyword": "e86128a79ebe7201b648f1caa828496a2878dc8f", # "content": "50383a4c15124682057b197d40261641a98db514", }), # oneshot ("https://mangadex.org/chapter/61a88817-9c29-4281-bdf1-77b3c1be9831", { "count": 64, - "keyword": "6abcbe1e24eeb1049dc931958853cd767ee483fb", + "keyword": "d11ed057a919854696853362be35fc0ba7dded4c", }), # MANGA Plus (#1154) ("https://mangadex.org/chapter/74149a55-e7c4-44ea-8a37-98e879c1096f", { @@ -157,6 +161,9 @@ class MangadexMangaExtractor(MangadexExtractor): "language": str, "artist" : ["Arakawa Hiromu"], "author" : ["Arakawa Hiromu"], + "status" : "completed", + "tags" : ["Oneshot", "Historical", "Action", + "Martial Arts", "Drama", "Tragedy"], }, }), ("https://mangadex.cc/manga/d0c88e3b-ea64-4e07-9841-c1d2ac982f4a/", { diff --git a/gallery_dl/extractor/myportfolio.py b/gallery_dl/extractor/myportfolio.py index 7d23518..fd16f24 100644 --- a/gallery_dl/extractor/myportfolio.py +++ b/gallery_dl/extractor/myportfolio.py @@ -1,12 +1,12 @@ # -*- coding: utf-8 -*- -# Copyright 2018-2022 Mike Fährmann +# Copyright 2018-2023 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as # published by the Free Software Foundation. -"""Extract images from https://www.myportfolio.com/""" +"""Extractors for https://www.myportfolio.com/""" from .common import Extractor, Message from .. import text, exception @@ -21,7 +21,7 @@ class MyportfolioGalleryExtractor(Extractor): archive_fmt = "{user}_{filename}" pattern = (r"(?:myportfolio:(?:https?://)?([^/]+)|" r"(?:https?://)?([\w-]+\.myportfolio\.com))" - r"(/[^/?&#]+)?") + r"(/[^/?#]+)?") test = ( ("https://andrewling.myportfolio.com/volvo-xc-90-hybrid", { "url": "acea0690c76db0e5cf267648cefd86e921bc3499", diff --git a/gallery_dl/extractor/nana.py b/gallery_dl/extractor/nana.py deleted file mode 100644 index 24e676f..0000000 --- a/gallery_dl/extractor/nana.py +++ /dev/null @@ -1,118 +0,0 @@ -# -*- coding: utf-8 -*- - -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License version 2 as -# published by the Free Software Foundation. - -"""Extractors for https://nana.my.id/""" - -from .common import GalleryExtractor, Extractor, Message -from .. import text, util, exception - - -class NanaGalleryExtractor(GalleryExtractor): - """Extractor for image galleries from nana.my.id""" - category = "nana" - directory_fmt = ("{category}", "{title}") - pattern = r"(?:https?://)?nana\.my\.id/reader/([^/?#]+)" - test = ( - (("https://nana.my.id/reader/" - "059f7de55a4297413bfbd432ce7d6e724dd42bae"), { - "pattern": r"https://nana\.my\.id/reader/" - r"\w+/image/page\?path=.*\.\w+", - "keyword": { - "title" : "Everybody Loves Shion", - "artist": "fuzui", - "tags" : list, - "count" : 29, - }, - }), - (("https://nana.my.id/reader/" - "77c8712b67013e427923573379f5bafcc0c72e46"), { - "pattern": r"https://nana\.my\.id/reader/" - r"\w+/image/page\?path=.*\.\w+", - "keyword": { - "title" : "Lovey-Dovey With an Otaku-Friendly Gyaru", - "artist": "Sueyuu", - "tags" : ["Sueyuu"], - "count" : 58, - }, - }), - ) - - def __init__(self, match): - self.gallery_id = match.group(1) - url = "https://nana.my.id/reader/" + self.gallery_id - GalleryExtractor.__init__(self, match, url) - - def metadata(self, page): - title = text.unescape( - text.extr(page, '</a> ', '</div>')) - artist = text.unescape(text.extr( - page, '<title>', '</title>'))[len(title):-10] - tags = text.extr(page, 'Reader.tags = "', '"') - - return { - "gallery_id": self.gallery_id, - "title" : title, - "artist" : artist[4:] if artist.startswith(" by ") else "", - "tags" : tags.split(", ") if tags else (), - "lang" : "en", - "language" : "English", - } - - def images(self, page): - data = util.json_loads(text.extr(page, "Reader.pages = ", ".pages")) - return [ - ("https://nana.my.id" + image, None) - for image in data["pages"] - ] - - -class NanaSearchExtractor(Extractor): - """Extractor for nana search results""" - category = "nana" - subcategory = "search" - pattern = r"(?:https?://)?nana\.my\.id(?:/?\?([^#]+))" - test = ( - ('https://nana.my.id/?q=+"elf"&sort=desc', { - "pattern": NanaGalleryExtractor.pattern, - "range": "1-100", - "count": 100, - }), - ("https://nana.my.id/?q=favorites%3A", { - "pattern": NanaGalleryExtractor.pattern, - "count": ">= 2", - }), - ) - - def __init__(self, match): - Extractor.__init__(self, match) - self.params = text.parse_query(match.group(1)) - self.params["p"] = text.parse_int(self.params.get("p"), 1) - self.params["q"] = self.params.get("q") or "" - - def items(self): - if "favorites:" in self.params["q"]: - favkey = self.config("favkey") - if not favkey: - raise exception.AuthenticationError( - "'Favorite key' not provided. " - "Please see 'https://nana.my.id/tutorial'") - self.session.cookies.set("favkey", favkey, domain="nana.my.id") - - data = {"_extractor": NanaGalleryExtractor} - while True: - try: - page = self.request( - "https://nana.my.id", params=self.params).text - except exception.HttpError: - return - - for gallery in text.extract_iter( - page, '<div class="id3">', '</div>'): - url = "https://nana.my.id" + text.extr( - gallery, '<a href="', '"') - yield Message.Queue, url, data - - self.params["p"] += 1 diff --git a/gallery_dl/extractor/oauth.py b/gallery_dl/extractor/oauth.py index 404f296..824757c 100644 --- a/gallery_dl/extractor/oauth.py +++ b/gallery_dl/extractor/oauth.py @@ -134,7 +134,7 @@ class OAuthBase(Extractor): def _oauth2_authorization_code_grant( self, client_id, client_secret, default_id, default_secret, - auth_url, token_url, *, scope="read", duration="permanent", + auth_url, token_url, scope="read", duration="permanent", key="refresh_token", auth=True, cache=None, instance=None): """Perform an OAuth2 authorization code grant""" diff --git a/gallery_dl/extractor/pinterest.py b/gallery_dl/extractor/pinterest.py index 31ddbcc..92e0588 100644 --- a/gallery_dl/extractor/pinterest.py +++ b/gallery_dl/extractor/pinterest.py @@ -112,7 +112,7 @@ class PinterestExtractor(Extractor): class PinterestPinExtractor(PinterestExtractor): """Extractor for images from a single pin from pinterest.com""" subcategory = "pin" - pattern = BASE_PATTERN + r"/pin/([^/?#&]+)(?!.*#related$)" + pattern = BASE_PATTERN + r"/pin/([^/?#]+)(?!.*#related$)" test = ( ("https://www.pinterest.com/pin/858146903966145189/", { "url": "afb3c26719e3a530bb0e871c480882a801a4e8a5", @@ -121,7 +121,7 @@ class PinterestPinExtractor(PinterestExtractor): }), # video pin (#1189) ("https://www.pinterest.com/pin/422564377542934214/", { - "pattern": r"https://v\.pinimg\.com/videos/mc/hls/d7/22/ff" + "pattern": r"https://v\d*\.pinimg\.com/videos/mc/hls/d7/22/ff" r"/d722ff00ab2352981b89974b37909de8.m3u8", }), ("https://www.pinterest.com/pin/858146903966145188/", { @@ -147,8 +147,8 @@ class PinterestBoardExtractor(PinterestExtractor): subcategory = "board" directory_fmt = ("{category}", "{board[owner][username]}", "{board[name]}") archive_fmt = "{board[id]}_{id}" - pattern = (BASE_PATTERN + r"/(?!pin/)([^/?#&]+)" - "/(?!_saved|_created|pins/)([^/?#&]+)/?$") + pattern = (BASE_PATTERN + r"/(?!pin/)([^/?#]+)" + "/(?!_saved|_created|pins/)([^/?#]+)/?$") test = ( ("https://www.pinterest.com/g1952849/test-/", { "pattern": r"https://i\.pinimg\.com/originals/", @@ -198,7 +198,7 @@ class PinterestBoardExtractor(PinterestExtractor): class PinterestUserExtractor(PinterestExtractor): """Extractor for a user's boards""" subcategory = "user" - pattern = BASE_PATTERN + r"/(?!pin/)([^/?#&]+)(?:/_saved)?/?$" + pattern = BASE_PATTERN + r"/(?!pin/)([^/?#]+)(?:/_saved)?/?$" test = ( ("https://www.pinterest.com/g1952849/", { "pattern": PinterestBoardExtractor.pattern, @@ -223,7 +223,7 @@ class PinterestAllpinsExtractor(PinterestExtractor): """Extractor for a user's 'All Pins' feed""" subcategory = "allpins" directory_fmt = ("{category}", "{user}") - pattern = BASE_PATTERN + r"/(?!pin/)([^/?#&]+)/pins/?$" + pattern = BASE_PATTERN + r"/(?!pin/)([^/?#]+)/pins/?$" test = ("https://www.pinterest.com/g1952849/pins/", { "pattern": r"https://i\.pinimg\.com/originals/[0-9a-f]{2}" r"/[0-9a-f]{2}/[0-9a-f]{2}/[0-9a-f]{32}\.\w{3}", @@ -245,10 +245,10 @@ class PinterestCreatedExtractor(PinterestExtractor): """Extractor for a user's created pins""" subcategory = "created" directory_fmt = ("{category}", "{user}") - pattern = BASE_PATTERN + r"/(?!pin/)([^/?#&]+)/_created/?$" + pattern = BASE_PATTERN + r"/(?!pin/)([^/?#]+)/_created/?$" test = ("https://www.pinterest.de/digitalmomblog/_created/", { "pattern": r"https://i\.pinimg\.com/originals/[0-9a-f]{2}" - r"/[0-9a-f]{2}/[0-9a-f]{2}/[0-9a-f]{32}\.jpg", + r"/[0-9a-f]{2}/[0-9a-f]{2}/[0-9a-f]{32}\.(jpg|png)", "count": 10, "range": "1-10", }) @@ -270,7 +270,7 @@ class PinterestSectionExtractor(PinterestExtractor): directory_fmt = ("{category}", "{board[owner][username]}", "{board[name]}", "{section[title]}") archive_fmt = "{board[id]}_{id}" - pattern = BASE_PATTERN + r"/(?!pin/)([^/?#&]+)/([^/?#&]+)/([^/?#&]+)" + pattern = BASE_PATTERN + r"/(?!pin/)([^/?#]+)/([^/?#]+)/([^/?#]+)" test = ("https://www.pinterest.com/g1952849/stuff/section", { "count": 2, }) @@ -321,7 +321,7 @@ class PinterestRelatedPinExtractor(PinterestPinExtractor): """Extractor for related pins of another pin from pinterest.com""" subcategory = "related-pin" directory_fmt = ("{category}", "related {original_pin[id]}") - pattern = BASE_PATTERN + r"/pin/([^/?#&]+).*#related$" + pattern = BASE_PATTERN + r"/pin/([^/?#]+).*#related$" test = ("https://www.pinterest.com/pin/858146903966145189/#related", { "range": "31-70", "count": 40, @@ -340,7 +340,7 @@ class PinterestRelatedBoardExtractor(PinterestBoardExtractor): subcategory = "related-board" directory_fmt = ("{category}", "{board[owner][username]}", "{board[name]}", "related") - pattern = BASE_PATTERN + r"/(?!pin/)([^/?#&]+)/([^/?#&]+)/?#related$" + pattern = BASE_PATTERN + r"/(?!pin/)([^/?#]+)/([^/?#]+)/?#related$" test = ("https://www.pinterest.com/g1952849/test-/#related", { "range": "31-70", "count": 40, @@ -348,13 +348,13 @@ class PinterestRelatedBoardExtractor(PinterestBoardExtractor): }) def pins(self): - return self.api.board_related(self.board["id"]) + return self.api.board_content_recommendation(self.board["id"]) class PinterestPinitExtractor(PinterestExtractor): """Extractor for images from a pin.it URL""" subcategory = "pinit" - pattern = r"(?:https?://)?pin\.it/([^/?#&]+)" + pattern = r"(?:https?://)?pin\.it/([^/?#]+)" test = ( ("https://pin.it/Hvt8hgT", { @@ -370,7 +370,7 @@ class PinterestPinitExtractor(PinterestExtractor): self.shortened_id = match.group(1) def items(self): - url = "https://api.pinterest.com/url_shortener/{}/redirect".format( + url = "https://api.pinterest.com/url_shortener/{}/redirect/".format( self.shortened_id) response = self.request(url, method="HEAD", allow_redirects=False) location = response.headers.get("Location") @@ -458,10 +458,10 @@ class PinterestAPI(): options = {"section_id": section_id} return self._pagination("BoardSectionPins", options) - def board_related(self, board_id): + def board_content_recommendation(self, board_id): """Yield related pins of a specific board""" - options = {"board_id": board_id, "add_vase": True} - return self._pagination("BoardRelatedPixieFeed", options) + options = {"id": board_id, "type": "board", "add_vase": True} + return self._pagination("BoardContentRecommendation", options) def user_pins(self, user): """Yield all pins from 'user'""" diff --git a/gallery_dl/extractor/sankakucomplex.py b/gallery_dl/extractor/sankakucomplex.py index 5d83299..47e067b 100644 --- a/gallery_dl/extractor/sankakucomplex.py +++ b/gallery_dl/extractor/sankakucomplex.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2019-2021 Mike Fährmann +# Copyright 2019-2023 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -112,7 +112,7 @@ class SankakucomplexTagExtractor(SankakucomplexExtractor): """Extractor for sankakucomplex blog articles by tag or author""" subcategory = "tag" pattern = (r"(?:https?://)?www\.sankakucomplex\.com" - r"/((?:tag|category|author)/[^/&?#]+)") + r"/((?:tag|category|author)/[^/?#]+)") test = ( ("https://www.sankakucomplex.com/tag/cosplay/", { "range": "1-50", diff --git a/gallery_dl/extractor/urlshortener.py b/gallery_dl/extractor/urlshortener.py index 1a39b5b..972b508 100644 --- a/gallery_dl/extractor/urlshortener.py +++ b/gallery_dl/extractor/urlshortener.py @@ -34,7 +34,7 @@ BASE_PATTERN = UrlshortenerExtractor.update(INSTANCES) class UrlshortenerLinkExtractor(UrlshortenerExtractor): """Extractor for general-purpose URL shorteners""" subcategory = "link" - pattern = BASE_PATTERN + r"/([^/?&#]+)" + pattern = BASE_PATTERN + r"/([^/?#]+)" test = ( ("https://bit.ly/3cWIUgq", { "count": 1, diff --git a/gallery_dl/option.py b/gallery_dl/option.py index aad307f..08e6e70 100644 --- a/gallery_dl/option.py +++ b/gallery_dl/option.py @@ -148,20 +148,6 @@ def build_parser(): help="Delete cached login sessions, cookies, etc. for MODULE " "(ALL to delete everything)", ) - general.add_argument( - "--cookies", - dest="cookies", metavar="FILE", action=ConfigAction, - help="File to load additional cookies from", - ) - general.add_argument( - "--cookies-from-browser", - dest="cookies_from_browser", - metavar="BROWSER[+KEYRING][:PROFILE][::CONTAINER]", - help=("Name of the browser to load cookies from, " - "with optional keyring name prefixed with '+', " - "profile prefixed with ':', and " - "container prefixed with '::' ('none' for no container)"), - ) output = parser.add_argument_group("Output Options") output.add_argument( @@ -374,6 +360,28 @@ def build_parser(): help="Enable .netrc authentication data", ) + cookies = parser.add_argument_group("Cookie Options") + cookies.add_argument( + "-C", "--cookies", + dest="cookies", metavar="FILE", action=ConfigAction, + help="File to load additional cookies from", + ) + cookies.add_argument( + "--cookies-export", + dest="cookies-update", metavar="FILE", action=ConfigAction, + help="Export session cookies to FILE", + ) + cookies.add_argument( + "--cookies-from-browser", + dest="cookies_from_browser", + metavar="BROWSER[/DOMAIN][+KEYRING][:PROFILE][::CONTAINER]", + help=("Name of the browser to load cookies from, with optional " + "domain prefixed with '/', " + "keyring name prefixed with '+', " + "profile prefixed with ':', and " + "container prefixed with '::' ('none' for no container)"), + ) + selection = parser.add_argument_group("Selection Options") selection.add_argument( "--download-archive", diff --git a/gallery_dl/version.py b/gallery_dl/version.py index d4ef532..4f9e49a 100644 --- a/gallery_dl/version.py +++ b/gallery_dl/version.py @@ -6,4 +6,4 @@ # it under the terms of the GNU General Public License version 2 as # published by the Free Software Foundation. -__version__ = "1.25.3" +__version__ = "1.25.4" diff --git a/test/test_ytdl.py b/test/test_ytdl.py index 4c20f67..878ac85 100644 --- a/test/test_ytdl.py +++ b/test/test_ytdl.py @@ -235,12 +235,6 @@ class Test_CommandlineArguments(unittest.TestCase): class Test_CommandlineArguments_YtDlp(Test_CommandlineArguments): module_name = "yt_dlp" - @classmethod - def setUpClass(cls): - super().setUpClass() - if cls.module.version.__version__ > "2023.03.04": - cls.test_geo_bypass = cls._test_geo_bypass_xff - def test_retries_extractor(self): inf = float("inf") @@ -275,7 +269,22 @@ class Test_CommandlineArguments_YtDlp(Test_CommandlineArguments): "title:%(artist)s - %(title)s")], }) - def _test_geo_bypass_xff(self): + def test_geo_bypass(self): + try: + ytdl.parse_command_line(self.module, ["--xff", "default"]) + except Exception: + # before --xff (c16644642) + return Test_CommandlineArguments.test_geo_bypass(self) + + self._(["--xff", "default"], + "geo_bypass", "default") + self._(["--xff", "never"], + "geo_bypass", "never") + self._(["--xff", "EN"], + "geo_bypass", "EN") + self._(["--xff", "198.51.100.14/24"], + "geo_bypass", "198.51.100.14/24") + self._("--geo-bypass", "geo_bypass", "default") self._("--no-geo-bypass", |
