aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--CHANGELOG.md35
-rw-r--r--PKG-INFO7
-rw-r--r--README.rst5
-rw-r--r--data/completion/_gallery-dl2
-rw-r--r--data/completion/gallery-dl.fish2
-rw-r--r--data/man/gallery-dl.16
-rw-r--r--data/man/gallery-dl.conf.529
-rw-r--r--docs/gallery-dl.conf6
-rw-r--r--gallery_dl.egg-info/PKG-INFO7
-rw-r--r--gallery_dl.egg-info/SOURCES.txt6
-rw-r--r--gallery_dl/__init__.py7
-rw-r--r--gallery_dl/cookies.py86
-rw-r--r--gallery_dl/extractor/2chen.py27
-rw-r--r--gallery_dl/extractor/35photo.py1
-rw-r--r--gallery_dl/extractor/8chan.py4
-rw-r--r--gallery_dl/extractor/__init__.py6
-rw-r--r--gallery_dl/extractor/bunkr.py74
-rw-r--r--gallery_dl/extractor/danbooru.py7
-rw-r--r--gallery_dl/extractor/deviantart.py62
-rw-r--r--gallery_dl/extractor/erome.py37
-rw-r--r--gallery_dl/extractor/exhentai.py10
-rw-r--r--gallery_dl/extractor/fapello.py152
-rw-r--r--gallery_dl/extractor/foolslide.py12
-rw-r--r--gallery_dl/extractor/gelbooru_v02.py3
-rw-r--r--gallery_dl/extractor/imagehosts.py2
-rw-r--r--gallery_dl/extractor/imgth.py83
-rw-r--r--gallery_dl/extractor/imgur.py8
-rw-r--r--gallery_dl/extractor/instagram.py10
-rw-r--r--gallery_dl/extractor/khinsider.py2
-rw-r--r--gallery_dl/extractor/kissgoddess.py82
-rw-r--r--gallery_dl/extractor/kohlchan.py78
-rw-r--r--gallery_dl/extractor/komikcast.py24
-rw-r--r--gallery_dl/extractor/lolisafe.py6
-rw-r--r--gallery_dl/extractor/lynxchan.py104
-rw-r--r--gallery_dl/extractor/mangadex.py2
-rw-r--r--gallery_dl/extractor/mangafox.py93
-rw-r--r--gallery_dl/extractor/pixiv.py22
-rw-r--r--gallery_dl/extractor/reddit.py2
-rw-r--r--gallery_dl/extractor/redgifs.py4
-rw-r--r--gallery_dl/extractor/seiga.py6
-rw-r--r--gallery_dl/extractor/sexcom.py4
-rw-r--r--gallery_dl/extractor/slickpic.py3
-rw-r--r--gallery_dl/extractor/smugmug.py2
-rw-r--r--gallery_dl/extractor/soundgasm.py93
-rw-r--r--gallery_dl/extractor/twibooru.py2
-rw-r--r--gallery_dl/extractor/twitter.py17
-rw-r--r--gallery_dl/extractor/unsplash.py18
-rw-r--r--gallery_dl/extractor/warosu.py62
-rw-r--r--gallery_dl/extractor/webmshare.py84
-rw-r--r--gallery_dl/extractor/webtoons.py1
-rw-r--r--gallery_dl/extractor/zerochan.py107
-rw-r--r--gallery_dl/formatter.py16
-rw-r--r--gallery_dl/option.py8
-rw-r--r--gallery_dl/path.py21
-rw-r--r--gallery_dl/util.py24
-rw-r--r--gallery_dl/version.py2
-rw-r--r--test/test_util.py11
57 files changed, 1072 insertions, 524 deletions
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 76d65cd..700efb1 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,40 @@
# Changelog
+## 1.24.2 - 2022-12-18
+### Additions
+- [2chen] support `.club` URLs ([#3406](https://github.com/mikf/gallery-dl/issues/3406))
+- [deviantart] extract sta.sh URLs from `text_content` ([#3366](https://github.com/mikf/gallery-dl/issues/3366))
+- [deviantart] add `/view` URL support ([#3367](https://github.com/mikf/gallery-dl/issues/3367))
+- [e621] implement `threshold` option to control pagination ([#3413](https://github.com/mikf/gallery-dl/issues/3413))
+- [fapello] add `post`, `user` and `path` extractors ([#3065](https://github.com/mikf/gallery-dl/issues/3065), [#3360](https://github.com/mikf/gallery-dl/issues/3360), [#3415](https://github.com/mikf/gallery-dl/issues/3415))
+- [imgur] add support for imgur.io URLs ([#3419](https://github.com/mikf/gallery-dl/issues/3419))
+- [lynxchan] add generic extractors for lynxchan imageboards ([#3389](https://github.com/mikf/gallery-dl/issues/3389), [#3394](https://github.com/mikf/gallery-dl/issues/3394))
+- [mangafox] extract more metadata ([#3167](https://github.com/mikf/gallery-dl/issues/3167))
+- [pixiv] extract `date_url` metadata ([#3405](https://github.com/mikf/gallery-dl/issues/3405))
+- [soundgasm] add `audio` and `user` extractors ([#3384](https://github.com/mikf/gallery-dl/issues/3384), [#3388](https://github.com/mikf/gallery-dl/issues/3388))
+- [webmshare] add `video` extractor ([#2410](https://github.com/mikf/gallery-dl/issues/2410))
+- support Firefox containers for `--cookies-from-browser` ([#3346](https://github.com/mikf/gallery-dl/issues/3346))
+### Fixes
+- [2chen] fix file URLs
+- [bunkr] update domain ([#3391](https://github.com/mikf/gallery-dl/issues/3391))
+- [exhentai] fix pagination
+- [imagetwist] fix extraction
+- [imgth] rewrite
+- [instagram] prevent post `date` overwriting file `date` ([#3392](https://github.com/mikf/gallery-dl/issues/3392))
+- [khinsider] fix metadata extraction
+- [komikcast] update domain and fix extraction
+- [reddit] increase `id-max` default value ([#3397](https://github.com/mikf/gallery-dl/issues/3397))
+- [seiga] raise error when redirected to login page ([#3401](https://github.com/mikf/gallery-dl/issues/3401))
+- [sexcom] fix video URLs ([#3408](https://github.com/mikf/gallery-dl/issues/3408), [#3414](https://github.com/mikf/gallery-dl/issues/3414))
+- [twitter] update `search` pagination ([#544](https://github.com/mikf/gallery-dl/issues/544))
+- [warosu] fix and update
+- [zerochan] update for layout v3
+- restore paths for archived files ([#3362](https://github.com/mikf/gallery-dl/issues/3362), [#3377](https://github.com/mikf/gallery-dl/issues/3377))
+- use `util.NONE` as `keyword-default` default value ([#3334](https://github.com/mikf/gallery-dl/issues/3334))
+### Removals
+- [foolslide] remove `kireicake`
+- [kissgoddess] remove module
+
## 1.24.1 - 2022-12-04
### Additions
- [artstation] add `pro-first` option ([#3273](https://github.com/mikf/gallery-dl/issues/3273))
diff --git a/PKG-INFO b/PKG-INFO
index c2bf496..68af9dd 100644
--- a/PKG-INFO
+++ b/PKG-INFO
@@ -1,6 +1,6 @@
Metadata-Version: 2.1
Name: gallery_dl
-Version: 1.24.1
+Version: 1.24.2
Summary: Command-line program to download image galleries and collections from several image hosting sites
Home-page: https://github.com/mikf/gallery-dl
Download-URL: https://github.com/mikf/gallery-dl/releases/latest
@@ -103,8 +103,9 @@ Standalone Executable
Prebuilt executable files with a Python interpreter and
required Python packages included are available for
-- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.24.1/gallery-dl.exe>`__
-- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.24.1/gallery-dl.bin>`__
+- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.24.2/gallery-dl.exe>`__
+ (Requires `Microsoft Visual C++ Redistributable Package (x86) <https://aka.ms/vs/17/release/vc_redist.x86.exe>`__)
+- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.24.2/gallery-dl.bin>`__
| Executables build from the latest commit can be found at
| https://github.com/mikf/gallery-dl/actions/workflows/executables.yml
diff --git a/README.rst b/README.rst
index fad6b30..b21d948 100644
--- a/README.rst
+++ b/README.rst
@@ -66,8 +66,9 @@ Standalone Executable
Prebuilt executable files with a Python interpreter and
required Python packages included are available for
-- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.24.1/gallery-dl.exe>`__
-- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.24.1/gallery-dl.bin>`__
+- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.24.2/gallery-dl.exe>`__
+ (Requires `Microsoft Visual C++ Redistributable Package (x86) <https://aka.ms/vs/17/release/vc_redist.x86.exe>`__)
+- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.24.2/gallery-dl.bin>`__
| Executables build from the latest commit can be found at
| https://github.com/mikf/gallery-dl/actions/workflows/executables.yml
diff --git a/data/completion/_gallery-dl b/data/completion/_gallery-dl
index eb5c0f4..13ee2ea 100644
--- a/data/completion/_gallery-dl
+++ b/data/completion/_gallery-dl
@@ -16,7 +16,7 @@ _arguments -C -S \
--user-agent'[User-Agent request header]':'<ua>' \
--clear-cache'[Delete cached login sessions, cookies, etc. for MODULE (ALL to delete everything)]':'<module>' \
--cookies'[File to load additional cookies from]':'<file>':_files \
---cookies-from-browser'[Name of the browser to load cookies from, with optional keyring name prefixed with "+" and profile prefixed with ":"]':'<browser[+keyring][:profile]>' \
+--cookies-from-browser'[Name of the browser to load cookies from, with optional keyring name prefixed with "+", profile prefixed with ":", and container prefixed with "::" ("none" for no container)]':'<browser[+keyring][:profile][::container]>' \
{-q,--quiet}'[Activate quiet mode]' \
{-v,--verbose}'[Print various debugging information]' \
{-g,--get-urls}'[Print URLs instead of downloading]' \
diff --git a/data/completion/gallery-dl.fish b/data/completion/gallery-dl.fish
index 87e625a..50ad132 100644
--- a/data/completion/gallery-dl.fish
+++ b/data/completion/gallery-dl.fish
@@ -10,7 +10,7 @@ complete -c gallery-dl -x -l 'source-address' -d 'Client-side IP address to bind
complete -c gallery-dl -x -l 'user-agent' -d 'User-Agent request header'
complete -c gallery-dl -x -l 'clear-cache' -d 'Delete cached login sessions, cookies, etc. for MODULE (ALL to delete everything)'
complete -c gallery-dl -r -F -l 'cookies' -d 'File to load additional cookies from'
-complete -c gallery-dl -x -l 'cookies-from-browser' -d 'Name of the browser to load cookies from, with optional keyring name prefixed with "+" and profile prefixed with ":"'
+complete -c gallery-dl -x -l 'cookies-from-browser' -d 'Name of the browser to load cookies from, with optional keyring name prefixed with "+", profile prefixed with ":", and container prefixed with "::" ("none" for no container)'
complete -c gallery-dl -s 'q' -l 'quiet' -d 'Activate quiet mode'
complete -c gallery-dl -s 'v' -l 'verbose' -d 'Print various debugging information'
complete -c gallery-dl -s 'g' -l 'get-urls' -d 'Print URLs instead of downloading'
diff --git a/data/man/gallery-dl.1 b/data/man/gallery-dl.1
index aac3757..d85b1c9 100644
--- a/data/man/gallery-dl.1
+++ b/data/man/gallery-dl.1
@@ -1,4 +1,4 @@
-.TH "GALLERY-DL" "1" "2022-12-04" "1.24.1" "gallery-dl Manual"
+.TH "GALLERY-DL" "1" "2022-12-18" "1.24.2" "gallery-dl Manual"
.\" disable hyphenation
.nh
@@ -50,8 +50,8 @@ Delete cached login sessions, cookies, etc. for MODULE (ALL to delete everything
.B "\-\-cookies" \f[I]FILE\f[]
File to load additional cookies from
.TP
-.B "\-\-cookies\-from\-browser" \f[I]BROWSER[+KEYRING][:PROFILE]\f[]
-Name of the browser to load cookies from, with optional keyring name prefixed with '+' and profile prefixed with ':'
+.B "\-\-cookies\-from\-browser" \f[I]BROWSER[+KEYRING][:PROFILE][::CONTAINER]\f[]
+Name of the browser to load cookies from, with optional keyring name prefixed with '+', profile prefixed with ':', and container prefixed with '::' ('none' for no container)
.TP
.B "\-q, \-\-quiet"
Activate quiet mode
diff --git a/data/man/gallery-dl.conf.5 b/data/man/gallery-dl.conf.5
index 6565e96..36b2c84 100644
--- a/data/man/gallery-dl.conf.5
+++ b/data/man/gallery-dl.conf.5
@@ -1,4 +1,4 @@
-.TH "GALLERY-DL.CONF" "5" "2022-12-04" "1.24.1" "gallery-dl Manual"
+.TH "GALLERY-DL.CONF" "5" "2022-12-18" "1.24.2" "gallery-dl Manual"
.\" disable hyphenation
.nh
.\" disable justification (adjust text to left margin only)
@@ -519,7 +519,7 @@ Source to read additional cookies from. This can be
}
.br
-* A \f[I]list\f[] with up to 3 entries specifying a browser profile.
+* A \f[I]list\f[] with up to 4 entries specifying a browser profile.
.br
* The first entry is the browser name
@@ -527,10 +527,13 @@ Source to read additional cookies from. This can be
* The optional second entry is a profile name or an absolute path to a profile directory
.br
* The optional third entry is the keyring to retrieve passwords for decrypting cookies from
+.br
+* The optional fourth entry is a (Firefox) container name (\f[I]"none"\f[] for only cookies with no container)
.. code:: json
["firefox"]
+["firefox", null, null, "Personal"]
["chromium", "Private", "kwallet"]
@@ -1121,7 +1124,7 @@ Download embedded videos hosted on https://www.blogger.com/
\f[I]string\f[]
.IP "Default:" 9
-\f[I]"auto"\f[]
+\f[I]null\f[]
.IP "Example:" 4
"cyberdrop.to"
@@ -1158,6 +1161,24 @@ Extract additional metadata (notes, artist commentary, parent, children)
Note: This requires 1 additional HTTP request for each post.
+.SS extractor.danbooru.threshold
+.IP "Type:" 6
+\f[I]string\f[] or \f[I]int\f[]
+
+.IP "Default:" 9
+\f[I]"auto"\f[]
+
+.IP "Description:" 4
+Stop paginating over API results if the length of a batch of returned
+posts is less than the specified number. Defaults to the per-page limit
+of the current instance, which is 320 for \f[I]e621\f[] and 200 for
+everything else.
+
+Note: Changing this setting is normally not necessary. When the value is
+greater than the per-page limit, gallery-dl will stop after the first
+batch. The value cannot be less than 1.
+
+
.SS extractor.danbooru.ugoira
.IP "Type:" 6
\f[I]bool\f[]
@@ -1961,7 +1982,7 @@ the first in the list gets chosen (usually mp3).
\f[I]string\f[]
.IP "Default:" 9
-\f[I]"auto"\f[]
+\f[I]null\f[]
.IP "Description:" 4
Specifies the domain used by a \f[I]lolisafe\f[] extractor
diff --git a/docs/gallery-dl.conf b/docs/gallery-dl.conf
index 6b12721..98974e9 100644
--- a/docs/gallery-dl.conf
+++ b/docs/gallery-dl.conf
@@ -55,7 +55,7 @@
},
"cyberdrop":
{
- "domain": "auto"
+ "domain": null
},
"danbooru":
{
@@ -251,8 +251,8 @@
"date-min": 0,
"date-max": 253402210800,
"date-format": "%Y-%m-%dT%H:%M:%S",
- "id-min": "0",
- "id-max": "zik0zj",
+ "id-min": null,
+ "id-max": null,
"recursion": 0,
"videos": true
},
diff --git a/gallery_dl.egg-info/PKG-INFO b/gallery_dl.egg-info/PKG-INFO
index 3b3201e..03c1930 100644
--- a/gallery_dl.egg-info/PKG-INFO
+++ b/gallery_dl.egg-info/PKG-INFO
@@ -1,6 +1,6 @@
Metadata-Version: 2.1
Name: gallery-dl
-Version: 1.24.1
+Version: 1.24.2
Summary: Command-line program to download image galleries and collections from several image hosting sites
Home-page: https://github.com/mikf/gallery-dl
Download-URL: https://github.com/mikf/gallery-dl/releases/latest
@@ -103,8 +103,9 @@ Standalone Executable
Prebuilt executable files with a Python interpreter and
required Python packages included are available for
-- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.24.1/gallery-dl.exe>`__
-- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.24.1/gallery-dl.bin>`__
+- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.24.2/gallery-dl.exe>`__
+ (Requires `Microsoft Visual C++ Redistributable Package (x86) <https://aka.ms/vs/17/release/vc_redist.x86.exe>`__)
+- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.24.2/gallery-dl.bin>`__
| Executables build from the latest commit can be found at
| https://github.com/mikf/gallery-dl/actions/workflows/executables.yml
diff --git a/gallery_dl.egg-info/SOURCES.txt b/gallery_dl.egg-info/SOURCES.txt
index ffa0e95..556dc49 100644
--- a/gallery_dl.egg-info/SOURCES.txt
+++ b/gallery_dl.egg-info/SOURCES.txt
@@ -74,6 +74,7 @@ gallery_dl/extractor/fallenangels.py
gallery_dl/extractor/fanbox.py
gallery_dl/extractor/fantia.py
gallery_dl/extractor/fapachi.py
+gallery_dl/extractor/fapello.py
gallery_dl/extractor/flickr.py
gallery_dl/extractor/foolfuuka.py
gallery_dl/extractor/foolslide.py
@@ -112,14 +113,13 @@ gallery_dl/extractor/kabeuchi.py
gallery_dl/extractor/keenspot.py
gallery_dl/extractor/kemonoparty.py
gallery_dl/extractor/khinsider.py
-gallery_dl/extractor/kissgoddess.py
-gallery_dl/extractor/kohlchan.py
gallery_dl/extractor/komikcast.py
gallery_dl/extractor/lightroom.py
gallery_dl/extractor/lineblog.py
gallery_dl/extractor/livedoor.py
gallery_dl/extractor/lolisafe.py
gallery_dl/extractor/luscious.py
+gallery_dl/extractor/lynxchan.py
gallery_dl/extractor/mangadex.py
gallery_dl/extractor/mangafox.py
gallery_dl/extractor/mangahere.py
@@ -176,6 +176,7 @@ gallery_dl/extractor/skeb.py
gallery_dl/extractor/slickpic.py
gallery_dl/extractor/slideshare.py
gallery_dl/extractor/smugmug.py
+gallery_dl/extractor/soundgasm.py
gallery_dl/extractor/speakerdeck.py
gallery_dl/extractor/subscribestar.py
gallery_dl/extractor/tapas.py
@@ -197,6 +198,7 @@ gallery_dl/extractor/wallhaven.py
gallery_dl/extractor/wallpapercave.py
gallery_dl/extractor/warosu.py
gallery_dl/extractor/weasyl.py
+gallery_dl/extractor/webmshare.py
gallery_dl/extractor/webtoons.py
gallery_dl/extractor/weibo.py
gallery_dl/extractor/wikiart.py
diff --git a/gallery_dl/__init__.py b/gallery_dl/__init__.py
index 3701d6f..611b2b9 100644
--- a/gallery_dl/__init__.py
+++ b/gallery_dl/__init__.py
@@ -66,7 +66,12 @@ def main():
if args.cookies_from_browser:
browser, _, profile = args.cookies_from_browser.partition(":")
browser, _, keyring = browser.partition("+")
- config.set((), "cookies", (browser, profile, keyring))
+ if profile.startswith(":"):
+ container = profile[1:]
+ profile = None
+ else:
+ profile, _, container = profile.partition("::")
+ config.set((), "cookies", (browser, profile, keyring, container))
for opts in args.options:
config.set(*opts)
diff --git a/gallery_dl/cookies.py b/gallery_dl/cookies.py
index 6f9a92d..ee00bf7 100644
--- a/gallery_dl/cookies.py
+++ b/gallery_dl/cookies.py
@@ -24,7 +24,7 @@ import tempfile
from datetime import datetime, timedelta, timezone
from hashlib import pbkdf2_hmac
from http.cookiejar import Cookie
-from . import aes
+from . import aes, text
SUPPORTED_BROWSERS_CHROMIUM = {
@@ -35,11 +35,10 @@ logger = logging.getLogger("cookies")
def load_cookies(cookiejar, browser_specification):
- browser_name, profile, keyring = \
+ browser_name, profile, keyring, container = \
_parse_browser_specification(*browser_specification)
-
if browser_name == "firefox":
- load_cookies_firefox(cookiejar, profile)
+ load_cookies_firefox(cookiejar, profile, container)
elif browser_name == "safari":
load_cookies_safari(cookiejar, profile)
elif browser_name in SUPPORTED_BROWSERS_CHROMIUM:
@@ -48,12 +47,24 @@ def load_cookies(cookiejar, browser_specification):
raise ValueError("unknown browser '{}'".format(browser_name))
-def load_cookies_firefox(cookiejar, profile=None):
- set_cookie = cookiejar.set_cookie
- with _firefox_cookies_database(profile) as db:
+def load_cookies_firefox(cookiejar, profile=None, container=None):
+ path, container_id = _firefox_cookies_database(profile, container)
+ with DatabaseCopy(path) as db:
+
+ sql = ("SELECT name, value, host, path, isSecure, expiry "
+ "FROM moz_cookies")
+ parameters = ()
+
+ if container_id is False:
+ sql += " WHERE NOT INSTR(originAttributes,'userContextId=')"
+ elif container_id:
+ sql += " WHERE originAttributes LIKE ? OR originAttributes LIKE ?"
+ uid = "%userContextId={}".format(container_id)
+ parameters = (uid, uid + "&%")
+
+ set_cookie = cookiejar.set_cookie
for name, value, domain, path, secure, expires in db.execute(
- "SELECT name, value, host, path, isSecure, expiry "
- "FROM moz_cookies"):
+ sql, parameters):
set_cookie(Cookie(
0, name, value, None, False,
domain, bool(domain), domain.startswith("."),
@@ -79,9 +90,10 @@ def load_cookies_safari(cookiejar, profile=None):
def load_cookies_chrome(cookiejar, browser_name, profile, keyring):
config = _get_chromium_based_browser_settings(browser_name)
+ path = _chrome_cookies_database(profile, config)
+ logger.debug("Extracting cookies from %s", path)
- with _chrome_cookies_database(profile, config) as db:
-
+ with DatabaseCopy(path) as db:
db.text_factory = bytes
decryptor = get_cookie_decryptor(
config["directory"], config["keyring"], keyring=keyring)
@@ -134,8 +146,8 @@ def load_cookies_chrome(cookiejar, browser_name, profile, keyring):
# --------------------------------------------------------------------
# firefox
-def _firefox_cookies_database(profile=None):
- if profile is None:
+def _firefox_cookies_database(profile=None, container=None):
+ if not profile:
search_root = _firefox_browser_directory()
elif _is_path(profile):
search_root = profile
@@ -146,14 +158,45 @@ def _firefox_cookies_database(profile=None):
if path is None:
raise FileNotFoundError("Unable to find Firefox cookies database in "
"{}".format(search_root))
-
logger.debug("Extracting cookies from %s", path)
- return DatabaseCopy(path)
+
+ if container == "none":
+ container_id = False
+ logger.debug("Only loading cookies not belonging to any container")
+
+ elif container:
+ containers_path = os.path.join(
+ os.path.dirname(path), "containers.json")
+
+ try:
+ with open(containers_path) as containers:
+ identities = json.load(containers)["identities"]
+ except OSError:
+ logger.error("Unable to read Firefox container database at %s",
+ containers_path)
+ raise
+ except KeyError:
+ identities = ()
+
+ for context in identities:
+ if container == context.get("name") or container == text.extr(
+ context.get("l10nID", ""), "userContext", ".label"):
+ container_id = context["userContextId"]
+ break
+ else:
+ raise ValueError("Unable to find Firefox container {}".format(
+ container))
+ logger.debug("Only loading cookies from container '%s' (ID %s)",
+ container, container_id)
+ else:
+ container_id = None
+
+ return path, container_id
def _firefox_browser_directory():
if sys.platform in ("win32", "cygwin"):
- return os.path.expandvars(R"%APPDATA%\Mozilla\Firefox\Profiles")
+ return os.path.expandvars(r"%APPDATA%\Mozilla\Firefox\Profiles")
if sys.platform == "darwin":
return os.path.expanduser("~/Library/Application Support/Firefox")
return os.path.expanduser("~/.mozilla/firefox")
@@ -237,7 +280,7 @@ def _safari_parse_cookies_record(data, cookiejar):
cookiejar.set_cookie(Cookie(
0, name, value, None, False,
- domain, bool(domain), domain.startswith('.'),
+ domain, bool(domain), domain.startswith("."),
path, bool(path), is_secure, expiration_date, False,
None, None, {},
))
@@ -265,9 +308,7 @@ def _chrome_cookies_database(profile, config):
if path is None:
raise FileNotFoundError("Unable to find {} cookies database in "
"'{}'".format(config["browser"], search_root))
-
- logger.debug("Extracting cookies from %s", path)
- return DatabaseCopy(path)
+ return path
def _get_chromium_based_browser_settings(browser_name):
@@ -937,11 +978,12 @@ def _is_path(value):
return os.path.sep in value
-def _parse_browser_specification(browser, profile=None, keyring=None):
+def _parse_browser_specification(
+ browser, profile=None, keyring=None, container=None):
if browser not in SUPPORTED_BROWSERS:
raise ValueError("unsupported browser '{}'".format(browser))
if keyring and keyring not in SUPPORTED_KEYRINGS:
raise ValueError("unsupported keyring '{}'".format(keyring))
if profile and _is_path(profile):
profile = os.path.expanduser(profile)
- return browser, profile, keyring
+ return browser, profile, keyring, container
diff --git a/gallery_dl/extractor/2chen.py b/gallery_dl/extractor/2chen.py
index 76a085a..d9674d8 100644
--- a/gallery_dl/extractor/2chen.py
+++ b/gallery_dl/extractor/2chen.py
@@ -17,18 +17,22 @@ class _2chenThreadExtractor(Extractor):
directory_fmt = ("{category}", "{board}", "{thread} {title}")
filename_fmt = "{time} {filename}.{extension}"
archive_fmt = "{board}_{thread}_{hash}_{time}"
- root = "https://2chen.moe"
- pattern = r"(?:https?://)?2chen\.moe/([^/?#]+)/(\d+)"
+ pattern = r"(?:https?://)?2chen\.(?:moe|club)/([^/?#]+)/(\d+)"
test = (
("https://2chen.moe/tv/496715", {
+ "pattern": r"https://2chen\.su/assets/images/src/\w{40}\.\w+$",
"count": ">= 179",
}),
+ ("https://2chen.club/tv/1", {
+ "count": 5,
+ }),
# 404
("https://2chen.moe/jp/303786"),
)
def __init__(self, match):
Extractor.__init__(self, match)
+ self.root = text.root_from_url(match.group(0))
self.board, self.thread = match.groups()
def items(self):
@@ -36,13 +40,19 @@ class _2chenThreadExtractor(Extractor):
page = self.request(url, encoding="utf-8", notfound="thread").text
data = self.metadata(page)
yield Message.Directory, data
+
for post in self.posts(page):
- if not post["url"]:
+
+ url = post["url"]
+ if not url:
continue
+ if url[0] == "/":
+ url = self.root + url
+ post["url"] = url = url.partition("?")[0]
+
post.update(data)
- post["url"] = self.root + post["url"]
post["time"] = text.parse_int(post["date"].timestamp())
- yield Message.Url, post["url"], text.nameext_from_url(
+ yield Message.Url, url, text.nameext_from_url(
post["filename"], post)
def metadata(self, page):
@@ -78,18 +88,19 @@ class _2chenBoardExtractor(Extractor):
"""Extractor for 2chen boards"""
category = "2chen"
subcategory = "board"
- root = "https://2chen.moe"
- pattern = r"(?:https?://)?2chen\.moe/([^/?#]+)(?:/catalog|/?$)"
+ pattern = r"(?:https?://)?2chen\.(?:moe|club)/([^/?#]+)(?:/catalog|/?$)"
test = (
("https://2chen.moe/co/", {
"pattern": _2chenThreadExtractor.pattern
}),
("https://2chen.moe/co"),
- ("https://2chen.moe/co/catalog")
+ ("https://2chen.club/tv"),
+ ("https://2chen.moe/co/catalog"),
)
def __init__(self, match):
Extractor.__init__(self, match)
+ self.root = text.root_from_url(match.group(0))
self.board = match.group(1)
def items(self):
diff --git a/gallery_dl/extractor/35photo.py b/gallery_dl/extractor/35photo.py
index 28acc3d..f86691d 100644
--- a/gallery_dl/extractor/35photo.py
+++ b/gallery_dl/extractor/35photo.py
@@ -146,6 +146,7 @@ class _35photoTagExtractor(_35photoExtractor):
test = ("https://35photo.pro/tags/landscape/", {
"range": "1-25",
"count": 25,
+ "archive": False,
})
def __init__(self, match):
diff --git a/gallery_dl/extractor/8chan.py b/gallery_dl/extractor/8chan.py
index 1e020c2..0e128c3 100644
--- a/gallery_dl/extractor/8chan.py
+++ b/gallery_dl/extractor/8chan.py
@@ -92,8 +92,8 @@ class _8chanThreadExtractor(_8chanExtractor):
"uniquePosters": 9,
"usesCustomCss": True,
"usesCustomJs": False,
- "wsPort": 8880,
- "wssPort": 2087,
+ "?wsPort": 8880,
+ "?wssPort": 2087,
},
}),
("https://8chan.se/vhs/res/4.html"),
diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py
index d2bbcbb..444075c 100644
--- a/gallery_dl/extractor/__init__.py
+++ b/gallery_dl/extractor/__init__.py
@@ -39,6 +39,7 @@ modules = [
"fallenangels",
"fanbox",
"fantia",
+ "fapello",
"fapachi",
"flickr",
"furaffinity",
@@ -74,13 +75,12 @@ modules = [
"keenspot",
"kemonoparty",
"khinsider",
- "kissgoddess",
- "kohlchan",
"komikcast",
"lightroom",
"lineblog",
"livedoor",
"luscious",
+ "lynxchan",
"mangadex",
"mangafox",
"mangahere",
@@ -131,6 +131,7 @@ modules = [
"slickpic",
"slideshare",
"smugmug",
+ "soundgasm",
"speakerdeck",
"subscribestar",
"tapas",
@@ -151,6 +152,7 @@ modules = [
"wallpapercave",
"warosu",
"weasyl",
+ "webmshare",
"webtoons",
"weibo",
"wikiart",
diff --git a/gallery_dl/extractor/bunkr.py b/gallery_dl/extractor/bunkr.py
index 7e9a422..882c2b3 100644
--- a/gallery_dl/extractor/bunkr.py
+++ b/gallery_dl/extractor/bunkr.py
@@ -6,7 +6,7 @@
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
-"""Extractors for https://bunkr.is/"""
+"""Extractors for https://bunkr.ru/"""
from .lolisafe import LolisafeAlbumExtractor
from .. import text
@@ -14,13 +14,13 @@ import json
class BunkrAlbumExtractor(LolisafeAlbumExtractor):
- """Extractor for bunkr.is albums"""
+ """Extractor for bunkr.ru albums"""
category = "bunkr"
- root = "https://bunkr.is"
- pattern = r"(?:https?://)?(?:app\.)?bunkr\.(?:is|to)/a/([^/?#]+)"
+ root = "https://bunkr.ru"
+ pattern = r"(?:https?://)?(?:app\.)?bunkr\.(?:ru|is|to)/a/([^/?#]+)"
test = (
- ("https://bunkr.is/a/Lktg9Keq", {
- "pattern": r"https://cdn\.bunkr\.is/test-テスト-\"&>-QjgneIQv\.png",
+ ("https://bunkr.ru/a/Lktg9Keq", {
+ "pattern": r"https://cdn\.bunkr\.ru/test-テスト-\"&>-QjgneIQv\.png",
"content": "0c8768055e4e20e7c7259608b67799171b691140",
"keyword": {
"album_id": "Lktg9Keq",
@@ -34,64 +34,46 @@ class BunkrAlbumExtractor(LolisafeAlbumExtractor):
}),
# mp4 (#2239)
("https://app.bunkr.is/a/ptRHaCn2", {
- "pattern": r"https://media-files\.bunkr\.is/_-RnHoW69L\.mp4",
+ "pattern": r"https://media-files\.bunkr\.ru/_-RnHoW69L\.mp4",
"content": "80e61d1dbc5896ae7ef9a28734c747b28b320471",
}),
# cdn4
("https://bunkr.is/a/iXTTc1o2", {
- "pattern": r"https://(cdn|media-files)4\.bunkr\.is/",
+ "pattern": r"https://(cdn|media-files)4\.bunkr\.ru/",
"content": "da29aae371b7adc8c5ef8e6991b66b69823791e8",
}),
("https://bunkr.to/a/Lktg9Keq"),
)
def fetch_album(self, album_id):
- if "//app." in self.root:
- return self._fetch_album_api(album_id)
- else:
- return self._fetch_album_site(album_id)
-
- def _fetch_album_api(self, album_id):
- files, data = LolisafeAlbumExtractor.fetch_album(self, album_id)
-
- for file in files:
- url = file["file"]
- if url.endswith(".mp4"):
- file["file"] = url.replace(
- "//cdn.bunkr.is/", "//media-files.bunkr.is/", 1)
- else:
- file["_fallback"] = (url.replace("//cdn.", "//cdn3.", 1),)
-
- return files, data
-
- def _fetch_album_site(self, album_id):
- url = self.root + "/a/" + self.album_id
+ root = self.root
try:
data = json.loads(text.extr(
- self.request(url).text,
+ self.request(root + "/a/" + self.album_id).text,
'id="__NEXT_DATA__" type="application/json">', '<'))
album = data["props"]["pageProps"]["album"]
files = album["files"]
except Exception as exc:
- self.log.debug(exc.__class__.__name__, exc)
- self.root = self.root.replace("bunkr", "app.bunkr", 1)
- return self._fetch_album_api(album_id)
-
- headers = {"Referer": "https://stream.bunkr.is/"}
+ self.log.debug("%s: %s", exc.__class__.__name__, exc)
+ self.root = root.replace("://", "://app.", 1)
+ files, data = LolisafeAlbumExtractor.fetch_album(self, album_id)
+ else:
+ for file in files:
+ file["file"] = file["cdn"] + "/" + file["name"]
+ data = {
+ "album_id" : self.album_id,
+ "album_name" : text.unescape(album["name"]),
+ "description": text.unescape(album["description"]),
+ "count" : len(files),
+ }
+ headers = {"Referer": root.replace("://", "://stream.", 1) + "/"}
for file in files:
- name = file["name"]
- cdn = file["cdn"]
- if name.endswith((".mp4", ".m4v", ".mov", ".webm",
- ".zip", ".rar", ".7z")):
- cdn = cdn.replace("//cdn", "//media-files", 1)
+ if file["file"].endswith(
+ (".mp4", ".m4v", ".mov", ".webm", ".zip", ".rar", ".7z")):
file["_http_headers"] = headers
- file["file"] = cdn + "/" + name
+ file["file"] = file["file"].replace(
+ "://cdn", "://media-files", 1)
- return files, {
- "album_id" : self.album_id,
- "album_name" : text.unescape(album["name"]),
- "description": text.unescape(album["description"]),
- "count" : len(files),
- }
+ return files, data
diff --git a/gallery_dl/extractor/danbooru.py b/gallery_dl/extractor/danbooru.py
index 5a44780..ef17176 100644
--- a/gallery_dl/extractor/danbooru.py
+++ b/gallery_dl/extractor/danbooru.py
@@ -41,6 +41,11 @@ class DanbooruExtractor(BaseExtractor):
self.ugoira = self.config("ugoira", False)
self.external = self.config("external", False)
self.extended_metadata = self.config("metadata", False)
+ threshold = self.config("threshold")
+ if isinstance(threshold, int):
+ self.threshold = 1 if threshold < 1 else threshold
+ else:
+ self.threshold = self.per_page
username, api_key = self._get_auth_info()
if username:
@@ -126,7 +131,7 @@ class DanbooruExtractor(BaseExtractor):
posts = posts["posts"]
yield from posts
- if len(posts) < self.per_page:
+ if len(posts) < self.threshold:
return
if pagenum:
diff --git a/gallery_dl/extractor/deviantart.py b/gallery_dl/extractor/deviantart.py
index 45beddf..aa78cfb 100644
--- a/gallery_dl/extractor/deviantart.py
+++ b/gallery_dl/extractor/deviantart.py
@@ -124,9 +124,20 @@ class DeviantartExtractor(Extractor):
deviation["_journal"] = journal["html"]
yield self.commit_journal(deviation, journal)
- if self.extra:
- txt = (deviation.get("description", "") +
- deviation.get("_journal", ""))
+ if not self.extra:
+ continue
+
+ # ref: https://www.deviantart.com
+ # /developers/http/v1/20210526/object/editor_text
+ # the value of "features" is a JSON string with forward
+ # slashes escaped
+ text_content = \
+ deviation["text_content"]["body"]["features"].replace(
+ "\\/", "/") if "text_content" in deviation else None
+ for txt in (text_content, deviation.get("description"),
+ deviation.get("_journal")):
+ if txt is None:
+ continue
for match in DeviantartStashExtractor.pattern.finditer(txt):
url = text.ensure_http_scheme(match.group(0))
deviation["_extractor"] = DeviantartStashExtractor
@@ -854,7 +865,9 @@ class DeviantartDeviationExtractor(DeviantartExtractor):
"""Extractor for single deviations"""
subcategory = "deviation"
archive_fmt = "g_{_username}_{index}.{extension}"
- pattern = BASE_PATTERN + r"/(art|journal)/(?:[^/?#]+-)?(\d+)"
+ pattern = (BASE_PATTERN + r"/(art|journal)/(?:[^/?#]+-)?(\d+)"
+ r"|(?:https?://)?(?:www\.)?deviantart\.com/"
+ r"(?:view/|view(?:-full)?\.php/*\?(?:[^#]+&)?id=)(\d+)")
test = (
(("https://www.deviantart.com/shimoda7/art/For-the-sake-10073852"), {
"options": (("original", 0),),
@@ -896,19 +909,13 @@ class DeviantartDeviationExtractor(DeviantartExtractor):
"range": "2-",
"count": 4,
}),
- # video
- ("https://www.deviantart.com/chi-u/art/-VIDEO-Brushes-330774593", {
- "pattern": r"https://wixmp-.+wixmp.com/v/mp4/.+\.720p\.\w+.mp4",
- "keyword": {
- "filename": r"re:_video____brushes_\w+_by_chi_u-d5gxnb5",
- "extension": "mp4",
- "target": {
- "duration": 306,
- "filesize": 19367585,
- "quality": "720p",
- "src": str,
- },
- }
+ # sta.sh URL from deviation["text_content"]["body"]["features"]
+ (("https://www.deviantart.com"
+ "/cimar-wildehopps/art/Honorary-Vixen-859809305"), {
+ "options": (("extra", 1),),
+ "pattern": ("text:<!DOCTYPE html>\n|" +
+ DeviantartStashExtractor.pattern),
+ "count": 2,
}),
# journal
("https://www.deviantart.com/shimoda7/journal/ARTility-583755752", {
@@ -920,12 +927,28 @@ class DeviantartDeviationExtractor(DeviantartExtractor):
"url": "e2e0044bd255304412179b6118536dbd9bb3bb0e",
"pattern": "text:<!DOCTYPE html>\n",
}),
+ # /view/ URLs
+ ("https://deviantart.com/view/904858796/", {
+ "content": "8770ec40ad1c1d60f6b602b16301d124f612948f",
+ }),
+ ("http://www.deviantart.com/view/890672057", {
+ "content": "1497e13d925caeb13a250cd666b779a640209236",
+ }),
+ ("https://www.deviantart.com/view/706871727", {
+ "content": "3f62ae0c2fca2294ac28e41888ea06bb37c22c65",
+ }),
+ ("https://www.deviantart.com/view/1", {
+ "exception": exception.NotFoundError,
+ }),
# old-style URLs
("https://shimoda7.deviantart.com"
"/art/For-the-sake-of-a-memory-10073852"),
("https://myria-moon.deviantart.com"
"/art/Aime-Moi-part-en-vadrouille-261986576"),
("https://zzz.deviantart.com/art/zzz-1234567890"),
+ # old /view/ URLs from the Wayback Machine
+ ("https://www.deviantart.com/view.php?id=14864502"),
+ ("http://www.deviantart.com/view-full.php?id=100842"),
)
skip = Extractor.skip
@@ -933,11 +956,12 @@ class DeviantartDeviationExtractor(DeviantartExtractor):
def __init__(self, match):
DeviantartExtractor.__init__(self, match)
self.type = match.group(3)
- self.deviation_id = match.group(4)
+ self.deviation_id = match.group(4) or match.group(5)
def deviations(self):
url = "{}/{}/{}/{}".format(
- self.root, self.user, self.type, self.deviation_id)
+ self.root, self.user or "u", self.type or "art", self.deviation_id)
+
uuid = text.extract(self._limited_request(url).text,
'"deviationUuid\\":\\"', '\\')[0]
if not uuid:
diff --git a/gallery_dl/extractor/erome.py b/gallery_dl/extractor/erome.py
index b4dadc7..ad3f16b 100644
--- a/gallery_dl/extractor/erome.py
+++ b/gallery_dl/extractor/erome.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2021 Mike Fährmann
+# Copyright 2021-2022 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -92,16 +92,29 @@ class EromeAlbumExtractor(EromeExtractor):
"""Extractor for albums on erome.com"""
subcategory = "album"
pattern = BASE_PATTERN + r"/a/(\w+)"
- test = ("https://www.erome.com/a/TyFMI7ik", {
- "pattern": r"https://s\d+\.erome\.com/\d+/TyFMI7ik/\w+",
- "count": 9,
- "keyword": {
- "album_id": "TyFMI7ik",
- "num": int,
- "title": "Ryan Ryans",
- "user": "xanub",
- },
- })
+ test = (
+ ("https://www.erome.com/a/NQgdlWvk", {
+ "pattern": r"https://v\d+\.erome\.com/\d+"
+ r"/NQgdlWvk/j7jlzmYB_480p\.mp4",
+ "count": 1,
+ "keyword": {
+ "album_id": "NQgdlWvk",
+ "num": 1,
+ "title": "porn",
+ "user": "yYgWBZw8o8qsMzM",
+ },
+ }),
+ ("https://www.erome.com/a/TdbZ4ogi", {
+ "pattern": r"https://s\d+\.erome\.com/\d+/TdbZ4ogi/\w+",
+ "count": 6,
+ "keyword": {
+ "album_id": "TdbZ4ogi",
+ "num": int,
+ "title": "82e78cfbb461ad87198f927fcb1fda9a1efac9ff.",
+ "user": "yYgWBZw8o8qsMzM",
+ },
+ }),
+ )
def albums(self):
return (self.item,)
@@ -110,7 +123,7 @@ class EromeAlbumExtractor(EromeExtractor):
class EromeUserExtractor(EromeExtractor):
subcategory = "user"
pattern = BASE_PATTERN + r"/(?!a/|search\?)([^/?#]+)"
- test = ("https://www.erome.com/xanub", {
+ test = ("https://www.erome.com/yYgWBZw8o8qsMzM", {
"range": "1-25",
"count": 25,
})
diff --git a/gallery_dl/extractor/exhentai.py b/gallery_dl/extractor/exhentai.py
index a546f68..dccc74e 100644
--- a/gallery_dl/extractor/exhentai.py
+++ b/gallery_dl/extractor/exhentai.py
@@ -117,9 +117,10 @@ class ExhentaiGalleryExtractor(ExhentaiExtractor):
r"|/s/([\da-f]{10})/(\d+)-(\d+))")
test = (
("https://exhentai.org/g/1200119/d55c44d3d0/", {
+ "options": (("original", False),),
"keyword": {
"cost": int,
- "date": "dt:2018-03-18 20:15:00",
+ "date": "dt:2018-03-18 20:14:00",
"eh_category": "Non-H",
"expunged": False,
"favorites": r"re:^[12]\d$",
@@ -150,7 +151,8 @@ class ExhentaiGalleryExtractor(ExhentaiExtractor):
"uploader": "klorpa",
"width": int,
},
- "content": "e9891a4c017ed0bb734cd1efba5cd03f594d31ff",
+ "content": ("2c68cff8a7ca540a78c36fdbf5fbae0260484f87",
+ "e9891a4c017ed0bb734cd1efba5cd03f594d31ff"),
}),
("https://exhentai.org/g/960461/4f0e369d82/", {
"exception": exception.NotFoundError,
@@ -159,9 +161,11 @@ class ExhentaiGalleryExtractor(ExhentaiExtractor):
"exception": exception.AuthorizationError,
}),
("https://exhentai.org/s/f68367b4c8/1200119-3", {
+ "options": (("original", False),),
"count": 2,
}),
("https://e-hentai.org/s/f68367b4c8/1200119-3", {
+ "options": (("original", False),),
"count": 2,
}),
("https://g.e-hentai.org/g/1200119/d55c44d3d0/"),
@@ -516,7 +520,7 @@ class ExhentaiSearchExtractor(ExhentaiExtractor):
data["gallery_token"] = gallery.group(3)
yield Message.Queue, url + "/", data
- next_url = text.extr(page, 'nexturl = "', '"', None)
+ next_url = text.extr(page, 'nexturl="', '"', None)
if next_url is not None:
if not next_url:
return
diff --git a/gallery_dl/extractor/fapello.py b/gallery_dl/extractor/fapello.py
new file mode 100644
index 0000000..d6fcb4b
--- /dev/null
+++ b/gallery_dl/extractor/fapello.py
@@ -0,0 +1,152 @@
+# -*- coding: utf-8 -*-
+
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Extractors for https://fapello.com/"""
+
+from .common import Extractor, Message
+from .. import text, exception
+
+
+class FapelloPostExtractor(Extractor):
+ """Extractor for individual posts on fapello.com"""
+ category = "fapello"
+ subcategory = "post"
+ directory_fmt = ("{category}", "{model}")
+ filename_fmt = "{model}_{id}.{extension}"
+ archive_fmt = "{type}_{model}_{id}"
+ pattern = (r"(?:https?://)?(?:www\.)?fapello\.com"
+ r"/(?!search/|popular_videos/)([^/?#]+)/(\d+)")
+ test = (
+ ("https://fapello.com/carrykey/530/", {
+ "pattern": (r"https://fapello\.com/content/c/a"
+ r"/carrykey/1000/carrykey_0530\.jpg"),
+ "keyword": {
+ "model": "carrykey",
+ "id" : 530,
+ "type" : "photo",
+ "thumbnail": "",
+ },
+ }),
+ ("https://fapello.com/vladislava-661/693/", {
+ "pattern": (r"https://cdn\.fapello\.com/content/v/l"
+ r"/vladislava-661/1000/vladislava-661_0693\.mp4"),
+ "keyword": {
+ "model": "vladislava-661",
+ "id" : 693,
+ "type" : "video",
+ "thumbnail": ("https://fapello.com/content/v/l"
+ "/vladislava-661/1000/vladislava-661_0693.jpg"),
+ },
+ }),
+ ("https://fapello.com/carrykey/000/", {
+ "exception": exception.NotFoundError,
+ }),
+ )
+
+ def __init__(self, match):
+ Extractor.__init__(self, match)
+ self.model, self.id = match.groups()
+
+ def items(self):
+ url = "https://fapello.com/{}/{}/".format(self.model, self.id)
+ page = text.extr(
+ self.request(url, allow_redirects=False).text,
+ 'class="uk-align-center"', "</div>", None)
+ if page is None:
+ raise exception.NotFoundError("post")
+
+ data = {
+ "model": self.model,
+ "id" : text.parse_int(self.id),
+ "type" : "video" if 'type="video' in page else "photo",
+ "thumbnail": text.extr(page, 'poster="', '"'),
+ }
+ url = text.extr(page, 'src="', '"')
+ yield Message.Directory, data
+ yield Message.Url, url, text.nameext_from_url(url, data)
+
+
+class FapelloModelExtractor(Extractor):
+ """Extractor for all posts from a fapello model"""
+ category = "fapello"
+ subcategory = "model"
+ pattern = (r"(?:https?://)?(?:www\.)?fapello\.com"
+ r"/(?!top-(?:likes|followers)|popular_videos"
+ r"|videos|trending|search/?$)"
+ r"([^/?#]+)/?$")
+ test = (
+ ("https://fapello.com/hyoon/", {
+ "pattern": FapelloPostExtractor.pattern,
+ "range" : "1-50",
+ "count" : 50,
+ }),
+ ("https://fapello.com/kobaebeefboo/"),
+ )
+
+ def __init__(self, match):
+ Extractor.__init__(self, match)
+ self.model = match.group(1)
+
+ def items(self):
+ num = 1
+ data = {"_extractor": FapelloPostExtractor}
+ while True:
+ url = "https://fapello.com/ajax/model/{}/page-{}/".format(
+ self.model, num)
+ page = self.request(url).text
+ if not page:
+ return
+
+ for url in text.extract_iter(page, '<a href="', '"'):
+ yield Message.Queue, url, data
+ num += 1
+
+
+class FapelloPathExtractor(Extractor):
+ """Extractor for models and posts from fapello.com paths"""
+ category = "fapello"
+ subcategory = "path"
+ pattern = (r"(?:https?://)?(?:www\.)?fapello\.com"
+ r"/(?!search/?$)(top-(?:likes|followers)|videos|trending"
+ r"|popular_videos/[^/?#]+)/?$")
+ test = (
+ ("https://fapello.com/top-likes/", {
+ "pattern": FapelloModelExtractor.pattern,
+ "range" : "1-10",
+ "count" : 10,
+ }),
+ ("https://fapello.com/videos/", {
+ "pattern": FapelloPostExtractor.pattern,
+ "range" : "1-10",
+ "count" : 10,
+ }),
+ ("https://fapello.com/top-followers/"),
+ ("https://fapello.com/trending/"),
+ ("https://fapello.com/popular_videos/twelve_hours/"),
+ ("https://fapello.com/popular_videos/week/"),
+ )
+
+ def __init__(self, match):
+ Extractor.__init__(self, match)
+ self.path = match.group(1)
+
+ def items(self):
+ num = 1
+ if self.path in ("top-likes", "top-followers"):
+ data = {"_extractor": FapelloModelExtractor}
+ else:
+ data = {"_extractor": FapelloPostExtractor}
+
+ while True:
+ page = self.request("https://fapello.com/ajax/{}/page-{}/".format(
+ self.path, num)).text
+ if not page:
+ return
+
+ for item in text.extract_iter(
+ page, 'uk-transition-toggle">', "</a>"):
+ yield Message.Queue, text.extr(item, '<a href="', '"'), data
+ num += 1
diff --git a/gallery_dl/extractor/foolslide.py b/gallery_dl/extractor/foolslide.py
index 81671ec..2290cc2 100644
--- a/gallery_dl/extractor/foolslide.py
+++ b/gallery_dl/extractor/foolslide.py
@@ -39,10 +39,6 @@ class FoolslideExtractor(BaseExtractor):
BASE_PATTERN = FoolslideExtractor.update({
- "kireicake": {
- "root": "https://reader.kireicake.com",
- "pattern": r"reader\.kireicake\.com",
- },
"powermanga": {
"root": "https://read.powermanga.org",
"pattern": r"read(?:er)?\.powermanga\.org",
@@ -64,10 +60,6 @@ class FoolslideChapterExtractor(FoolslideExtractor):
archive_fmt = "{id}"
pattern = BASE_PATTERN + r"(/read/[^/?#]+/[a-z-]+/\d+/\d+(?:/\d+)?)"
test = (
- ("https://reader.kireicake.com/read/wonderland/en/1/1/", {
- "url": "b2d36bc0bc67e4c461c3a4d6444a2fd339f5d07e",
- "keyword": "9f80947920a325e33aea7f5cd69ea669171903b6",
- }),
(("https://read.powermanga.org"
"/read/one_piece_digital_colour_comics/en/0/75/"), {
"url": "854c5817f8f767e1bccd05fa9d58ffb5a4b09384",
@@ -123,10 +115,6 @@ class FoolslideMangaExtractor(FoolslideExtractor):
categorytransfer = True
pattern = BASE_PATTERN + r"(/series/[^/?#]+)"
test = (
- ("https://reader.kireicake.com/series/wonderland/", {
- "url": "d067b649af1cc88fa8c8b698fde04a10909fd169",
- "keyword": "268f43772fb239888ca5c5f6a4f65f99ffb3eefb",
- }),
(("https://read.powermanga.org"
"/series/one_piece_digital_colour_comics/"), {
"count": ">= 1",
diff --git a/gallery_dl/extractor/gelbooru_v02.py b/gallery_dl/extractor/gelbooru_v02.py
index da87b8f..facd3db 100644
--- a/gallery_dl/extractor/gelbooru_v02.py
+++ b/gallery_dl/extractor/gelbooru_v02.py
@@ -174,7 +174,8 @@ class GelbooruV02TagExtractor(GelbooruV02Extractor):
pattern = BASE_PATTERN + r"/index\.php\?page=post&s=list&tags=([^&#]+)"
test = (
("https://rule34.xxx/index.php?page=post&s=list&tags=danraku", {
- "content": "5c6ae9ee13e6d4bc9cb8bdce224c84e67fbfa36c",
+ "content": ("5c6ae9ee13e6d4bc9cb8bdce224c84e67fbfa36c",
+ "622e80be3f496672c44aab5c47fbc6941c61bc79"),
"pattern": r"https?://.*rule34\.xxx/images/\d+/[0-9a-f]+\.jpg",
"count": 2,
}),
diff --git a/gallery_dl/extractor/imagehosts.py b/gallery_dl/extractor/imagehosts.py
index 6fcfc55..207562a 100644
--- a/gallery_dl/extractor/imagehosts.py
+++ b/gallery_dl/extractor/imagehosts.py
@@ -200,7 +200,7 @@ class ImagetwistImageExtractor(ImagehostImageExtractor):
return self.request(self.page_url).cookies
def get_info(self, page):
- url , pos = text.extract(page, 'center;"><img src="', '"')
+ url , pos = text.extract(page, '<img src="', '"')
filename, pos = text.extract(page, ' alt="', '"', pos)
return url, filename
diff --git a/gallery_dl/extractor/imgth.py b/gallery_dl/extractor/imgth.py
index 7e4cce4..9ae22a9 100644
--- a/gallery_dl/extractor/imgth.py
+++ b/gallery_dl/extractor/imgth.py
@@ -1,60 +1,73 @@
# -*- coding: utf-8 -*-
-# Copyright 2015-2019 Mike Fährmann
+# Copyright 2015-2022 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
-"""Extract images from https://imgth.com/"""
+"""Extractors for https://imgth.com/"""
-from .common import Extractor, Message
+from .common import GalleryExtractor
from .. import text
-class ImgthGalleryExtractor(Extractor):
+class ImgthGalleryExtractor(GalleryExtractor):
"""Extractor for image galleries from imgth.com"""
category = "imgth"
- subcategory = "gallery"
- directory_fmt = ("{category}", "{gallery_id} {title}")
- filename_fmt = "{category}_{gallery_id}_{num:>03}.{extension}"
- archive_fmt = "{gallery_id}_{num}"
- pattern = r"(?:https?://)?imgth\.com/gallery/(\d+)"
- test = ("http://imgth.com/gallery/37/wallpaper-anime", {
- "url": "4ae1d281ca2b48952cf5cca57e9914402ad72748",
- "keyword": "6f8c00d6849ea89d1a028764675ec1fe9dbd87e2",
- })
+ root = "https://imgth.com"
+ pattern = r"(?:https?://)?(?:www\.)?imgth\.com/gallery/(\d+)"
+ test = (
+ ("https://imgth.com/gallery/37/wallpaper-anime", {
+ "url": "4ae1d281ca2b48952cf5cca57e9914402ad72748",
+ "pattern": r"https://imgth\.com/images/2009/11/25"
+ r"/wallpaper-anime_\w+\.jpg",
+ "keyword": {
+ "count": 12,
+ "date": "dt:2009-11-25 18:21:00",
+ "extension": "jpg",
+ "filename": r"re:wallpaper-anime_\w+",
+ "gallery_id": 37,
+ "num": int,
+ "title": "Wallpaper anime",
+ "user": "celebrities",
+ },
+ }),
+ ("https://www.imgth.com/gallery/37/wallpaper-anime"),
+ )
def __init__(self, match):
- Extractor.__init__(self, match)
- self.gid = match.group(1)
- self.url_base = "https://imgth.com/gallery/" + self.gid + "/g/page/"
+ self.gallery_id = gid = match.group(1)
+ url = "{}/gallery/{}/g/".format(self.root, gid)
+ GalleryExtractor.__init__(self, match, url)
- def items(self):
- page = self.request(self.url_base + "0").text
- data = self.metadata(page)
- yield Message.Directory, data
- for data["num"], url in enumerate(self.images(page), 1):
- yield Message.Url, url, text.nameext_from_url(url, data)
+ def metadata(self, page):
+ extr = text.extract_from(page)
+ return {
+ "gallery_id": text.parse_int(self.gallery_id),
+ "title": text.unescape(extr("<h1>", "</h1>")),
+ "count": text.parse_int(extr(
+ "total of images in this gallery: ", " ")),
+ "date" : text.parse_datetime(
+ extr("created on ", " by <")
+ .replace("th, ", " ", 1).replace("nd, ", " ", 1)
+ .replace("st, ", " ", 1), "%B %d %Y at %H:%M"),
+ "user" : text.unescape(extr(">", "<")),
+ }
def images(self, page):
- """Yield all image urls for this gallery"""
pnum = 0
+
while True:
thumbs = text.extr(page, '<ul class="thumbnails">', '</ul>')
for url in text.extract_iter(thumbs, '<img src="', '"'):
- yield "https://imgth.com/images" + url[24:]
+ path = url.partition("/thumbs/")[2]
+ yield ("{}/images/{}".format(self.root, path), None)
+
if '<li class="next">' not in page:
return
- pnum += 1
- page = self.request(self.url_base + str(pnum)).text
- def metadata(self, page):
- """Collect metadata for extractor-job"""
- return text.extract_all(page, (
- ("title", '<h1>', '</h1>'),
- ("count", 'total of images in this gallery: ', ' '),
- ("date" , 'created on ', ' by <'),
- (None , 'href="/users/', ''),
- ("user" , '>', '<'),
- ), values={"gallery_id": self.gid})[0]
+ pnum += 1
+ url = "{}/gallery/{}/g/page/{}".format(
+ self.root, self.gallery_id, pnum)
+ page = self.request(url).text
diff --git a/gallery_dl/extractor/imgur.py b/gallery_dl/extractor/imgur.py
index fd78ce2..42d0a7b 100644
--- a/gallery_dl/extractor/imgur.py
+++ b/gallery_dl/extractor/imgur.py
@@ -12,7 +12,7 @@ from .common import Extractor, Message
from .. import text, exception
-BASE_PATTERN = r"(?:https?://)?(?:www\.|[im]\.)?imgur\.com"
+BASE_PATTERN = r"(?:https?://)?(?:www\.|[im]\.)?imgur\.(?:com|io)"
class ImgurExtractor(Extractor):
@@ -114,7 +114,9 @@ class ImgurImageExtractor(ImgurExtractor):
("https://www.imgur.com/21yMxCS"), # www
("https://m.imgur.com/21yMxCS"), # mobile
("https://imgur.com/zxaY6"), # 5 character key
+ ("https://imgur.io/zxaY6"), # .io
("https://i.imgur.com/21yMxCS.png"), # direct link
+ ("https://i.imgur.io/21yMxCS.png"), # direct link .io
("https://i.imgur.com/21yMxCSh.png"), # direct link thumbnail
("https://i.imgur.com/zxaY6.gif"), # direct link (short)
("https://i.imgur.com/zxaY6s.gif"), # direct link (short; thumb)
@@ -205,7 +207,8 @@ class ImgurAlbumExtractor(ImgurExtractor):
"count": 0,
}),
("https://www.imgur.com/a/TcBmP"), # www
- ("https://m.imgur.com/a/TcBmP"), # mobile
+ ("https://imgur.io/a/TcBmP"), # .io
+ ("https://m.imgur.com/a/TcBmP"), # mobile
)
def items(self):
@@ -248,6 +251,7 @@ class ImgurGalleryExtractor(ImgurExtractor):
}),
("https://imgur.com/t/unmuted/26sEhNr"),
("https://imgur.com/t/cat/qSB8NbN"),
+ ("https://imgur.io/t/cat/qSB8NbN"), # .io
)
def items(self):
diff --git a/gallery_dl/extractor/instagram.py b/gallery_dl/extractor/instagram.py
index 24ad873..db9f3fb 100644
--- a/gallery_dl/extractor/instagram.py
+++ b/gallery_dl/extractor/instagram.py
@@ -65,6 +65,10 @@ class InstagramExtractor(Extractor):
post["count"] = len(files)
yield Message.Directory, post
+
+ if "date" in post:
+ del post["date"]
+
for file in files:
file.update(post)
@@ -93,10 +97,6 @@ class InstagramExtractor(Extractor):
url = response.url
if "/accounts/login/" in url:
- if self._username:
- self.log.debug("Invalidating cached login session for "
- "'%s'", self._username)
- _login_impl.invalidate(self._username)
page = "login"
elif "/challenge/" in url:
page = "challenge"
@@ -117,11 +117,9 @@ class InstagramExtractor(Extractor):
return response
def login(self):
- self._username = None
if not self._check_cookies(self.cookienames):
username, password = self._get_auth_info()
if username:
- self._username = username
self._update_cookies(_login_impl(self, username, password))
else:
self._logged_in = False
diff --git a/gallery_dl/extractor/khinsider.py b/gallery_dl/extractor/khinsider.py
index d5cca1c..0c3b002 100644
--- a/gallery_dl/extractor/khinsider.py
+++ b/gallery_dl/extractor/khinsider.py
@@ -65,7 +65,7 @@ class KhinsiderSoundtrackExtractor(AsynchronousMixin, Extractor):
"count": text.parse_int(extr("Number of Files: <b>", "<")),
"size" : text.parse_bytes(extr("Total Filesize: <b>", "<")[:-1]),
"date" : extr("Date Added: <b>", "<"),
- "type" : extr("Album type: <b>", "<"),
+ "type" : text.remove_html(extr("Album type: <b>", "</b>")),
}}
def tracks(self, page):
diff --git a/gallery_dl/extractor/kissgoddess.py b/gallery_dl/extractor/kissgoddess.py
deleted file mode 100644
index 4ec685c..0000000
--- a/gallery_dl/extractor/kissgoddess.py
+++ /dev/null
@@ -1,82 +0,0 @@
-# -*- coding: utf-8 -*-
-
-# Copyright 2022 Mike Fährmann
-#
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License version 2 as
-# published by the Free Software Foundation.
-
-"""Extractors for https://kissgoddess.com/"""
-
-from .common import GalleryExtractor, Extractor, Message
-from .. import text, exception
-
-
-class KissgoddessGalleryExtractor(GalleryExtractor):
- """Extractor for image galleries on kissgoddess.com"""
- category = "kissgoddess"
- root = "https://kissgoddess.com"
- pattern = r"(?:https?://)?(?:www\.)?kissgoddess\.com/album/(\d+)"
- test = ("https://kissgoddess.com/album/18285.html", {
- "pattern": r"https://pic\.kissgoddess\.com"
- r"/gallery/16473/18285/s/\d+\.jpg",
- "count": 19,
- "keyword": {
- "gallery_id": 18285,
- "title": "[Young Champion Extra] 2016.02 No.03 菜乃花 安枝瞳 葉月あや",
- },
- })
-
- def __init__(self, match):
- self.gallery_id = match.group(1)
- url = "{}/album/{}.html".format(self.root, self.gallery_id)
- GalleryExtractor.__init__(self, match, url)
-
- def metadata(self, page):
- return {
- "gallery_id": text.parse_int(self.gallery_id),
- "title" : text.extr(
- page, '<title>', "<")[0].rpartition(" | "),
- }
-
- def images(self, page):
- pnum = 1
-
- while page:
- for url in text.extract_iter(page, "<img src='", "'"):
- yield url, None
- for url in text.extract_iter(page, "<img data-original='", "'"):
- yield url, None
-
- pnum += 1
- url = "{}/album/{}_{}.html".format(
- self.root, self.gallery_id, pnum)
- try:
- page = self.request(url).text
- except exception.HttpError:
- return
-
-
-class KissgoddessModelExtractor(Extractor):
- """Extractor for all galleries of a model on kissgoddess.com"""
- category = "kissgoddess"
- subcategory = "model"
- root = "https://kissgoddess.com"
- pattern = r"(?:https?://)?(?:www\.)?kissgoddess\.com/people/([^./?#]+)"
- test = ("https://kissgoddess.com/people/aya-hazuki.html", {
- "pattern": KissgoddessGalleryExtractor.pattern,
- "count": ">= 7",
- })
-
- def __init__(self, match):
- Extractor.__init__(self, match)
- self.model = match.group(1)
-
- def items(self):
- url = "{}/people/{}.html".format(self.root, self.model)
- page = self.request(url).text
-
- data = {"_extractor": KissgoddessGalleryExtractor}
- for path in text.extract_iter(page, 'thumb"><a href="/album/', '"'):
- url = self.root + "/album/" + path
- yield Message.Queue, url, data
diff --git a/gallery_dl/extractor/kohlchan.py b/gallery_dl/extractor/kohlchan.py
deleted file mode 100644
index c96dedc..0000000
--- a/gallery_dl/extractor/kohlchan.py
+++ /dev/null
@@ -1,78 +0,0 @@
-# -*- coding: utf-8 -*-
-
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License version 2 as
-# published by the Free Software Foundation.
-
-"""Extractors for https://kohlchan.net/"""
-
-from .common import Extractor, Message
-from .. import text
-import itertools
-
-
-class KohlchanThreadExtractor(Extractor):
- """Extractor for Kohlchan threads"""
- category = "kohlchan"
- subcategory = "thread"
- directory_fmt = ("{category}", "{boardUri}",
- "{threadId} {subject|message[:50]}")
- filename_fmt = "{postId}{num:?-//} {filename}.{extension}"
- archive_fmt = "{boardUri}_{postId}_{num}"
- pattern = r"(?:https?://)?kohlchan\.net/([^/?#]+)/res/(\d+)"
- test = ("https://kohlchan.net/a/res/4594.html", {
- "pattern": r"https://kohlchan\.net/\.media/[0-9a-f]{64}(\.\w+)?$",
- "count": ">= 80",
- })
-
- def __init__(self, match):
- Extractor.__init__(self, match)
- self.board, self.thread = match.groups()
-
- def items(self):
- url = "https://kohlchan.net/{}/res/{}.json".format(
- self.board, self.thread)
- thread = self.request(url).json()
- thread["postId"] = thread["threadId"]
- posts = thread.pop("posts")
-
- yield Message.Directory, thread
-
- for post in itertools.chain((thread,), posts):
- files = post.pop("files", ())
- if files:
- thread.update(post)
- for num, file in enumerate(files):
- file.update(thread)
- file["num"] = num
- url = "https://kohlchan.net" + file["path"]
- text.nameext_from_url(file["originalName"], file)
- yield Message.Url, url, file
-
-
-class KohlchanBoardExtractor(Extractor):
- """Extractor for Kohlchan boards"""
- category = "kohlchan"
- subcategory = "board"
- pattern = (r"(?:https?://)?kohlchan\.net"
- r"/([^/?#]+)/(?:(?:catalog|\d+)\.html)?$")
- test = (
- ("https://kohlchan.net/a/", {
- "pattern": KohlchanThreadExtractor.pattern,
- "count": ">= 100",
- }),
- ("https://kohlchan.net/a/2.html"),
- ("https://kohlchan.net/a/catalog.html"),
- )
-
- def __init__(self, match):
- Extractor.__init__(self, match)
- self.board = match.group(1)
-
- def items(self):
- url = "https://kohlchan.net/{}/catalog.json".format(self.board)
- for thread in self.request(url).json():
- url = "https://kohlchan.net/{}/res/{}.html".format(
- self.board, thread["threadId"])
- thread["_extractor"] = KohlchanThreadExtractor
- yield Message.Queue, url, thread
diff --git a/gallery_dl/extractor/komikcast.py b/gallery_dl/extractor/komikcast.py
index a9eebf4..04373c4 100644
--- a/gallery_dl/extractor/komikcast.py
+++ b/gallery_dl/extractor/komikcast.py
@@ -6,19 +6,19 @@
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
-"""Extractors for https://komikcast.me/"""
+"""Extractors for https://komikcast.site/"""
from .common import ChapterExtractor, MangaExtractor
from .. import text
import re
-BASE_PATTERN = r"(?:https?://)?(?:www\.)?komikcast\.(?:me|com)"
+BASE_PATTERN = r"(?:https?://)?(?:www\.)?komikcast\.(?:site|me|com)"
class KomikcastBase():
"""Base class for komikcast extractors"""
category = "komikcast"
- root = "https://komikcast.me"
+ root = "https://komikcast.site"
@staticmethod
def parse_chapter_string(chapter_string, data=None):
@@ -46,23 +46,23 @@ class KomikcastBase():
class KomikcastChapterExtractor(KomikcastBase, ChapterExtractor):
- """Extractor for manga-chapters from komikcast.me"""
+ """Extractor for manga-chapters from komikcast.site"""
pattern = BASE_PATTERN + r"(/chapter/[^/?#]+/)"
test = (
- (("https://komikcast.me/chapter"
+ (("https://komikcast.site/chapter"
"/apotheosis-chapter-02-2-bahasa-indonesia/"), {
- "url": "74eca5c9b27b896816497f9b2d847f2a1fcfc209",
+ "url": "f6b43fbc027697749b3ea1c14931c83f878d7936",
"keyword": "f3938e1aff9ad1f302f52447e9781b21f6da26d4",
}),
(("https://komikcast.me/chapter"
"/soul-land-ii-chapter-300-1-bahasa-indonesia/"), {
- "url": "243a5250e210b40d17217e83b7547cefea5638bd",
+ "url": "efd00a9bd95461272d51990d7bc54b79ff3ff2e6",
"keyword": "cb646cfed3d45105bd645ab38b2e9f7d8c436436",
}),
)
def metadata(self, page):
- info = text.extr(page, "<title>", " – Komikcast<")
+ info = text.extr(page, "<title>", " - Komikcast<")
return self.parse_chapter_string(info)
@staticmethod
@@ -76,12 +76,12 @@ class KomikcastChapterExtractor(KomikcastBase, ChapterExtractor):
class KomikcastMangaExtractor(KomikcastBase, MangaExtractor):
- """Extractor for manga from komikcast.me"""
+ """Extractor for manga from komikcast.site"""
chapterclass = KomikcastChapterExtractor
pattern = BASE_PATTERN + r"(/(?:komik/)?[^/?#]+)/?$"
test = (
- ("https://komikcast.me/komik/090-eko-to-issho/", {
- "url": "08204f0a703ec5272121abcf0632ecacba1e588f",
+ ("https://komikcast.site/komik/090-eko-to-issho/", {
+ "url": "19d3d50d532e84be6280a3d61ff0fd0ca04dd6b4",
"keyword": "837a7e96867344ff59d840771c04c20dc46c0ab1",
}),
("https://komikcast.me/tonari-no-kashiwagi-san/"),
@@ -101,7 +101,7 @@ class KomikcastMangaExtractor(KomikcastBase, MangaExtractor):
@staticmethod
def metadata(page):
"""Return a dict with general metadata"""
- manga , pos = text.extract(page, "<title>" , " – Komikcast<")
+ manga , pos = text.extract(page, "<title>" , " - Komikcast<")
genres, pos = text.extract(
page, 'class="komik_info-content-genre">', "</span>", pos)
author, pos = text.extract(page, ">Author:", "</span>", pos)
diff --git a/gallery_dl/extractor/lolisafe.py b/gallery_dl/extractor/lolisafe.py
index 9caf6d7..5d236c3 100644
--- a/gallery_dl/extractor/lolisafe.py
+++ b/gallery_dl/extractor/lolisafe.py
@@ -23,7 +23,7 @@ BASE_PATTERN = LolisafeExtractor.update({
"xbunkr": {
"root": "https://xbunkr.com",
"pattern": r"xbunkr\.com",
- }
+ },
})
@@ -47,9 +47,9 @@ class LolisafeAlbumExtractor(LolisafeExtractor):
self.album_id = match.group(match.lastindex)
domain = self.config("domain")
- if domain is None or domain == "auto":
+ if domain == "auto":
self.root = text.root_from_url(match.group(0))
- else:
+ elif domain:
self.root = text.ensure_http_scheme(domain)
def items(self):
diff --git a/gallery_dl/extractor/lynxchan.py b/gallery_dl/extractor/lynxchan.py
new file mode 100644
index 0000000..bbcf9c0
--- /dev/null
+++ b/gallery_dl/extractor/lynxchan.py
@@ -0,0 +1,104 @@
+# -*- coding: utf-8 -*-
+
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Extractors for LynxChan Imageboards"""
+
+from .common import BaseExtractor, Message
+from .. import text
+import itertools
+
+
+class LynxchanExtractor(BaseExtractor):
+ """Base class for LynxChan extractors"""
+ basecategory = "lynxchan"
+
+
+BASE_PATTERN = LynxchanExtractor.update({
+ "kohlchan": {
+ "root": "https://kohlchan.net",
+ "pattern": r"kohlchan\.net"
+ },
+ "endchan": {
+ "root": None,
+ "pattern": r"endchan\.(?:org|net|gg)",
+ },
+})
+
+
+class LynxchanThreadExtractor(LynxchanExtractor):
+ """Extractor for LynxChan threads"""
+ subcategory = "thread"
+ directory_fmt = ("{category}", "{boardUri}",
+ "{threadId} {subject|message[:50]}")
+ filename_fmt = "{postId}{num:?-//} {filename}.{extension}"
+ archive_fmt = "{boardUri}_{postId}_{num}"
+ pattern = BASE_PATTERN + r"/([^/?#]+)/res/(\d+)"
+ test = (
+ ("https://kohlchan.net/a/res/4594.html", {
+ "pattern": r"https://kohlchan\.net/\.media/[0-9a-f]{64}(\.\w+)?$",
+ "count": ">= 80",
+ }),
+ ("https://endchan.org/yuri/res/193483.html", {
+ "pattern": r"https://endchan\.org/\.media/[^.]+(\.\w+)?$",
+ "count" : ">= 19",
+ }),
+ ("https://endchan.org/yuri/res/33621.html"),
+ )
+
+ def __init__(self, match):
+ LynxchanExtractor.__init__(self, match)
+ index = match.lastindex
+ self.board = match.group(index-1)
+ self.thread = match.group(index)
+
+ def items(self):
+ url = "{}/{}/res/{}.json".format(self.root, self.board, self.thread)
+ thread = self.request(url).json()
+ thread["postId"] = thread["threadId"]
+ posts = thread.pop("posts", ())
+
+ yield Message.Directory, thread
+ for post in itertools.chain((thread,), posts):
+ files = post.pop("files", ())
+ if files:
+ thread.update(post)
+ for num, file in enumerate(files):
+ file.update(thread)
+ file["num"] = num
+ url = self.root + file["path"]
+ text.nameext_from_url(file["originalName"], file)
+ yield Message.Url, url, file
+
+
+class LynxchanBoardExtractor(LynxchanExtractor):
+ """Extractor for LynxChan boards"""
+ subcategory = "board"
+ pattern = BASE_PATTERN + r"/([^/?#]+)(?:/index|/catalog|/\d+|/?$)"
+ test = (
+ ("https://kohlchan.net/a/", {
+ "pattern": LynxchanThreadExtractor.pattern,
+ "count": ">= 100",
+ }),
+ ("https://kohlchan.net/a/2.html"),
+ ("https://kohlchan.net/a/catalog.html"),
+ ("https://endchan.org/yuri/", {
+ "pattern": LynxchanThreadExtractor.pattern,
+ "count" : ">= 9",
+ }),
+ ("https://endchan.org/yuri/catalog.html"),
+ )
+
+ def __init__(self, match):
+ LynxchanExtractor.__init__(self, match)
+ self.board = match.group(match.lastindex)
+
+ def items(self):
+ url = "{}/{}/catalog.json".format(self.root, self.board)
+ for thread in self.request(url).json():
+ url = "{}/{}/res/{}.html".format(
+ self.root, self.board, thread["threadId"])
+ thread["_extractor"] = LynxchanThreadExtractor
+ yield Message.Queue, url, thread
diff --git a/gallery_dl/extractor/mangadex.py b/gallery_dl/extractor/mangadex.py
index 0bc3527..dae203e 100644
--- a/gallery_dl/extractor/mangadex.py
+++ b/gallery_dl/extractor/mangadex.py
@@ -109,7 +109,7 @@ class MangadexChapterExtractor(MangadexExtractor):
}),
# 'externalUrl', but still downloadable (#2503)
("https://mangadex.org/chapter/364728a4-6909-4164-9eea-6b56354f7c78", {
- "count": 39,
+ "count": 0, # 404
}),
)
diff --git a/gallery_dl/extractor/mangafox.py b/gallery_dl/extractor/mangafox.py
index 4808105..0818fd9 100644
--- a/gallery_dl/extractor/mangafox.py
+++ b/gallery_dl/extractor/mangafox.py
@@ -10,7 +10,6 @@
from .common import ChapterExtractor, MangaExtractor
from .. import text
-import re
BASE_PATTERN = r"(?:https?://)?(?:www\.|m\.)?(?:fanfox\.net|mangafox\.me)"
@@ -44,14 +43,14 @@ class MangafoxChapterExtractor(ChapterExtractor):
cid , pos = text.extract(page, "var chapter_id =", ";", pos)
return {
- "manga": text.unescape(manga),
- "volume": text.parse_int(self.volume),
- "chapter": text.parse_int(self.chapter),
- "chapter_minor": self.minor or "",
+ "manga" : text.unescape(manga),
+ "volume" : text.parse_int(self.volume),
+ "chapter" : text.parse_int(self.chapter),
+ "chapter_minor" : self.minor or "",
"chapter_string": self.cstr,
- "count": text.parse_int(count),
- "sid": text.parse_int(sid),
- "cid": text.parse_int(cid),
+ "count" : text.parse_int(count),
+ "sid" : text.parse_int(sid),
+ "cid" : text.parse_int(cid),
}
def images(self, page):
@@ -76,6 +75,25 @@ class MangafoxMangaExtractor(MangaExtractor):
("https://fanfox.net/manga/kanojo_mo_kanojo", {
"pattern": MangafoxChapterExtractor.pattern,
"count": ">=60",
+ "keyword": {
+ "author": "HIROYUKI",
+ "chapter": int,
+ "chapter_minor": r"re:^(\.\d+)?$",
+ "chapter_string": r"re:(v\d+/)?c\d+",
+ "date": "type:datetime",
+ "description": "High school boy Naoya gets a confession from M"
+ "omi, a cute and friendly girl. However, Naoya "
+ "already has a girlfriend, Seki... but Momi is "
+ "too good a catch to let go. Momi and Nagoya's "
+ "goal becomes clear: convince Seki to accept be"
+ "ing an item with the two of them. Will she bud"
+ "ge?",
+ "lang": "en",
+ "language": "English",
+ "manga": "Kanojo mo Kanojo",
+ "tags": ["Comedy", "Romance", "School Life", "Shounen"],
+ "volume": int,
+ },
}),
("https://mangafox.me/manga/shangri_la_frontier", {
"pattern": MangafoxChapterExtractor.pattern,
@@ -85,34 +103,41 @@ class MangafoxMangaExtractor(MangaExtractor):
)
def chapters(self, page):
- match_info = re.compile(r"Ch (\d+)(\S*)(?: (.*))?").match
- manga, pos = text.extract(page, '<p class="title">', '</p>')
- author, pos = text.extract(page, '<p>Author(s):', '</p>', pos)
+ results = []
+ chapter_match = MangafoxChapterExtractor.pattern.match
+
+ extr = text.extract_from(page)
+ manga = extr('<p class="title">', '</p>')
+ author = extr('<p>Author(s):', '</p>')
+ extr('<dd class="chlist">', '')
+
+ genres, _, summary = text.extr(
+ page, '<div class="manga-genres">', '</section>'
+ ).partition('<div class="manga-summary">')
+
data = {
- "manga" : text.unescape(manga),
- "author" : text.remove_html(author),
- "lang" : "en",
- "language": "English",
+ "manga" : text.unescape(manga),
+ "author" : text.remove_html(author),
+ "description": text.unescape(text.remove_html(summary)),
+ "tags" : text.split_html(genres),
+ "lang" : "en",
+ "language" : "English",
}
- results = []
- pos = page.index('<dd class="chlist">')
while True:
- url, pos = text.extract(page, '<a href="//', '"', pos)
- if url == 'mangafox.la?f=mobile':
+ url = "https://" + extr('<a href="//', '"')
+ match = chapter_match(url)
+ if not match:
return results
- info, pos = text.extract(page, '>', '<span', pos)
- date, pos = text.extract(page, 'right">', '</span>', pos)
-
- match = match_info(text.unescape(info))
- if match:
- chapter, minor, title = match.groups()
- chapter_minor = minor
- else:
- chapter, _, minor = url[:-7].rpartition("/c")[2].partition(".")
- chapter_minor = "." + minor
-
- data["chapter"] = text.parse_int(chapter)
- data["chapter_minor"] = chapter_minor if minor else ""
- data["date"] = date
- results.append(("https://" + url, data.copy()))
+ _, cstr, volume, chapter, minor = match.groups()
+
+ chapter = {
+ "volume" : text.parse_int(volume),
+ "chapter" : text.parse_int(chapter),
+ "chapter_minor" : minor or "",
+ "chapter_string": cstr,
+ "date" : text.parse_datetime(
+ extr('right">', '</span>'), "%b %d, %Y"),
+ }
+ chapter.update(data)
+ results.append((url, chapter))
diff --git a/gallery_dl/extractor/pixiv.py b/gallery_dl/extractor/pixiv.py
index 9cd95bb..134361d 100644
--- a/gallery_dl/extractor/pixiv.py
+++ b/gallery_dl/extractor/pixiv.py
@@ -88,20 +88,32 @@ class PixivExtractor(Extractor):
url = ugoira["zip_urls"]["medium"].replace(
"_ugoira600x600", "_ugoira1920x1080")
work["frames"] = ugoira["frames"]
+ work["date_url"] = self._date_from_url(url)
work["_http_adjust_extension"] = False
yield Message.Url, url, text.nameext_from_url(url, work)
elif work["page_count"] == 1:
url = meta_single_page["original_image_url"]
+ work["date_url"] = self._date_from_url(url)
yield Message.Url, url, text.nameext_from_url(url, work)
else:
for work["num"], img in enumerate(meta_pages):
url = img["image_urls"]["original"]
+ work["date_url"] = self._date_from_url(url)
work["suffix"] = "_p{:02}".format(work["num"])
yield Message.Url, url, text.nameext_from_url(url, work)
@staticmethod
+ def _date_from_url(url, offset=timedelta(hours=9)):
+ try:
+ _, _, _, _, _, y, m, d, H, M, S, _ = url.split("/")
+ return datetime(
+ int(y), int(m), int(d), int(H), int(M), int(S)) - offset
+ except Exception:
+ return None
+
+ @staticmethod
def _make_work(kind, url, user):
p = url.split("/")
return {
@@ -309,6 +321,10 @@ class PixivWorkExtractor(PixivExtractor):
("https://www.pixiv.net/artworks/966412", {
"url": "90c1715b07b0d1aad300bce256a0bc71f42540ba",
"content": "69a8edfb717400d1c2e146ab2b30d2c235440c5a",
+ "keyword": {
+ "date" : "dt:2008-06-12 15:29:13",
+ "date_url": "dt:2008-06-12 15:29:13",
+ },
}),
(("http://www.pixiv.net/member_illust.php"
"?mode=medium&illust_id=966411"), {
@@ -318,7 +334,11 @@ class PixivWorkExtractor(PixivExtractor):
(("https://www.pixiv.net/member_illust.php"
"?mode=medium&illust_id=66806629"), {
"url": "7267695a985c4db8759bebcf8d21dbdd2d2317ef",
- "keywords": {"frames": list},
+ "keyword": {
+ "frames" : list,
+ "date" : "dt:2018-01-14 15:06:08",
+ "date_url": "dt:2018-01-15 04:24:48",
+ },
}),
# related works (#1237)
("https://www.pixiv.net/artworks/966412", {
diff --git a/gallery_dl/extractor/reddit.py b/gallery_dl/extractor/reddit.py
index 0ec8478..204562e 100644
--- a/gallery_dl/extractor/reddit.py
+++ b/gallery_dl/extractor/reddit.py
@@ -428,7 +428,7 @@ class RedditAPI():
def _pagination(self, endpoint, params):
id_min = self._parse_id("id-min", 0)
- id_max = self._parse_id("id-max", 2147483647)
+ id_max = self._parse_id("id-max", float("inf"))
date_min, date_max = self.extractor._get_date_min_max(0, 253402210800)
while True:
diff --git a/gallery_dl/extractor/redgifs.py b/gallery_dl/extractor/redgifs.py
index 53e5e79..ad4282c 100644
--- a/gallery_dl/extractor/redgifs.py
+++ b/gallery_dl/extractor/redgifs.py
@@ -72,7 +72,7 @@ class RedgifsUserExtractor(RedgifsExtractor):
pattern = r"(?:https?://)?(?:www\.)?redgifs\.com/users/([^/?#]+)"
test = ("https://www.redgifs.com/users/Natalifiction", {
"pattern": r"https://\w+\.redgifs\.com/[A-Za-z]+\.mp4",
- "count": ">= 120",
+ "count": ">= 100",
})
def metadata(self):
@@ -89,7 +89,7 @@ class RedgifsSearchExtractor(RedgifsExtractor):
pattern = r"(?:https?://)?(?:www\.)?redgifs\.com/browse/?\?([^#]+)"
test = (
("https://www.redgifs.com/browse?tags=JAV", {
- "pattern": r"https://\w+\.redgifs\.com/[A-Za-z-]+\.mp4",
+ "pattern": r"https://\w+\.redgifs\.com/[A-Za-z-]+\.(mp4|jpg)",
"range": "1-10",
"count": 10,
}),
diff --git a/gallery_dl/extractor/seiga.py b/gallery_dl/extractor/seiga.py
index 22c9487..7b8d2a3 100644
--- a/gallery_dl/extractor/seiga.py
+++ b/gallery_dl/extractor/seiga.py
@@ -44,7 +44,11 @@ class SeigaExtractor(Extractor):
url = "{}/image/source/{}".format(self.root, image_id)
response = self.request(
url, method="HEAD", allow_redirects=False, notfound="image")
- return response.headers["Location"].replace("/o/", "/priv/", 1)
+ location = response.headers["location"]
+ if "nicovideo.jp/login" in location:
+ raise exception.StopExtraction(
+ "HTTP redirect to login page (%s)", location.partition("?")[0])
+ return location.replace("/o/", "/priv/", 1)
class SeigaUserExtractor(SeigaExtractor):
diff --git a/gallery_dl/extractor/sexcom.py b/gallery_dl/extractor/sexcom.py
index aa6726d..486bf92 100644
--- a/gallery_dl/extractor/sexcom.py
+++ b/gallery_dl/extractor/sexcom.py
@@ -74,9 +74,7 @@ class SexcomExtractor(Extractor):
path = text.extr(info, "src: '", "'")
data["filename"] = path.rpartition("/")[2]
data["extension"] = "mp4"
- if "'HD'" in info:
- path += "/hd"
- data["url"] = self.root + path
+ data["url"] = path
else:
iframe = extr('<iframe', '>')
src = (text.extr(iframe, ' src="', '"') or
diff --git a/gallery_dl/extractor/slickpic.py b/gallery_dl/extractor/slickpic.py
index ae4e2e8..3727c0b 100644
--- a/gallery_dl/extractor/slickpic.py
+++ b/gallery_dl/extractor/slickpic.py
@@ -43,7 +43,8 @@ class SlickpicAlbumExtractor(SlickpicExtractor):
}),
("https://mattcrandall.slickpic.com/albums/LamborghiniMurcielago/", {
"range": "34",
- "content": ("52b5a310587de1048030ab13a912f6a3a9cc7dab",
+ "content": ("276eb2c902187bb177ae8013e310e1d6641fba9a",
+ "52b5a310587de1048030ab13a912f6a3a9cc7dab",
"cec6630e659dc72db1ee1a9a6f3b525189261988",
"6f81e1e74c6cd6db36844e7211eef8e7cd30055d",
"22e83645fc242bc3584eca7ec982c8a53a4d8a44"),
diff --git a/gallery_dl/extractor/smugmug.py b/gallery_dl/extractor/smugmug.py
index 2264fe4..713d4c4 100644
--- a/gallery_dl/extractor/smugmug.py
+++ b/gallery_dl/extractor/smugmug.py
@@ -117,7 +117,7 @@ class SmugmugImageExtractor(SmugmugExtractor):
# video
("https://tstravels.smugmug.com/Dailies/Daily-Dose-2015/i-39JFNzB", {
"url": "04d0ab1ff829ca7d78f5acb5548953df08e9a5ee",
- "keyword": "4cef98133ace511adc874c9d9abac5817ba0d856",
+ "keyword": "2b545184592c282b365fcbb7df6ca7952b8a3173",
}),
)
diff --git a/gallery_dl/extractor/soundgasm.py b/gallery_dl/extractor/soundgasm.py
new file mode 100644
index 0000000..1afb92c
--- /dev/null
+++ b/gallery_dl/extractor/soundgasm.py
@@ -0,0 +1,93 @@
+# -*- coding: utf-8 -*-
+
+# Copyright 2022 Mike Fährmann
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Extractors for https://soundgasm.net/"""
+
+from .common import Extractor, Message
+from .. import text
+
+
+class SoundgasmAudioExtractor(Extractor):
+ """Extractor for audio clips from soundgasm.net"""
+ category = "soundgasm"
+ subcategory = "audio"
+ root = "https://soundgasm.net"
+ directory_fmt = ("{category}", "{user}")
+ filename_fmt = "{title}.{extension}"
+ archive_fmt = "{user}_{slug}"
+ pattern = (r"(?:https?://)?(?:www\.)?soundgasm\.net"
+ r"/u(?:ser)?/([^/?#]+)/([^/?#]+)")
+ test = (
+ (("https://soundgasm.net/u/ClassWarAndPuppies2"
+ "/687-Otto-von-Toontown-12822"), {
+ "pattern": r"https://media\.soundgasm\.net/sounds"
+ r"/26cb2b23b2f2c6094b40ee3a9167271e274b570a\.m4a",
+ "keyword": {
+ "description": "We celebrate today’s important prisoner swap, "
+ "and finally bring the 2022 mid-terms to a clos"
+ "e with Raphael Warnock’s defeat of Herschel Wa"
+ "lker in Georgia. Then, we take a look at the Q"
+ "anon-addled attempt to overthrow the German go"
+ "vernment and install Heinrich XIII Prince of R"
+ "euss as kaiser.",
+ "extension": "m4a",
+ "filename": "26cb2b23b2f2c6094b40ee3a9167271e274b570a",
+ "slug": "687-Otto-von-Toontown-12822",
+ "title": "687 - Otto von Toontown (12/8/22)",
+ "user": "ClassWarAndPuppies2",
+ },
+ }),
+ ("https://www.soundgasm.net/user/ClassWarAndPuppies2"
+ "/687-Otto-von-Toontown-12822"),
+ )
+
+ def __init__(self, match):
+ Extractor.__init__(self, match)
+ self.user, self.slug = match.groups()
+
+ def items(self):
+ url = "{}/u/{}/{}".format(self.root, self.user, self.slug)
+ extr = text.extract_from(self.request(url).text)
+
+ data = {
+ "user" : self.user,
+ "slug" : self.slug,
+ "title": text.unescape(extr('aria-label="title">', "<")),
+ "description": text.unescape(text.remove_html(extr(
+ 'class="jp-description">', '</div>'))),
+ }
+
+ formats = extr('"setMedia", {', '}')
+ url = text.extr(formats, ': "', '"')
+
+ yield Message.Directory, data
+ yield Message.Url, url, text.nameext_from_url(url, data)
+
+
+class SoundgasmUserExtractor(Extractor):
+ """Extractor for all sounds from a soundgasm user"""
+ category = "soundgasm"
+ subcategory = "user"
+ root = "https://soundgasm.net"
+ pattern = (r"(?:https?://)?(?:www\.)?soundgasm\.net"
+ r"/u(?:ser)?/([^/?#]+)/?$")
+ test = ("https://soundgasm.net/u/fierce-aphrodite", {
+ "pattern": SoundgasmAudioExtractor.pattern,
+ "count" : ">= 15",
+ })
+
+ def __init__(self, match):
+ Extractor.__init__(self, match)
+ self.user = match.group(1)
+
+ def items(self):
+ page = self.request(self.root + "/user/" + self.user).text
+ data = {"_extractor": SoundgasmAudioExtractor}
+ for sound in text.extract_iter(
+ page, 'class="sound-details">', "</a>"):
+ yield Message.Queue, text.extr(sound, '<a href="', '"'), data
diff --git a/gallery_dl/extractor/twibooru.py b/gallery_dl/extractor/twibooru.py
index f010f92..30bf2f1 100644
--- a/gallery_dl/extractor/twibooru.py
+++ b/gallery_dl/extractor/twibooru.py
@@ -83,7 +83,7 @@ class TwibooruPostExtractor(TwibooruExtractor):
"tag_ids": list,
"tags": list,
"thumbnails_generated": True,
- "updated_at": "2022-09-21T14:31:50.441Z",
+ "updated_at": "2022-11-27T00:34:50.483Z",
"upvotes": int,
"view_url": "https://cdn.twibooru.org/img/2020/7/8/1/full.png",
"width": 576,
diff --git a/gallery_dl/extractor/twitter.py b/gallery_dl/extractor/twitter.py
index 22d4a6e..22aa78e 100644
--- a/gallery_dl/extractor/twitter.py
+++ b/gallery_dl/extractor/twitter.py
@@ -633,7 +633,7 @@ class TwitterEventExtractor(TwitterExtractor):
pattern = BASE_PATTERN + r"/i/events/(\d+)"
test = ("https://twitter.com/i/events/1484669206993903616", {
"range": "1-20",
- "count": ">5",
+ "count": ">=1",
})
def metadata(self):
@@ -759,7 +759,7 @@ class TwitterTweetExtractor(TwitterExtractor):
# retweet with missing media entities (#1555)
("https://twitter.com/morino_ya/status/1392763691599237121", {
"options": (("retweets", True),),
- "count": 4,
+ "count": 0, # private
}),
# deleted quote tweet (#2225)
("https://twitter.com/i/web/status/1460044411165888515", {
@@ -782,7 +782,7 @@ class TwitterTweetExtractor(TwitterExtractor):
# '?format=...&name=...'-style URLs
("https://twitter.com/poco_dandy/status/1150646424461176832", {
"options": (("cards", True),),
- "pattern": r"https://pbs.twimg.com/card_img/157\d+/\w+"
+ "pattern": r"https://pbs.twimg.com/card_img/157\d+/[\w-]+"
r"\?format=(jpg|png)&name=orig$",
"range": "1-2",
}),
@@ -886,7 +886,7 @@ class TwitterBackgroundExtractor(TwitterExtractor):
def tweets(self):
self.api._user_id_by_screen_name(self.user)
- user = user = self._user_obj
+ user = self._user_obj
try:
url = user["legacy"]["profile_banner_url"]
@@ -1216,15 +1216,16 @@ class TwitterAPI():
original_retweets = (self.extractor.retweets == "original")
while True:
- cursor = tweet = None
data = self._call(endpoint, params)
instr = data["timeline"]["instructions"]
if not instr:
return
- tweet_ids = []
+
tweets = data["globalObjects"]["tweets"]
users = data["globalObjects"]["users"]
+ tweet_id = cursor = None
+ tweet_ids = []
# collect tweet IDs and cursor value
for entry in instr[0]["addEntries"]["entries"]:
@@ -1243,7 +1244,7 @@ class TwitterAPI():
cursor = entry["content"]["operation"]["cursor"]
if not cursor.get("stopOnEmptyResponse", True):
# keep going even if there are no tweets
- tweet = True
+ tweet_id = True
cursor = cursor["value"]
elif entry_startswith("conversationThread-"):
@@ -1292,7 +1293,7 @@ class TwitterAPI():
cursor = (instr[-1]["replaceEntry"]["entry"]
["content"]["operation"]["cursor"]["value"])
- if not cursor or not tweet:
+ if not cursor or (not tweets and not tweet_id):
return
params["cursor"] = cursor
diff --git a/gallery_dl/extractor/unsplash.py b/gallery_dl/extractor/unsplash.py
index 8bea18c..b298c27 100644
--- a/gallery_dl/extractor/unsplash.py
+++ b/gallery_dl/extractor/unsplash.py
@@ -78,11 +78,11 @@ class UnsplashImageExtractor(UnsplashExtractor):
pattern = BASE_PATTERN + r"/photos/([^/?#]+)"
test = ("https://unsplash.com/photos/lsoogGC_5dg", {
"pattern": r"https://images\.unsplash\.com/photo-1586348943529-"
- r"beaae6c28db9\?ixid=\w+&ixlib=rb-1.2.1",
+ r"beaae6c28db9\?ixid=\w+&ixlib=rb-4.0.3",
"keyword": {
"alt_description": "re:silhouette of trees near body of water ",
"blur_hash": "LZP4uQS4jboe%#o0WCa}2doJNaaz",
- "categories": list,
+ "? categories": list,
"color": "#f3c08c",
"created_at": "2020-04-08T12:29:42Z",
"date": "dt:2020-04-08 12:29:42",
@@ -108,9 +108,8 @@ class UnsplashImageExtractor(UnsplashExtractor):
"name": "Beaver Dam, WI 53916, USA",
"position": {
"latitude": 43.457769,
- "longitude": -88.837329
+ "longitude": -88.837329,
},
- "title": "Beaver Dam, WI 53916, USA"
},
"promoted_at": "2020-04-08T15:12:03Z",
"sponsorship": None,
@@ -149,7 +148,7 @@ class UnsplashUserExtractor(UnsplashExtractor):
pattern = BASE_PATTERN + r"/@(\w+)/?$"
test = ("https://unsplash.com/@davehoefler", {
"pattern": r"https://images\.unsplash\.com/(photo-\d+-\w+"
- r"|reserve/[^/?#]+)\?ixid=\w+&ixlib=rb-1\.2\.1$",
+ r"|reserve/[^/?#]+)\?ixid=\w+&ixlib=rb-4\.0\.3$",
"range": "1-30",
"count": 30,
})
@@ -166,7 +165,7 @@ class UnsplashFavoriteExtractor(UnsplashExtractor):
pattern = BASE_PATTERN + r"/@(\w+)/likes"
test = ("https://unsplash.com/@davehoefler/likes", {
"pattern": r"https://images\.unsplash\.com/(photo-\d+-\w+"
- r"|reserve/[^/?#]+)\?ixid=\w+&ixlib=rb-1\.2\.1$",
+ r"|reserve/[^/?#]+)\?ixid=\w+&ixlib=rb-4\.0\.3$",
"range": "1-30",
"count": 30,
})
@@ -184,7 +183,7 @@ class UnsplashCollectionExtractor(UnsplashExtractor):
test = (
("https://unsplash.com/collections/3178572/winter", {
"pattern": r"https://images\.unsplash\.com/(photo-\d+-\w+"
- r"|reserve/[^/?#]+)\?ixid=\w+&ixlib=rb-1\.2\.1$",
+ r"|reserve/[^/?#]+)\?ixid=\w+&ixlib=rb-4\.0\.3$",
"keyword": {"collection_id": "3178572",
"collection_title": "winter"},
"range": "1-30",
@@ -212,8 +211,9 @@ class UnsplashSearchExtractor(UnsplashExtractor):
subcategory = "search"
pattern = BASE_PATTERN + r"/s/photos/([^/?#]+)(?:\?([^#]+))?"
test = ("https://unsplash.com/s/photos/hair-style", {
- "pattern": r"https://images\.unsplash\.com/((flagged/)?photo-\d+-\w+"
- r"|reserve/[^/?#]+)\?ixid=\w+&ixlib=rb-1\.2\.1$",
+ "pattern": r"https://(images|plus)\.unsplash\.com"
+ r"/((flagged/|premium_)?photo-\d+-\w+"
+ r"|reserve/[^/?#]+)\?ixid=\w+&ixlib=rb-4\.0\.3$",
"range": "1-30",
"count": 30,
})
diff --git a/gallery_dl/extractor/warosu.py b/gallery_dl/extractor/warosu.py
index 677680f..bdedfcb 100644
--- a/gallery_dl/extractor/warosu.py
+++ b/gallery_dl/extractor/warosu.py
@@ -1,21 +1,22 @@
# -*- coding: utf-8 -*-
-# Copyright 2017-2019 Mike Fährmann
+# Copyright 2017-2022 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
-"""Extract images from https://warosu.org/"""
+"""Extractors for https://warosu.org/"""
from .common import Extractor, Message
from .. import text
class WarosuThreadExtractor(Extractor):
- """Extractor for images from threads on warosu.org"""
+ """Extractor for threads on warosu.org"""
category = "warosu"
subcategory = "thread"
+ root = "https://warosu.org"
directory_fmt = ("{category}", "{board}", "{thread} - {title}")
filename_fmt = "{tim}-{filename}.{extension}"
archive_fmt = "{board}_{thread}_{tim}"
@@ -31,7 +32,6 @@ class WarosuThreadExtractor(Extractor):
"content": "d48df0a701e6599312bfff8674f4aa5d4fb8db1c",
}),
)
- root = "https://warosu.org"
def __init__(self, match):
Extractor.__init__(self, match)
@@ -40,12 +40,12 @@ class WarosuThreadExtractor(Extractor):
def items(self):
url = "{}/{}/thread/{}".format(self.root, self.board, self.thread)
page = self.request(url).text
- data = self.get_metadata(page)
+ data = self.metadata(page)
posts = self.posts(page)
if not data["title"]:
- title = text.remove_html(posts[0]["com"])
- data["title"] = text.unescape(title)[:50]
+ data["title"] = text.unescape(text.remove_html(
+ posts[0]["com"]))[:50]
yield Message.Directory, data
for post in posts:
@@ -55,25 +55,24 @@ class WarosuThreadExtractor(Extractor):
post.update(data)
yield Message.Url, post["image"], post
- def get_metadata(self, page):
- """Collect metadata for extractor-job"""
+ def metadata(self, page):
boardname = text.extr(page, "<title>", "</title>")
title = text.extr(page, 'filetitle" itemprop="name">', '<')
return {
- "board": self.board,
+ "board" : self.board,
"board_name": boardname.rpartition(" - ")[2],
- "thread": self.thread,
- "title": title,
+ "thread" : self.thread,
+ "title" : title,
}
def posts(self, page):
- """Build a list of all post-objects"""
+ """Build a list of all post objects"""
page = text.extr(page, '<div class="content">', '<table>')
needle = '<table itemscope itemtype="http://schema.org/Comment">'
return [self.parse(post) for post in page.split(needle)]
def parse(self, post):
- """Build post-object by extracting data from an HTML post"""
+ """Build post object by extracting data from an HTML post"""
data = self._extract_post(post)
if "<span>File:" in post:
self._extract_image(post, data)
@@ -84,24 +83,23 @@ class WarosuThreadExtractor(Extractor):
@staticmethod
def _extract_post(post):
- data = text.extract_all(post, (
- ("no" , 'id="p', '"'),
- ("name", '<span itemprop="name">', '</span>'),
- ("time", '<span class="posttime" title="', '000">'),
- ("now" , '', '<'),
- ("com" , '<blockquote><p itemprop="text">', '</p></blockquote>'),
- ))[0]
- data["com"] = text.unescape(text.remove_html(data["com"].strip()))
- return data
+ extr = text.extract_from(post)
+ return {
+ "no" : extr('id="p', '"'),
+ "name": extr('<span itemprop="name">', "</span>"),
+ "time": extr('<span class="posttime" title="', '000">'),
+ "now" : extr("", "<"),
+ "com" : text.unescape(text.remove_html(extr(
+ '<blockquote><p itemprop="text">', '</p></blockquote>'
+ ).strip())),
+ }
@staticmethod
def _extract_image(post, data):
- text.extract_all(post, (
- ("fsize" , '<span>File: ', ', '),
- ("w" , '', 'x'),
- ("h" , '', ', '),
- ("filename", '', '<'),
- ("image" , '<br />\n<a href="', '"'),
- ), 0, data)
- data["filename"] = text.unquote(data["filename"].rpartition(".")[0])
- data["image"] = "https:" + data["image"]
+ extr = text.extract_from(post)
+ data["fsize"] = extr("<span>File: ", ", ")
+ data["w"] = extr("", "x")
+ data["h"] = extr("", ", ")
+ data["filename"] = text.unquote(extr("", "<").rpartition(".")[0])
+ extr("<br />", "")
+ data["image"] = "https:" + extr('<a href="', '"')
diff --git a/gallery_dl/extractor/webmshare.py b/gallery_dl/extractor/webmshare.py
new file mode 100644
index 0000000..b038425
--- /dev/null
+++ b/gallery_dl/extractor/webmshare.py
@@ -0,0 +1,84 @@
+# -*- coding: utf-8 -*-
+
+# Copyright 2022 Mike Fährmann
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Extractors for https://webmshare.com/"""
+
+from .common import Extractor, Message
+from .. import text
+
+
+class WebmshareVideoExtractor(Extractor):
+ """Extractor for webmshare videos"""
+ category = "webmshare"
+ subcategory = "video"
+ root = "https://webmshare.com"
+ filename_fmt = "{id}{title:? //}.{extension}"
+ archive_fmt = "{id}"
+ pattern = (r"(?:https?://)?(?:s\d+\.)?webmshare\.com"
+ r"/(?:play/|download-webm/)?(\w{3,})")
+ test = (
+ ("https://webmshare.com/O9mWY", {
+ "keyword": {
+ "date": "dt:2022-12-04 00:00:00",
+ "extension": "webm",
+ "filename": "O9mWY",
+ "height": 568,
+ "id": "O9mWY",
+ "thumb": "https://s1.webmshare.com/t/O9mWY.jpg",
+ "title": "Yeah buddy over here",
+ "url": "https://s1.webmshare.com/O9mWY.webm",
+ "views": int,
+ "width": 320,
+ },
+ }),
+ ("https://s1.webmshare.com/zBGAg.webm", {
+ "keyword": {
+ "date": "dt:2018-12-07 00:00:00",
+ "height": 1080,
+ "id": "zBGAg",
+ "thumb": "https://s1.webmshare.com/t/zBGAg.jpg",
+ "title": "",
+ "url": "https://s1.webmshare.com/zBGAg.webm",
+ "views": int,
+ "width": 1920,
+ },
+ }),
+ ("https://webmshare.com/play/zBGAg"),
+ ("https://webmshare.com/download-webm/zBGAg"),
+ )
+
+ def __init__(self, match):
+ Extractor.__init__(self, match)
+ self.video_id = match.group(1)
+
+ def items(self):
+ url = "{}/{}".format(self.root, self.video_id)
+ extr = text.extract_from(self.request(url).text)
+
+ data = {
+ "title": text.unescape(extr(
+ 'property="og:title" content="', '"').rpartition(" — ")[0]),
+ "thumb": "https:" + extr('property="og:image" content="', '"'),
+ "url" : "https:" + extr('property="og:video" content="', '"'),
+ "width": text.parse_int(extr(
+ 'property="og:video:width" content="', '"')),
+ "height": text.parse_int(extr(
+ 'property="og:video:height" content="', '"')),
+ "date" : text.parse_datetime(extr(
+ "<small>Added ", "<"), "%B %d, %Y"),
+ "views": text.parse_int(extr('glyphicon-eye-open"></span>', '<')),
+ "id" : self.video_id,
+ "filename" : self.video_id,
+ "extension": "webm",
+ }
+
+ if data["title"] == "webmshare":
+ data["title"] = ""
+
+ yield Message.Directory, data
+ yield Message.Url, data["url"], data
diff --git a/gallery_dl/extractor/webtoons.py b/gallery_dl/extractor/webtoons.py
index 8a22fcb..21f7c21 100644
--- a/gallery_dl/extractor/webtoons.py
+++ b/gallery_dl/extractor/webtoons.py
@@ -57,6 +57,7 @@ class WebtoonsEpisodeExtractor(WebtoonsBase, GalleryExtractor):
}),
(("https://www.webtoons.com/en/challenge/punderworld"
"/happy-earth-day-/viewer?title_no=312584&episode_no=40"), {
+ "exception": exception.NotFoundError,
"keyword": {
"comic": "punderworld",
"description": str,
diff --git a/gallery_dl/extractor/zerochan.py b/gallery_dl/extractor/zerochan.py
index c0d43fe..74da615 100644
--- a/gallery_dl/extractor/zerochan.py
+++ b/gallery_dl/extractor/zerochan.py
@@ -11,8 +11,6 @@
from .booru import BooruExtractor
from ..cache import cache
from .. import text, exception
-from xml.etree import ElementTree
-
BASE_PATTERN = r"(?:https?://)?(?:www\.)?zerochan\.net"
@@ -27,12 +25,13 @@ class ZerochanExtractor(BooruExtractor):
cookienames = ("z_id", "z_hash")
def login(self):
+ self._logged_in = True
if not self._check_cookies(self.cookienames):
username, password = self._get_auth_info()
if username:
self._update_cookies(self._login_impl(username, password))
- # force legacy layout
- self.session.cookies.set("v3", "0", domain=self.cookiedomain)
+ else:
+ self._logged_in = False
@cache(maxage=90*86400, keyarg=1)
def _login_impl(self, username, password):
@@ -60,36 +59,50 @@ class ZerochanExtractor(BooruExtractor):
url = "{}/{}".format(self.root, entry_id)
extr = text.extract_from(self.request(url).text)
- return {
- "id" : entry_id,
- "author": extr('"author": "', '"'),
+ data = {
+ "id" : text.parse_int(entry_id),
+ "author" : extr('"author": "', '"'),
"file_url": extr('"contentUrl": "', '"'),
- "date" : text.parse_datetime(extr(
- '"datePublished": "', '"'), "%a %b %d %H:%M:%S %Y"),
- "width" : extr('"width": "', ' '),
- "height": extr('"height": "', ' '),
- "size" : text.parse_bytes(extr('"contentSize": "', 'B')),
- "path" : text.split_html(extr(
- 'class="breadcrumbs', '</p>'))[3::2],
- "tags" : extr('alt="Tags: Anime, ', '"').split(", ")
+ "date" : text.parse_datetime(extr('"datePublished": "', '"')),
+ "width" : text.parse_int(extr('"width": "', ' ')),
+ "height" : text.parse_int(extr('"height": "', ' ')),
+ "size" : text.parse_bytes(extr('"contentSize": "', 'B')),
+ "path" : text.split_html(extr(
+ 'class="breadcrumbs', '</p>'))[2:],
+ "uploader": extr('href="/user/', '"'),
+ "tags" : extr('<ul id="tags"', '</ul>'),
+ "source" : extr('<h2>Source</h2>', '</p><h2>').rpartition(
+ ">")[2] or None,
}
- def _parse_entry_xml(self, entry_id):
- url = "{}/{}?xml".format(self.root, entry_id)
- item = ElementTree.fromstring(self.request(url).text)[0][-1]
- # content = item[4].attrib
-
- return {
- # "id" : entry_id,
- # "file_url": content["url"],
- # "width" : content["width"],
- # "height": content["height"],
- # "size" : content["filesize"],
- "name" : item[2].text,
- "tags" : item[5].text.lstrip().split(", "),
- "md5" : item[6].text,
+ html = data["tags"]
+ tags = data["tags"] = []
+ for tag in html.split("<li class=")[1:]:
+ category, _, name = text.extr(tag, 'alt="', '<').partition('">')
+ tags.append(category + ":" + name.strip())
+
+ return data
+
+ def _parse_entry_json(self, entry_id):
+ url = "{}/{}?json".format(self.root, entry_id)
+ item = self.request(url).json()
+
+ data = {
+ "id" : item["id"],
+ "file_url": item["full"],
+ "width" : item["width"],
+ "height" : item["height"],
+ "size" : item["size"],
+ "name" : item["primary"],
+ "md5" : item["hash"],
+ "source" : item.get("source"),
}
+ if not self._logged_in:
+ data["tags"] = item["tags"]
+
+ return data
+
class ZerochanTagExtractor(ZerochanExtractor):
subcategory = "tag"
@@ -138,7 +151,7 @@ class ZerochanTagExtractor(ZerochanExtractor):
if metadata:
entry_id = extr('href="/', '"')
post = self._parse_entry_html(entry_id)
- post.update(self._parse_entry_xml(entry_id))
+ post.update(self._parse_entry_json(entry_id))
yield post
else:
yield {
@@ -163,14 +176,34 @@ class ZerochanImageExtractor(ZerochanExtractor):
"pattern": r"https://static\.zerochan\.net/"
r"Perth\.%28Kantai\.Collection%29\.full.2920445\.jpg",
"keyword": {
- "author": "YukinoTokisaki",
+ "author": "YeFan 葉凡",
"date": "dt:2020-04-24 21:33:44",
- "file_url": str,
+ "file_url": "https://static.zerochan.net"
+ "/Perth.%28Kantai.Collection%29.full.2920445.jpg",
"filename": "Perth.(Kantai.Collection).full.2920445",
- "height": "1366",
- "id": "2920445",
- "size": "1929k",
- "width": "1920",
+ "height": 1366,
+ "id": 2920445,
+ "path": ["Kantai Collection", "Perth (Kantai Collection)"],
+ "size": 1975296,
+ "tags": [
+ "Mangaka:YeFan 葉凡",
+ "Game:Kantai Collection",
+ "Character:Perth (Kantai Collection)",
+ "Theme:Blonde Hair",
+ "Theme:Braids",
+ "Theme:Coat",
+ "Theme:Female",
+ "Theme:Firefighter Outfit",
+ "Theme:Group",
+ "Theme:Long Sleeves",
+ "Theme:Personification",
+ "Theme:Pins",
+ "Theme:Ribbon",
+ "Theme:Shirt",
+ "Theme:Short Hair",
+ ],
+ "uploader": "YukinoTokisaki",
+ "width": 1920,
},
})
@@ -181,5 +214,5 @@ class ZerochanImageExtractor(ZerochanExtractor):
def posts(self):
post = self._parse_entry_html(self.image_id)
if self.config("metadata"):
- post.update(self._parse_entry_xml(self.image_id))
+ post.update(self._parse_entry_json(self.image_id))
return (post,)
diff --git a/gallery_dl/formatter.py b/gallery_dl/formatter.py
index ca05fa5..8a45330 100644
--- a/gallery_dl/formatter.py
+++ b/gallery_dl/formatter.py
@@ -18,8 +18,10 @@ import operator
import functools
from . import text, util
+NONE = util.NONE
-def parse(format_string, default=None, fmt=format):
+
+def parse(format_string, default=NONE, fmt=format):
key = format_string, default, fmt
try:
@@ -88,7 +90,7 @@ class StringFormatter():
Example: {f:R /_/} -> "f_o_o_b_a_r" (if "f" is "f o o b a r")
"""
- def __init__(self, format_string, default=None, fmt=format):
+ def __init__(self, format_string, default=NONE, fmt=format):
self.default = default
self.format = fmt
self.result = []
@@ -193,7 +195,7 @@ class StringFormatter():
class TemplateFormatter(StringFormatter):
"""Read format_string from file"""
- def __init__(self, path, default=None, fmt=format):
+ def __init__(self, path, default=NONE, fmt=format):
with open(util.expand_path(path)) as fp:
format_string = fp.read()
StringFormatter.__init__(self, format_string, default, fmt)
@@ -202,23 +204,23 @@ class TemplateFormatter(StringFormatter):
class ExpressionFormatter():
"""Generate text by evaluating a Python expression"""
- def __init__(self, expression, default=None, fmt=None):
+ def __init__(self, expression, default=NONE, fmt=None):
self.format_map = util.compile_expression(expression)
class ModuleFormatter():
"""Generate text by calling an external function"""
- def __init__(self, function_spec, default=None, fmt=None):
+ def __init__(self, function_spec, default=NONE, fmt=None):
module_name, _, function_name = function_spec.partition(":")
module = __import__(module_name)
self.format_map = getattr(module, function_name)
class FStringFormatter():
- """Generate text by evaluaring an f-string literal"""
+ """Generate text by evaluating an f-string literal"""
- def __init__(self, fstring, default=None, fmt=None):
+ def __init__(self, fstring, default=NONE, fmt=None):
self.format_map = util.compile_expression("f'''" + fstring + "'''")
diff --git a/gallery_dl/option.py b/gallery_dl/option.py
index 4d9a358..91e9169 100644
--- a/gallery_dl/option.py
+++ b/gallery_dl/option.py
@@ -142,10 +142,12 @@ def build_parser():
)
general.add_argument(
"--cookies-from-browser",
- dest="cookies_from_browser", metavar="BROWSER[+KEYRING][:PROFILE]",
+ dest="cookies_from_browser",
+ metavar="BROWSER[+KEYRING][:PROFILE][::CONTAINER]",
help=("Name of the browser to load cookies from, "
- "with optional keyring name prefixed with '+' and "
- "profile prefixed with ':'"),
+ "with optional keyring name prefixed with '+', "
+ "profile prefixed with ':', and "
+ "container prefixed with '::' ('none' for no container)"),
)
output = parser.add_argument_group("Output Options")
diff --git a/gallery_dl/path.py b/gallery_dl/path.py
index 7d599ee..3b360e9 100644
--- a/gallery_dl/path.py
+++ b/gallery_dl/path.py
@@ -29,6 +29,8 @@ class PathFormat():
def __init__(self, extractor):
config = extractor.config
kwdefault = config("keywords-default")
+ if kwdefault is None:
+ kwdefault = util.NONE
filename_fmt = config("filename")
try:
@@ -212,14 +214,19 @@ class PathFormat():
def fix_extension(self, _=None):
"""Fix filenames without a given filename extension"""
- if not self.extension:
- self.kwdict["extension"] = self.prefix + self.extension_map("", "")
- self.build_path()
- if self.path[-1] == ".":
- self.path = self.path[:-1]
- self.temppath = self.realpath = self.realpath[:-1]
- elif not self.temppath:
+ try:
+ if not self.extension:
+ self.kwdict["extension"] = \
+ self.prefix + self.extension_map("", "")
+ self.build_path()
+ if self.path[-1] == ".":
+ self.path = self.path[:-1]
+ self.temppath = self.realpath = self.realpath[:-1]
+ elif not self.temppath:
+ self.build_path()
+ except Exception:
self.path = self.directory + "?"
+ self.realpath = self.temppath = self.realdirectory + "?"
return True
def build_filename(self, kwdict):
diff --git a/gallery_dl/util.py b/gallery_dl/util.py
index 8ce1fb4..23d5bc8 100644
--- a/gallery_dl/util.py
+++ b/gallery_dl/util.py
@@ -528,8 +528,8 @@ def parse_inputfile(file, log):
yield line
-class UniversalNone():
- """None-style object that supports more operations than None itself"""
+class CustomNone():
+ """None-style type that supports more operations than regular None"""
__slots__ = ()
def __getattribute__(self, _):
@@ -538,18 +538,36 @@ class UniversalNone():
def __getitem__(self, _):
return self
+ def __iter__(self):
+ return self
+
+ def __call__(self, *args, **kwargs):
+ return self
+
+ @staticmethod
+ def __next__():
+ raise StopIteration
+
@staticmethod
def __bool__():
return False
@staticmethod
+ def __len__():
+ return 0
+
+ @staticmethod
+ def __format__(_):
+ return "None"
+
+ @staticmethod
def __str__():
return "None"
__repr__ = __str__
-NONE = UniversalNone()
+NONE = CustomNone()
EPOCH = datetime.datetime(1970, 1, 1)
SECOND = datetime.timedelta(0, 1)
WINDOWS = (os.name == "nt")
diff --git a/gallery_dl/version.py b/gallery_dl/version.py
index d289009..d832185 100644
--- a/gallery_dl/version.py
+++ b/gallery_dl/version.py
@@ -6,4 +6,4 @@
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
-__version__ = "1.24.1"
+__version__ = "1.24.2"
diff --git a/test/test_util.py b/test/test_util.py
index 2921ea2..4b8f9ae 100644
--- a/test/test_util.py
+++ b/test/test_util.py
@@ -618,10 +618,21 @@ class TestOther(unittest.TestCase):
obj = util.NONE
self.assertFalse(obj)
+ self.assertEqual(len(obj), 0)
self.assertEqual(str(obj), str(None))
self.assertEqual(repr(obj), repr(None))
+ self.assertEqual(format(obj), str(None))
+ self.assertEqual(format(obj, "%F"), str(None))
self.assertIs(obj.attr, obj)
self.assertIs(obj["key"], obj)
+ self.assertIs(obj(), obj)
+ self.assertIs(obj(1, "a"), obj)
+ self.assertIs(obj(foo="bar"), obj)
+
+ i = 0
+ for _ in obj:
+ i += 1
+ self.assertEqual(i, 0)
class TestExtractor():