aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLibravatarUnit 193 <unit193@unit193.net>2023-10-22 01:00:14 -0400
committerLibravatarUnit 193 <unit193@unit193.net>2023-10-22 01:00:14 -0400
commite052f3b9e1d9703a5a466daeaf37bacf476c2daf (patch)
treefc608c7d452695706fb13e2b0b34671f569f3ab0
parentb8758ecd073910ce3220b2e68399147b425c37b8 (diff)
New upstream version 1.26.1.upstream/1.26.1
-rw-r--r--CHANGELOG.md40
-rw-r--r--PKG-INFO6
-rw-r--r--README.rst4
-rw-r--r--data/completion/_gallery-dl4
-rw-r--r--data/completion/gallery-dl.fish4
-rw-r--r--data/man/gallery-dl.16
-rw-r--r--data/man/gallery-dl.conf.547
-rw-r--r--docs/gallery-dl.conf2
-rw-r--r--gallery_dl.egg-info/PKG-INFO6
-rw-r--r--gallery_dl.egg-info/SOURCES.txt2
-rw-r--r--gallery_dl/cookies.py27
-rw-r--r--gallery_dl/extractor/4chanarchives.py1
-rw-r--r--gallery_dl/extractor/__init__.py2
-rw-r--r--gallery_dl/extractor/bunkr.py44
-rw-r--r--gallery_dl/extractor/chevereto.py113
-rw-r--r--gallery_dl/extractor/deviantart.py64
-rw-r--r--gallery_dl/extractor/fantia.py9
-rw-r--r--gallery_dl/extractor/hentaifoundry.py22
-rw-r--r--gallery_dl/extractor/imgbb.py39
-rw-r--r--gallery_dl/extractor/jpgfish.py105
-rw-r--r--gallery_dl/extractor/kemonoparty.py134
-rw-r--r--gallery_dl/extractor/moebooru.py5
-rw-r--r--gallery_dl/extractor/newgrounds.py56
-rw-r--r--gallery_dl/extractor/patreon.py2
-rw-r--r--gallery_dl/extractor/reddit.py3
-rw-r--r--gallery_dl/extractor/redgifs.py15
-rw-r--r--gallery_dl/extractor/sankaku.py2
-rw-r--r--gallery_dl/extractor/twitter.py2
-rw-r--r--gallery_dl/extractor/warosu.py34
-rw-r--r--gallery_dl/option.py5
-rw-r--r--gallery_dl/postprocessor/exec.py17
-rw-r--r--gallery_dl/version.py2
-rw-r--r--test/test_extractor.py2
-rw-r--r--test/test_postprocessor.py8
-rw-r--r--test/test_results.py3
35 files changed, 582 insertions, 255 deletions
diff --git a/CHANGELOG.md b/CHANGELOG.md
index a4ce4ba..34607f2 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,45 @@
# Changelog
+## 1.26.1 - 2023-10-21
+### Extractors
+#### Additions
+- [bunkr] add extractor for media URLs ([#4684](https://github.com/mikf/gallery-dl/issues/4684))
+- [chevereto] add generic extractors for `chevereto` sites ([#4664](https://github.com/mikf/gallery-dl/issues/4664))
+ - `deltaporno.com` ([#1381](https://github.com/mikf/gallery-dl/issues/1381))
+ - `img.kiwi`
+ - `jpgfish`
+ - `pixl.li` ([#3179](https://github.com/mikf/gallery-dl/issues/3179), [#4357](https://github.com/mikf/gallery-dl/issues/4357))
+- [deviantart] implement `"group": "skip"` ([#4630](https://github.com/mikf/gallery-dl/issues/4630))
+- [fantia] add `content_count` and `content_num` metadata fields ([#4627](https://github.com/mikf/gallery-dl/issues/4627))
+- [imgbb] add `displayname` and `user_id` metadata ([#4626](https://github.com/mikf/gallery-dl/issues/4626))
+- [kemonoparty] support post revisions; add `revisions` option ([#4498](https://github.com/mikf/gallery-dl/issues/4498), [#4597](https://github.com/mikf/gallery-dl/issues/4597))
+- [kemonoparty] support searches ([#3385](https://github.com/mikf/gallery-dl/issues/3385), [#4057](https://github.com/mikf/gallery-dl/issues/4057))
+- [kemonoparty] support discord URLs with channel IDs ([#4662](https://github.com/mikf/gallery-dl/issues/4662))
+- [moebooru] add `metadata` option ([#4646](https://github.com/mikf/gallery-dl/issues/4646))
+- [newgrounds] support multi-image posts ([#4642](https://github.com/mikf/gallery-dl/issues/4642))
+- [sankaku] support `/posts/` URLs ([#4688](https://github.com/mikf/gallery-dl/issues/4688))
+- [twitter] add `sensitive` metadata field ([#4619](https://github.com/mikf/gallery-dl/issues/4619))
+#### Fixes
+- [4chanarchives] disable Referer headers by default ([#4686](https://github.com/mikf/gallery-dl/issues/4686))
+- [bunkr] fix `/d/` file URLs ([#4685](https://github.com/mikf/gallery-dl/issues/4685))
+- [deviantart] expand nested comment replies ([#4653](https://github.com/mikf/gallery-dl/issues/4653))
+- [deviantart] disable `jwt` ([#4652](https://github.com/mikf/gallery-dl/issues/4652))
+- [hentaifoundry] fix `.swf` file downloads ([#4641](https://github.com/mikf/gallery-dl/issues/4641))
+- [imgbb] fix `user` metadata extraction ([#4626](https://github.com/mikf/gallery-dl/issues/4626))
+- [imgbb] update pagination end condition ([#4626](https://github.com/mikf/gallery-dl/issues/4626))
+- [kemonoparty] update API endpoints ([#4676](https://github.com/mikf/gallery-dl/issues/4676), [#4677](https://github.com/mikf/gallery-dl/issues/4677))
+- [patreon] update `campaign_id` path ([#4639](https://github.com/mikf/gallery-dl/issues/4639))
+- [reddit] fix wrong previews ([#4649](https://github.com/mikf/gallery-dl/issues/4649))
+- [redgifs] fix `niches` extraction ([#4666](https://github.com/mikf/gallery-dl/issues/4666), [#4667](https://github.com/mikf/gallery-dl/issues/4667))
+- [twitter] fix crash due to missing `source` ([#4620](https://github.com/mikf/gallery-dl/issues/4620))
+- [warosu] fix extraction ([#4634](https://github.com/mikf/gallery-dl/issues/4634))
+### Post Processors
+#### Additions
+- support `{_filename}`, `{_directory}`, and `{_path}` replacement fields for `--exec` ([#4633](https://github.com/mikf/gallery-dl/issues/4633))
+### Miscellaneous
+#### Improvements
+- avoid temporary copies with `--cookies-from-browser` by opening cookie databases in read-only mode
+
## 1.26.0 - 2023-10-03
- ### Extractors
#### Additions
diff --git a/PKG-INFO b/PKG-INFO
index a2dedf5..0aed72d 100644
--- a/PKG-INFO
+++ b/PKG-INFO
@@ -1,6 +1,6 @@
Metadata-Version: 2.1
Name: gallery_dl
-Version: 1.26.0
+Version: 1.26.1
Summary: Command-line program to download image galleries and collections from several image hosting sites
Home-page: https://github.com/mikf/gallery-dl
Download-URL: https://github.com/mikf/gallery-dl/releases/latest
@@ -109,9 +109,9 @@ Standalone Executable
Prebuilt executable files with a Python interpreter and
required Python packages included are available for
-- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.26.0/gallery-dl.exe>`__
+- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.26.1/gallery-dl.exe>`__
(Requires `Microsoft Visual C++ Redistributable Package (x86) <https://aka.ms/vs/17/release/vc_redist.x86.exe>`__)
-- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.26.0/gallery-dl.bin>`__
+- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.26.1/gallery-dl.bin>`__
Nightly Builds
diff --git a/README.rst b/README.rst
index 14cfb09..207b68e 100644
--- a/README.rst
+++ b/README.rst
@@ -72,9 +72,9 @@ Standalone Executable
Prebuilt executable files with a Python interpreter and
required Python packages included are available for
-- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.26.0/gallery-dl.exe>`__
+- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.26.1/gallery-dl.exe>`__
(Requires `Microsoft Visual C++ Redistributable Package (x86) <https://aka.ms/vs/17/release/vc_redist.x86.exe>`__)
-- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.26.0/gallery-dl.bin>`__
+- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.26.1/gallery-dl.bin>`__
Nightly Builds
diff --git a/data/completion/_gallery-dl b/data/completion/_gallery-dl
index 94d2f06..c1425bb 100644
--- a/data/completion/_gallery-dl
+++ b/data/completion/_gallery-dl
@@ -73,8 +73,8 @@ _arguments -C -S \
--write-infojson'[==SUPPRESS==]' \
--write-tags'[Write image tags to separate text files]' \
--mtime-from-date'[Set file modification times according to "date" metadata]' \
---exec'[Execute CMD for each downloaded file. Example: --exec "convert {} {}.png && rm {}"]':'<cmd>' \
---exec-after'[Execute CMD after all files were downloaded successfully. Example: --exec-after "cd {} && convert * ../doc.pdf"]':'<cmd>' \
+--exec'[Execute CMD for each downloaded file. Supported replacement fields are {} or {_path}, {_directory}, {_filename}. Example: --exec "convert {} {}.png && rm {}"]':'<cmd>' \
+--exec-after'[Execute CMD after all files were downloaded successfully. Example: --exec-after "cd {_directory} && convert * ../doc.pdf"]':'<cmd>' \
{-P,--postprocessor}'[Activate the specified post processor]':'<name>' \
{-O,--postprocessor-option}'[Additional "<key>=<value>" post processor options]':'<opt>' && rc=0
diff --git a/data/completion/gallery-dl.fish b/data/completion/gallery-dl.fish
index 00e7b24..593ab89 100644
--- a/data/completion/gallery-dl.fish
+++ b/data/completion/gallery-dl.fish
@@ -67,7 +67,7 @@ complete -c gallery-dl -l 'write-info-json' -d 'Write gallery metadata to a info
complete -c gallery-dl -l 'write-infojson' -d '==SUPPRESS=='
complete -c gallery-dl -l 'write-tags' -d 'Write image tags to separate text files'
complete -c gallery-dl -l 'mtime-from-date' -d 'Set file modification times according to "date" metadata'
-complete -c gallery-dl -x -l 'exec' -d 'Execute CMD for each downloaded file. Example: --exec "convert {} {}.png && rm {}"'
-complete -c gallery-dl -x -l 'exec-after' -d 'Execute CMD after all files were downloaded successfully. Example: --exec-after "cd {} && convert * ../doc.pdf"'
+complete -c gallery-dl -x -l 'exec' -d 'Execute CMD for each downloaded file. Supported replacement fields are {} or {_path}, {_directory}, {_filename}. Example: --exec "convert {} {}.png && rm {}"'
+complete -c gallery-dl -x -l 'exec-after' -d 'Execute CMD after all files were downloaded successfully. Example: --exec-after "cd {_directory} && convert * ../doc.pdf"'
complete -c gallery-dl -x -s 'P' -l 'postprocessor' -d 'Activate the specified post processor'
complete -c gallery-dl -x -s 'O' -l 'postprocessor-option' -d 'Additional "<key>=<value>" post processor options'
diff --git a/data/man/gallery-dl.1 b/data/man/gallery-dl.1
index c2eedb7..27f13af 100644
--- a/data/man/gallery-dl.1
+++ b/data/man/gallery-dl.1
@@ -1,4 +1,4 @@
-.TH "GALLERY-DL" "1" "2023-10-03" "1.26.0" "gallery-dl Manual"
+.TH "GALLERY-DL" "1" "2023-10-21" "1.26.1" "gallery-dl Manual"
.\" disable hyphenation
.nh
@@ -216,10 +216,10 @@ Write image tags to separate text files
Set file modification times according to 'date' metadata
.TP
.B "\-\-exec" \f[I]CMD\f[]
-Execute CMD for each downloaded file. Example: --exec "convert {} {}.png && rm {}"
+Execute CMD for each downloaded file. Supported replacement fields are {} or {_path}, {_directory}, {_filename}. Example: --exec "convert {} {}.png && rm {}"
.TP
.B "\-\-exec\-after" \f[I]CMD\f[]
-Execute CMD after all files were downloaded successfully. Example: --exec-after "cd {} && convert * ../doc.pdf"
+Execute CMD after all files were downloaded successfully. Example: --exec-after "cd {_directory} && convert * ../doc.pdf"
.TP
.B "\-P, \-\-postprocessor" \f[I]NAME\f[]
Activate the specified post processor
diff --git a/data/man/gallery-dl.conf.5 b/data/man/gallery-dl.conf.5
index 19a5812..9083d24 100644
--- a/data/man/gallery-dl.conf.5
+++ b/data/man/gallery-dl.conf.5
@@ -1,4 +1,4 @@
-.TH "GALLERY-DL.CONF" "5" "2023-10-03" "1.26.0" "gallery-dl Manual"
+.TH "GALLERY-DL.CONF" "5" "2023-10-21" "1.26.1" "gallery-dl Manual"
.\" disable hyphenation
.nh
.\" disable justification (adjust text to left margin only)
@@ -1529,7 +1529,10 @@ Use with caution.
.SS extractor.deviantart.group
.IP "Type:" 6
-\f[I]bool\f[]
+.br
+* \f[I]bool\f[]
+.br
+* \f[I]string\f[]
.IP "Default:" 9
\f[I]true\f[]
@@ -1538,6 +1541,14 @@ Use with caution.
Check whether the profile name in a given URL
belongs to a group or a regular user.
+When disabled, assume every given profile name
+belongs to a regular user.
+
+Special values:
+
+.br
+* \f[I]"skip"\f[]: Skip groups
+
.SS extractor.deviantart.include
.IP "Type:" 6
@@ -1589,13 +1600,15 @@ literature and status updates.
\f[I]bool\f[]
.IP "Default:" 9
-\f[I]true\f[]
+\f[I]false\f[]
.IP "Description:" 4
Update \f[I]JSON Web Tokens\f[] (the \f[I]token\f[] URL parameter)
of otherwise non-downloadable, low-resolution images
to be able to download them in full resolution.
+Note: No longer functional as of 2023-10-11
+
.SS extractor.deviantart.mature
.IP "Type:" 6
@@ -2415,7 +2428,20 @@ Limit the number of posts to download.
\f[I]false\f[]
.IP "Description:" 4
-Extract \f[I]username\f[] metadata
+Extract \f[I]username\f[] metadata.
+
+
+.SS extractor.kemonoparty.revisions
+.IP "Type:" 6
+\f[I]bool\f[]
+
+.IP "Default:" 9
+\f[I]false\f[]
+
+.IP "Description:" 4
+Extract post revisions.
+
+Note: This requires 1 additional HTTP request per post.
.SS extractor.khinsider.format
@@ -2625,6 +2651,19 @@ Fetch media from renoted notes.
Fetch media from replies to other notes.
+.SS extractor.[moebooru].pool.metadata
+.IP "Type:" 6
+\f[I]bool\f[]
+
+.IP "Default:" 9
+\f[I]false\f[]
+
+.IP "Description:" 4
+Extract extended \f[I]pool\f[] metadata.
+
+Note: Not supported by all \f[I]moebooru\f[] instances.
+
+
.SS extractor.newgrounds.flash
.IP "Type:" 6
\f[I]bool\f[]
diff --git a/docs/gallery-dl.conf b/docs/gallery-dl.conf
index 2eac0a1..9f12652 100644
--- a/docs/gallery-dl.conf
+++ b/docs/gallery-dl.conf
@@ -85,7 +85,7 @@
"group": true,
"include": "gallery",
"journals": "html",
- "jwt": true,
+ "jwt": false,
"mature": true,
"metadata": false,
"original": true,
diff --git a/gallery_dl.egg-info/PKG-INFO b/gallery_dl.egg-info/PKG-INFO
index 022a2d6..95861dc 100644
--- a/gallery_dl.egg-info/PKG-INFO
+++ b/gallery_dl.egg-info/PKG-INFO
@@ -1,6 +1,6 @@
Metadata-Version: 2.1
Name: gallery-dl
-Version: 1.26.0
+Version: 1.26.1
Summary: Command-line program to download image galleries and collections from several image hosting sites
Home-page: https://github.com/mikf/gallery-dl
Download-URL: https://github.com/mikf/gallery-dl/releases/latest
@@ -109,9 +109,9 @@ Standalone Executable
Prebuilt executable files with a Python interpreter and
required Python packages included are available for
-- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.26.0/gallery-dl.exe>`__
+- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.26.1/gallery-dl.exe>`__
(Requires `Microsoft Visual C++ Redistributable Package (x86) <https://aka.ms/vs/17/release/vc_redist.x86.exe>`__)
-- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.26.0/gallery-dl.bin>`__
+- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.26.1/gallery-dl.bin>`__
Nightly Builds
diff --git a/gallery_dl.egg-info/SOURCES.txt b/gallery_dl.egg-info/SOURCES.txt
index e319eef..fb6cb4b 100644
--- a/gallery_dl.egg-info/SOURCES.txt
+++ b/gallery_dl.egg-info/SOURCES.txt
@@ -60,6 +60,7 @@ gallery_dl/extractor/blogger.py
gallery_dl/extractor/booru.py
gallery_dl/extractor/bunkr.py
gallery_dl/extractor/catbox.py
+gallery_dl/extractor/chevereto.py
gallery_dl/extractor/comicvine.py
gallery_dl/extractor/common.py
gallery_dl/extractor/cyberdrop.py
@@ -111,7 +112,6 @@ gallery_dl/extractor/instagram.py
gallery_dl/extractor/issuu.py
gallery_dl/extractor/itaku.py
gallery_dl/extractor/itchio.py
-gallery_dl/extractor/jpgfish.py
gallery_dl/extractor/jschan.py
gallery_dl/extractor/kabeuchi.py
gallery_dl/extractor/keenspot.py
diff --git a/gallery_dl/cookies.py b/gallery_dl/cookies.py
index c5c5667..416cc9a 100644
--- a/gallery_dl/cookies.py
+++ b/gallery_dl/cookies.py
@@ -47,7 +47,7 @@ def load_cookies(cookiejar, browser_specification):
def load_cookies_firefox(cookiejar, profile=None, container=None, domain=None):
path, container_id = _firefox_cookies_database(profile, container)
- with DatabaseCopy(path) as db:
+ with DatabaseConnection(path) as db:
sql = ("SELECT name, value, host, path, isSecure, expiry "
"FROM moz_cookies")
@@ -100,7 +100,7 @@ def load_cookies_chrome(cookiejar, browser_name, profile=None,
path = _chrome_cookies_database(profile, config)
_log_debug("Extracting cookies from %s", path)
- with DatabaseCopy(path) as db:
+ with DatabaseConnection(path) as db:
db.text_factory = bytes
decryptor = get_cookie_decryptor(
config["directory"], config["keyring"], keyring)
@@ -814,7 +814,7 @@ class DataParser:
self.skip_to(len(self._data), description)
-class DatabaseCopy():
+class DatabaseConnection():
def __init__(self, path):
self.path = path
@@ -823,12 +823,26 @@ class DatabaseCopy():
def __enter__(self):
try:
+ # https://www.sqlite.org/uri.html#the_uri_path
+ path = self.path.replace("?", "%3f").replace("#", "%23")
+ if util.WINDOWS:
+ path = "/" + os.path.abspath(path)
+
+ uri = "file:{}?mode=ro&immutable=1".format(path)
+ self.database = sqlite3.connect(
+ uri, uri=True, isolation_level=None, check_same_thread=False)
+ return self.database
+ except Exception as exc:
+ _log_debug("Falling back to temporary database copy (%s: %s)",
+ exc.__class__.__name__, exc)
+
+ try:
self.directory = tempfile.TemporaryDirectory(prefix="gallery-dl-")
path_copy = os.path.join(self.directory.name, "copy.sqlite")
shutil.copyfile(self.path, path_copy)
- self.database = db = sqlite3.connect(
+ self.database = sqlite3.connect(
path_copy, isolation_level=None, check_same_thread=False)
- return db
+ return self.database
except BaseException:
if self.directory:
self.directory.cleanup()
@@ -836,7 +850,8 @@ class DatabaseCopy():
def __exit__(self, exc, value, tb):
self.database.close()
- self.directory.cleanup()
+ if self.directory:
+ self.directory.cleanup()
def Popen_communicate(*args):
diff --git a/gallery_dl/extractor/4chanarchives.py b/gallery_dl/extractor/4chanarchives.py
index f018d3e..27ac7c5 100644
--- a/gallery_dl/extractor/4chanarchives.py
+++ b/gallery_dl/extractor/4chanarchives.py
@@ -20,6 +20,7 @@ class _4chanarchivesThreadExtractor(Extractor):
directory_fmt = ("{category}", "{board}", "{thread} - {title}")
filename_fmt = "{no}-{filename}.{extension}"
archive_fmt = "{board}_{thread}_{no}"
+ referer = False
pattern = r"(?:https?://)?4chanarchives\.com/board/([^/?#]+)/thread/(\d+)"
example = "https://4chanarchives.com/board/a/thread/12345/"
diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py
index 3abe74b..1c1473a 100644
--- a/gallery_dl/extractor/__init__.py
+++ b/gallery_dl/extractor/__init__.py
@@ -28,6 +28,7 @@ modules = [
"blogger",
"bunkr",
"catbox",
+ "chevereto",
"comicvine",
"cyberdrop",
"danbooru",
@@ -73,7 +74,6 @@ modules = [
"issuu",
"itaku",
"itchio",
- "jpgfish",
"jschan",
"kabeuchi",
"keenspot",
diff --git a/gallery_dl/extractor/bunkr.py b/gallery_dl/extractor/bunkr.py
index 5509f5a..26123b8 100644
--- a/gallery_dl/extractor/bunkr.py
+++ b/gallery_dl/extractor/bunkr.py
@@ -12,6 +12,8 @@ from .lolisafe import LolisafeAlbumExtractor
from .. import text
from urllib.parse import urlsplit, urlunsplit
+BASE_PATTERN = r"(?:https?://)?(?:app\.)?bunkr+\.(?:la|[sr]u|is|to)"
+
MEDIA_DOMAIN_OVERRIDES = {
"cdn9.bunkr.ru" : "c9.bunkr.ru",
"cdn12.bunkr.ru": "media-files12.bunkr.la",
@@ -28,7 +30,7 @@ class BunkrAlbumExtractor(LolisafeAlbumExtractor):
"""Extractor for bunkrr.su albums"""
category = "bunkr"
root = "https://bunkrr.su"
- pattern = r"(?:https?://)?(?:app\.)?bunkr+\.(?:la|[sr]u|is|to)/a/([^/?#]+)"
+ pattern = BASE_PATTERN + r"/a/([^/?#]+)"
example = "https://bunkrr.su/a/ID"
def fetch_album(self, album_id):
@@ -53,11 +55,7 @@ class BunkrAlbumExtractor(LolisafeAlbumExtractor):
for url in urls:
if url.startswith("/"):
try:
- page = self.request(self.root + text.unescape(url)).text
- if url[1] == "v":
- url = text.extr(page, '<source src="', '"')
- else:
- url = text.extr(page, '<img src="', '"')
+ url = self._extract_file(text.unescape(url))
except Exception as exc:
self.log.error("%s: %s", exc.__class__.__name__, exc)
continue
@@ -72,3 +70,37 @@ class BunkrAlbumExtractor(LolisafeAlbumExtractor):
url = urlunsplit((scheme, domain, path, query, fragment))
yield {"file": text.unescape(url)}
+
+ def _extract_file(self, path):
+ page = self.request(self.root + path).text
+ if path[1] == "v":
+ url = text.extr(page, '<source src="', '"')
+ else:
+ url = text.extr(page, '<img src="', '"')
+ if not url:
+ url = text.rextract(
+ page, ' href="', '"', page.rindex("Download"))[0]
+ return url
+
+
+class BunkrMediaExtractor(BunkrAlbumExtractor):
+ """Extractor for bunkrr.su media links"""
+ subcategory = "media"
+ directory_fmt = ("{category}",)
+ pattern = BASE_PATTERN + r"/[vid]/([^/?#]+)"
+ example = "https://bunkrr.su/v/FILENAME"
+
+ def fetch_album(self, album_id):
+ try:
+ url = self._extract_file(urlsplit(self.url).path)
+ except Exception as exc:
+ self.log.error("%s: %s", exc.__class__.__name__, exc)
+ return (), {}
+
+ return ({"file": text.unescape(url)},), {
+ "album_id" : "",
+ "album_name" : "",
+ "album_size" : -1,
+ "description": "",
+ "count" : 1,
+ }
diff --git a/gallery_dl/extractor/chevereto.py b/gallery_dl/extractor/chevereto.py
new file mode 100644
index 0000000..21166bd
--- /dev/null
+++ b/gallery_dl/extractor/chevereto.py
@@ -0,0 +1,113 @@
+# -*- coding: utf-8 -*-
+
+# Copyright 2023 Mike Fährmann
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Extractors for Chevereto galleries"""
+
+from .common import BaseExtractor, Message
+from .. import text
+
+
+class CheveretoExtractor(BaseExtractor):
+ """Base class for chevereto extractors"""
+ basecategory = "chevereto"
+ directory_fmt = ("{category}", "{user}", "{album}",)
+ archive_fmt = "{id}"
+
+ def __init__(self, match):
+ BaseExtractor.__init__(self, match)
+ self.path = match.group(match.lastindex)
+
+ def _pagination(self, url):
+ while url:
+ page = self.request(url).text
+
+ for item in text.extract_iter(
+ page, '<div class="list-item-image ', 'image-container'):
+ yield text.extr(item, '<a href="', '"')
+
+ url = text.extr(page, '<a data-pagination="next" href="', '" ><')
+
+
+BASE_PATTERN = CheveretoExtractor.update({
+ "jpgfish": {
+ "root": "https://jpg2.su",
+ "pattern": r"jpe?g\d?\.(?:su|pet|fish(?:ing)?|church)",
+ },
+ "pixl": {
+ "root": "https://pixl.li",
+ "pattern": r"pixl\.(?:li|is)",
+ },
+ "imgkiwi": {
+ "root": "https://img.kiwi",
+ "pattern": r"img\.kiwi",
+ },
+ "deltaporno": {
+ "root": "https://gallery.deltaporno.com",
+ "pattern": r"gallery\.deltaporno\.com",
+ },
+})
+
+
+class CheveretoImageExtractor(CheveretoExtractor):
+ """Extractor for chevereto Images"""
+ subcategory = "image"
+ pattern = BASE_PATTERN + r"(/im(?:g|age)/[^/?#]+)"
+ example = "https://jpg2.su/img/TITLE.ID"
+
+ def items(self):
+ url = self.root + self.path
+ extr = text.extract_from(self.request(url).text)
+
+ image = {
+ "id" : self.path.rpartition(".")[2],
+ "url" : extr('<meta property="og:image" content="', '"'),
+ "album": text.extr(extr("Added to <a", "/a>"), ">", "<"),
+ "user" : extr('username: "', '"'),
+ }
+
+ text.nameext_from_url(image["url"], image)
+ yield Message.Directory, image
+ yield Message.Url, image["url"], image
+
+
+class CheveretoAlbumExtractor(CheveretoExtractor):
+ """Extractor for chevereto Albums"""
+ subcategory = "album"
+ pattern = BASE_PATTERN + r"(/a(?:lbum)?/[^/?#]+(?:/sub)?)"
+ example = "https://jpg2.su/album/TITLE.ID"
+
+ def items(self):
+ url = self.root + self.path
+ data = {"_extractor": CheveretoImageExtractor}
+
+ if self.path.endswith("/sub"):
+ albums = self._pagination(url)
+ else:
+ albums = (url,)
+
+ for album in albums:
+ for image in self._pagination(album):
+ yield Message.Queue, image, data
+
+
+class CheveretoUserExtractor(CheveretoExtractor):
+ """Extractor for chevereto Users"""
+ subcategory = "user"
+ pattern = BASE_PATTERN + r"(/(?!img|image|a(?:lbum)?)[^/?#]+(?:/albums)?)"
+ example = "https://jpg2.su/USER"
+
+ def items(self):
+ url = self.root + self.path
+
+ if self.path.endswith("/albums"):
+ data = {"_extractor": CheveretoAlbumExtractor}
+ else:
+ data = {"_extractor": CheveretoImageExtractor}
+
+ for url in self._pagination(url):
+ yield Message.Queue, url, data
diff --git a/gallery_dl/extractor/deviantart.py b/gallery_dl/extractor/deviantart.py
index 9421096..2c37ef1 100644
--- a/gallery_dl/extractor/deviantart.py
+++ b/gallery_dl/extractor/deviantart.py
@@ -42,7 +42,7 @@ class DeviantartExtractor(Extractor):
self.offset = 0
def _init(self):
- self.jwt = self.config("jwt", True)
+ self.jwt = self.config("jwt", False)
self.flat = self.config("flat", True)
self.extra = self.config("extra", False)
self.quality = self.config("quality", "100")
@@ -91,14 +91,20 @@ class DeviantartExtractor(Extractor):
return True
def items(self):
- if self.user and self.config("group", True):
- profile = self.api.user_profile(self.user)
- self.group = not profile
- if self.group:
- self.subcategory = "group-" + self.subcategory
- self.user = self.user.lower()
- else:
- self.user = profile["user"]["username"]
+ if self.user:
+ group = self.config("group", True)
+ if group:
+ profile = self.api.user_profile(self.user)
+ if profile:
+ self.user = profile["user"]["username"]
+ self.group = False
+ elif group == "skip":
+ self.log.info("Skipping group '%s'", self.user)
+ raise exception.StopExtraction()
+ else:
+ self.subcategory = "group-" + self.subcategory
+ self.user = self.user.lower()
+ self.group = True
for deviation in self.deviations():
if isinstance(deviation, tuple):
@@ -228,7 +234,7 @@ class DeviantartExtractor(Extractor):
if self.comments:
deviation["comments"] = (
- self.api.comments(deviation["deviationid"], target="deviation")
+ self._extract_comments(deviation["deviationid"], "deviation")
if deviation["stats"]["comments"] else ()
)
@@ -395,6 +401,28 @@ class DeviantartExtractor(Extractor):
binascii.b2a_base64(payload).rstrip(b"=\n").decode())
)
+ def _extract_comments(self, target_id, target_type="deviation"):
+ results = None
+ comment_ids = [None]
+
+ while comment_ids:
+ comments = self.api.comments(
+ target_id, target_type, comment_ids.pop())
+
+ if results:
+ results.extend(comments)
+ else:
+ results = comments
+
+ # parent comments, i.e. nodes with at least one child
+ parents = {c["parentid"] for c in comments}
+ # comments with more than one reply
+ replies = {c["commentid"] for c in comments if c["replies"]}
+ # add comment UUIDs with replies that are not parent to any node
+ comment_ids.extend(replies - parents)
+
+ return results
+
def _limited_request(self, url, **kwargs):
"""Limits HTTP requests to one every 2 seconds"""
kwargs["fatal"] = None
@@ -698,7 +726,7 @@ class DeviantartStatusExtractor(DeviantartExtractor):
deviation["stats"] = {"comments": comments_count}
if self.comments:
deviation["comments"] = (
- self.api.comments(deviation["statusid"], target="status")
+ self._extract_comments(deviation["statusid"], "status")
if comments_count else ()
)
@@ -1072,11 +1100,17 @@ class DeviantartOAuthAPI():
"mature_content": self.mature}
return self._pagination_list(endpoint, params)
- def comments(self, id, target, offset=0):
+ def comments(self, target_id, target_type="deviation",
+ comment_id=None, offset=0):
"""Fetch comments posted on a target"""
- endpoint = "/comments/{}/{}".format(target, id)
- params = {"maxdepth": "5", "offset": offset, "limit": 50,
- "mature_content": self.mature}
+ endpoint = "/comments/{}/{}".format(target_type, target_id)
+ params = {
+ "commentid" : comment_id,
+ "maxdepth" : "5",
+ "offset" : offset,
+ "limit" : 50,
+ "mature_content": self.mature,
+ }
return self._pagination_list(endpoint, params=params, key="thread")
def deviation(self, deviation_id, public=None):
diff --git a/gallery_dl/extractor/fantia.py b/gallery_dl/extractor/fantia.py
index f1d51e2..4a67695 100644
--- a/gallery_dl/extractor/fantia.py
+++ b/gallery_dl/extractor/fantia.py
@@ -42,7 +42,11 @@ class FantiaExtractor(Extractor):
post = self._get_post_data(post_id)
post["num"] = 0
- for content in self._get_post_contents(post):
+ contents = self._get_post_contents(post)
+ post["content_count"] = len(contents)
+ post["content_num"] = 0
+
+ for content in contents:
files = self._process_content(post, content)
yield Message.Directory, post
@@ -59,6 +63,8 @@ class FantiaExtractor(Extractor):
post["content_filename"] or file["file_url"], post)
yield Message.Url, file["file_url"], post
+ post["content_num"] += 1
+
def posts(self):
"""Return post IDs"""
@@ -131,6 +137,7 @@ class FantiaExtractor(Extractor):
post["content_filename"] = content.get("filename") or ""
post["content_id"] = content["id"]
post["content_comment"] = content.get("comment") or ""
+ post["content_num"] += 1
post["plan"] = content["plan"] or self._empty_plan
files = []
diff --git a/gallery_dl/extractor/hentaifoundry.py b/gallery_dl/extractor/hentaifoundry.py
index 4c02000..8ba23c2 100644
--- a/gallery_dl/extractor/hentaifoundry.py
+++ b/gallery_dl/extractor/hentaifoundry.py
@@ -72,13 +72,11 @@ class HentaifoundryExtractor(Extractor):
extr = text.extract_from(page, page.index('id="picBox"'))
data = {
+ "index" : text.parse_int(path.rsplit("/", 2)[1]),
"title" : text.unescape(extr('class="imageTitle">', '<')),
"artist" : text.unescape(extr('/profile">', '<')),
- "width" : text.parse_int(extr('width="', '"')),
- "height" : text.parse_int(extr('height="', '"')),
- "index" : text.parse_int(path.rsplit("/", 2)[1]),
- "src" : text.urljoin(self.root, text.unescape(extr(
- 'src="', '"'))),
+ "_body" : extr(
+ '<div class="boxbody"', '<div class="boxfooter"'),
"description": text.unescape(text.remove_html(extr(
'>Description</div>', '</section>')
.replace("\r\n", "\n"), "", "")),
@@ -92,6 +90,20 @@ class HentaifoundryExtractor(Extractor):
">Tags </span>", "</div>")),
}
+ body = data["_body"]
+ if "<object " in body:
+ data["src"] = text.urljoin(self.root, text.unescape(text.extr(
+ body, 'name="movie" value="', '"')))
+ data["width"] = text.parse_int(text.extr(
+ body, "name='width' value='", "'"))
+ data["height"] = text.parse_int(text.extr(
+ body, "name='height' value='", "'"))
+ else:
+ data["src"] = text.urljoin(self.root, text.unescape(text.extr(
+ body, 'src="', '"')))
+ data["width"] = text.parse_int(text.extr(body, 'width="', '"'))
+ data["height"] = text.parse_int(text.extr(body, 'height="', '"'))
+
return text.nameext_from_url(data["src"], data)
def _parse_story(self, html):
diff --git a/gallery_dl/extractor/imgbb.py b/gallery_dl/extractor/imgbb.py
index 1b74180..6c0684e 100644
--- a/gallery_dl/extractor/imgbb.py
+++ b/gallery_dl/extractor/imgbb.py
@@ -84,6 +84,13 @@ class ImgbbExtractor(Extractor):
raise exception.AuthenticationError()
return self.cookies
+ def _extract_resource(self, page):
+ return util.json_loads(text.extr(
+ page, "CHV.obj.resource=", "};") + "}")
+
+ def _extract_user(self, page):
+ return self._extract_resource(page).get("user") or {}
+
def _pagination(self, page, endpoint, params):
data = None
seek, pos = text.extract(page, 'data-seek="', '"')
@@ -99,7 +106,7 @@ class ImgbbExtractor(Extractor):
for img in text.extract_iter(page, "data-object='", "'"):
yield util.json_loads(text.unquote(img))
if data:
- if params["seek"] == data["seekEnd"]:
+ if not data["seekEnd"] or params["seek"] == data["seekEnd"]:
return
params["seek"] = data["seekEnd"]
params["page"] += 1
@@ -124,12 +131,14 @@ class ImgbbAlbumExtractor(ImgbbExtractor):
self.page_url = "https://ibb.co/album/" + self.album_id
def metadata(self, page):
- album, pos = text.extract(page, '"og:title" content="', '"')
- user , pos = text.extract(page, 'rel="author">', '<', pos)
+ album = text.extr(page, '"og:title" content="', '"')
+ user = self._extract_user(page)
return {
- "album_id" : self.album_id,
- "album_name": text.unescape(album),
- "user" : user.lower() if user else "",
+ "album_id" : self.album_id,
+ "album_name" : text.unescape(album),
+ "user" : user.get("username") or "",
+ "user_id" : user.get("id") or "",
+ "displayname": user.get("name") or "",
}
def images(self, page):
@@ -158,7 +167,12 @@ class ImgbbUserExtractor(ImgbbExtractor):
self.page_url = "https://{}.imgbb.com/".format(self.user)
def metadata(self, page):
- return {"user": self.user}
+ user = self._extract_user(page)
+ return {
+ "user" : user.get("username") or self.user,
+ "user_id" : user.get("id") or "",
+ "displayname": user.get("name") or "",
+ }
def images(self, page):
user = text.extr(page, '.obj.resource={"id":"', '"')
@@ -181,15 +195,20 @@ class ImgbbImageExtractor(ImgbbExtractor):
def items(self):
url = "https://ibb.co/" + self.image_id
- extr = text.extract_from(self.request(url).text)
+ page = self.request(url).text
+ extr = text.extract_from(page)
+ user = self._extract_user(page)
image = {
"id" : self.image_id,
- "title" : text.unescape(extr('"og:title" content="', '"')),
+ "title" : text.unescape(extr(
+ '"og:title" content="', ' hosted at ImgBB"')),
"url" : extr('"og:image" content="', '"'),
"width" : text.parse_int(extr('"og:image:width" content="', '"')),
"height": text.parse_int(extr('"og:image:height" content="', '"')),
- "user" : extr('rel="author">', '<').lower(),
+ "user" : user.get("username") or "",
+ "user_id" : user.get("id") or "",
+ "displayname": user.get("name") or "",
}
image["extension"] = text.ext_from_url(image["url"])
diff --git a/gallery_dl/extractor/jpgfish.py b/gallery_dl/extractor/jpgfish.py
deleted file mode 100644
index 8862a7b..0000000
--- a/gallery_dl/extractor/jpgfish.py
+++ /dev/null
@@ -1,105 +0,0 @@
-# -*- coding: utf-8 -*-
-
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License version 2 as
-# published by the Free Software Foundation.
-
-"""Extractors for https://jpg1.su/"""
-
-from .common import Extractor, Message
-from .. import text
-
-BASE_PATTERN = r"(?:https?://)?jpe?g\d?\.(?:su|pet|fish(?:ing)?|church)"
-
-
-class JpgfishExtractor(Extractor):
- """Base class for jpgfish extractors"""
- category = "jpgfish"
- root = "https://jpg1.su"
- directory_fmt = ("{category}", "{user}", "{album}",)
- archive_fmt = "{id}"
-
- def _pagination(self, url):
- while url:
- page = self.request(url).text
-
- for item in text.extract_iter(
- page, '<div class="list-item-image ', 'image-container'):
- yield text.extract(item, '<a href="', '"')[0]
-
- url = text.extract(
- page, '<a data-pagination="next" href="', '" ><')[0]
-
-
-class JpgfishImageExtractor(JpgfishExtractor):
- """Extractor for jpgfish Images"""
- subcategory = "image"
- pattern = BASE_PATTERN + r"/img/((?:[^/?#]+\.)?(\w+))"
- example = "https://jpg1.su/img/TITLE.ID"
-
- def __init__(self, match):
- JpgfishExtractor.__init__(self, match)
- self.path, self.image_id = match.groups()
-
- def items(self):
- url = "{}/img/{}".format(self.root, self.path)
- extr = text.extract_from(self.request(url).text)
-
- image = {
- "id" : self.image_id,
- "url" : extr('<meta property="og:image" content="', '"'),
- "album": text.extract(extr(
- "Added to <a", "/a>"), ">", "<")[0] or "",
- "user" : extr('username: "', '"'),
- }
-
- text.nameext_from_url(image["url"], image)
- yield Message.Directory, image
- yield Message.Url, image["url"], image
-
-
-class JpgfishAlbumExtractor(JpgfishExtractor):
- """Extractor for jpgfish Albums"""
- subcategory = "album"
- pattern = BASE_PATTERN + r"/a(?:lbum)?/([^/?#]+)(/sub)?"
- example = "https://jpg1.su/album/TITLE.ID"
-
- def __init__(self, match):
- JpgfishExtractor.__init__(self, match)
- self.album, self.sub_albums = match.groups()
-
- def items(self):
- url = "{}/a/{}".format(self.root, self.album)
- data = {"_extractor": JpgfishImageExtractor}
-
- if self.sub_albums:
- albums = self._pagination(url + "/sub")
- else:
- albums = (url,)
-
- for album in albums:
- for image in self._pagination(album):
- yield Message.Queue, image, data
-
-
-class JpgfishUserExtractor(JpgfishExtractor):
- """Extractor for jpgfish Users"""
- subcategory = "user"
- pattern = BASE_PATTERN + r"/(?!img|a(?:lbum)?)([^/?#]+)(/albums)?"
- example = "https://jpg1.su/USER"
-
- def __init__(self, match):
- JpgfishExtractor.__init__(self, match)
- self.user, self.albums = match.groups()
-
- def items(self):
- url = "{}/{}".format(self.root, self.user)
-
- if self.albums:
- url += "/albums"
- data = {"_extractor": JpgfishAlbumExtractor}
- else:
- data = {"_extractor": JpgfishImageExtractor}
-
- for url in self._pagination(url):
- yield Message.Queue, url, data
diff --git a/gallery_dl/extractor/kemonoparty.py b/gallery_dl/extractor/kemonoparty.py
index 894c671..1596cfb 100644
--- a/gallery_dl/extractor/kemonoparty.py
+++ b/gallery_dl/extractor/kemonoparty.py
@@ -10,7 +10,7 @@
from .common import Extractor, Message
from .. import text, exception
-from ..cache import cache
+from ..cache import cache, memcache
import itertools
import re
@@ -70,8 +70,7 @@ class KemonopartyExtractor(Extractor):
self.root, post["service"], post["user"], post["id"])
post["_http_headers"] = headers
post["date"] = text.parse_datetime(
- post["published"] or post["added"],
- "%a, %d %b %Y %H:%M:%S %Z")
+ post["published"] or post["added"], "%Y-%m-%dT%H:%M:%S")
if username:
post["username"] = username
if comments:
@@ -197,14 +196,25 @@ class KemonopartyExtractor(Extractor):
dms = []
for dm in text.extract_iter(page, "<article", "</article>"):
+ footer = text.extr(dm, "<footer", "</footer>")
dms.append({
- "body": text.unescape(text.extract(
+ "body": text.unescape(text.extr(
dm, "<pre>", "</pre></",
- )[0].strip()),
- "date": text.extr(dm, 'datetime="', '"'),
+ ).strip()),
+ "date": text.extr(footer, 'Published: ', '\n'),
})
return dms
+ @memcache(keyarg=1)
+ def _discord_channels(self, server):
+ url = "{}/api/v1/discord/channel/lookup/{}".format(
+ self.root, server)
+ return self.request(url).json()
+
+ @memcache(keyarg=1)
+ def _post_revisions(self, url):
+ return self.request(url + "/revisions").json()
+
def _validate(response):
return (response.headers["content-length"] != "9" or
@@ -214,48 +224,82 @@ def _validate(response):
class KemonopartyUserExtractor(KemonopartyExtractor):
"""Extractor for all posts from a kemono.party user listing"""
subcategory = "user"
- pattern = USER_PATTERN + r"/?(?:\?o=(\d+))?(?:$|[?#])"
+ pattern = USER_PATTERN + r"/?(?:\?([^#]+))?(?:$|[?#])"
example = "https://kemono.party/SERVICE/user/12345"
def __init__(self, match):
- _, _, service, user_id, offset = match.groups()
+ _, _, service, user_id, self.query = match.groups()
self.subcategory = service
KemonopartyExtractor.__init__(self, match)
- self.api_url = "{}/api/{}/user/{}".format(self.root, service, user_id)
+ self.api_url = "{}/api/v1/{}/user/{}".format(
+ self.root, service, user_id)
self.user_url = "{}/{}/user/{}".format(self.root, service, user_id)
- self.offset = text.parse_int(offset)
def posts(self):
url = self.api_url
- params = {"o": self.offset}
+ params = text.parse_query(self.query)
+ params["o"] = text.parse_int(params.get("o"))
+ revisions = self.config("revisions")
while True:
posts = self.request(url, params=params).json()
- yield from posts
- cnt = len(posts)
- if cnt < 25:
- return
- params["o"] += cnt
+ if revisions:
+ for post in posts:
+ post["revision_id"] = 0
+ yield post
+ post_url = "{}/post/{}".format(self.api_url, post["id"])
+ try:
+ revs = self._post_revisions(post_url)
+ except exception.HttpError:
+ pass
+ else:
+ yield from revs
+ else:
+ yield from posts
+
+ if len(posts) < 50:
+ break
+ params["o"] += 50
class KemonopartyPostExtractor(KemonopartyExtractor):
"""Extractor for a single kemono.party post"""
subcategory = "post"
- pattern = USER_PATTERN + r"/post/([^/?#]+)"
+ pattern = USER_PATTERN + r"/post/([^/?#]+)(/revisions?(?:/(\d*))?)?"
example = "https://kemono.party/SERVICE/user/12345/post/12345"
def __init__(self, match):
- _, _, service, user_id, post_id = match.groups()
+ _, _, service, user_id, post_id, self.revision, self.revision_id = \
+ match.groups()
self.subcategory = service
KemonopartyExtractor.__init__(self, match)
- self.api_url = "{}/api/{}/user/{}/post/{}".format(
+ self.api_url = "{}/api/v1/{}/user/{}/post/{}".format(
self.root, service, user_id, post_id)
self.user_url = "{}/{}/user/{}".format(self.root, service, user_id)
def posts(self):
- posts = self.request(self.api_url).json()
- return (posts[0],) if len(posts) > 1 else posts
+ if not self.revision:
+ post = self.request(self.api_url).json()
+ if self.config("revisions"):
+ post["revision_id"] = 0
+ try:
+ revs = self._post_revisions(self.api_url)
+ except exception.HttpError:
+ pass
+ else:
+ return itertools.chain((post,), revs)
+ return (post,)
+
+ revs = self._post_revisions(self.api_url)
+ if not self.revision_id:
+ return revs
+
+ for rev in revs:
+ if str(rev["revision_id"]) == self.revision_id:
+ return (rev,)
+
+ raise exception.NotFoundError("revision")
class KemonopartyDiscordExtractor(KemonopartyExtractor):
@@ -270,11 +314,29 @@ class KemonopartyDiscordExtractor(KemonopartyExtractor):
def __init__(self, match):
KemonopartyExtractor.__init__(self, match)
- _, _, self.server, self.channel, self.channel_name = match.groups()
+ _, _, self.server, self.channel_id, self.channel = match.groups()
+ self.channel_name = ""
def items(self):
self._prepare_ddosguard_cookies()
+ if self.channel_id:
+ self.channel_name = self.channel
+ else:
+ if self.channel.isdecimal() and len(self.channel) >= 16:
+ key = "id"
+ else:
+ key = "name"
+
+ for channel in self._discord_channels(self.server):
+ if channel[key] == self.channel:
+ break
+ else:
+ raise exception.NotFoundError("channel")
+
+ self.channel_id = channel["id"]
+ self.channel_name = channel["name"]
+
find_inline = re.compile(
r"https?://(?:cdn\.discordapp.com|media\.discordapp\.net)"
r"(/[A-Za-z0-9-._~:/?#\[\]@!$&'()*+,;%=]+)").findall
@@ -299,7 +361,7 @@ class KemonopartyDiscordExtractor(KemonopartyExtractor):
post["channel_name"] = self.channel_name
post["date"] = text.parse_datetime(
- post["published"], "%a, %d %b %Y %H:%M:%S %Z")
+ post["published"], "%Y-%m-%dT%H:%M:%S.%f")
post["count"] = len(files)
yield Message.Directory, post
@@ -319,27 +381,17 @@ class KemonopartyDiscordExtractor(KemonopartyExtractor):
yield Message.Url, url, post
def posts(self):
- if self.channel is None:
- url = "{}/api/discord/channels/lookup?q={}".format(
- self.root, self.server)
- for channel in self.request(url).json():
- if channel["name"] == self.channel_name:
- self.channel = channel["id"]
- break
- else:
- raise exception.NotFoundError("channel")
-
- url = "{}/api/discord/channel/{}".format(self.root, self.channel)
- params = {"skip": 0}
+ url = "{}/api/v1/discord/channel/{}".format(
+ self.root, self.channel_id)
+ params = {"o": 0}
while True:
posts = self.request(url, params=params).json()
yield from posts
- cnt = len(posts)
- if cnt < 25:
+ if len(posts) < 150:
break
- params["skip"] += cnt
+ params["o"] += 150
class KemonopartyDiscordServerExtractor(KemonopartyExtractor):
@@ -352,11 +404,7 @@ class KemonopartyDiscordServerExtractor(KemonopartyExtractor):
self.server = match.group(3)
def items(self):
- url = "{}/api/discord/channels/lookup?q={}".format(
- self.root, self.server)
- channels = self.request(url).json()
-
- for channel in channels:
+ for channel in self._discord_channels(self.server):
url = "{}/discord/server/{}/channel/{}#{}".format(
self.root, self.server, channel["id"], channel["name"])
channel["_extractor"] = KemonopartyDiscordExtractor
diff --git a/gallery_dl/extractor/moebooru.py b/gallery_dl/extractor/moebooru.py
index 145dd51..e97d273 100644
--- a/gallery_dl/extractor/moebooru.py
+++ b/gallery_dl/extractor/moebooru.py
@@ -124,6 +124,11 @@ class MoebooruPoolExtractor(MoebooruExtractor):
self.pool_id = match.group(match.lastindex)
def metadata(self):
+ if self.config("metadata"):
+ url = "{}/pool/show/{}.json".format(self.root, self.pool_id)
+ pool = self.request(url).json()
+ pool.pop("posts", None)
+ return {"pool": pool}
return {"pool": text.parse_int(self.pool_id)}
def posts(self):
diff --git a/gallery_dl/extractor/newgrounds.py b/gallery_dl/extractor/newgrounds.py
index 1bcc915..a6971e8 100644
--- a/gallery_dl/extractor/newgrounds.py
+++ b/gallery_dl/extractor/newgrounds.py
@@ -54,14 +54,31 @@ class NewgroundsExtractor(Extractor):
if metadata:
post.update(metadata)
yield Message.Directory, post
+ post["num"] = 0
yield Message.Url, url, text.nameext_from_url(url, post)
- for num, url in enumerate(text.extract_iter(
- post["_comment"], 'data-smartload-src="', '"'), 1):
- post["num"] = num
- post["_index"] = "{}_{:>02}".format(post["index"], num)
+ if "_multi" in post:
+ for data in post["_multi"]:
+ post["num"] += 1
+ post["_index"] = "{}_{:>02}".format(
+ post["index"], post["num"])
+ post.update(data)
+ url = data["image"]
+
+ text.nameext_from_url(url, post)
+ yield Message.Url, url, post
+
+ if "_fallback" in post:
+ del post["_fallback"]
+
+ for url in text.extract_iter(
+ post["_comment"], 'data-smartload-src="', '"'):
+ post["num"] += 1
+ post["_index"] = "{}_{:>02}".format(
+ post["index"], post["num"])
url = text.ensure_http_scheme(url)
- yield Message.Url, url, text.nameext_from_url(url, post)
+ text.nameext_from_url(url, post)
+ yield Message.Url, url, post
else:
self.log.warning(
"Unable to get download URL for '%s'", post_url)
@@ -153,8 +170,7 @@ class NewgroundsExtractor(Extractor):
data["post_url"] = post_url
return data
- @staticmethod
- def _extract_image_data(extr, url):
+ def _extract_image_data(self, extr, url):
full = text.extract_from(util.json_loads(extr(
'"full_image_text":', '});')))
data = {
@@ -172,8 +188,34 @@ class NewgroundsExtractor(Extractor):
index = data["url"].rpartition("/")[2].partition("_")[0]
data["index"] = text.parse_int(index)
data["_index"] = index
+
+ image_data = extr("let imageData =", "\n];")
+ if image_data:
+ data["_multi"] = self._extract_images_multi(image_data)
+ else:
+ art_images = extr('<div class="art-images', '\n</div>')
+ if art_images:
+ data["_multi"] = self._extract_images_art(art_images, data)
+
return data
+ def _extract_images_multi(self, html):
+ data = util.json_loads(html + "]")
+ yield from data[1:]
+
+ def _extract_images_art(self, html, data):
+ ext = text.ext_from_url(data["url"])
+ for url in text.extract_iter(html, 'data-smartload-src="', '"'):
+ url = text.ensure_http_scheme(url)
+ url = url.replace("/medium_views/", "/images/", 1)
+ if text.ext_from_url(url) == "webp":
+ yield {
+ "image" : url.replace(".webp", "." + ext),
+ "_fallback": (url,),
+ }
+ else:
+ yield {"image": url}
+
@staticmethod
def _extract_audio_data(extr, url):
index = url.split("/")[5]
diff --git a/gallery_dl/extractor/patreon.py b/gallery_dl/extractor/patreon.py
index 729ceaf..6ac9a83 100644
--- a/gallery_dl/extractor/patreon.py
+++ b/gallery_dl/extractor/patreon.py
@@ -277,7 +277,7 @@ class PatreonCreatorExtractor(PatreonExtractor):
try:
data = self._extract_bootstrap(page)
- campaign_id = data["creator"]["data"]["id"]
+ campaign_id = data["campaign"]["data"]["id"]
except (KeyError, ValueError):
raise exception.NotFoundError("creator")
diff --git a/gallery_dl/extractor/reddit.py b/gallery_dl/extractor/reddit.py
index 8553312..cd2ba3d 100644
--- a/gallery_dl/extractor/reddit.py
+++ b/gallery_dl/extractor/reddit.py
@@ -125,7 +125,8 @@ class RedditExtractor(Extractor):
if match:
extra.append(match.group(1))
elif not match_user(url) and not match_subreddit(url):
- if previews and "preview" in data:
+ if previews and "comment" not in data and \
+ "preview" in data:
data["_fallback"] = self._previews(data)
yield Message.Queue, text.unescape(url), data
if "_fallback" in data:
diff --git a/gallery_dl/extractor/redgifs.py b/gallery_dl/extractor/redgifs.py
index e246405..6185acb 100644
--- a/gallery_dl/extractor/redgifs.py
+++ b/gallery_dl/extractor/redgifs.py
@@ -146,11 +146,17 @@ class RedgifsCollectionsExtractor(RedgifsExtractor):
class RedgifsNichesExtractor(RedgifsExtractor):
"""Extractor for redgifs niches"""
subcategory = "niches"
- pattern = r"(?:https?://)?(?:www\.)?redgifs\.com/niches/([^/?#]+)"
+ pattern = (r"(?:https?://)?(?:www\.)?redgifs\.com/niches/([^/?#]+)/?"
+ r"(?:\?([^#]+))?$")
example = "https://www.redgifs.com/niches/NAME"
+ def __init__(self, match):
+ RedgifsExtractor.__init__(self, match)
+ self.query = match.group(2)
+
def gifs(self):
- return self.api.niches(self.key)
+ order = text.parse_query(self.query).get("order")
+ return self.api.niches(self.key, order or "new")
class RedgifsSearchExtractor(RedgifsExtractor):
@@ -232,9 +238,10 @@ class RedgifsAPI():
endpoint = "/v2/users/{}/collections".format(user)
return self._pagination(endpoint, key="collections")
- def niches(self, niche):
+ def niches(self, niche, order):
endpoint = "/v2/niches/{}/gifs".format(niche)
- return self._pagination(endpoint)
+ params = {"count": 30, "order": order}
+ return self._pagination(endpoint, params)
def search(self, params):
endpoint = "/v2/gifs/search"
diff --git a/gallery_dl/extractor/sankaku.py b/gallery_dl/extractor/sankaku.py
index 745a351..dc35511 100644
--- a/gallery_dl/extractor/sankaku.py
+++ b/gallery_dl/extractor/sankaku.py
@@ -143,7 +143,7 @@ class SankakuPostExtractor(SankakuExtractor):
"""Extractor for single posts from sankaku.app"""
subcategory = "post"
archive_fmt = "{id}"
- pattern = BASE_PATTERN + r"/post/show/([0-9a-f]+)"
+ pattern = BASE_PATTERN + r"/post(?:s|/show)/([0-9a-f]+)"
example = "https://sankaku.app/post/show/12345"
def __init__(self, match):
diff --git a/gallery_dl/extractor/twitter.py b/gallery_dl/extractor/twitter.py
index 3895c74..61e871e 100644
--- a/gallery_dl/extractor/twitter.py
+++ b/gallery_dl/extractor/twitter.py
@@ -306,6 +306,7 @@ class TwitterExtractor(Extractor):
"user" : self._user or author,
"lang" : tweet["lang"],
"source" : text.extr(source, ">", "<"),
+ "sensitive" : tget("possibly_sensitive"),
"favorite_count": tget("favorite_count"),
"quote_count" : tget("quote_count"),
"reply_count" : tget("reply_count"),
@@ -451,6 +452,7 @@ class TwitterExtractor(Extractor):
"id_str": id_str,
"lang": None,
"user": user,
+ "source": "><",
"entities": {},
"extended_entities": {
"media": [
diff --git a/gallery_dl/extractor/warosu.py b/gallery_dl/extractor/warosu.py
index 6f152ed..8e6b842 100644
--- a/gallery_dl/extractor/warosu.py
+++ b/gallery_dl/extractor/warosu.py
@@ -47,7 +47,7 @@ class WarosuThreadExtractor(Extractor):
def metadata(self, page):
boardname = text.extr(page, "<title>", "</title>")
- title = text.extr(page, 'filetitle" itemprop="name">', '<')
+ title = text.unescape(text.extr(page, "class=filetitle>", "<"))
return {
"board" : self.board,
"board_name": boardname.rpartition(" - ")[2],
@@ -57,39 +57,37 @@ class WarosuThreadExtractor(Extractor):
def posts(self, page):
"""Build a list of all post objects"""
- page = text.extr(page, '<div class="content">', '<table>')
- needle = '<table itemscope itemtype="http://schema.org/Comment">'
+ page = text.extr(page, "<div class=content", "</form>")
+ needle = "<table>"
return [self.parse(post) for post in page.split(needle)]
def parse(self, post):
"""Build post object by extracting data from an HTML post"""
data = self._extract_post(post)
- if "<span>File:" in post:
+ if "<span> File:" in post:
self._extract_image(post, data)
part = data["image"].rpartition("/")[2]
data["tim"], _, data["extension"] = part.partition(".")
data["ext"] = "." + data["extension"]
return data
- @staticmethod
- def _extract_post(post):
+ def _extract_post(self, post):
extr = text.extract_from(post)
return {
- "no" : extr('id="p', '"'),
- "name": extr('<span itemprop="name">', "</span>"),
- "time": extr('<span class="posttime" title="', '000">'),
- "now" : extr("", "<"),
+ "no" : extr("id=p", ">"),
+ "name": extr("class=postername>", "<").strip(),
+ "time": extr("class=posttime title=", "000>"),
+ "now" : extr("", "<").strip(),
"com" : text.unescape(text.remove_html(extr(
- '<blockquote><p itemprop="text">', '</p></blockquote>'
- ).strip())),
+ "<blockquote>", "</blockquote>").strip())),
}
- @staticmethod
- def _extract_image(post, data):
+ def _extract_image(self, post, data):
extr = text.extract_from(post)
- data["fsize"] = extr("<span>File: ", ", ")
+ data["fsize"] = extr("<span> File: ", ", ")
data["w"] = extr("", "x")
data["h"] = extr("", ", ")
- data["filename"] = text.unquote(extr("", "<").rpartition(".")[0])
- extr("<br />", "")
- data["image"] = "https:" + extr('<a href="', '"')
+ data["filename"] = text.unquote(extr(
+ "", "<").rstrip().rpartition(".")[0])
+ extr("<br>", "")
+ data["image"] = self.root + extr("<a href=", ">")
diff --git a/gallery_dl/option.py b/gallery_dl/option.py
index 08e6e70..1982b71 100644
--- a/gallery_dl/option.py
+++ b/gallery_dl/option.py
@@ -510,6 +510,8 @@ def build_parser():
dest="postprocessors", metavar="CMD",
action=AppendCommandAction, const={"name": "exec"},
help=("Execute CMD for each downloaded file. "
+ "Supported replacement fields are "
+ "{} or {_path}, {_directory}, {_filename}. "
"Example: --exec \"convert {} {}.png && rm {}\""),
)
postprocessor.add_argument(
@@ -518,7 +520,8 @@ def build_parser():
action=AppendCommandAction, const={
"name": "exec", "event": "finalize"},
help=("Execute CMD after all files were downloaded successfully. "
- "Example: --exec-after \"cd {} && convert * ../doc.pdf\""),
+ "Example: --exec-after \"cd {_directory} "
+ "&& convert * ../doc.pdf\""),
)
postprocessor.add_argument(
"-P", "--postprocessor",
diff --git a/gallery_dl/postprocessor/exec.py b/gallery_dl/postprocessor/exec.py
index afa828c..e7ed2f6 100644
--- a/gallery_dl/postprocessor/exec.py
+++ b/gallery_dl/postprocessor/exec.py
@@ -12,6 +12,7 @@ from .common import PostProcessor
from .. import util, formatter
import subprocess
import os
+import re
if util.WINDOWS:
@@ -32,6 +33,7 @@ class ExecPP(PostProcessor):
args = options["command"]
if isinstance(args, str):
self.args = args
+ self._sub = re.compile(r"\{(_directory|_filename|_path|)\}").sub
execute = self.exec_string
else:
self.args = [formatter.parse(arg) for arg in args]
@@ -69,11 +71,8 @@ class ExecPP(PostProcessor):
if archive and archive.check(pathfmt.kwdict):
return
- if pathfmt.realpath:
- args = self.args.replace("{}", quote(pathfmt.realpath))
- else:
- args = self.args.replace("{}", quote(pathfmt.realdirectory))
-
+ self.pathfmt = pathfmt
+ args = self._sub(self._replace, self.args)
self._exec(args, True)
if archive:
@@ -90,5 +89,13 @@ class ExecPP(PostProcessor):
self.log.debug("Running '%s'", args)
subprocess.Popen(args, shell=shell)
+ def _replace(self, match):
+ name = match.group(1)
+ if name == "_directory":
+ return quote(self.pathfmt.realdirectory)
+ if name == "_filename":
+ return quote(self.pathfmt.filename)
+ return quote(self.pathfmt.realpath)
+
__postprocessor__ = ExecPP
diff --git a/gallery_dl/version.py b/gallery_dl/version.py
index d06d9d6..593cffa 100644
--- a/gallery_dl/version.py
+++ b/gallery_dl/version.py
@@ -6,4 +6,4 @@
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
-__version__ = "1.26.0"
+__version__ = "1.26.1"
diff --git a/test/test_extractor.py b/test/test_extractor.py
index 9387f5b..29ccf97 100644
--- a/test/test_extractor.py
+++ b/test/test_extractor.py
@@ -238,7 +238,7 @@ class TestExtractorWait(unittest.TestCase):
until = datetime.fromtimestamp(until)
o = self._isotime_to_seconds(output)
u = self._isotime_to_seconds(until.time().isoformat()[:8])
- self.assertLess(o-u, 1.0)
+ self.assertLessEqual(o-u, 1.0)
@staticmethod
def _isotime_to_seconds(isotime):
diff --git a/test/test_postprocessor.py b/test/test_postprocessor.py
index c00144e..b64df88 100644
--- a/test/test_postprocessor.py
+++ b/test/test_postprocessor.py
@@ -168,7 +168,7 @@ class ExecTest(BasePostprocessorTest):
def test_command_string(self):
self._create({
- "command": "echo {} && rm {};",
+ "command": "echo {} {_path} {_directory} {_filename} && rm {};",
})
with patch("subprocess.Popen") as p:
@@ -178,7 +178,11 @@ class ExecTest(BasePostprocessorTest):
self._trigger(("after",))
p.assert_called_once_with(
- "echo {0} && rm {0};".format(self.pathfmt.realpath), shell=True)
+ "echo {0} {0} {1} {2} && rm {0};".format(
+ self.pathfmt.realpath,
+ self.pathfmt.realdirectory,
+ self.pathfmt.filename),
+ shell=True)
i.wait.assert_called_once_with()
def test_command_list(self):
diff --git a/test/test_results.py b/test/test_results.py
index 4fb22c7..f275bbf 100644
--- a/test/test_results.py
+++ b/test/test_results.py
@@ -201,6 +201,9 @@ class TestExtractorResults(unittest.TestCase):
self.assertEqual(str(value), test[3:], msg=key)
elif test.startswith("type:"):
self.assertEqual(type(value).__name__, test[5:], msg=key)
+ elif test.startswith("len:"):
+ self.assertIsInstance(value, (list, tuple), msg=key)
+ self.assertEqual(len(value), int(test[4:]), msg=key)
else:
self.assertEqual(value, test, msg=key)
else: