aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--CHANGELOG.md72
-rw-r--r--PKG-INFO6
-rw-r--r--README.rst4
-rw-r--r--data/man/gallery-dl.12
-rw-r--r--data/man/gallery-dl.conf.595
-rw-r--r--docs/gallery-dl.conf14
-rw-r--r--gallery_dl.egg-info/PKG-INFO6
-rw-r--r--gallery_dl.egg-info/SOURCES.txt1
-rw-r--r--gallery_dl/downloader/common.py24
-rw-r--r--gallery_dl/downloader/http.py5
-rw-r--r--gallery_dl/extractor/__init__.py1
-rw-r--r--gallery_dl/extractor/bunkr.py32
-rw-r--r--gallery_dl/extractor/chevereto.py18
-rw-r--r--gallery_dl/extractor/common.py23
-rw-r--r--gallery_dl/extractor/danbooru.py62
-rw-r--r--gallery_dl/extractor/e621.py9
-rw-r--r--gallery_dl/extractor/erome.py19
-rw-r--r--gallery_dl/extractor/furaffinity.py5
-rw-r--r--gallery_dl/extractor/reddit.py15
-rw-r--r--gallery_dl/extractor/redgifs.py41
-rw-r--r--gallery_dl/extractor/sankaku.py7
-rw-r--r--gallery_dl/extractor/tenor.py148
-rw-r--r--gallery_dl/extractor/tiktok.py7
-rw-r--r--gallery_dl/extractor/vsco.py17
-rw-r--r--gallery_dl/text.py6
-rw-r--r--gallery_dl/util.py38
-rw-r--r--gallery_dl/version.py2
-rw-r--r--test/test_text.py6
28 files changed, 500 insertions, 185 deletions
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 4294e8a..5d7c67a 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,59 +1,23 @@
-## 1.29.0 - 2025-03-01
-### Changes
-- build `gallery-dl.exe` on Windows 10 / Python 3.13 ([#6684](https://github.com/mikf/gallery-dl/issues/6684))
-- provide Windows 7 / Python 3.8 builds as `gallery-dl_x86.exe`
+## 1.29.1 - 2025-03-08
### Extractors
#### Additions
-- [bilibili] add `user-articles-favorite` extractor ([#6725](https://github.com/mikf/gallery-dl/issues/6725) [#6781](https://github.com/mikf/gallery-dl/issues/6781))
-- [boosty] add `direct-messages` extractor ([#6768](https://github.com/mikf/gallery-dl/issues/6768))
-- [discord] add support ([#454](https://github.com/mikf/gallery-dl/issues/454) [#6836](https://github.com/mikf/gallery-dl/issues/6836) [#7059](https://github.com/mikf/gallery-dl/issues/7059) [#7067](https://github.com/mikf/gallery-dl/issues/7067))
-- [furry34] add support ([#1078](https://github.com/mikf/gallery-dl/issues/1078) [#7018](https://github.com/mikf/gallery-dl/issues/7018))
-- [hentaiera] add support ([#3046](https://github.com/mikf/gallery-dl/issues/3046) [#6952](https://github.com/mikf/gallery-dl/issues/6952) [#7020](https://github.com/mikf/gallery-dl/issues/7020))
-- [hentairox] add support ([#7003](https://github.com/mikf/gallery-dl/issues/7003))
-- [imgur] add support for personal posts ([#6990](https://github.com/mikf/gallery-dl/issues/6990))
-- [imhentai] add support ([#1660](https://github.com/mikf/gallery-dl/issues/1660) [#3046](https://github.com/mikf/gallery-dl/issues/3046) [#3824](https://github.com/mikf/gallery-dl/issues/3824) [#4338](https://github.com/mikf/gallery-dl/issues/4338) [#5936](https://github.com/mikf/gallery-dl/issues/5936))
-- [tiktok] add support ([#3061](https://github.com/mikf/gallery-dl/issues/3061) [#4177](https://github.com/mikf/gallery-dl/issues/4177) [#5646](https://github.com/mikf/gallery-dl/issues/5646) [#6878](https://github.com/mikf/gallery-dl/issues/6878) [#6708](https://github.com/mikf/gallery-dl/issues/6708))
-- [vsco] support `/video/` URLs ([#4295](https://github.com/mikf/gallery-dl/issues/4295) [#6973](https://github.com/mikf/gallery-dl/issues/6973))
+- [tenor] add support ([#6075](https://github.com/mikf/gallery-dl/issues/6075))
#### Fixes
-- [bunkr] decrypt file URLs ([#7058](https://github.com/mikf/gallery-dl/issues/7058) [#7070](https://github.com/mikf/gallery-dl/issues/7070) [#7085](https://github.com/mikf/gallery-dl/issues/7085) [#7089](https://github.com/mikf/gallery-dl/issues/7089) [#7090](https://github.com/mikf/gallery-dl/issues/7090))
-- [chevereto/jpgfish] fix extraction ([#7073](https://github.com/mikf/gallery-dl/issues/7073) [#7079](https://github.com/mikf/gallery-dl/issues/7079))
-- [generic] fix config lookups by subcategory
-- [philomena] fix `date` values without UTC offset ([#6921](https://github.com/mikf/gallery-dl/issues/6921))
-- [philomena] download `full` URLs to prevent potential 404 errors ([#6922](https://github.com/mikf/gallery-dl/issues/6922))
-- [pixiv] prevent exceptions during `comments` extraction ([#6965](https://github.com/mikf/gallery-dl/issues/6965))
-- [reddit] restrict subreddit search results ([#7025](https://github.com/mikf/gallery-dl/issues/7025))
-- [sankaku] fix extraction ([#7071](https://github.com/mikf/gallery-dl/issues/7071) [#7072](https://github.com/mikf/gallery-dl/issues/7072))
-- [subscribestar] fix `post` extractor ([#6582](https://github.com/mikf/gallery-dl/issues/6582))
-- [twitter] revert generated CSRF token length to 32 characters ([#6895](https://github.com/mikf/gallery-dl/issues/6895))
-- [vipergirls] change default `domain` to `viper.click` ([#4166](https://github.com/mikf/gallery-dl/issues/4166))
-- [weebcentral] fix extracting wrong number of chapter pages ([#6966](https://github.com/mikf/gallery-dl/issues/6966))
+- [bunkr] update API endpoint ([#7097](https://github.com/mikf/gallery-dl/issues/7097))
+- [erome] fix `AttributeError` for albums without tags ([#7076](https://github.com/mikf/gallery-dl/issues/7076))
+- [furaffinity] fix `artist` metadata ([#6582](https://github.com/mikf/gallery-dl/issues/6582) [#7115](https://github.com/mikf/gallery-dl/issues/7115) [#7123](https://github.com/mikf/gallery-dl/issues/7123) [#7130](https://github.com/mikf/gallery-dl/issues/7130))
+- [jpgfish] decrypt file URLs ([#7073](https://github.com/mikf/gallery-dl/issues/7073) [#7079](https://github.com/mikf/gallery-dl/issues/7079) [#7109](https://github.com/mikf/gallery-dl/issues/7109))
+- [sankaku] fix search tag limit check
+- [vsco] fix `video` extractor ([#7113](https://github.com/mikf/gallery-dl/issues/7113))
+- [vsco] fix extracting videos from `/gallery` results ([#7113](https://github.com/mikf/gallery-dl/issues/7113))
#### Improvements
-- [b4k] update domain to `arch.b4k.dev` ([#6955](https://github.com/mikf/gallery-dl/issues/6955) [#6956](https://github.com/mikf/gallery-dl/issues/6956))
-- [bunkr] update default archive ID format ([#6935](https://github.com/mikf/gallery-dl/issues/6935))
-- [bunkr] provide fallback URLs for 403 download links ([#6732](https://github.com/mikf/gallery-dl/issues/6732) [#6972](https://github.com/mikf/gallery-dl/issues/6972))
-- [bunkr] implement fast `--range` support ([#6985](https://github.com/mikf/gallery-dl/issues/6985))
-- [furaffinity] use a default delay of 1 second between requests ([#7054](https://github.com/mikf/gallery-dl/issues/7054))
-- [itaku] support gallery section URLs ([#6951](https://github.com/mikf/gallery-dl/issues/6951))
-- [patreon] support `/profile/creators` URLs
-- [subscribestar] detect and handle redirects ([#6916](https://github.com/mikf/gallery-dl/issues/6916))
-- [twibooru] match URLs with `www` subdomain ([#6903](https://github.com/mikf/gallery-dl/issues/6903))
-- [twitter] support `grok` cards content ([#7040](https://github.com/mikf/gallery-dl/issues/7040))
-- [vsco] improve `m3u8` handling
-- [weibo] add `movies` option ([#6988](https://github.com/mikf/gallery-dl/issues/6988))
-#### Metadata
-- [bunkr] extract `id_url` metadata ([#6935](https://github.com/mikf/gallery-dl/issues/6935))
-- [erome] extract `tags` metadata ([#7076](https://github.com/mikf/gallery-dl/issues/7076))
-- [issuu] unescape HTML entities
-- [newgrounds] provide `comment_html` metadata ([#7038](https://github.com/mikf/gallery-dl/issues/7038))
-- [patreon] extract `campaign` metadata ([#6989](https://github.com/mikf/gallery-dl/issues/6989))
+- [bunkr] add `endpoint` option ([#7097](https://github.com/mikf/gallery-dl/issues/7097))
+- [danbooru:pool] download posts in pool order, add `order-posts` option ([#7091](https://github.com/mikf/gallery-dl/issues/7091))
+- [erome:search] recognize all URL query parameters ([#7125](https://github.com/mikf/gallery-dl/issues/7125))
+- [reddit] add `selftext` option ([#7111](https://github.com/mikf/gallery-dl/issues/7111))
+- [redgifs:search] support `/search?query=...` URLs ([#7118](https://github.com/mikf/gallery-dl/issues/7118))
+- [sankaku] increase wait time on 429 errors ([#7129](https://github.com/mikf/gallery-dl/issues/7129))
+- [tiktok] improve `tiktok-range` parsing ([#7098](https://github.com/mikf/gallery-dl/issues/7098))
### Downloaders
-- implement `downloader` options per extractor category
-- [http] add `sleep-429` option ([#6996](https://github.com/mikf/gallery-dl/issues/6996))
-- [ytdl] support specifying `module` as filesystem paths ([#6991](https://github.com/mikf/gallery-dl/issues/6991))
-### Archives
-- [archive] implement support for PostgreSQL databases ([#6152](https://github.com/mikf/gallery-dl/issues/6152))
-- [archive] add `archive-table` option ([#6152](https://github.com/mikf/gallery-dl/issues/6152))
-### Miscellaneous
-- [aes] handle errors during `cryptodome` import ([#6906](https://github.com/mikf/gallery-dl/issues/6906))
-- [executables] fix loading `certifi` SSL certificates ([#6393](https://github.com/mikf/gallery-dl/issues/6393))
-- improve `\f` format string handling for `--print`
+- [http] detect Cloudflare/DDoS-Guard challenges ([#7066](https://github.com/mikf/gallery-dl/issues/7066) [#7121](https://github.com/mikf/gallery-dl/issues/7121))
+- warn about invalid `subcategory` values ([#7103](https://github.com/mikf/gallery-dl/issues/7103) [#7119](https://github.com/mikf/gallery-dl/issues/7119))
diff --git a/PKG-INFO b/PKG-INFO
index 148bf37..c74f013 100644
--- a/PKG-INFO
+++ b/PKG-INFO
@@ -1,6 +1,6 @@
Metadata-Version: 2.2
Name: gallery_dl
-Version: 1.29.0
+Version: 1.29.1
Summary: Command-line program to download image galleries and collections from several image hosting sites
Home-page: https://github.com/mikf/gallery-dl
Download-URL: https://github.com/mikf/gallery-dl/releases/latest
@@ -132,9 +132,9 @@ Standalone Executable
Prebuilt executable files with a Python interpreter and
required Python packages included are available for
-- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.29.0/gallery-dl.exe>`__
+- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.29.1/gallery-dl.exe>`__
(Requires `Microsoft Visual C++ Redistributable Package (x86) <https://aka.ms/vs/17/release/vc_redist.x86.exe>`__)
-- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.29.0/gallery-dl.bin>`__
+- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.29.1/gallery-dl.bin>`__
Nightly Builds
diff --git a/README.rst b/README.rst
index 5825ead..4be6c57 100644
--- a/README.rst
+++ b/README.rst
@@ -77,9 +77,9 @@ Standalone Executable
Prebuilt executable files with a Python interpreter and
required Python packages included are available for
-- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.29.0/gallery-dl.exe>`__
+- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.29.1/gallery-dl.exe>`__
(Requires `Microsoft Visual C++ Redistributable Package (x86) <https://aka.ms/vs/17/release/vc_redist.x86.exe>`__)
-- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.29.0/gallery-dl.bin>`__
+- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.29.1/gallery-dl.bin>`__
Nightly Builds
diff --git a/data/man/gallery-dl.1 b/data/man/gallery-dl.1
index b17a8f4..761c413 100644
--- a/data/man/gallery-dl.1
+++ b/data/man/gallery-dl.1
@@ -1,4 +1,4 @@
-.TH "GALLERY-DL" "1" "2025-03-01" "1.29.0" "gallery-dl Manual"
+.TH "GALLERY-DL" "1" "2025-03-08" "1.29.1" "gallery-dl Manual"
.\" disable hyphenation
.nh
diff --git a/data/man/gallery-dl.conf.5 b/data/man/gallery-dl.conf.5
index 847efaa..c0b64df 100644
--- a/data/man/gallery-dl.conf.5
+++ b/data/man/gallery-dl.conf.5
@@ -1,4 +1,4 @@
-.TH "GALLERY-DL.CONF" "5" "2025-03-01" "1.29.0" "gallery-dl Manual"
+.TH "GALLERY-DL.CONF" "5" "2025-03-08" "1.29.1" "gallery-dl Manual"
.\" disable hyphenation
.nh
.\" disable justification (adjust text to left margin only)
@@ -1942,6 +1942,17 @@ Possibly available formats are
* \f[I]tiny\f[] (144p)
+.SS extractor.bunkr.endpoint
+.IP "Type:" 6
+\f[I]string\f[]
+
+.IP "Default:" 9
+\f[I]"/api/_001"\f[]
+
+.IP "Description:" 4
+API endpoint for retrieving file URLs.
+
+
.SS extractor.bunkr.tlds
.IP "Type:" 6
\f[I]bool\f[]
@@ -2157,6 +2168,26 @@ For unavailable or restricted posts,
follow the \f[I]source\f[] and download from there if possible.
+.SS extractor.[Danbooru].pool.order-posts
+.IP "Type:" 6
+\f[I]string\f[]
+
+.IP "Default:" 9
+\f[I]"pool"\f[]
+
+.IP "Description:" 4
+Controls the order in which pool posts are returned.
+
+\f[I]"pool"\f[] \f[I] \f[I]"pool_asc"\f[] \f[] \f[I]"asc"\f[] \f[I] \f[I]"asc_pool"\f[]
+Pool order
+\f[I]"pool_desc"\f[] \f[] \f[I]"desc_pool"\f[] \f[I] \f[I]"desc"\f[]
+Reverse Pool order
+\f[I]"id"\f[] \f[] \f[I]"id_desc"\f[] \f[I] \f[I]"desc_id"\f[]
+Descending Post ID order
+\f[I]"id_asc"\f[] \f[] \f[I]"asc_id"\f[]
+Ascending Post ID order
+
+
.SS extractor.[Danbooru].ugoira
.IP "Type:" 6
\f[I]bool\f[]
@@ -4654,6 +4685,20 @@ but requests to the reddit API are going to be rate limited
at 600 requests every 10 minutes/600 seconds.
+.SS extractor.reddit.selftext
+.IP "Type:" 6
+\f[I]bool\f[]
+
+.IP "Default:" 9
+.br
+* \f[I]true\f[] if \f[I]comments\f[] are enabled
+.br
+* \f[I]false\f[] otherwise
+
+.IP "Description:" 4
+Follow links in the original post's \f[I]selftext\f[].
+
+
.SS extractor.reddit.videos
.IP "Type:" 6
.br
@@ -5072,6 +5117,52 @@ To generate a token, visit \f[I]/user/USERNAME/list-tokens\f[]
and click \f[I]Create Token\f[].
+.SS extractor.tenor.format
+.IP "Type:" 6
+.br
+* \f[I]string\f[]
+.br
+* \f[I]list\f[] of \f[I]strings\f[]
+
+.IP "Default:" 9
+\f[I]["gif", "mp4", "webm", "webp"]\f[]
+
+.IP "Description:" 4
+List of names of the preferred animation format.
+
+If a selected format is not available, the next one in the list will be
+tried until a format is found.
+
+Possible formats include
+
+.br
+* \f[I]"gif"\f[]
+.br
+* \f[I]"gif_transparent"\f[]
+.br
+* \f[I]"gifpreview"\f[]
+.br
+* \f[I]"mediumgif"\f[]
+.br
+* \f[I]"tinygif"\f[]
+.br
+* \f[I]"tinygif_transparent"\f[]
+.br
+* \f[I]"mp4"\f[]
+.br
+* \f[I]"tinymp4"\f[]
+.br
+* \f[I]"webm"\f[]
+.br
+* \f[I]"webp"\f[]
+.br
+* \f[I]"webp_transparent"\f[]
+.br
+* \f[I]"tinywebp"\f[]
+.br
+* \f[I]"tinywebp_transparent"\f[]
+
+
.SS extractor.tiktok.audio
.IP "Type:" 6
\f[I]bool\f[]
@@ -5124,7 +5215,7 @@ See \f[I]extractor.ytdl.module\f[].
\f[I]string\f[]
.IP "Default:" 9
-\f[I]null\f[]
+\f[I]""\f[]
.IP "Example:" 4
"1-20"
diff --git a/docs/gallery-dl.conf b/docs/gallery-dl.conf
index ed85b01..0ad87c0 100644
--- a/docs/gallery-dl.conf
+++ b/docs/gallery-dl.conf
@@ -155,6 +155,7 @@
},
"bunkr":
{
+ "endpoint": "/api/_001",
"tlds": false
},
"cien":
@@ -511,6 +512,7 @@
"id-max" : null,
"previews" : true,
"recursion" : 0,
+ "selftext" : null,
"videos" : true
},
"redgifs":
@@ -593,6 +595,10 @@
"username": "",
"password": ""
},
+ "tenor":
+ {
+ "format": ["gif", "mp4", "webm", "webp"]
+ },
"tiktok":
{
"audio" : true,
@@ -601,7 +607,7 @@
"user": {
"avatar": true,
"module": null,
- "tiktok-range": null
+ "tiktok-range": ""
}
},
"tsumino":
@@ -769,7 +775,11 @@
"external" : false,
"metadata" : false,
"threshold": "auto",
- "ugoira" : false
+ "ugoira" : false,
+
+ "pool": {
+ "order-posts": "pool"
+ }
},
"danbooru":
{
diff --git a/gallery_dl.egg-info/PKG-INFO b/gallery_dl.egg-info/PKG-INFO
index 148bf37..c74f013 100644
--- a/gallery_dl.egg-info/PKG-INFO
+++ b/gallery_dl.egg-info/PKG-INFO
@@ -1,6 +1,6 @@
Metadata-Version: 2.2
Name: gallery_dl
-Version: 1.29.0
+Version: 1.29.1
Summary: Command-line program to download image galleries and collections from several image hosting sites
Home-page: https://github.com/mikf/gallery-dl
Download-URL: https://github.com/mikf/gallery-dl/releases/latest
@@ -132,9 +132,9 @@ Standalone Executable
Prebuilt executable files with a Python interpreter and
required Python packages included are available for
-- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.29.0/gallery-dl.exe>`__
+- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.29.1/gallery-dl.exe>`__
(Requires `Microsoft Visual C++ Redistributable Package (x86) <https://aka.ms/vs/17/release/vc_redist.x86.exe>`__)
-- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.29.0/gallery-dl.bin>`__
+- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.29.1/gallery-dl.bin>`__
Nightly Builds
diff --git a/gallery_dl.egg-info/SOURCES.txt b/gallery_dl.egg-info/SOURCES.txt
index a29d3fe..0609d8d 100644
--- a/gallery_dl.egg-info/SOURCES.txt
+++ b/gallery_dl.egg-info/SOURCES.txt
@@ -220,6 +220,7 @@ gallery_dl/extractor/szurubooru.py
gallery_dl/extractor/tapas.py
gallery_dl/extractor/tcbscans.py
gallery_dl/extractor/telegraph.py
+gallery_dl/extractor/tenor.py
gallery_dl/extractor/tiktok.py
gallery_dl/extractor/tmohentai.py
gallery_dl/extractor/toyhouse.py
diff --git a/gallery_dl/downloader/common.py b/gallery_dl/downloader/common.py
index 8430884..dc1219f 100644
--- a/gallery_dl/downloader/common.py
+++ b/gallery_dl/downloader/common.py
@@ -19,6 +19,7 @@ class DownloaderBase():
def __init__(self, job):
extractor = job.extractor
+ self.log = job.get_logger("downloader." + self.scheme)
opts = self._extractor_config(extractor)
if opts:
@@ -29,7 +30,6 @@ class DownloaderBase():
self.session = extractor.session
self.part = self.config("part", True)
self.partdir = self.config("part-directory")
- self.log = job.get_logger("downloader." + self.scheme)
if self.partdir:
self.partdir = util.expand_path(self.partdir)
@@ -73,17 +73,27 @@ class DownloaderBase():
copts = cfg.get(self.scheme)
if copts:
if subcategory in cfg:
- sopts = cfg[subcategory].get(self.scheme)
- if sopts:
- opts = copts.copy()
- opts.update(sopts)
- return opts
+ try:
+ sopts = cfg[subcategory].get(self.scheme)
+ if sopts:
+ opts = copts.copy()
+ opts.update(sopts)
+ return opts
+ except Exception:
+ self._report_config_error(subcategory, cfg[subcategory])
return copts
if subcategory in cfg:
- return cfg[subcategory].get(self.scheme)
+ try:
+ return cfg[subcategory].get(self.scheme)
+ except Exception:
+ self._report_config_error(subcategory, cfg[subcategory])
return None
+ def _report_config_error(self, subcategory, value):
+ config.log.warning("Subcategory '%s' set to '%s' instead of object",
+ subcategory, util.json_dumps(value).strip('"'))
+
def download(self, url, pathfmt):
"""Write data from 'url' into the file specified by 'pathfmt'"""
diff --git a/gallery_dl/downloader/http.py b/gallery_dl/downloader/http.py
index 449ffe8..faea9e5 100644
--- a/gallery_dl/downloader/http.py
+++ b/gallery_dl/downloader/http.py
@@ -184,6 +184,11 @@ class HttpDownloader(DownloaderBase):
break
else:
msg = "'{} {}' for '{}'".format(code, response.reason, url)
+
+ challenge = util.detect_challenge(response)
+ if challenge is not None:
+ self.log.warning(challenge)
+
if code in self.retry_codes or 500 <= code < 600:
continue
retry = kwdict.get("_http_retry")
diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py
index 00b22d4..8208241 100644
--- a/gallery_dl/extractor/__init__.py
+++ b/gallery_dl/extractor/__init__.py
@@ -171,6 +171,7 @@ modules = [
"tapas",
"tcbscans",
"telegraph",
+ "tenor",
"tiktok",
"tmohentai",
"toyhouse",
diff --git a/gallery_dl/extractor/bunkr.py b/gallery_dl/extractor/bunkr.py
index 201b8f4..d74f59c 100644
--- a/gallery_dl/extractor/bunkr.py
+++ b/gallery_dl/extractor/bunkr.py
@@ -11,7 +11,6 @@
from .common import Extractor
from .lolisafe import LolisafeAlbumExtractor
from .. import text, util, config, exception
-import binascii
import random
if config.get(("extractor", "bunkr"), "tlds"):
@@ -71,6 +70,17 @@ class BunkrAlbumExtractor(LolisafeAlbumExtractor):
domain = self.groups[0] or self.groups[1]
if domain not in LEGACY_DOMAINS:
self.root = "https://" + domain
+
+ def _init(self):
+ LolisafeAlbumExtractor._init(self)
+
+ endpoint = self.config("endpoint")
+ if not endpoint:
+ endpoint = self.root_dl + "/api/_001"
+ elif endpoint[0] == "/":
+ endpoint = self.root_dl + endpoint
+
+ self.endpoint = endpoint
self.offset = 0
def skip(self, num):
@@ -169,13 +179,13 @@ class BunkrAlbumExtractor(LolisafeAlbumExtractor):
data_id = text.extr(page, 'data-file-id="', '"')
referer = self.root_dl + "/file/" + data_id
- url = self.root_dl + "/api/vs"
- headers = {"Referer": referer}
- data = self.request(
- url, method="POST", headers=headers, json={"id": data_id}).json()
+ headers = {"Referer": referer, "Origin": self.root_dl}
+ data = self.request(self.endpoint, method="POST", headers=headers,
+ json={"id": data_id}).json()
if data.get("encrypted"):
- file_url = self._decrypt_url(data["url"], data["timestamp"])
+ key = "SECRET_KEY_{}".format(data["timestamp"] // 3600)
+ file_url = util.decrypt_xor(data["url"], key.encode())
else:
file_url = data["url"]
@@ -192,16 +202,6 @@ class BunkrAlbumExtractor(LolisafeAlbumExtractor):
"_http_validate": self._validate,
}
- def _decrypt_url(self, encrypted_b64, timestamp):
- encrypted_bytes = binascii.a2b_base64(encrypted_b64)
- key = "SECRET_KEY_{}".format(timestamp // 3600).encode()
- div = len(key)
-
- return bytes([
- encrypted_bytes[i] ^ key[i % div]
- for i in range(len(encrypted_bytes))
- ]).decode()
-
def _validate(self, response):
if response.history and response.url.endswith("/maintenance-vid.mp4"):
self.log.warning("File server in maintenance mode")
diff --git a/gallery_dl/extractor/chevereto.py b/gallery_dl/extractor/chevereto.py
index de22a7b..c9ccb7d 100644
--- a/gallery_dl/extractor/chevereto.py
+++ b/gallery_dl/extractor/chevereto.py
@@ -9,7 +9,7 @@
"""Extractors for Chevereto galleries"""
from .common import BaseExtractor, Message
-from .. import text
+from .. import text, util
class CheveretoExtractor(BaseExtractor):
@@ -53,12 +53,22 @@ class CheveretoImageExtractor(CheveretoExtractor):
def items(self):
url = self.root + self.path
- extr = text.extract_from(self.request(url).text)
+ page = self.request(url).text
+ extr = text.extract_from(page)
+
+ url = (extr('<meta property="og:image" content="', '"') or
+ extr('url: "', '"'))
+ if not url or url.endswith("/loading.svg"):
+ pos = page.find(" download=")
+ url = text.rextract(page, 'href="', '"', pos)[0]
+ if not url.startswith("https://"):
+ url = util.decrypt_xor(
+ url, b"seltilovessimpcity@simpcityhatesscrapers",
+ fromhex=True)
image = {
"id" : self.path.rpartition(".")[2],
- "url" : (extr('<meta property="og:image" content="', '"') or
- extr('url: "', '"')),
+ "url" : url,
"album": text.extr(extr("Added to <a", "/a>"), ">", "<"),
"user" : extr('username: "', '"'),
}
diff --git a/gallery_dl/extractor/common.py b/gallery_dl/extractor/common.py
index d58db6f..a85eedd 100644
--- a/gallery_dl/extractor/common.py
+++ b/gallery_dl/extractor/common.py
@@ -205,25 +205,10 @@ class Extractor():
msg = "'{} {}' for '{}'".format(
code, response.reason, response.url)
- server = response.headers.get("Server")
- if server and server.startswith("cloudflare") and \
- code in (403, 503):
- mitigated = response.headers.get("cf-mitigated")
- if mitigated and mitigated.lower() == "challenge":
- self.log.warning("Cloudflare challenge")
- break
- content = response.content
- if b"_cf_chl_opt" in content or b"jschl-answer" in content:
- self.log.warning("Cloudflare challenge")
- break
- if b'name="captcha-bypass"' in content:
- self.log.warning("Cloudflare CAPTCHA")
- break
- elif server and server.startswith("ddos-guard") and \
- code == 403:
- if b"/ddos-guard/js-challenge/" in response.content:
- self.log.warning("DDoS-Guard challenge")
- break
+
+ challenge = util.detect_challenge(response)
+ if challenge is not None:
+ self.log.warning(challenge)
if code == 429 and self._handle_429(response):
continue
diff --git a/gallery_dl/extractor/danbooru.py b/gallery_dl/extractor/danbooru.py
index d0a9397..8d00728 100644
--- a/gallery_dl/extractor/danbooru.py
+++ b/gallery_dl/extractor/danbooru.py
@@ -205,12 +205,8 @@ class DanbooruTagExtractor(DanbooruExtractor):
pattern = BASE_PATTERN + r"/posts\?(?:[^&#]*&)*tags=([^&#]*)"
example = "https://danbooru.donmai.us/posts?tags=TAG"
- def __init__(self, match):
- DanbooruExtractor.__init__(self, match)
- tags = match.group(match.lastindex)
- self.tags = text.unquote(tags.replace("+", " "))
-
def metadata(self):
+ self.tags = text.unquote(self.groups[-1].replace("+", " "))
return {"search_tags": self.tags}
def posts(self):
@@ -235,15 +231,13 @@ class DanbooruPoolExtractor(DanbooruExtractor):
"""Extractor for posts from danbooru pools"""
subcategory = "pool"
directory_fmt = ("{category}", "pool", "{pool[id]} {pool[name]}")
+ filename_fmt = "{num:>04}_{id}_{filename}.{extension}"
archive_fmt = "p_{pool[id]}_{id}"
pattern = BASE_PATTERN + r"/pool(?:s|/show)/(\d+)"
example = "https://danbooru.donmai.us/pools/12345"
- def __init__(self, match):
- DanbooruExtractor.__init__(self, match)
- self.pool_id = match.group(match.lastindex)
-
def metadata(self):
+ self.pool_id = self.groups[-1]
url = "{}/pools/{}.json".format(self.root, self.pool_id)
pool = self.request(url).json()
pool["name"] = pool["name"].replace("_", " ")
@@ -251,8 +245,42 @@ class DanbooruPoolExtractor(DanbooruExtractor):
return {"pool": pool}
def posts(self):
- params = {"tags": "pool:" + self.pool_id}
- return self._pagination("/posts.json", params, "b")
+ reverse = prefix = None
+
+ order = self.config("order-posts")
+ if not order or order in ("asc", "pool", "pool_asc", "asc_pool"):
+ params = {"tags": "ordpool:" + self.pool_id}
+ elif order in ("id", "desc_id", "id_desc"):
+ params = {"tags": "pool:" + self.pool_id}
+ prefix = "b"
+ elif order in ("desc", "desc_pool", "pool_desc"):
+ params = {"tags": "ordpool:" + self.pool_id}
+ reverse = True
+ elif order in ("asc_id", "id_asc"):
+ params = {"tags": "pool:" + self.pool_id}
+ reverse = True
+
+ posts = self._pagination("/posts.json", params, prefix)
+ if reverse:
+ return self._enumerate_posts_reverse(posts)
+ else:
+ return self._enumerate_posts(posts)
+
+ def _enumerate_posts(self, posts):
+ pid_to_num = {pid: num+1 for num, pid in enumerate(self.post_ids)}
+ for post in posts:
+ post["num"] = pid_to_num[post["id"]]
+ yield post
+
+ def _enumerate_posts_reverse(self, posts):
+ self.log.info("Collecting posts of pool %s", self.pool_id)
+ posts = list(posts)
+ posts.reverse()
+
+ pid_to_num = {pid: num+1 for num, pid in enumerate(self.post_ids)}
+ for post in posts:
+ post["num"] = pid_to_num[post["id"]]
+ return posts
class DanbooruPostExtractor(DanbooruExtractor):
@@ -262,12 +290,8 @@ class DanbooruPostExtractor(DanbooruExtractor):
pattern = BASE_PATTERN + r"/post(?:s|/show)/(\d+)"
example = "https://danbooru.donmai.us/posts/12345"
- def __init__(self, match):
- DanbooruExtractor.__init__(self, match)
- self.post_id = match.group(match.lastindex)
-
def posts(self):
- url = "{}/posts/{}.json".format(self.root, self.post_id)
+ url = "{}/posts/{}.json".format(self.root, self.groups[-1])
post = self.request(url).json()
if self.includes:
params = {"only": self.includes}
@@ -283,12 +307,8 @@ class DanbooruPopularExtractor(DanbooruExtractor):
pattern = BASE_PATTERN + r"/(?:explore/posts/)?popular(?:\?([^#]*))?"
example = "https://danbooru.donmai.us/explore/posts/popular"
- def __init__(self, match):
- DanbooruExtractor.__init__(self, match)
- self.params = match.group(match.lastindex)
-
def metadata(self):
- self.params = params = text.parse_query(self.params)
+ self.params = params = text.parse_query(self.groups[-1])
scale = params.get("scale", "day")
date = params.get("date") or datetime.date.today().isoformat()
diff --git a/gallery_dl/extractor/e621.py b/gallery_dl/extractor/e621.py
index eddcb12..76ea792 100644
--- a/gallery_dl/extractor/e621.py
+++ b/gallery_dl/extractor/e621.py
@@ -100,7 +100,7 @@ class E621PoolExtractor(E621Extractor, danbooru.DanbooruPoolExtractor):
example = "https://e621.net/pools/12345"
def posts(self):
- self.log.info("Fetching posts of pool %s", self.pool_id)
+ self.log.info("Collecting posts of pool %s", self.pool_id)
id_to_post = {
post["id"]: post
@@ -126,7 +126,7 @@ class E621PostExtractor(E621Extractor, danbooru.DanbooruPostExtractor):
example = "https://e621.net/posts/12345"
def posts(self):
- url = "{}/posts/{}.json".format(self.root, self.post_id)
+ url = "{}/posts/{}.json".format(self.root, self.groups[-1])
return (self.request(url).json()["post"],)
@@ -147,11 +147,8 @@ class E621FavoriteExtractor(E621Extractor):
pattern = BASE_PATTERN + r"/favorites(?:\?([^#]*))?"
example = "https://e621.net/favorites"
- def __init__(self, match):
- E621Extractor.__init__(self, match)
- self.query = text.parse_query(match.group(match.lastindex))
-
def metadata(self):
+ self.query = text.parse_query(self.groups[-1])
return {"user_id": self.query.get("user_id", "")}
def posts(self):
diff --git a/gallery_dl/extractor/erome.py b/gallery_dl/extractor/erome.py
index 55549de..7582528 100644
--- a/gallery_dl/extractor/erome.py
+++ b/gallery_dl/extractor/erome.py
@@ -23,12 +23,8 @@ class EromeExtractor(Extractor):
archive_fmt = "{album_id}_{num}"
root = "https://www.erome.com"
- def __init__(self, match):
- Extractor.__init__(self, match)
- self.item = match.group(1)
- self.__cookies = True
-
def items(self):
+ self.__cookies = True
for album_id in self.albums():
url = "{}/a/{}".format(self.root, album_id)
@@ -66,8 +62,9 @@ class EromeExtractor(Extractor):
"user" : text.unquote(user),
"count" : len(urls),
"date" : date,
- "tags" : [t.replace("+", " ")
- for t in text.extract_iter(tags, "?q=", '"')],
+ "tags" : ([t.replace("+", " ")
+ for t in text.extract_iter(tags, "?q=", '"')]
+ if tags else ()),
"_http_headers": {"Referer": url},
}
@@ -110,7 +107,7 @@ class EromeAlbumExtractor(EromeExtractor):
example = "https://www.erome.com/a/ID"
def albums(self):
- return (self.item,)
+ return (self.groups[0],)
class EromeUserExtractor(EromeExtractor):
@@ -119,18 +116,18 @@ class EromeUserExtractor(EromeExtractor):
example = "https://www.erome.com/USER"
def albums(self):
- url = "{}/{}".format(self.root, self.item)
+ url = "{}/{}".format(self.root, self.groups[0])
return self._pagination(url, {})
class EromeSearchExtractor(EromeExtractor):
subcategory = "search"
- pattern = BASE_PATTERN + r"/search\?q=([^&#]+)"
+ pattern = BASE_PATTERN + r"/search/?\?(q=[^#]+)"
example = "https://www.erome.com/search?q=QUERY"
def albums(self):
url = self.root + "/search"
- params = {"q": text.unquote(self.item)}
+ params = text.parse_query(self.groups[0])
return self._pagination(url, params)
diff --git a/gallery_dl/extractor/furaffinity.py b/gallery_dl/extractor/furaffinity.py
index 1466390..216aeb1 100644
--- a/gallery_dl/extractor/furaffinity.py
+++ b/gallery_dl/extractor/furaffinity.py
@@ -98,7 +98,8 @@ class FuraffinityExtractor(Extractor):
data["tags"] = text.split_html(extr(
'class="tags-row">', '</section>'))
data["title"] = text.unescape(extr("<h2><p>", "</p></h2>"))
- data["artist"] = extr("<strong>", "<")
+ data["artist_url"] = extr('title="', '"').strip()
+ data["artist"] = extr(">", "<")
data["_description"] = extr(
'class="submission-description user-submitted-links">',
' </div>')
@@ -121,6 +122,7 @@ class FuraffinityExtractor(Extractor):
else:
# old site layout
data["title"] = text.unescape(extr("<h2>", "</h2>"))
+ data["artist_url"] = extr('title="', '"').strip()
data["artist"] = extr(">", "<")
data["fa_category"] = extr("<b>Category:</b>", "<").strip()
data["theme"] = extr("<b>Theme:</b>", "<").strip()
@@ -139,7 +141,6 @@ class FuraffinityExtractor(Extractor):
'style="padding:8px">', ' </td>')
data["folders"] = () # folders not present in old layout
- data["artist_url"] = data["artist"].replace("_", "").lower()
data["user"] = self.user or data["artist_url"]
data["date"] = text.parse_timestamp(data["filename"].partition(".")[0])
data["description"] = self._process_description(data["_description"])
diff --git a/gallery_dl/extractor/reddit.py b/gallery_dl/extractor/reddit.py
index f36b1f5..7a9e3c5 100644
--- a/gallery_dl/extractor/reddit.py
+++ b/gallery_dl/extractor/reddit.py
@@ -41,6 +41,11 @@ class RedditExtractor(Extractor):
self._extract_video = self._extract_video_dash
videos = True
+ selftext = self.config("selftext")
+ if selftext is None:
+ selftext = self.api.comments
+ selftext = True if selftext else False
+
submissions = self.submissions()
visited = set()
depth = 0
@@ -92,12 +97,12 @@ class RedditExtractor(Extractor):
elif parentdir:
yield Message.Directory, comments[0]
+ if selftext and submission:
+ for url in text.extract_iter(
+ submission["selftext_html"] or "", ' href="', '"'):
+ urls.append((url, submission))
+
if self.api.comments:
- if submission:
- for url in text.extract_iter(
- submission["selftext_html"] or "",
- ' href="', '"'):
- urls.append((url, submission))
for comment in comments:
html = comment["body_html"] or ""
href = (' href="' in html)
diff --git a/gallery_dl/extractor/redgifs.py b/gallery_dl/extractor/redgifs.py
index 506f6ac..612faac 100644
--- a/gallery_dl/extractor/redgifs.py
+++ b/gallery_dl/extractor/redgifs.py
@@ -163,24 +163,27 @@ class RedgifsSearchExtractor(RedgifsExtractor):
subcategory = "search"
directory_fmt = ("{category}", "Search", "{search}")
pattern = (r"(?:https?://)?(?:\w+\.)?redgifs\.com"
- r"/(?:gifs/([^/?#]+)|browse)(?:/?\?([^#]+))?")
+ r"/(?:gifs/([^/?#]+)|search(?:/gifs)?()|browse)"
+ r"(?:/?\?([^#]+))?")
example = "https://www.redgifs.com/gifs/TAG"
- def __init__(self, match):
- RedgifsExtractor.__init__(self, match)
- self.search, self.query = match.groups()
-
def metadata(self):
- self.params = text.parse_query(self.query)
- if self.search:
- self.params["tags"] = text.unquote(self.search)
+ tag, self.search, query = self.groups
- return {"search": (self.params.get("tags") or
- self.params.get("order") or
+ self.params = params = text.parse_query(query)
+ if tag is not None:
+ params["tags"] = text.unquote(tag)
+
+ return {"search": (params.get("query") or
+ params.get("tags") or
+ params.get("order") or
"trending")}
def gifs(self):
- return self.api.search(self.params)
+ if self.search is None:
+ return self.api.gifs_search(self.params)
+ else:
+ return self.api.search_gifs(self.params)
class RedgifsImageExtractor(RedgifsExtractor):
@@ -205,9 +208,9 @@ class RedgifsAPI():
def __init__(self, extractor):
self.extractor = extractor
self.headers = {
- "authorization" : None,
- "content-type" : "application/json",
- "x-customheader": extractor.root + "/",
+ "Accept" : "application/json, text/plain, */*",
+ "Referer" : extractor.root + "/",
+ "Authorization" : None,
"Origin" : extractor.root,
}
@@ -242,14 +245,18 @@ class RedgifsAPI():
params = {"count": 30, "order": order}
return self._pagination(endpoint, params)
- def search(self, params):
+ def gifs_search(self, params):
endpoint = "/v2/gifs/search"
params["search_text"] = params.pop("tags", None)
return self._pagination(endpoint, params)
+ def search_gifs(self, params):
+ endpoint = "/v2/search/gifs"
+ return self._pagination(endpoint, params)
+
def _call(self, endpoint, params=None):
url = self.API_ROOT + endpoint
- self.headers["authorization"] = self._auth()
+ self.headers["Authorization"] = self._auth()
return self.extractor.request(
url, params=params, headers=self.headers).json()
@@ -270,6 +277,6 @@ class RedgifsAPI():
def _auth(self):
# https://github.com/Redgifs/api/wiki/Temporary-tokens
url = self.API_ROOT + "/v2/auth/temporary"
- self.headers["authorization"] = None
+ self.headers["Authorization"] = None
return "Bearer " + self.extractor.request(
url, headers=self.headers).json()["token"]
diff --git a/gallery_dl/extractor/sankaku.py b/gallery_dl/extractor/sankaku.py
index b5cdb9c..b2f31dd 100644
--- a/gallery_dl/extractor/sankaku.py
+++ b/gallery_dl/extractor/sankaku.py
@@ -241,9 +241,10 @@ class SankakuAPI():
if response.status_code == 429:
until = response.headers.get("X-RateLimit-Reset")
- if not until and b"tags-limit" in response.content:
- raise exception.StopExtraction("Search tag limit exceeded")
- seconds = None if until else 60
+ if not until and b"_tags-explicit-limit" in response.content:
+ raise exception.AuthorizationError(
+ "Search tag limit exceeded")
+ seconds = None if until else 600
self.extractor.wait(until=until, seconds=seconds)
continue
diff --git a/gallery_dl/extractor/tenor.py b/gallery_dl/extractor/tenor.py
new file mode 100644
index 0000000..7273eac
--- /dev/null
+++ b/gallery_dl/extractor/tenor.py
@@ -0,0 +1,148 @@
+# -*- coding: utf-8 -*-
+
+# Copyright 2025 Mike Fährmann
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Extractors for https://tenor.com/"""
+
+from .common import Extractor, Message
+from .. import text, util
+
+BASE_PATTERN = r"(?:https?://)?tenor\.com/(?:\w\w(?:-\w\w)?/)?"
+
+
+class TenorExtractor(Extractor):
+ """Base class for tenor extractors"""
+ category = "tenor"
+ root = "https://tenor.com"
+ filename_fmt = "{id}{title:? //}.{extension}"
+ archive_fmt = "{id}"
+ request_interval = (0.5, 1.5)
+
+ def _init(self):
+ formats = self.config("format")
+ if formats is None:
+ self.formats = ("gif", "mp4", "webm", "webp")
+ else:
+ if isinstance(formats, str):
+ formats = formats.split(",")
+ self.formats = formats
+
+ def items(self):
+ meta = self.metadata()
+
+ for gif in self.gifs():
+ fmt = self._extract_format(gif)
+ if not fmt:
+ self.log.warning("%s: Selected format(s) not available",
+ gif.get("id"))
+ continue
+
+ url = fmt["url"]
+ gif["width"], gif["height"] = fmt["dims"]
+ gif["title"] = gif["h1_title"][:-4]
+ gif["description"] = gif.pop("content_description", "")
+ gif["date"] = text.parse_timestamp(gif["created"])
+ if meta:
+ gif.update(meta)
+
+ yield Message.Directory, gif
+ yield Message.Url, url, text.nameext_from_url(url, gif)
+
+ def _extract_format(self, gif):
+ media_formats = gif["media_formats"]
+ for fmt in self.formats:
+ if fmt in media_formats:
+ return media_formats[fmt]
+
+ def _search_results(self, query):
+ url = "https://tenor.googleapis.com/v2/search"
+ params = {
+ "appversion": "browser-r20250225-1",
+ "prettyPrint": "false",
+ "key": "AIzaSyC-P6_qz3FzCoXGLk6tgitZo4jEJ5mLzD8",
+ "client_key": "tenor_web",
+ "locale": "en",
+ "anon_id": "",
+ "q": query,
+ "limit": "50",
+ "contentfilter": "low",
+ "media_filter": "gif,gif_transparent,mediumgif,tinygif,"
+ "tinygif_transparent,webp,webp_transparent,"
+ "tinywebp,tinywebp_transparent,tinymp4,mp4,webm,"
+ "originalgif,gifpreview",
+ "fields": "next,results.id,results.media_formats,results.title,"
+ "results.h1_title,results.long_title,results.itemurl,"
+ "results.url,results.created,results.user,"
+ "results.shares,results.embed,results.hasaudio,"
+ "results.policy_status,results.source_id,results.flags,"
+ "results.tags,results.content_rating,results.bg_color,"
+ "results.legacy_info,results.geographic_restriction,"
+ "results.content_description",
+ "pos": None,
+ "component": "web_desktop",
+ }
+ headers = {
+ "Referer": self.root + "/",
+ "Origin" : self.root,
+ }
+
+ while True:
+ data = self.request(url, params=params, headers=headers).json()
+
+ yield from data["results"]
+
+ params["pos"] = data.get("next")
+ if not params["pos"]:
+ return
+
+ def metadata(self):
+ return False
+
+ def gifs(self):
+ return ()
+
+
+class TenorImageExtractor(TenorExtractor):
+ subcategory = "image"
+ pattern = BASE_PATTERN + r"view/(?:[^/?#]*-)?(\d+)"
+ example = "https://tenor.com/view/SLUG-1234567890"
+
+ def gifs(self):
+ url = "{}/view/{}".format(self.root, self.groups[0])
+ page = self.request(url).text
+ pos = page.index('id="store-cache"')
+ data = util.json_loads(text.extract(page, ">", "</script>", pos)[0])
+ return (data["gifs"]["byId"].popitem()[1]["results"][0],)
+
+
+class TenorSearchExtractor(TenorExtractor):
+ subcategory = "search"
+ directory_fmt = ("{category}", "{search_tags}")
+ pattern = BASE_PATTERN + r"search/([^/?#]+)"
+ example = "https://tenor.com/search/QUERY"
+
+ def metadata(self):
+ query = text.unquote(self.groups[0])
+ rest, _, last = query.rpartition("-")
+ if last == "gifs":
+ query = rest
+ self.search_tags = query.replace("-", " ")
+
+ return {"search_tags": self.search_tags}
+
+ def gifs(self):
+ return self._search_results(self.search_tags)
+
+
+class TenorUserExtractor(TenorExtractor):
+ subcategory = "user"
+ directory_fmt = ("{category}", "@{user[username]}")
+ pattern = BASE_PATTERN + r"(?:users|official)/([^/?#]+)"
+ example = "https://tenor.com/users/USER"
+
+ def gifs(self):
+ return self._search_results("@" + self.groups[0])
diff --git a/gallery_dl/extractor/tiktok.py b/gallery_dl/extractor/tiktok.py
index f129b1c..203b1ac 100644
--- a/gallery_dl/extractor/tiktok.py
+++ b/gallery_dl/extractor/tiktok.py
@@ -219,6 +219,11 @@ class TiktokUserExtractor(TiktokExtractor):
self.log.debug("", exc_info=exc)
raise exception.ExtractionError("yt-dlp or youtube-dl is required "
"for this feature!")
+
+ ytdl_range = self.config("tiktok-range")
+ if ytdl_range is None or not ytdl_range and ytdl_range != 0:
+ ytdl_range = ""
+
extr_opts = {
"extract_flat" : True,
"ignore_no_formats_error": True,
@@ -227,7 +232,7 @@ class TiktokUserExtractor(TiktokExtractor):
"retries" : self._retries,
"socket_timeout" : self._timeout,
"nocheckcertificate" : not self._verify,
- "playlist_items" : str(self.config("tiktok-range", "")),
+ "playlist_items" : str(ytdl_range),
}
if self._proxies:
user_opts["proxy"] = self._proxies.get("http")
diff --git a/gallery_dl/extractor/vsco.py b/gallery_dl/extractor/vsco.py
index a53409c..524bd81 100644
--- a/gallery_dl/extractor/vsco.py
+++ b/gallery_dl/extractor/vsco.py
@@ -32,7 +32,11 @@ class VscoExtractor(Extractor):
yield Message.Directory, {"user": self.user}
for img in self.images():
- if not img or "responsive_url" not in img:
+ if not img:
+ continue
+ elif "playback_url" in img:
+ img = self._transform_video(img)
+ elif "responsive_url" not in img:
continue
if img["is_video"]:
@@ -118,6 +122,15 @@ class VscoExtractor(Extractor):
media["image_meta"] = media.get("imageMeta")
return media
+ @staticmethod
+ def _transform_video(media):
+ media["is_video"] = True
+ media["grid_name"] = ""
+ media["video_url"] = media["playback_url"]
+ media["responsive_url"] = media["poster_url"]
+ media["upload_date"] = media["created_date"]
+ return media
+
class VscoUserExtractor(VscoExtractor):
"""Extractor for a vsco user profile"""
@@ -322,7 +335,7 @@ class VscoVideoExtractor(VscoExtractor):
"grid_name" : "",
"upload_date" : media["createdDate"],
"responsive_url": media["posterUrl"],
- "video_url" : "ytdl:" + media.get("playbackUrl"),
+ "video_url" : media.get("playbackUrl"),
"image_meta" : None,
"width" : media["width"],
"height" : media["height"],
diff --git a/gallery_dl/text.py b/gallery_dl/text.py
index 5fd5a40..f117c92 100644
--- a/gallery_dl/text.py
+++ b/gallery_dl/text.py
@@ -160,10 +160,10 @@ def extract_all(txt, rules, pos=0, values=None):
def extract_iter(txt, begin, end, pos=0):
"""Yield values that would be returned by repeated calls of extract()"""
- index = txt.index
- lbeg = len(begin)
- lend = len(end)
try:
+ index = txt.index
+ lbeg = len(begin)
+ lend = len(end)
while True:
first = index(begin, pos) + lbeg
last = index(end, first)
diff --git a/gallery_dl/util.py b/gallery_dl/util.py
index 7034c0c..06a580b 100644
--- a/gallery_dl/util.py
+++ b/gallery_dl/util.py
@@ -48,6 +48,19 @@ def bdecode(data, alphabet="0123456789"):
return num
+def decrypt_xor(encrypted, key, base64=True, fromhex=False):
+ if base64:
+ encrypted = binascii.a2b_base64(encrypted)
+ if fromhex:
+ encrypted = bytes.fromhex(encrypted.decode())
+
+ div = len(key)
+ return bytes([
+ encrypted[i] ^ key[i % div]
+ for i in range(len(encrypted))
+ ]).decode()
+
+
def advance(iterable, num):
""""Advance 'iterable' by 'num' steps"""
iterator = iter(iterable)
@@ -356,6 +369,31 @@ def extract_headers(response):
return data
+def detect_challenge(response):
+ server = response.headers.get("server")
+ if not server:
+ return
+
+ elif server.startswith("cloudflare"):
+ if response.status_code not in (403, 503):
+ return
+
+ mitigated = response.headers.get("cf-mitigated")
+ if mitigated and mitigated.lower() == "challenge":
+ return "Cloudflare challenge"
+
+ content = response.content
+ if b"_cf_chl_opt" in content or b"jschl-answer" in content:
+ return "Cloudflare challenge"
+ elif b'name="captcha-bypass"' in content:
+ return "Cloudflare CAPTCHA"
+
+ elif server.startswith("ddos-guard"):
+ if response.status_code == 403 and \
+ b"/ddos-guard/js-challenge/" in response.content:
+ return "DDoS-Guard challenge"
+
+
@functools.lru_cache(maxsize=None)
def git_head():
try:
diff --git a/gallery_dl/version.py b/gallery_dl/version.py
index 0c75005..ad98770 100644
--- a/gallery_dl/version.py
+++ b/gallery_dl/version.py
@@ -6,5 +6,5 @@
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
-__version__ = "1.29.0"
+__version__ = "1.29.1"
__variant__ = None
diff --git a/test/test_text.py b/test/test_text.py
index 1b19c47..30feefc 100644
--- a/test/test_text.py
+++ b/test/test_text.py
@@ -300,6 +300,12 @@ class TestText(unittest.TestCase):
self.assertEqual(
g(txt, "[", "]", 6), ["a", "d"])
+ # invalid arguments
+ for value in INVALID:
+ self.assertEqual(g(value, "<" , ">") , [])
+ self.assertEqual(g(txt , value, ">") , [])
+ self.assertEqual(g(txt , "<" , value), [])
+
def test_extract_from(self, f=text.extract_from):
txt = "[c][b][a]: xyz! [d][e"