aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--CHANGELOG.md32
-rw-r--r--PKG-INFO22
-rw-r--r--README.rst4
-rw-r--r--data/completion/_gallery-dl2
-rw-r--r--data/man/gallery-dl.14
-rw-r--r--data/man/gallery-dl.conf.57
-rw-r--r--docs/gallery-dl.conf15
-rw-r--r--gallery_dl.egg-info/PKG-INFO22
-rw-r--r--gallery_dl.egg-info/SOURCES.txt2
-rw-r--r--gallery_dl/extractor/__init__.py2
-rw-r--r--gallery_dl/extractor/bbc.py3
-rw-r--r--gallery_dl/extractor/bunkr.py36
-rw-r--r--gallery_dl/extractor/cien.py7
-rw-r--r--gallery_dl/extractor/common.py14
-rw-r--r--gallery_dl/extractor/e621.py2
-rw-r--r--gallery_dl/extractor/imagefap.py6
-rw-r--r--gallery_dl/extractor/mangapark.py3
-rw-r--r--gallery_dl/extractor/patreon.py15
-rw-r--r--gallery_dl/extractor/pexels.py189
-rw-r--r--gallery_dl/extractor/pixiv.py21
-rw-r--r--gallery_dl/extractor/plurk.py16
-rw-r--r--gallery_dl/extractor/slideshare.py5
-rw-r--r--gallery_dl/extractor/wallhaven.py23
-rw-r--r--gallery_dl/extractor/weebcentral.py136
-rw-r--r--gallery_dl/option.py8
-rw-r--r--gallery_dl/util.py2
-rw-r--r--gallery_dl/version.py2
-rw-r--r--test/test_util.py9
28 files changed, 511 insertions, 98 deletions
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 7fc97ba..2c7e627 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,26 +1,16 @@
-## 1.28.3 - 2025-01-04
+## 1.28.4 - 2025-01-12
### Extractors
#### Additions
-- [civitai] add `user-videos` extractor ([#6644](https://github.com/mikf/gallery-dl/issues/6644))
-- [szurubooru] support `visuabusters.com/booru` ([#6729](https://github.com/mikf/gallery-dl/issues/6729))
+- [pexels] add support ([#2286](https://github.com/mikf/gallery-dl/issues/2286), [#4214](https://github.com/mikf/gallery-dl/issues/4214), [#6769](https://github.com/mikf/gallery-dl/issues/6769))
+- [weebcentral] add support ([#6778](https://github.com/mikf/gallery-dl/issues/6778))
#### Fixes
-- [8muses] skip albums without valid `permalink` ([#6717](https://github.com/mikf/gallery-dl/issues/6717))
-- [batoto] update domains ([#6714](https://github.com/mikf/gallery-dl/issues/6714))
-- [deviantart:tiptap] fix deviation embeds without `token`
-- [hitomi] fix searches ([#6713](https://github.com/mikf/gallery-dl/issues/6713))
-- [instagram:reels] fix `pinned` values ([#6719](https://github.com/mikf/gallery-dl/issues/6719))
-- [kemonoparty] handle `discord` favorites ([#6706](https://github.com/mikf/gallery-dl/issues/6706))
-- [piczel] fix extraction ([#6735](https://github.com/mikf/gallery-dl/issues/6735))
-- [poipiku] fix downloads when post has a warning ([#6736](https://github.com/mikf/gallery-dl/issues/6736))
-- [sankaku] support alphanumeric book/pool IDs ([#6757](https://github.com/mikf/gallery-dl/issues/6757))
-- [subscribestar] fix attachment downloads ([#6721](https://github.com/mikf/gallery-dl/issues/6721), [#6724](https://github.com/mikf/gallery-dl/issues/6724), [#6758](https://github.com/mikf/gallery-dl/issues/6758))
-- [subscribestar] improve `content` metadata extraction ([#6761](https://github.com/mikf/gallery-dl/issues/6761))
-- [tapas] fix `TypeError` for locked episodes ([#6700](https://github.com/mikf/gallery-dl/issues/6700))
+- [bunkr] update to new site layout ([#6798](https://github.com/mikf/gallery-dl/issues/6798), [#6805](https://github.com/mikf/gallery-dl/issues/6805))
+- [bunkr] fix `ValueError` on relative redirects ([#6790](https://github.com/mikf/gallery-dl/issues/6790))
+- [plurk] fix `user` data extraction and make it non-fatal ([#6742](https://github.com/mikf/gallery-dl/issues/6742))
#### Improvements
-- [boosty] support `file` post attachments ([#6760](https://github.com/mikf/gallery-dl/issues/6760))
-- [deviantart:tiptap] support more content block types ([#6686](https://github.com/mikf/gallery-dl/issues/6686))
-- [directlink] use domain as `subcategory` ([#6703](https://github.com/mikf/gallery-dl/issues/6703))
-- [hitomi] provide `search_tags` metadata for `tag` and `search` results ([#6756](https://github.com/mikf/gallery-dl/issues/6756))
-- [subscribestar] support `audio` files ([#6758](https://github.com/mikf/gallery-dl/issues/6758))
+- [bunkr] support `/f/` media URLs
+- [e621] accept `tag` search URLs with empty tag ([#6783](https://github.com/mikf/gallery-dl/issues/6783))
+- [pixiv] provide fallback URLs ([#6762](https://github.com/mikf/gallery-dl/issues/6762))
+- [wallhaven] extract `search[tags]` and `search[tag_id]` metadata ([#6772](https://github.com/mikf/gallery-dl/issues/6772))
### Miscellaneous
-- [workflows:executables] build with Python 3.13
+- [util] support not splitting `value` argument when calling `contains()` ([#6773](https://github.com/mikf/gallery-dl/issues/6773))
diff --git a/PKG-INFO b/PKG-INFO
index ecc3fc2..2d2156a 100644
--- a/PKG-INFO
+++ b/PKG-INFO
@@ -1,6 +1,6 @@
-Metadata-Version: 2.1
+Metadata-Version: 2.2
Name: gallery_dl
-Version: 1.28.3
+Version: 1.28.4
Summary: Command-line program to download image galleries and collections from several image hosting sites
Home-page: https://github.com/mikf/gallery-dl
Download-URL: https://github.com/mikf/gallery-dl/releases/latest
@@ -38,6 +38,20 @@ License-File: LICENSE
Requires-Dist: requests>=2.11.0
Provides-Extra: video
Requires-Dist: youtube-dl; extra == "video"
+Dynamic: author
+Dynamic: author-email
+Dynamic: classifier
+Dynamic: description
+Dynamic: download-url
+Dynamic: home-page
+Dynamic: keywords
+Dynamic: license
+Dynamic: maintainer
+Dynamic: maintainer-email
+Dynamic: provides-extra
+Dynamic: requires-dist
+Dynamic: requires-python
+Dynamic: summary
==========
gallery-dl
@@ -117,9 +131,9 @@ Standalone Executable
Prebuilt executable files with a Python interpreter and
required Python packages included are available for
-- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.28.3/gallery-dl.exe>`__
+- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.28.4/gallery-dl.exe>`__
(Requires `Microsoft Visual C++ Redistributable Package (x86) <https://aka.ms/vs/17/release/vc_redist.x86.exe>`__)
-- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.28.3/gallery-dl.bin>`__
+- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.28.4/gallery-dl.bin>`__
Nightly Builds
diff --git a/README.rst b/README.rst
index 6ed729b..2a1a3c2 100644
--- a/README.rst
+++ b/README.rst
@@ -76,9 +76,9 @@ Standalone Executable
Prebuilt executable files with a Python interpreter and
required Python packages included are available for
-- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.28.3/gallery-dl.exe>`__
+- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.28.4/gallery-dl.exe>`__
(Requires `Microsoft Visual C++ Redistributable Package (x86) <https://aka.ms/vs/17/release/vc_redist.x86.exe>`__)
-- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.28.3/gallery-dl.bin>`__
+- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.28.4/gallery-dl.bin>`__
Nightly Builds
diff --git a/data/completion/_gallery-dl b/data/completion/_gallery-dl
index 073ac05..99fb8ad 100644
--- a/data/completion/_gallery-dl
+++ b/data/completion/_gallery-dl
@@ -32,7 +32,7 @@ _arguments -s -S \
{-N,--print}'[Write FORMAT during EVENT (default '\''prepare'\'') to standard output. Examples: '\''id'\'' or '\''post:{md5\[:8\]}'\'']':'<[event:]format>' \
--print-to-file'[Append FORMAT during EVENT to FILE]':'<[event:]format file>' \
--list-modules'[Print a list of available extractor modules]' \
---list-extractors'[Print a list of extractor classes with description, (sub)category and example URL]':'<categories>' \
+--list-extractors'[Print a list of extractor classes with description, (sub)category and example URL]':'<[categories]>' \
--write-log'[Write logging output to FILE]':'<file>':_files \
--write-unsupported'[Write URLs, which get emitted by other extractors but cannot be handled, to FILE]':'<file>':_files \
--write-pages'[Write downloaded intermediary pages to files in the current directory to debug problems]' \
diff --git a/data/man/gallery-dl.1 b/data/man/gallery-dl.1
index f4791df..ff83690 100644
--- a/data/man/gallery-dl.1
+++ b/data/man/gallery-dl.1
@@ -1,4 +1,4 @@
-.TH "GALLERY-DL" "1" "2025-01-04" "1.28.3" "gallery-dl Manual"
+.TH "GALLERY-DL" "1" "2025-01-12" "1.28.4" "gallery-dl Manual"
.\" disable hyphenation
.nh
@@ -98,7 +98,7 @@ Append FORMAT during EVENT to FILE
.B "\-\-list\-modules"
Print a list of available extractor modules
.TP
-.B "\-\-list\-extractors" \f[I]CATEGORIES\f[]
+.B "\-\-list\-extractors" \f[I][CATEGORIES]\f[]
Print a list of extractor classes with description, (sub)category and example URL
.TP
.B "\-\-write\-log" \f[I]FILE\f[]
diff --git a/data/man/gallery-dl.conf.5 b/data/man/gallery-dl.conf.5
index 7028b7a..9ed6d97 100644
--- a/data/man/gallery-dl.conf.5
+++ b/data/man/gallery-dl.conf.5
@@ -1,4 +1,4 @@
-.TH "GALLERY-DL.CONF" "5" "2025-01-04" "1.28.3" "gallery-dl Manual"
+.TH "GALLERY-DL.CONF" "5" "2025-01-12" "1.28.4" "gallery-dl Manual"
.\" disable hyphenation
.nh
.\" disable justification (adjust text to left margin only)
@@ -471,13 +471,18 @@ response before \f[I]retrying\f[] the request.
\f[I]soundgasm\f[],
\f[I]urlgalleries\f[],
\f[I]vk\f[],
+\f[I]weebcentral\f[],
\f[I]zerochan\f[]
.br
* \f[I]"1.0-2.0"\f[]
\f[I]flickr\f[],
+\f[I]pexels\f[],
\f[I]weibo\f[],
\f[I][wikimedia]\f[]
.br
+* \f[I]"1.4"\f[]
+\f[I]wallhaven\f[]
+.br
* \f[I]"2.0-4.0"\f[]
\f[I]behance\f[],
\f[I]imagefap\f[],
diff --git a/docs/gallery-dl.conf b/docs/gallery-dl.conf
index 3d73869..0d0c412 100644
--- a/docs/gallery-dl.conf
+++ b/docs/gallery-dl.conf
@@ -128,7 +128,7 @@
},
"bilibili":
{
- "sleep-request": "2.0-4.0"
+ "sleep-request": "3.0-6.0"
},
"bluesky":
{
@@ -435,7 +435,12 @@
{
"cookies": null,
- "files" : ["images", "image_large", "attachments", "postfile", "content"]
+ "files" : ["images", "image_large", "attachments", "postfile", "content"],
+ "format-images": "download_url"
+ },
+ "pexels":
+ {
+ "sleep-request": "1.0-2.0"
},
"pillowfort":
{
@@ -691,6 +696,8 @@
"wallhaven":
{
"api-key" : null,
+ "sleep-request": "1.4",
+
"include" : ["uploads"],
"metadata": false
},
@@ -699,6 +706,10 @@
"api-key" : null,
"metadata": false
},
+ "weebcentral":
+ {
+ "sleep-request": "0.5-1.5"
+ },
"weibo":
{
"sleep-request": "1.0-2.0",
diff --git a/gallery_dl.egg-info/PKG-INFO b/gallery_dl.egg-info/PKG-INFO
index ecc3fc2..2d2156a 100644
--- a/gallery_dl.egg-info/PKG-INFO
+++ b/gallery_dl.egg-info/PKG-INFO
@@ -1,6 +1,6 @@
-Metadata-Version: 2.1
+Metadata-Version: 2.2
Name: gallery_dl
-Version: 1.28.3
+Version: 1.28.4
Summary: Command-line program to download image galleries and collections from several image hosting sites
Home-page: https://github.com/mikf/gallery-dl
Download-URL: https://github.com/mikf/gallery-dl/releases/latest
@@ -38,6 +38,20 @@ License-File: LICENSE
Requires-Dist: requests>=2.11.0
Provides-Extra: video
Requires-Dist: youtube-dl; extra == "video"
+Dynamic: author
+Dynamic: author-email
+Dynamic: classifier
+Dynamic: description
+Dynamic: download-url
+Dynamic: home-page
+Dynamic: keywords
+Dynamic: license
+Dynamic: maintainer
+Dynamic: maintainer-email
+Dynamic: provides-extra
+Dynamic: requires-dist
+Dynamic: requires-python
+Dynamic: summary
==========
gallery-dl
@@ -117,9 +131,9 @@ Standalone Executable
Prebuilt executable files with a Python interpreter and
required Python packages included are available for
-- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.28.3/gallery-dl.exe>`__
+- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.28.4/gallery-dl.exe>`__
(Requires `Microsoft Visual C++ Redistributable Package (x86) <https://aka.ms/vs/17/release/vc_redist.x86.exe>`__)
-- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.28.3/gallery-dl.bin>`__
+- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.28.4/gallery-dl.bin>`__
Nightly Builds
diff --git a/gallery_dl.egg-info/SOURCES.txt b/gallery_dl.egg-info/SOURCES.txt
index 42dd483..2656948 100644
--- a/gallery_dl.egg-info/SOURCES.txt
+++ b/gallery_dl.egg-info/SOURCES.txt
@@ -171,6 +171,7 @@ gallery_dl/extractor/nsfwalbum.py
gallery_dl/extractor/oauth.py
gallery_dl/extractor/paheal.py
gallery_dl/extractor/patreon.py
+gallery_dl/extractor/pexels.py
gallery_dl/extractor/philomena.py
gallery_dl/extractor/photovogue.py
gallery_dl/extractor/picarto.py
@@ -239,6 +240,7 @@ gallery_dl/extractor/warosu.py
gallery_dl/extractor/weasyl.py
gallery_dl/extractor/webmshare.py
gallery_dl/extractor/webtoons.py
+gallery_dl/extractor/weebcentral.py
gallery_dl/extractor/weibo.py
gallery_dl/extractor/wikiart.py
gallery_dl/extractor/wikifeet.py
diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py
index d003a61..b582c99 100644
--- a/gallery_dl/extractor/__init__.py
+++ b/gallery_dl/extractor/__init__.py
@@ -124,6 +124,7 @@ modules = [
"nsfwalbum",
"paheal",
"patreon",
+ "pexels",
"philomena",
"photovogue",
"picarto",
@@ -190,6 +191,7 @@ modules = [
"weasyl",
"webmshare",
"webtoons",
+ "weebcentral",
"weibo",
"wikiart",
"wikifeet",
diff --git a/gallery_dl/extractor/bbc.py b/gallery_dl/extractor/bbc.py
index 54aaac4..113a669 100644
--- a/gallery_dl/extractor/bbc.py
+++ b/gallery_dl/extractor/bbc.py
@@ -26,8 +26,7 @@ class BbcGalleryExtractor(GalleryExtractor):
example = "https://www.bbc.co.uk/programmes/PATH"
def metadata(self, page):
- data = util.json_loads(text.extr(
- page, '<script type="application/ld+json">', '</script>'))
+ data = self._extract_jsonld(page)
return {
"programme": self.gallery_url.split("/")[4],
"path": list(util.unique_sequence(
diff --git a/gallery_dl/extractor/bunkr.py b/gallery_dl/extractor/bunkr.py
index 3e12452..e1ee50d 100644
--- a/gallery_dl/extractor/bunkr.py
+++ b/gallery_dl/extractor/bunkr.py
@@ -80,6 +80,9 @@ class BunkrAlbumExtractor(LolisafeAlbumExtractor):
# redirect
url = response.headers["Location"]
+ if url[0] == "/":
+ url = self.root + url
+ continue
root, path = self._split(url)
if root not in CF_DOMAINS:
continue
@@ -105,37 +108,40 @@ class BunkrAlbumExtractor(LolisafeAlbumExtractor):
"All Bunkr domains require solving a CF challenge")
# select alternative domain
- root = "https://" + random.choice(DOMAINS)
+ self.root = root = "https://" + random.choice(DOMAINS)
self.log.debug("Trying '%s' as fallback", root)
url = root + path
def fetch_album(self, album_id):
# album metadata
- page = self.request(self.root + "/a/" + album_id).text
- title, size = text.split_html(text.extr(
- page, "<h1", "</span>").partition(">")[2])
- if "&" in title:
- title = title.replace(
- "&lt;", "<").replace("&gt;", ">").replace("&amp;", "&")
+ page = self.request(
+ self.root + "/a/" + album_id, encoding="utf-8").text
+ title = text.unescape(text.unescape(text.extr(
+ page, 'property="og:title" content="', '"')))
# files
- items = list(text.extract_iter(page, "<!-- item -->", "<!-- -->"))
+ items = list(text.extract_iter(
+ page, '<div class="grid-images_box', "</a>"))
+
return self._extract_files(items), {
"album_id" : album_id,
"album_name" : title,
- "album_size" : text.extr(size, "(", ")"),
+ "album_size" : text.extr(
+ page, '<span class="font-semibold">(', ')'),
"count" : len(items),
}
def _extract_files(self, items):
for item in items:
try:
- url = text.extr(item, ' href="', '"')
- file = self._extract_file(text.unescape(url))
+ url = text.unescape(text.extr(item, ' href="', '"'))
+ if url[0] == "/":
+ url = self.root + url
+ file = self._extract_file(url)
info = text.split_html(item)
- file["name"] = info[0]
- file["size"] = info[2]
+ file["name"] = info[-3]
+ file["size"] = info[-2]
file["date"] = text.parse_datetime(
info[-1], "%H:%M:%S %d/%m/%Y")
@@ -179,8 +185,8 @@ class BunkrMediaExtractor(BunkrAlbumExtractor):
"""Extractor for bunkr.si media links"""
subcategory = "media"
directory_fmt = ("{category}",)
- pattern = BASE_PATTERN + r"(/[vid]/[^/?#]+)"
- example = "https://bunkr.si/v/FILENAME"
+ pattern = BASE_PATTERN + r"(/[fvid]/[^/?#]+)"
+ example = "https://bunkr.si/f/FILENAME"
def fetch_album(self, album_id):
try:
diff --git a/gallery_dl/extractor/cien.py b/gallery_dl/extractor/cien.py
index 378365e..27d50e7 100644
--- a/gallery_dl/extractor/cien.py
+++ b/gallery_dl/extractor/cien.py
@@ -9,7 +9,7 @@
"""Extractors for https://ci-en.net/"""
from .common import Extractor, Message
-from .. import text, util
+from .. import text
BASE_PATTERN = r"(?:https?://)?ci-en\.(?:net|dlsite\.com)"
@@ -56,11 +56,8 @@ class CienArticleExtractor(CienExtractor):
self.root, self.groups[0], self.groups[1])
page = self.request(url, notfound="article").text
- post = util.json_loads(text.extr(
- page, '<script type="application/ld+json">', '</script>'))[0]
-
files = self._extract_files(page)
-
+ post = self._extract_jsonld(page)[0]
post["post_url"] = url
post["post_id"] = text.parse_int(self.groups[1])
post["count"] = len(files)
diff --git a/gallery_dl/extractor/common.py b/gallery_dl/extractor/common.py
index 5ada030..13fd88a 100644
--- a/gallery_dl/extractor/common.py
+++ b/gallery_dl/extractor/common.py
@@ -587,6 +587,14 @@ class Extractor():
return True
return False
+ def _extract_jsonld(self, page):
+ return util.json_loads(text.extr(
+ page, '<script type="application/ld+json">', "</script>"))
+
+ def _extract_nextdata(self, page):
+ return util.json_loads(text.extr(
+ page, ' id="__NEXT_DATA__" type="application/json">', "</script>"))
+
def _prepare_ddosguard_cookies(self):
if not self.cookies.get("__ddg2", domain=self.cookies_domain):
self.cookies.set(
@@ -772,7 +780,11 @@ class MangaExtractor(Extractor):
def items(self):
self.login()
- page = self.request(self.manga_url).text
+
+ if self.manga_url:
+ page = self.request(self.manga_url, notfound=self.subcategory).text
+ else:
+ page = None
chapters = self.chapters(page)
if self.reverse:
diff --git a/gallery_dl/extractor/e621.py b/gallery_dl/extractor/e621.py
index 4a6624d..33e6ba8 100644
--- a/gallery_dl/extractor/e621.py
+++ b/gallery_dl/extractor/e621.py
@@ -90,7 +90,7 @@ BASE_PATTERN = E621Extractor.update({
class E621TagExtractor(E621Extractor, danbooru.DanbooruTagExtractor):
"""Extractor for e621 posts from tag searches"""
- pattern = BASE_PATTERN + r"/posts?(?:\?.*?tags=|/index/\d+/)([^&#]+)"
+ pattern = BASE_PATTERN + r"/posts?(?:\?[^#]*?tags=|/index/\d+/)([^&#]*)"
example = "https://e621.net/posts?tags=TAG"
diff --git a/gallery_dl/extractor/imagefap.py b/gallery_dl/extractor/imagefap.py
index 28590fc..dd5220d 100644
--- a/gallery_dl/extractor/imagefap.py
+++ b/gallery_dl/extractor/imagefap.py
@@ -9,7 +9,7 @@
"""Extractors for https://www.imagefap.com/"""
from .common import Extractor, Message
-from .. import text, util, exception
+from .. import text, exception
BASE_PATTERN = r"(?:https?://)?(?:www\.|beta\.)?imagefap\.com"
@@ -129,13 +129,11 @@ class ImagefapImageExtractor(ImagefapExtractor):
url, pos = text.extract(
page, 'original="', '"')
- info, pos = text.extract(
- page, '<script type="application/ld+json">', '</script>', pos)
image_id, pos = text.extract(
page, 'id="imageid_input" value="', '"', pos)
gallery_id, pos = text.extract(
page, 'id="galleryid_input" value="', '"', pos)
- info = util.json_loads(info)
+ info = self._extract_jsonld(page)
return url, text.nameext_from_url(url, {
"title": text.unescape(info["name"]),
diff --git a/gallery_dl/extractor/mangapark.py b/gallery_dl/extractor/mangapark.py
index 63aaf91..6f7a238 100644
--- a/gallery_dl/extractor/mangapark.py
+++ b/gallery_dl/extractor/mangapark.py
@@ -43,8 +43,7 @@ class MangaparkChapterExtractor(MangaparkBase, ChapterExtractor):
ChapterExtractor.__init__(self, match, url)
def metadata(self, page):
- data = util.json_loads(text.extr(
- page, 'id="__NEXT_DATA__" type="application/json">', '<'))
+ data = self._extract_nextdata(page)
chapter = (data["props"]["pageProps"]["dehydratedState"]
["queries"][0]["state"]["data"]["data"])
manga = chapter["comicNode"]["data"]
diff --git a/gallery_dl/extractor/patreon.py b/gallery_dl/extractor/patreon.py
index e4a5985..866e93a 100644
--- a/gallery_dl/extractor/patreon.py
+++ b/gallery_dl/extractor/patreon.py
@@ -286,15 +286,12 @@ class PatreonExtractor(Extractor):
return [genmap[ft] for ft in filetypes]
def _extract_bootstrap(self, page):
- data = text.extr(
- page, 'id="__NEXT_DATA__" type="application/json">', '</script')
- if data:
- try:
- data = util.json_loads(data)
- env = data["props"]["pageProps"]["bootstrapEnvelope"]
- return env.get("pageBootstrap") or env["bootstrap"]
- except Exception as exc:
- self.log.debug("%s: %s", exc.__class__.__name__, exc)
+ try:
+ data = self._extract_nextdata(page)
+ env = data["props"]["pageProps"]["bootstrapEnvelope"]
+ return env.get("pageBootstrap") or env["bootstrap"]
+ except Exception as exc:
+ self.log.debug("%s: %s", exc.__class__.__name__, exc)
bootstrap = text.extr(
page, 'window.patreon = {"bootstrap":', '},"apiServer"')
diff --git a/gallery_dl/extractor/pexels.py b/gallery_dl/extractor/pexels.py
new file mode 100644
index 0000000..804623b
--- /dev/null
+++ b/gallery_dl/extractor/pexels.py
@@ -0,0 +1,189 @@
+# -*- coding: utf-8 -*-
+
+# Copyright 2025 Mike Fährmann
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Extractors for https://pexels.com/"""
+
+from .common import Extractor, Message
+from .. import text, exception
+
+BASE_PATTERN = r"(?:https?://)?(?:www\.)?pexels\.com"
+
+
+class PexelsExtractor(Extractor):
+ """Base class for pexels extractors"""
+ category = "pexels"
+ root = "https://www.pexels.com"
+ archive_fmt = "{id}"
+ request_interval = (1.0, 2.0)
+ request_interval_min = 0.5
+
+ def _init(self):
+ self.api = PexelsAPI(self)
+
+ def items(self):
+ metadata = self.metadata()
+
+ for post in self.posts():
+ if "attributes" in post:
+ attr = post
+ post = post["attributes"]
+ post["type"] = attr["type"]
+
+ post.update(metadata)
+ post["date"] = text.parse_datetime(
+ post["created_at"][:-5], "%Y-%m-%dT%H:%M:%S")
+
+ if "image" in post:
+ url, _, query = post["image"]["download_link"].partition("?")
+ name = text.extr(query, "&dl=", "&")
+ elif "video" in post:
+ video = post["video"]
+ name = video["src"]
+ url = video["download_link"]
+ else:
+ self.log.warning("%s: Unsupported post type", post.get("id"))
+ continue
+
+ yield Message.Directory, post
+ yield Message.Url, url, text.nameext_from_url(name, post)
+
+ def posts(self):
+ return ()
+
+ def metadata(self):
+ return {}
+
+
+class PexelsCollectionExtractor(PexelsExtractor):
+ """Extractor for a pexels.com collection"""
+ subcategory = "collection"
+ directory_fmt = ("{category}", "Collections", "{collection}")
+ pattern = BASE_PATTERN + r"/collections/((?:[^/?#]*-)?(\w+))"
+ example = "https://www.pexels.com/collections/SLUG-a1b2c3/"
+
+ def metadata(self):
+ cname, cid = self.groups
+ return {"collection": cname, "collection_id": cid}
+
+ def posts(self):
+ return self.api.collections_media(self.groups[1])
+
+
+class PexelsSearchExtractor(PexelsExtractor):
+ """Extractor for pexels.com search results"""
+ subcategory = "search"
+ directory_fmt = ("{category}", "Searches", "{search_tags}")
+ pattern = BASE_PATTERN + r"/search/([^/?#]+)"
+ example = "https://www.pexels.com/search/QUERY/"
+
+ def metadata(self):
+ return {"search_tags": self.groups[0]}
+
+ def posts(self):
+ return self.api.search_photos(self.groups[0])
+
+
+class PexelsUserExtractor(PexelsExtractor):
+ """Extractor for pexels.com user galleries"""
+ subcategory = "user"
+ directory_fmt = ("{category}", "@{user[slug]}")
+ pattern = BASE_PATTERN + r"/(@(?:(?:[^/?#]*-)?(\d+)|[^/?#]+))"
+ example = "https://www.pexels.com/@USER-12345/"
+
+ def posts(self):
+ return self.api.users_media_recent(self.groups[1] or self.groups[0])
+
+
+class PexelsImageExtractor(PexelsExtractor):
+ subcategory = "image"
+ pattern = BASE_PATTERN + r"/photo/((?:[^/?#]*-)?\d+)"
+ example = "https://www.pexels.com/photo/SLUG-12345/"
+
+ def posts(self):
+ url = "{}/photo/{}/".format(self.root, self.groups[0])
+ page = self.request(url).text
+ return (self._extract_nextdata(page)["props"]["pageProps"]["medium"],)
+
+
+class PexelsAPI():
+ """Interface for the Pexels Web API"""
+
+ def __init__(self, extractor):
+ self.extractor = extractor
+ self.root = "https://www.pexels.com/en-us/api"
+ self.headers = {
+ "Accept" : "*/*",
+ "Content-Type" : "application/json",
+ "secret-key" : "H2jk9uKnhRmL6WPwh89zBezWvr",
+ "Authorization" : "",
+ "X-Forwarded-CF-Connecting-IP" : "",
+ "X-Forwarded-HTTP_CF_IPCOUNTRY": "",
+ "X-Forwarded-CF-IPRegionCode" : "",
+ "X-Client-Type" : "react",
+ "Sec-Fetch-Dest": "empty",
+ "Sec-Fetch-Mode": "cors",
+ "Sec-Fetch-Site": "same-origin",
+ "Priority" : "u=4",
+ }
+
+ def collections_media(self, collection_id):
+ endpoint = "/v3/collections/{}/media".format(collection_id)
+ params = {
+ "page" : "1",
+ "per_page": "24",
+ }
+ return self._pagination(endpoint, params)
+
+ def search_photos(self, query):
+ endpoint = "/v3/search/photos"
+ params = {
+ "query" : query,
+ "page" : "1",
+ "per_page" : "24",
+ "orientation": "all",
+ "size" : "all",
+ "color" : "all",
+ "sort" : "popular",
+ }
+ return self._pagination(endpoint, params)
+
+ def users_media_recent(self, user_id):
+ endpoint = "/v3/users/{}/media/recent".format(user_id)
+ params = {
+ "page" : "1",
+ "per_page": "24",
+ }
+ return self._pagination(endpoint, params)
+
+ def _call(self, endpoint, params):
+ url = self.root + endpoint
+
+ while True:
+ response = self.extractor.request(
+ url, params=params, headers=self.headers, fatal=None)
+
+ if response.status_code < 300:
+ return response.json()
+
+ elif response.status_code == 429:
+ self.extractor.wait(seconds=600)
+
+ else:
+ self.extractor.log.debug(response.text)
+ raise exception.StopExtraction("API request failed")
+
+ def _pagination(self, endpoint, params):
+ while True:
+ data = self._call(endpoint, params)
+
+ yield from data["data"]
+
+ pagination = data["pagination"]
+ if pagination["current_page"] >= pagination["total_pages"]:
+ return
+ params["page"] = pagination["current_page"] + 1
diff --git a/gallery_dl/extractor/pixiv.py b/gallery_dl/extractor/pixiv.py
index 6207bf7..d3e40ee 100644
--- a/gallery_dl/extractor/pixiv.py
+++ b/gallery_dl/extractor/pixiv.py
@@ -111,6 +111,7 @@ class PixivExtractor(Extractor):
{
"url" : img["image_urls"]["original"],
"suffix": "_p{:02}".format(num),
+ "_fallback": self._fallback_image(img),
}
for num, img in enumerate(meta_pages)
]
@@ -128,7 +129,7 @@ class PixivExtractor(Extractor):
self.log.warning("%s: 'My pixiv' locked", work["id"])
elif work["type"] != "ugoira":
- return ({"url": url},)
+ return ({"url": url, "_fallback": self._fallback_image(url)},)
elif self.load_ugoira:
try:
@@ -269,6 +270,24 @@ class PixivExtractor(Extractor):
except exception.HttpError:
pass
+ def _fallback_image(self, src):
+ if isinstance(src, str):
+ urls = None
+ orig = src
+ else:
+ urls = src["image_urls"]
+ orig = urls["original"]
+
+ base = orig.rpartition(".")[0]
+ yield base.replace("-original/", "-master/", 1) + "_master1200.jpg"
+
+ if urls is None:
+ return
+
+ for fmt in ("large", "medium", "square_medium"):
+ if fmt in urls:
+ yield urls[fmt]
+
@staticmethod
def _date_from_url(url, offset=timedelta(hours=9)):
try:
diff --git a/gallery_dl/extractor/plurk.py b/gallery_dl/extractor/plurk.py
index be0dbde..0bacd54 100644
--- a/gallery_dl/extractor/plurk.py
+++ b/gallery_dl/extractor/plurk.py
@@ -104,16 +104,16 @@ class PlurkPostExtractor(PlurkExtractor):
pattern = r"(?:https?://)?(?:www\.)?plurk\.com/p/(\w+)"
example = "https://www.plurk.com/p/12345"
- def __init__(self, match):
- PlurkExtractor.__init__(self, match)
- self.plurk_id = match.group(1)
-
def plurks(self):
- url = "{}/p/{}".format(self.root, self.plurk_id)
+ url = "{}/p/{}".format(self.root, self.groups[0])
page = self.request(url).text
- user, pos = text.extract(page, " GLOBAL = ", "\n")
- data, pos = text.extract(page, "plurk = ", ";\n", pos)
+ user, pos = text.extract(page, " GLOBAL=", "\n")
+ data, pos = text.extract(page, "plurk =", ";\n", pos)
data = self._load(data)
- data["user"] = self._load(user)["page_user"]
+ try:
+ data["user"] = self._load(user)["page_user"]
+ except Exception:
+ self.log.warning("%s: Failed to extract 'user' data",
+ self.groups[0])
return (data,)
diff --git a/gallery_dl/extractor/slideshare.py b/gallery_dl/extractor/slideshare.py
index e5e7a6b..0722d23 100644
--- a/gallery_dl/extractor/slideshare.py
+++ b/gallery_dl/extractor/slideshare.py
@@ -10,7 +10,7 @@
"""Extractors for https://www.slideshare.net/"""
from .common import GalleryExtractor
-from .. import text, util
+from .. import text
class SlidesharePresentationExtractor(GalleryExtractor):
@@ -31,8 +31,7 @@ class SlidesharePresentationExtractor(GalleryExtractor):
GalleryExtractor.__init__(self, match, url)
def metadata(self, page):
- data = util.json_loads(text.extr(
- page, 'id="__NEXT_DATA__" type="application/json">', '</script>'))
+ data = self._extract_nextdata(page)
self.slideshow = slideshow = data["props"]["pageProps"]["slideshow"]
return {
diff --git a/gallery_dl/extractor/wallhaven.py b/gallery_dl/extractor/wallhaven.py
index 479e8a8..e5b764a 100644
--- a/gallery_dl/extractor/wallhaven.py
+++ b/gallery_dl/extractor/wallhaven.py
@@ -54,7 +54,7 @@ class WallhavenExtractor(Extractor):
class WallhavenSearchExtractor(WallhavenExtractor):
"""Extractor for search results on wallhaven.cc"""
subcategory = "search"
- directory_fmt = ("{category}", "{search[q]}")
+ directory_fmt = ("{category}", "{search[tags]}")
archive_fmt = "s_{search[q]}_{id}"
pattern = r"(?:https?://)?wallhaven\.cc/search(?:/?\?([^#]+))?"
example = "https://wallhaven.cc/search?q=QUERY"
@@ -64,7 +64,7 @@ class WallhavenSearchExtractor(WallhavenExtractor):
self.params = text.parse_query(match.group(1))
def wallpapers(self):
- return self.api.search(self.params.copy())
+ return self.api.search(self.params)
def metadata(self):
return {"search": self.params}
@@ -141,7 +141,7 @@ class WallhavenUploadsExtractor(WallhavenExtractor):
def wallpapers(self):
params = {"q": "@" + self.username}
- return self.api.search(params.copy())
+ return self.api.search(params)
def metadata(self):
return {"username": self.username}
@@ -215,20 +215,35 @@ class WallhavenAPI():
def _pagination(self, endpoint, params=None, metadata=None):
if params is None:
+ params_ptr = None
params = {}
+ else:
+ params_ptr = params
+ params = params.copy()
if metadata is None:
metadata = self.extractor.config("metadata")
while True:
data = self._call(endpoint, params)
+ meta = data.get("meta")
+ if params_ptr is not None:
+ if meta and "query" in meta:
+ query = meta["query"]
+ if isinstance(query, dict):
+ params_ptr["tags"] = query.get("tag")
+ params_ptr["tag_id"] = query.get("id")
+ else:
+ params_ptr["tags"] = query
+ params_ptr["tag_id"] = 0
+ params_ptr = None
+
if metadata:
for wp in data["data"]:
yield self.info(str(wp["id"]))
else:
yield from data["data"]
- meta = data.get("meta")
if not meta or meta["current_page"] >= meta["last_page"]:
return
params["page"] = meta["current_page"] + 1
diff --git a/gallery_dl/extractor/weebcentral.py b/gallery_dl/extractor/weebcentral.py
new file mode 100644
index 0000000..39f998a
--- /dev/null
+++ b/gallery_dl/extractor/weebcentral.py
@@ -0,0 +1,136 @@
+# -*- coding: utf-8 -*-
+
+# Copyright 2025 Mike Fährmann
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Extractors for https://weebcentral.com/"""
+
+from .common import ChapterExtractor, MangaExtractor
+from .. import text
+from ..cache import memcache
+
+BASE_PATTERN = r"(?:https?://)?(?:www\.)?weebcentral\.com"
+
+
+class WeebcentralBase():
+ category = "weebcentral"
+ root = "https://weebcentral.com"
+ request_interval = (0.5, 1.5)
+
+ @memcache(keyarg=1)
+ def _extract_manga_data(self, manga_id):
+ url = "{}/series/{}".format(self.root, manga_id)
+ page = self.request(url).text
+ extr = text.extract_from(page)
+
+ return {
+ "manga_id": manga_id,
+ "lang" : "en",
+ "language": "English",
+ "manga" : text.unescape(extr("<title>", " | Weeb Central")),
+ "author" : text.split_html(extr("<strong>Author", "</li>"))[1::2],
+ "tags" : text.split_html(extr("<strong>Tag", "</li>"))[1::2],
+ "type" : text.remove_html(extr("<strong>Type: ", "</li>")),
+ "status" : text.remove_html(extr("<strong>Status: ", "</li>")),
+ "release" : text.remove_html(extr("<strong>Released: ", "</li>")),
+ "official": ">Yes" in extr("<strong>Official Translatio", "</li>"),
+ "description": text.unescape(text.remove_html(extr(
+ "<strong>Description", "</li>"))),
+ }
+
+
+class WeebcentralChapterExtractor(WeebcentralBase, ChapterExtractor):
+ """Extractor for manga chapters from weebcentral.com"""
+ pattern = BASE_PATTERN + r"(/chapters/(\w+))"
+ example = "https://weebcentral.com/chapters/01JHABCDEFGHIJKLMNOPQRSTUV"
+
+ def metadata(self, page):
+ extr = text.extract_from(page)
+ manga_id = extr("'series_id': '", "'")
+
+ data = self._extract_manga_data(manga_id)
+ data["chapter_id"] = self.groups[1]
+ data["chapter_type"] = extr("'chapter_type': '", "'")
+
+ chapter, sep, minor = extr("'number': '", "'").partition(".")
+ data["chapter"] = text.parse_int(chapter)
+ data["chapter_minor"] = sep + minor
+
+ return data
+
+ def images(self, page):
+ referer = self.gallery_url
+ url = referer + "/images"
+ params = {
+ "is_prev" : "False",
+ "current_page" : "1",
+ "reading_style": "long_strip",
+ }
+ headers = {
+ "Accept" : "*/*",
+ "Referer" : referer,
+ "HX-Request" : "true",
+ "HX-Current-URL": referer,
+ }
+ page = self.request(url, params=params, headers=headers).text
+ extr = text.extract_from(page)
+
+ results = []
+ while True:
+ src = extr(' src="', '"')
+ if not src:
+ break
+ results.append((src, {
+ "width" : text.parse_int(extr(' width="' , '"')),
+ "height": text.parse_int(extr(' height="', '"')),
+ }))
+ return results
+
+
+class WeebcentralMangaExtractor(WeebcentralBase, MangaExtractor):
+ """Extractor for manga from weebcentral.com"""
+ chapterclass = WeebcentralChapterExtractor
+ pattern = BASE_PATTERN + r"/series/(\w+)"
+ example = "https://weebcentral.com/series/01J7ABCDEFGHIJKLMNOPQRSTUV/TITLE"
+
+ def __init__(self, match):
+ MangaExtractor.__init__(self, match, False)
+
+ def chapters(self, _):
+ manga_id = self.groups[0]
+ referer = "{}/series/{}".format(self.root, manga_id)
+ url = referer + "/full-chapter-list"
+ headers = {
+ "Accept" : "*/*",
+ "Referer" : referer,
+ "HX-Request" : "true",
+ "HX-Target" : "chapter-list",
+ "HX-Current-URL": referer,
+ }
+ page = self.request(url, headers=headers).text
+ extr = text.extract_from(page)
+ data = self._extract_manga_data(manga_id)
+ base = self.root + "/chapters/"
+
+ results = []
+ while True:
+ chapter_id = extr("/chapters/", '"')
+ if not chapter_id:
+ break
+ type, _, chapter = extr('<span class="">', "<").partition(" ")
+ chapter, sep, minor = chapter.partition(".")
+
+ chapter = {
+ "chapter_id" : chapter_id,
+ "chapter" : text.parse_int(chapter),
+ "chapter_minor": sep + minor,
+ "chapter_type" : type,
+ "date" : text.parse_datetime(
+ extr(' datetime="', '"')[:-5], "%Y-%m-%dT%H:%M:%S"),
+ }
+ chapter.update(data)
+ results.append((base + chapter_id, chapter))
+ return results
diff --git a/gallery_dl/option.py b/gallery_dl/option.py
index a3f78e5..222679a 100644
--- a/gallery_dl/option.py
+++ b/gallery_dl/option.py
@@ -323,7 +323,7 @@ def build_parser():
input.add_argument(
"--no-input",
dest="input", nargs=0, action=ConfigConstAction, const=False,
- help=("Do not prompt for passwords/tokens"),
+ help="Do not prompt for passwords/tokens",
)
output = parser.add_argument_group("Output Options")
@@ -406,7 +406,7 @@ def build_parser():
)
output.add_argument(
"--list-extractors",
- dest="list_extractors", metavar="CATEGORIES", nargs="*",
+ dest="list_extractors", metavar="[CATEGORIES]", nargs="*",
help=("Print a list of extractor classes "
"with description, (sub)category and example URL"),
)
@@ -430,12 +430,12 @@ def build_parser():
output.add_argument(
"--print-traffic",
dest="print_traffic", action="store_true",
- help=("Display sent and read HTTP traffic"),
+ help="Display sent and read HTTP traffic",
)
output.add_argument(
"--no-colors",
dest="colors", action="store_false",
- help=("Do not emit ANSI color codes in output"),
+ help="Do not emit ANSI color codes in output",
)
networking = parser.add_argument_group("Networking Options")
diff --git a/gallery_dl/util.py b/gallery_dl/util.py
index 72ec98e..2302088 100644
--- a/gallery_dl/util.py
+++ b/gallery_dl/util.py
@@ -83,7 +83,7 @@ def unique_sequence(iterable):
def contains(values, elements, separator=" "):
"""Returns True if at least one of 'elements' is contained in 'values'"""
- if isinstance(values, str):
+ if isinstance(values, str) and (separator or separator is None):
values = values.split(separator)
if not isinstance(elements, (tuple, list)):
diff --git a/gallery_dl/version.py b/gallery_dl/version.py
index 4b28924..6bceebd 100644
--- a/gallery_dl/version.py
+++ b/gallery_dl/version.py
@@ -6,5 +6,5 @@
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
-__version__ = "1.28.3"
+__version__ = "1.28.4"
__variant__ = None
diff --git a/test/test_util.py b/test/test_util.py
index fa16c44..27f78ec 100644
--- a/test/test_util.py
+++ b/test/test_util.py
@@ -459,6 +459,15 @@ class TestOther(unittest.TestCase):
self.assertFalse(util.contains(s, "tag1"))
self.assertFalse(util.contains(s, ["tag1", "tag2", "tag3"]))
+ self.assertTrue(util.contains(s, "(+)", ""))
+ self.assertTrue(util.contains(s, ["(-)", "(+)"], ""))
+ self.assertTrue(util.contains(s, "(+)", 0))
+ self.assertTrue(util.contains(s, "(+)", False))
+
+ self.assertFalse(util.contains(s, "(+)", None))
+ self.assertTrue(util.contains(s, "y(+)c", None))
+ self.assertTrue(util.contains(s, ["(-)", "(+)", "bar"], None))
+
s = "1, 2, 3, asd, qwe, y(+)c, f(+)(-), bar"
self.assertTrue(util.contains(s, "y(+)c", ", "))
self.assertTrue(util.contains(s, ["sdf", "dfg", "qwe"], ", "))