summaryrefslogtreecommitdiffstats
path: root/gallery_dl
diff options
context:
space:
mode:
Diffstat (limited to 'gallery_dl')
-rw-r--r--gallery_dl/extractor/__init__.py2
-rw-r--r--gallery_dl/extractor/bbc.py3
-rw-r--r--gallery_dl/extractor/bunkr.py36
-rw-r--r--gallery_dl/extractor/cien.py7
-rw-r--r--gallery_dl/extractor/common.py14
-rw-r--r--gallery_dl/extractor/e621.py2
-rw-r--r--gallery_dl/extractor/imagefap.py6
-rw-r--r--gallery_dl/extractor/mangapark.py3
-rw-r--r--gallery_dl/extractor/patreon.py15
-rw-r--r--gallery_dl/extractor/pexels.py189
-rw-r--r--gallery_dl/extractor/pixiv.py21
-rw-r--r--gallery_dl/extractor/plurk.py16
-rw-r--r--gallery_dl/extractor/slideshare.py5
-rw-r--r--gallery_dl/extractor/wallhaven.py23
-rw-r--r--gallery_dl/extractor/weebcentral.py136
-rw-r--r--gallery_dl/option.py8
-rw-r--r--gallery_dl/util.py2
-rw-r--r--gallery_dl/version.py2
18 files changed, 429 insertions, 61 deletions
diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py
index d003a61..b582c99 100644
--- a/gallery_dl/extractor/__init__.py
+++ b/gallery_dl/extractor/__init__.py
@@ -124,6 +124,7 @@ modules = [
"nsfwalbum",
"paheal",
"patreon",
+ "pexels",
"philomena",
"photovogue",
"picarto",
@@ -190,6 +191,7 @@ modules = [
"weasyl",
"webmshare",
"webtoons",
+ "weebcentral",
"weibo",
"wikiart",
"wikifeet",
diff --git a/gallery_dl/extractor/bbc.py b/gallery_dl/extractor/bbc.py
index 54aaac4..113a669 100644
--- a/gallery_dl/extractor/bbc.py
+++ b/gallery_dl/extractor/bbc.py
@@ -26,8 +26,7 @@ class BbcGalleryExtractor(GalleryExtractor):
example = "https://www.bbc.co.uk/programmes/PATH"
def metadata(self, page):
- data = util.json_loads(text.extr(
- page, '<script type="application/ld+json">', '</script>'))
+ data = self._extract_jsonld(page)
return {
"programme": self.gallery_url.split("/")[4],
"path": list(util.unique_sequence(
diff --git a/gallery_dl/extractor/bunkr.py b/gallery_dl/extractor/bunkr.py
index 3e12452..e1ee50d 100644
--- a/gallery_dl/extractor/bunkr.py
+++ b/gallery_dl/extractor/bunkr.py
@@ -80,6 +80,9 @@ class BunkrAlbumExtractor(LolisafeAlbumExtractor):
# redirect
url = response.headers["Location"]
+ if url[0] == "/":
+ url = self.root + url
+ continue
root, path = self._split(url)
if root not in CF_DOMAINS:
continue
@@ -105,37 +108,40 @@ class BunkrAlbumExtractor(LolisafeAlbumExtractor):
"All Bunkr domains require solving a CF challenge")
# select alternative domain
- root = "https://" + random.choice(DOMAINS)
+ self.root = root = "https://" + random.choice(DOMAINS)
self.log.debug("Trying '%s' as fallback", root)
url = root + path
def fetch_album(self, album_id):
# album metadata
- page = self.request(self.root + "/a/" + album_id).text
- title, size = text.split_html(text.extr(
- page, "<h1", "</span>").partition(">")[2])
- if "&" in title:
- title = title.replace(
- "&lt;", "<").replace("&gt;", ">").replace("&amp;", "&")
+ page = self.request(
+ self.root + "/a/" + album_id, encoding="utf-8").text
+ title = text.unescape(text.unescape(text.extr(
+ page, 'property="og:title" content="', '"')))
# files
- items = list(text.extract_iter(page, "<!-- item -->", "<!-- -->"))
+ items = list(text.extract_iter(
+ page, '<div class="grid-images_box', "</a>"))
+
return self._extract_files(items), {
"album_id" : album_id,
"album_name" : title,
- "album_size" : text.extr(size, "(", ")"),
+ "album_size" : text.extr(
+ page, '<span class="font-semibold">(', ')'),
"count" : len(items),
}
def _extract_files(self, items):
for item in items:
try:
- url = text.extr(item, ' href="', '"')
- file = self._extract_file(text.unescape(url))
+ url = text.unescape(text.extr(item, ' href="', '"'))
+ if url[0] == "/":
+ url = self.root + url
+ file = self._extract_file(url)
info = text.split_html(item)
- file["name"] = info[0]
- file["size"] = info[2]
+ file["name"] = info[-3]
+ file["size"] = info[-2]
file["date"] = text.parse_datetime(
info[-1], "%H:%M:%S %d/%m/%Y")
@@ -179,8 +185,8 @@ class BunkrMediaExtractor(BunkrAlbumExtractor):
"""Extractor for bunkr.si media links"""
subcategory = "media"
directory_fmt = ("{category}",)
- pattern = BASE_PATTERN + r"(/[vid]/[^/?#]+)"
- example = "https://bunkr.si/v/FILENAME"
+ pattern = BASE_PATTERN + r"(/[fvid]/[^/?#]+)"
+ example = "https://bunkr.si/f/FILENAME"
def fetch_album(self, album_id):
try:
diff --git a/gallery_dl/extractor/cien.py b/gallery_dl/extractor/cien.py
index 378365e..27d50e7 100644
--- a/gallery_dl/extractor/cien.py
+++ b/gallery_dl/extractor/cien.py
@@ -9,7 +9,7 @@
"""Extractors for https://ci-en.net/"""
from .common import Extractor, Message
-from .. import text, util
+from .. import text
BASE_PATTERN = r"(?:https?://)?ci-en\.(?:net|dlsite\.com)"
@@ -56,11 +56,8 @@ class CienArticleExtractor(CienExtractor):
self.root, self.groups[0], self.groups[1])
page = self.request(url, notfound="article").text
- post = util.json_loads(text.extr(
- page, '<script type="application/ld+json">', '</script>'))[0]
-
files = self._extract_files(page)
-
+ post = self._extract_jsonld(page)[0]
post["post_url"] = url
post["post_id"] = text.parse_int(self.groups[1])
post["count"] = len(files)
diff --git a/gallery_dl/extractor/common.py b/gallery_dl/extractor/common.py
index 5ada030..13fd88a 100644
--- a/gallery_dl/extractor/common.py
+++ b/gallery_dl/extractor/common.py
@@ -587,6 +587,14 @@ class Extractor():
return True
return False
+ def _extract_jsonld(self, page):
+ return util.json_loads(text.extr(
+ page, '<script type="application/ld+json">', "</script>"))
+
+ def _extract_nextdata(self, page):
+ return util.json_loads(text.extr(
+ page, ' id="__NEXT_DATA__" type="application/json">', "</script>"))
+
def _prepare_ddosguard_cookies(self):
if not self.cookies.get("__ddg2", domain=self.cookies_domain):
self.cookies.set(
@@ -772,7 +780,11 @@ class MangaExtractor(Extractor):
def items(self):
self.login()
- page = self.request(self.manga_url).text
+
+ if self.manga_url:
+ page = self.request(self.manga_url, notfound=self.subcategory).text
+ else:
+ page = None
chapters = self.chapters(page)
if self.reverse:
diff --git a/gallery_dl/extractor/e621.py b/gallery_dl/extractor/e621.py
index 4a6624d..33e6ba8 100644
--- a/gallery_dl/extractor/e621.py
+++ b/gallery_dl/extractor/e621.py
@@ -90,7 +90,7 @@ BASE_PATTERN = E621Extractor.update({
class E621TagExtractor(E621Extractor, danbooru.DanbooruTagExtractor):
"""Extractor for e621 posts from tag searches"""
- pattern = BASE_PATTERN + r"/posts?(?:\?.*?tags=|/index/\d+/)([^&#]+)"
+ pattern = BASE_PATTERN + r"/posts?(?:\?[^#]*?tags=|/index/\d+/)([^&#]*)"
example = "https://e621.net/posts?tags=TAG"
diff --git a/gallery_dl/extractor/imagefap.py b/gallery_dl/extractor/imagefap.py
index 28590fc..dd5220d 100644
--- a/gallery_dl/extractor/imagefap.py
+++ b/gallery_dl/extractor/imagefap.py
@@ -9,7 +9,7 @@
"""Extractors for https://www.imagefap.com/"""
from .common import Extractor, Message
-from .. import text, util, exception
+from .. import text, exception
BASE_PATTERN = r"(?:https?://)?(?:www\.|beta\.)?imagefap\.com"
@@ -129,13 +129,11 @@ class ImagefapImageExtractor(ImagefapExtractor):
url, pos = text.extract(
page, 'original="', '"')
- info, pos = text.extract(
- page, '<script type="application/ld+json">', '</script>', pos)
image_id, pos = text.extract(
page, 'id="imageid_input" value="', '"', pos)
gallery_id, pos = text.extract(
page, 'id="galleryid_input" value="', '"', pos)
- info = util.json_loads(info)
+ info = self._extract_jsonld(page)
return url, text.nameext_from_url(url, {
"title": text.unescape(info["name"]),
diff --git a/gallery_dl/extractor/mangapark.py b/gallery_dl/extractor/mangapark.py
index 63aaf91..6f7a238 100644
--- a/gallery_dl/extractor/mangapark.py
+++ b/gallery_dl/extractor/mangapark.py
@@ -43,8 +43,7 @@ class MangaparkChapterExtractor(MangaparkBase, ChapterExtractor):
ChapterExtractor.__init__(self, match, url)
def metadata(self, page):
- data = util.json_loads(text.extr(
- page, 'id="__NEXT_DATA__" type="application/json">', '<'))
+ data = self._extract_nextdata(page)
chapter = (data["props"]["pageProps"]["dehydratedState"]
["queries"][0]["state"]["data"]["data"])
manga = chapter["comicNode"]["data"]
diff --git a/gallery_dl/extractor/patreon.py b/gallery_dl/extractor/patreon.py
index e4a5985..866e93a 100644
--- a/gallery_dl/extractor/patreon.py
+++ b/gallery_dl/extractor/patreon.py
@@ -286,15 +286,12 @@ class PatreonExtractor(Extractor):
return [genmap[ft] for ft in filetypes]
def _extract_bootstrap(self, page):
- data = text.extr(
- page, 'id="__NEXT_DATA__" type="application/json">', '</script')
- if data:
- try:
- data = util.json_loads(data)
- env = data["props"]["pageProps"]["bootstrapEnvelope"]
- return env.get("pageBootstrap") or env["bootstrap"]
- except Exception as exc:
- self.log.debug("%s: %s", exc.__class__.__name__, exc)
+ try:
+ data = self._extract_nextdata(page)
+ env = data["props"]["pageProps"]["bootstrapEnvelope"]
+ return env.get("pageBootstrap") or env["bootstrap"]
+ except Exception as exc:
+ self.log.debug("%s: %s", exc.__class__.__name__, exc)
bootstrap = text.extr(
page, 'window.patreon = {"bootstrap":', '},"apiServer"')
diff --git a/gallery_dl/extractor/pexels.py b/gallery_dl/extractor/pexels.py
new file mode 100644
index 0000000..804623b
--- /dev/null
+++ b/gallery_dl/extractor/pexels.py
@@ -0,0 +1,189 @@
+# -*- coding: utf-8 -*-
+
+# Copyright 2025 Mike Fährmann
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Extractors for https://pexels.com/"""
+
+from .common import Extractor, Message
+from .. import text, exception
+
+BASE_PATTERN = r"(?:https?://)?(?:www\.)?pexels\.com"
+
+
+class PexelsExtractor(Extractor):
+ """Base class for pexels extractors"""
+ category = "pexels"
+ root = "https://www.pexels.com"
+ archive_fmt = "{id}"
+ request_interval = (1.0, 2.0)
+ request_interval_min = 0.5
+
+ def _init(self):
+ self.api = PexelsAPI(self)
+
+ def items(self):
+ metadata = self.metadata()
+
+ for post in self.posts():
+ if "attributes" in post:
+ attr = post
+ post = post["attributes"]
+ post["type"] = attr["type"]
+
+ post.update(metadata)
+ post["date"] = text.parse_datetime(
+ post["created_at"][:-5], "%Y-%m-%dT%H:%M:%S")
+
+ if "image" in post:
+ url, _, query = post["image"]["download_link"].partition("?")
+ name = text.extr(query, "&dl=", "&")
+ elif "video" in post:
+ video = post["video"]
+ name = video["src"]
+ url = video["download_link"]
+ else:
+ self.log.warning("%s: Unsupported post type", post.get("id"))
+ continue
+
+ yield Message.Directory, post
+ yield Message.Url, url, text.nameext_from_url(name, post)
+
+ def posts(self):
+ return ()
+
+ def metadata(self):
+ return {}
+
+
+class PexelsCollectionExtractor(PexelsExtractor):
+ """Extractor for a pexels.com collection"""
+ subcategory = "collection"
+ directory_fmt = ("{category}", "Collections", "{collection}")
+ pattern = BASE_PATTERN + r"/collections/((?:[^/?#]*-)?(\w+))"
+ example = "https://www.pexels.com/collections/SLUG-a1b2c3/"
+
+ def metadata(self):
+ cname, cid = self.groups
+ return {"collection": cname, "collection_id": cid}
+
+ def posts(self):
+ return self.api.collections_media(self.groups[1])
+
+
+class PexelsSearchExtractor(PexelsExtractor):
+ """Extractor for pexels.com search results"""
+ subcategory = "search"
+ directory_fmt = ("{category}", "Searches", "{search_tags}")
+ pattern = BASE_PATTERN + r"/search/([^/?#]+)"
+ example = "https://www.pexels.com/search/QUERY/"
+
+ def metadata(self):
+ return {"search_tags": self.groups[0]}
+
+ def posts(self):
+ return self.api.search_photos(self.groups[0])
+
+
+class PexelsUserExtractor(PexelsExtractor):
+ """Extractor for pexels.com user galleries"""
+ subcategory = "user"
+ directory_fmt = ("{category}", "@{user[slug]}")
+ pattern = BASE_PATTERN + r"/(@(?:(?:[^/?#]*-)?(\d+)|[^/?#]+))"
+ example = "https://www.pexels.com/@USER-12345/"
+
+ def posts(self):
+ return self.api.users_media_recent(self.groups[1] or self.groups[0])
+
+
+class PexelsImageExtractor(PexelsExtractor):
+ subcategory = "image"
+ pattern = BASE_PATTERN + r"/photo/((?:[^/?#]*-)?\d+)"
+ example = "https://www.pexels.com/photo/SLUG-12345/"
+
+ def posts(self):
+ url = "{}/photo/{}/".format(self.root, self.groups[0])
+ page = self.request(url).text
+ return (self._extract_nextdata(page)["props"]["pageProps"]["medium"],)
+
+
+class PexelsAPI():
+ """Interface for the Pexels Web API"""
+
+ def __init__(self, extractor):
+ self.extractor = extractor
+ self.root = "https://www.pexels.com/en-us/api"
+ self.headers = {
+ "Accept" : "*/*",
+ "Content-Type" : "application/json",
+ "secret-key" : "H2jk9uKnhRmL6WPwh89zBezWvr",
+ "Authorization" : "",
+ "X-Forwarded-CF-Connecting-IP" : "",
+ "X-Forwarded-HTTP_CF_IPCOUNTRY": "",
+ "X-Forwarded-CF-IPRegionCode" : "",
+ "X-Client-Type" : "react",
+ "Sec-Fetch-Dest": "empty",
+ "Sec-Fetch-Mode": "cors",
+ "Sec-Fetch-Site": "same-origin",
+ "Priority" : "u=4",
+ }
+
+ def collections_media(self, collection_id):
+ endpoint = "/v3/collections/{}/media".format(collection_id)
+ params = {
+ "page" : "1",
+ "per_page": "24",
+ }
+ return self._pagination(endpoint, params)
+
+ def search_photos(self, query):
+ endpoint = "/v3/search/photos"
+ params = {
+ "query" : query,
+ "page" : "1",
+ "per_page" : "24",
+ "orientation": "all",
+ "size" : "all",
+ "color" : "all",
+ "sort" : "popular",
+ }
+ return self._pagination(endpoint, params)
+
+ def users_media_recent(self, user_id):
+ endpoint = "/v3/users/{}/media/recent".format(user_id)
+ params = {
+ "page" : "1",
+ "per_page": "24",
+ }
+ return self._pagination(endpoint, params)
+
+ def _call(self, endpoint, params):
+ url = self.root + endpoint
+
+ while True:
+ response = self.extractor.request(
+ url, params=params, headers=self.headers, fatal=None)
+
+ if response.status_code < 300:
+ return response.json()
+
+ elif response.status_code == 429:
+ self.extractor.wait(seconds=600)
+
+ else:
+ self.extractor.log.debug(response.text)
+ raise exception.StopExtraction("API request failed")
+
+ def _pagination(self, endpoint, params):
+ while True:
+ data = self._call(endpoint, params)
+
+ yield from data["data"]
+
+ pagination = data["pagination"]
+ if pagination["current_page"] >= pagination["total_pages"]:
+ return
+ params["page"] = pagination["current_page"] + 1
diff --git a/gallery_dl/extractor/pixiv.py b/gallery_dl/extractor/pixiv.py
index 6207bf7..d3e40ee 100644
--- a/gallery_dl/extractor/pixiv.py
+++ b/gallery_dl/extractor/pixiv.py
@@ -111,6 +111,7 @@ class PixivExtractor(Extractor):
{
"url" : img["image_urls"]["original"],
"suffix": "_p{:02}".format(num),
+ "_fallback": self._fallback_image(img),
}
for num, img in enumerate(meta_pages)
]
@@ -128,7 +129,7 @@ class PixivExtractor(Extractor):
self.log.warning("%s: 'My pixiv' locked", work["id"])
elif work["type"] != "ugoira":
- return ({"url": url},)
+ return ({"url": url, "_fallback": self._fallback_image(url)},)
elif self.load_ugoira:
try:
@@ -269,6 +270,24 @@ class PixivExtractor(Extractor):
except exception.HttpError:
pass
+ def _fallback_image(self, src):
+ if isinstance(src, str):
+ urls = None
+ orig = src
+ else:
+ urls = src["image_urls"]
+ orig = urls["original"]
+
+ base = orig.rpartition(".")[0]
+ yield base.replace("-original/", "-master/", 1) + "_master1200.jpg"
+
+ if urls is None:
+ return
+
+ for fmt in ("large", "medium", "square_medium"):
+ if fmt in urls:
+ yield urls[fmt]
+
@staticmethod
def _date_from_url(url, offset=timedelta(hours=9)):
try:
diff --git a/gallery_dl/extractor/plurk.py b/gallery_dl/extractor/plurk.py
index be0dbde..0bacd54 100644
--- a/gallery_dl/extractor/plurk.py
+++ b/gallery_dl/extractor/plurk.py
@@ -104,16 +104,16 @@ class PlurkPostExtractor(PlurkExtractor):
pattern = r"(?:https?://)?(?:www\.)?plurk\.com/p/(\w+)"
example = "https://www.plurk.com/p/12345"
- def __init__(self, match):
- PlurkExtractor.__init__(self, match)
- self.plurk_id = match.group(1)
-
def plurks(self):
- url = "{}/p/{}".format(self.root, self.plurk_id)
+ url = "{}/p/{}".format(self.root, self.groups[0])
page = self.request(url).text
- user, pos = text.extract(page, " GLOBAL = ", "\n")
- data, pos = text.extract(page, "plurk = ", ";\n", pos)
+ user, pos = text.extract(page, " GLOBAL=", "\n")
+ data, pos = text.extract(page, "plurk =", ";\n", pos)
data = self._load(data)
- data["user"] = self._load(user)["page_user"]
+ try:
+ data["user"] = self._load(user)["page_user"]
+ except Exception:
+ self.log.warning("%s: Failed to extract 'user' data",
+ self.groups[0])
return (data,)
diff --git a/gallery_dl/extractor/slideshare.py b/gallery_dl/extractor/slideshare.py
index e5e7a6b..0722d23 100644
--- a/gallery_dl/extractor/slideshare.py
+++ b/gallery_dl/extractor/slideshare.py
@@ -10,7 +10,7 @@
"""Extractors for https://www.slideshare.net/"""
from .common import GalleryExtractor
-from .. import text, util
+from .. import text
class SlidesharePresentationExtractor(GalleryExtractor):
@@ -31,8 +31,7 @@ class SlidesharePresentationExtractor(GalleryExtractor):
GalleryExtractor.__init__(self, match, url)
def metadata(self, page):
- data = util.json_loads(text.extr(
- page, 'id="__NEXT_DATA__" type="application/json">', '</script>'))
+ data = self._extract_nextdata(page)
self.slideshow = slideshow = data["props"]["pageProps"]["slideshow"]
return {
diff --git a/gallery_dl/extractor/wallhaven.py b/gallery_dl/extractor/wallhaven.py
index 479e8a8..e5b764a 100644
--- a/gallery_dl/extractor/wallhaven.py
+++ b/gallery_dl/extractor/wallhaven.py
@@ -54,7 +54,7 @@ class WallhavenExtractor(Extractor):
class WallhavenSearchExtractor(WallhavenExtractor):
"""Extractor for search results on wallhaven.cc"""
subcategory = "search"
- directory_fmt = ("{category}", "{search[q]}")
+ directory_fmt = ("{category}", "{search[tags]}")
archive_fmt = "s_{search[q]}_{id}"
pattern = r"(?:https?://)?wallhaven\.cc/search(?:/?\?([^#]+))?"
example = "https://wallhaven.cc/search?q=QUERY"
@@ -64,7 +64,7 @@ class WallhavenSearchExtractor(WallhavenExtractor):
self.params = text.parse_query(match.group(1))
def wallpapers(self):
- return self.api.search(self.params.copy())
+ return self.api.search(self.params)
def metadata(self):
return {"search": self.params}
@@ -141,7 +141,7 @@ class WallhavenUploadsExtractor(WallhavenExtractor):
def wallpapers(self):
params = {"q": "@" + self.username}
- return self.api.search(params.copy())
+ return self.api.search(params)
def metadata(self):
return {"username": self.username}
@@ -215,20 +215,35 @@ class WallhavenAPI():
def _pagination(self, endpoint, params=None, metadata=None):
if params is None:
+ params_ptr = None
params = {}
+ else:
+ params_ptr = params
+ params = params.copy()
if metadata is None:
metadata = self.extractor.config("metadata")
while True:
data = self._call(endpoint, params)
+ meta = data.get("meta")
+ if params_ptr is not None:
+ if meta and "query" in meta:
+ query = meta["query"]
+ if isinstance(query, dict):
+ params_ptr["tags"] = query.get("tag")
+ params_ptr["tag_id"] = query.get("id")
+ else:
+ params_ptr["tags"] = query
+ params_ptr["tag_id"] = 0
+ params_ptr = None
+
if metadata:
for wp in data["data"]:
yield self.info(str(wp["id"]))
else:
yield from data["data"]
- meta = data.get("meta")
if not meta or meta["current_page"] >= meta["last_page"]:
return
params["page"] = meta["current_page"] + 1
diff --git a/gallery_dl/extractor/weebcentral.py b/gallery_dl/extractor/weebcentral.py
new file mode 100644
index 0000000..39f998a
--- /dev/null
+++ b/gallery_dl/extractor/weebcentral.py
@@ -0,0 +1,136 @@
+# -*- coding: utf-8 -*-
+
+# Copyright 2025 Mike Fährmann
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Extractors for https://weebcentral.com/"""
+
+from .common import ChapterExtractor, MangaExtractor
+from .. import text
+from ..cache import memcache
+
+BASE_PATTERN = r"(?:https?://)?(?:www\.)?weebcentral\.com"
+
+
+class WeebcentralBase():
+ category = "weebcentral"
+ root = "https://weebcentral.com"
+ request_interval = (0.5, 1.5)
+
+ @memcache(keyarg=1)
+ def _extract_manga_data(self, manga_id):
+ url = "{}/series/{}".format(self.root, manga_id)
+ page = self.request(url).text
+ extr = text.extract_from(page)
+
+ return {
+ "manga_id": manga_id,
+ "lang" : "en",
+ "language": "English",
+ "manga" : text.unescape(extr("<title>", " | Weeb Central")),
+ "author" : text.split_html(extr("<strong>Author", "</li>"))[1::2],
+ "tags" : text.split_html(extr("<strong>Tag", "</li>"))[1::2],
+ "type" : text.remove_html(extr("<strong>Type: ", "</li>")),
+ "status" : text.remove_html(extr("<strong>Status: ", "</li>")),
+ "release" : text.remove_html(extr("<strong>Released: ", "</li>")),
+ "official": ">Yes" in extr("<strong>Official Translatio", "</li>"),
+ "description": text.unescape(text.remove_html(extr(
+ "<strong>Description", "</li>"))),
+ }
+
+
+class WeebcentralChapterExtractor(WeebcentralBase, ChapterExtractor):
+ """Extractor for manga chapters from weebcentral.com"""
+ pattern = BASE_PATTERN + r"(/chapters/(\w+))"
+ example = "https://weebcentral.com/chapters/01JHABCDEFGHIJKLMNOPQRSTUV"
+
+ def metadata(self, page):
+ extr = text.extract_from(page)
+ manga_id = extr("'series_id': '", "'")
+
+ data = self._extract_manga_data(manga_id)
+ data["chapter_id"] = self.groups[1]
+ data["chapter_type"] = extr("'chapter_type': '", "'")
+
+ chapter, sep, minor = extr("'number': '", "'").partition(".")
+ data["chapter"] = text.parse_int(chapter)
+ data["chapter_minor"] = sep + minor
+
+ return data
+
+ def images(self, page):
+ referer = self.gallery_url
+ url = referer + "/images"
+ params = {
+ "is_prev" : "False",
+ "current_page" : "1",
+ "reading_style": "long_strip",
+ }
+ headers = {
+ "Accept" : "*/*",
+ "Referer" : referer,
+ "HX-Request" : "true",
+ "HX-Current-URL": referer,
+ }
+ page = self.request(url, params=params, headers=headers).text
+ extr = text.extract_from(page)
+
+ results = []
+ while True:
+ src = extr(' src="', '"')
+ if not src:
+ break
+ results.append((src, {
+ "width" : text.parse_int(extr(' width="' , '"')),
+ "height": text.parse_int(extr(' height="', '"')),
+ }))
+ return results
+
+
+class WeebcentralMangaExtractor(WeebcentralBase, MangaExtractor):
+ """Extractor for manga from weebcentral.com"""
+ chapterclass = WeebcentralChapterExtractor
+ pattern = BASE_PATTERN + r"/series/(\w+)"
+ example = "https://weebcentral.com/series/01J7ABCDEFGHIJKLMNOPQRSTUV/TITLE"
+
+ def __init__(self, match):
+ MangaExtractor.__init__(self, match, False)
+
+ def chapters(self, _):
+ manga_id = self.groups[0]
+ referer = "{}/series/{}".format(self.root, manga_id)
+ url = referer + "/full-chapter-list"
+ headers = {
+ "Accept" : "*/*",
+ "Referer" : referer,
+ "HX-Request" : "true",
+ "HX-Target" : "chapter-list",
+ "HX-Current-URL": referer,
+ }
+ page = self.request(url, headers=headers).text
+ extr = text.extract_from(page)
+ data = self._extract_manga_data(manga_id)
+ base = self.root + "/chapters/"
+
+ results = []
+ while True:
+ chapter_id = extr("/chapters/", '"')
+ if not chapter_id:
+ break
+ type, _, chapter = extr('<span class="">', "<").partition(" ")
+ chapter, sep, minor = chapter.partition(".")
+
+ chapter = {
+ "chapter_id" : chapter_id,
+ "chapter" : text.parse_int(chapter),
+ "chapter_minor": sep + minor,
+ "chapter_type" : type,
+ "date" : text.parse_datetime(
+ extr(' datetime="', '"')[:-5], "%Y-%m-%dT%H:%M:%S"),
+ }
+ chapter.update(data)
+ results.append((base + chapter_id, chapter))
+ return results
diff --git a/gallery_dl/option.py b/gallery_dl/option.py
index a3f78e5..222679a 100644
--- a/gallery_dl/option.py
+++ b/gallery_dl/option.py
@@ -323,7 +323,7 @@ def build_parser():
input.add_argument(
"--no-input",
dest="input", nargs=0, action=ConfigConstAction, const=False,
- help=("Do not prompt for passwords/tokens"),
+ help="Do not prompt for passwords/tokens",
)
output = parser.add_argument_group("Output Options")
@@ -406,7 +406,7 @@ def build_parser():
)
output.add_argument(
"--list-extractors",
- dest="list_extractors", metavar="CATEGORIES", nargs="*",
+ dest="list_extractors", metavar="[CATEGORIES]", nargs="*",
help=("Print a list of extractor classes "
"with description, (sub)category and example URL"),
)
@@ -430,12 +430,12 @@ def build_parser():
output.add_argument(
"--print-traffic",
dest="print_traffic", action="store_true",
- help=("Display sent and read HTTP traffic"),
+ help="Display sent and read HTTP traffic",
)
output.add_argument(
"--no-colors",
dest="colors", action="store_false",
- help=("Do not emit ANSI color codes in output"),
+ help="Do not emit ANSI color codes in output",
)
networking = parser.add_argument_group("Networking Options")
diff --git a/gallery_dl/util.py b/gallery_dl/util.py
index 72ec98e..2302088 100644
--- a/gallery_dl/util.py
+++ b/gallery_dl/util.py
@@ -83,7 +83,7 @@ def unique_sequence(iterable):
def contains(values, elements, separator=" "):
"""Returns True if at least one of 'elements' is contained in 'values'"""
- if isinstance(values, str):
+ if isinstance(values, str) and (separator or separator is None):
values = values.split(separator)
if not isinstance(elements, (tuple, list)):
diff --git a/gallery_dl/version.py b/gallery_dl/version.py
index 4b28924..6bceebd 100644
--- a/gallery_dl/version.py
+++ b/gallery_dl/version.py
@@ -6,5 +6,5 @@
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
-__version__ = "1.28.3"
+__version__ = "1.28.4"
__variant__ = None