summaryrefslogtreecommitdiffstats
path: root/gallery_dl
diff options
context:
space:
mode:
Diffstat (limited to 'gallery_dl')
-rw-r--r--gallery_dl/extractor/8muses.py7
-rw-r--r--gallery_dl/extractor/batoto.py60
-rw-r--r--gallery_dl/extractor/boosty.py3
-rw-r--r--gallery_dl/extractor/civitai.py17
-rw-r--r--gallery_dl/extractor/deviantart.py107
-rw-r--r--gallery_dl/extractor/directlink.py3
-rw-r--r--gallery_dl/extractor/hitomi.py27
-rw-r--r--gallery_dl/extractor/instagram.py18
-rw-r--r--gallery_dl/extractor/kemonoparty.py12
-rw-r--r--gallery_dl/extractor/piczel.py49
-rw-r--r--gallery_dl/extractor/poipiku.py10
-rw-r--r--gallery_dl/extractor/sankaku.py2
-rw-r--r--gallery_dl/extractor/subscribestar.py28
-rw-r--r--gallery_dl/extractor/szurubooru.py4
-rw-r--r--gallery_dl/extractor/tapas.py4
-rw-r--r--gallery_dl/version.py2
16 files changed, 260 insertions, 93 deletions
diff --git a/gallery_dl/extractor/8muses.py b/gallery_dl/extractor/8muses.py
index f88a0c6..68b906e 100644
--- a/gallery_dl/extractor/8muses.py
+++ b/gallery_dl/extractor/8muses.py
@@ -57,7 +57,12 @@ class _8musesAlbumExtractor(Extractor):
albums = data.get("albums")
if albums:
for album in albums:
- url = self.root + "/comics/album/" + album["permalink"]
+ permalink = album.get("permalink")
+ if not permalink:
+ self.log.debug("Private album")
+ continue
+
+ url = self.root + "/comics/album/" + permalink
yield Message.Queue, url, {
"url" : url,
"name" : album["name"],
diff --git a/gallery_dl/extractor/batoto.py b/gallery_dl/extractor/batoto.py
index 786acd9..77c40ef 100644
--- a/gallery_dl/extractor/batoto.py
+++ b/gallery_dl/extractor/batoto.py
@@ -10,17 +10,55 @@ from .common import Extractor, ChapterExtractor, MangaExtractor
from .. import text, exception
import re
-BASE_PATTERN = (r"(?:https?://)?(?:"
- r"(?:ba|d|h|m|w)to\.to|"
+BASE_PATTERN = (r"(?:https?://)?("
+ r"(?:ba|d|f|h|j|m|w)to\.to|"
r"(?:(?:manga|read)toto|batocomic|[xz]bato)\.(?:com|net|org)|"
r"comiko\.(?:net|org)|"
r"bat(?:otoo|o?two)\.com)")
+# https://rentry.co/batoto
+DOMAINS = {
+ "dto.to",
+ "fto.to",
+ "hto.to",
+ "jto.to",
+ "mto.to",
+ "wto.to",
+ "xbato.com",
+ "xbato.net",
+ "xbato.org",
+ "zbato.com",
+ "zbato.net",
+ "zbato.org",
+ "readtoto.com",
+ "readtoto.net",
+ "readtoto.org",
+ "batocomic.com",
+ "batocomic.net",
+ "batocomic.org",
+ "batotoo.com",
+ "batotwo.com",
+ "comiko.net",
+ "comiko.org",
+ "battwo.com",
+}
+LEGACY_DOMAINS = {
+ "bato.to",
+ "mangatoto.com",
+ "mangatoto.net",
+ "mangatoto.org",
+}
+
class BatotoBase():
"""Base class for batoto extractors"""
category = "batoto"
- root = "https://bato.to"
+ root = "https://xbato.org"
+
+ def _init_root(self, match):
+ domain = match.group(1)
+ if domain not in LEGACY_DOMAINS:
+ self.root = "https://" + domain
def request(self, url, **kwargs):
kwargs["encoding"] = "utf-8"
@@ -28,13 +66,13 @@ class BatotoBase():
class BatotoChapterExtractor(BatotoBase, ChapterExtractor):
- """Extractor for bato.to manga chapters"""
+ """Extractor for batoto manga chapters"""
pattern = BASE_PATTERN + r"/(?:title/[^/?#]+|chapter)/(\d+)"
- example = "https://bato.to/title/12345-MANGA/54321"
+ example = "https://xbato.org/title/12345-MANGA/54321"
def __init__(self, match):
- self.root = text.root_from_url(match.group(0))
- self.chapter_id = match.group(1)
+ self._init_root(match)
+ self.chapter_id = match.group(2)
url = "{}/title/0/{}".format(self.root, self.chapter_id)
ChapterExtractor.__init__(self, match, url)
@@ -86,16 +124,16 @@ class BatotoChapterExtractor(BatotoBase, ChapterExtractor):
class BatotoMangaExtractor(BatotoBase, MangaExtractor):
- """Extractor for bato.to manga"""
+ """Extractor for batoto manga"""
reverse = False
chapterclass = BatotoChapterExtractor
pattern = (BASE_PATTERN +
r"/(?:title/(\d+)[^/?#]*|series/(\d+)(?:/[^/?#]*)?)/?$")
- example = "https://bato.to/title/12345-MANGA/"
+ example = "https://xbato.org/title/12345-MANGA/"
def __init__(self, match):
- self.root = text.root_from_url(match.group(0))
- self.manga_id = match.group(1) or match.group(2)
+ self._init_root(match)
+ self.manga_id = match.group(2) or match.group(3)
url = "{}/title/{}".format(self.root, self.manga_id)
MangaExtractor.__init__(self, match, url)
diff --git a/gallery_dl/extractor/boosty.py b/gallery_dl/extractor/boosty.py
index 33823be..c28fad9 100644
--- a/gallery_dl/extractor/boosty.py
+++ b/gallery_dl/extractor/boosty.py
@@ -124,6 +124,9 @@ class BoostyExtractor(Extractor):
elif type == "audio_file":
files.append(self._update_url(post, block))
+ elif type == "file":
+ files.append(self._update_url(post, block))
+
else:
self.log.debug("%s: Unsupported data type '%s'",
post["int_id"], type)
diff --git a/gallery_dl/extractor/civitai.py b/gallery_dl/extractor/civitai.py
index 1e8cb42..36efcfe 100644
--- a/gallery_dl/extractor/civitai.py
+++ b/gallery_dl/extractor/civitai.py
@@ -338,6 +338,7 @@ class CivitaiUserExtractor(CivitaiExtractor):
(CivitaiUserModelsExtractor, base + "models"),
(CivitaiUserPostsExtractor , base + "posts"),
(CivitaiUserImagesExtractor, base + "images"),
+ (CivitaiUserVideosExtractor, base + "videos"),
), ("user-models", "user-posts"))
@@ -400,6 +401,20 @@ class CivitaiUserImagesExtractor(CivitaiExtractor):
return self.api.images(params)
+class CivitaiUserVideosExtractor(CivitaiExtractor):
+ subcategory = "user-videos"
+ directory_fmt = ("{category}", "{username|user[username]}", "videos")
+ pattern = USER_PATTERN + r"/videos/?(?:\?([^#]+))?"
+ example = "https://civitai.com/user/USER/videos"
+
+ def images(self):
+ self._image_ext = "mp4"
+ params = text.parse_query(self.groups[1])
+ params["types"] = ["video"]
+ params["username"] = text.unquote(self.groups[0])
+ return self.api.images(params)
+
+
class CivitaiRestAPI():
"""Interface for the Civitai Public REST API
@@ -484,7 +499,7 @@ class CivitaiTrpcAPI():
self.root = extractor.root + "/api/trpc/"
self.headers = {
"content-type" : "application/json",
- "x-client-version": "5.0.211",
+ "x-client-version": "5.0.394",
"x-client-date" : "",
"x-client" : "web",
"x-fingerprint" : "undefined",
diff --git a/gallery_dl/extractor/deviantart.py b/gallery_dl/extractor/deviantart.py
index 69934b4..8172f62 100644
--- a/gallery_dl/extractor/deviantart.py
+++ b/gallery_dl/extractor/deviantart.py
@@ -440,7 +440,8 @@ class DeviantartExtractor(Extractor):
html.append("text-align:")
html.append(attrs["textAlign"])
html.append(";")
- html.append('margin-inline-start:0px">')
+ self._tiptap_process_indentation(html, attrs)
+ html.append('">')
for block in children:
self._tiptap_process_content(html, block)
@@ -460,17 +461,32 @@ class DeviantartExtractor(Extractor):
html.append(' style="text-align:')
html.append(attrs.get("textAlign") or "left")
html.append('">')
- html.append('<span style="margin-inline-start:0px">')
-
- children = content.get("content")
- if children:
- for block in children:
- self._tiptap_process_content(html, block)
-
+ html.append('<span style="')
+ self._tiptap_process_indentation(html, attrs)
+ html.append('">')
+ self._tiptap_process_children(html, content)
html.append("</span></h")
html.append(level)
html.append(">")
+ elif type in ("listItem", "bulletList", "orderedList", "blockquote"):
+ c = type[1]
+ tag = (
+ "li" if c == "i" else
+ "ul" if c == "u" else
+ "ol" if c == "r" else
+ "blockquote"
+ )
+ html.append("<" + tag + ">")
+ self._tiptap_process_children(html, content)
+ html.append("</" + tag + ">")
+
+ elif type == "anchor":
+ attrs = content["attrs"]
+ html.append('<a id="')
+ html.append(attrs.get("id") or "")
+ html.append('" data-testid="anchor"></a>')
+
elif type == "hardBreak":
html.append("<br/><br/>")
@@ -488,6 +504,44 @@ class DeviantartExtractor(Extractor):
html.append(user)
html.append('</a>')
+ elif type == "da-gif":
+ attrs = content["attrs"]
+ width = str(attrs.get("width") or "")
+ height = str(attrs.get("height") or "")
+ url = text.escape(attrs.get("url") or "")
+
+ html.append('<div data-da-type="da-gif" data-width="')
+ html.append(width)
+ html.append('" data-height="')
+ html.append(height)
+ html.append('" data-alignment="')
+ html.append(attrs.get("alignment") or "")
+ html.append('" data-url="')
+ html.append(url)
+ html.append('" class="t61qu"><video role="img" autoPlay="" '
+ 'muted="" loop="" style="pointer-events:none" '
+ 'controlsList="nofullscreen" playsInline="" '
+ 'aria-label="gif" data-da-type="da-gif" width="')
+ html.append(width)
+ html.append('" height="')
+ html.append(height)
+ html.append('" src="')
+ html.append(url)
+ html.append('" class="_1Fkk6"></video></div>')
+
+ elif type == "da-video":
+ src = text.escape(content["attrs"].get("src") or "")
+ html.append('<div data-testid="video" data-da-type="da-video" '
+ 'data-src="')
+ html.append(src)
+ html.append('" class="_1Uxvs"><div data-canfs="yes" data-testid="v'
+ 'ideo-inner" class="main-video" style="width:780px;hei'
+ 'ght:438px"><div style="width:780px;height:438px">'
+ '<video src="')
+ html.append(src)
+ html.append('" style="width:100%;height:100%;" preload="auto" cont'
+ 'rols=""></video></div></div></div>')
+
else:
self.log.warning("Unsupported content type '%s'", type)
@@ -501,7 +555,13 @@ class DeviantartExtractor(Extractor):
attrs = mark.get("attrs") or {}
html.append('<a href="')
html.append(text.escape(attrs.get("href") or ""))
- html.append('" rel="noopener noreferrer nofollow ugc">')
+ if "target" in attrs:
+ html.append('" target="')
+ html.append(attrs["target"])
+ html.append('" rel="')
+ html.append(attrs.get("rel") or
+ "noopener noreferrer nofollow ugc")
+ html.append('">')
close.append("</a>")
elif type == "bold":
html.append("<strong>")
@@ -525,6 +585,18 @@ class DeviantartExtractor(Extractor):
else:
html.append(text.escape(content["text"]))
+ def _tiptap_process_children(self, html, content):
+ children = content.get("content")
+ if children:
+ for block in children:
+ self._tiptap_process_content(html, block)
+
+ def _tiptap_process_indentation(self, html, attrs):
+ itype = ("text-indent" if attrs.get("indentType") == "line" else
+ "margin-inline-start")
+ isize = str((attrs.get("indentation") or 0) * 24)
+ html.append(itype + ":" + isize + "px")
+
def _tiptap_process_deviation(self, html, content):
dev = content["attrs"]["deviation"]
media = dev.get("media") or ()
@@ -758,19 +830,22 @@ x2="45.4107524%" y2="71.4898596%" id="app-root-3">\
self.api.user_friends_unwatch(username)
def _eclipse_media(self, media, format="preview"):
- url = [media["baseUri"], ]
+ url = [media["baseUri"]]
formats = {
fmt["t"]: fmt
for fmt in media["types"]
}
- tokens = media["token"]
- if len(tokens) == 1:
- fmt = formats[format]
- url.append(fmt["c"].replace("<prettyName>", media["prettyName"]))
- url.append("?token=")
- url.append(tokens[-1])
+ tokens = media.get("token") or ()
+ if tokens:
+ if len(tokens) <= 1:
+ fmt = formats[format]
+ if "c" in fmt:
+ url.append(fmt["c"].replace(
+ "<prettyName>", media["prettyName"]))
+ url.append("?token=")
+ url.append(tokens[-1])
return "".join(url), formats
diff --git a/gallery_dl/extractor/directlink.py b/gallery_dl/extractor/directlink.py
index 2f0230a..4559aff 100644
--- a/gallery_dl/extractor/directlink.py
+++ b/gallery_dl/extractor/directlink.py
@@ -25,7 +25,8 @@ class DirectlinkExtractor(Extractor):
def __init__(self, match):
Extractor.__init__(self, match)
- self.data = match.groupdict()
+ self.data = data = match.groupdict()
+ self.subcategory = ".".join(data["domain"].rsplit(".", 2)[-2:])
def items(self):
data = self.data
diff --git a/gallery_dl/extractor/hitomi.py b/gallery_dl/extractor/hitomi.py
index 308b42c..e15e13c 100644
--- a/gallery_dl/extractor/hitomi.py
+++ b/gallery_dl/extractor/hitomi.py
@@ -122,7 +122,10 @@ class HitomiTagExtractor(Extractor):
self.tag = tag
def items(self):
- data = {"_extractor": HitomiGalleryExtractor}
+ data = {
+ "_extractor": HitomiGalleryExtractor,
+ "search_tags": text.unquote(self.tag.rpartition("-")[0]),
+ }
nozomi_url = "https://ltn.hitomi.la/{}/{}.nozomi".format(
self.type, self.tag)
headers = {
@@ -202,12 +205,14 @@ class HitomiSearchExtractor(Extractor):
def __init__(self, match):
Extractor.__init__(self, match)
self.query = match.group(1)
- self.tags = text.unquote(self.query).split(" ")
+ self.tags = text.unquote(self.query)
def items(self):
- data = {"_extractor": HitomiGalleryExtractor}
-
- results = [self.get_nozomi_items(tag) for tag in self.tags]
+ data = {
+ "_extractor": HitomiGalleryExtractor,
+ "search_tags": self.tags,
+ }
+ results = [self.get_nozomi_items(tag) for tag in self.tags.split(" ")]
intersects = set.intersection(*results)
for gallery_id in sorted(intersects, reverse=True):
@@ -219,20 +224,16 @@ class HitomiSearchExtractor(Extractor):
area, tag, language = self.get_nozomi_args(full_tag)
if area:
- referer_base = "{}/n/{}/{}-{}.html".format(
- self.root, area, tag, language)
- nozomi_url = "https://ltn.hitomi.la/{}/{}-{}.nozomi".format(
+ nozomi_url = "https://ltn.hitomi.la/n/{}/{}-{}.nozomi".format(
area, tag, language)
else:
- referer_base = "{}/n/{}-{}.html".format(
- self.root, tag, language)
- nozomi_url = "https://ltn.hitomi.la/{}-{}.nozomi".format(
+ nozomi_url = "https://ltn.hitomi.la/n/{}-{}.nozomi".format(
tag, language)
headers = {
"Origin": self.root,
"Cache-Control": "max-age=0",
- "Referer": "{}/search.html?{}".format(referer_base, self.query),
+ "Referer": "{}/search.html?{}".format(self.root, self.query),
}
response = self.request(nozomi_url, headers=headers)
@@ -251,7 +252,7 @@ class HitomiSearchExtractor(Extractor):
language = tag
tag = "index"
- return area, tag, language
+ return area, tag.replace("_", " "), language
@memcache(maxage=1800)
diff --git a/gallery_dl/extractor/instagram.py b/gallery_dl/extractor/instagram.py
index 8c5b180..e344b2f 100644
--- a/gallery_dl/extractor/instagram.py
+++ b/gallery_dl/extractor/instagram.py
@@ -172,8 +172,8 @@ class InstagramExtractor(Extractor):
"post_shortcode": post["code"],
"post_url": "{}/p/{}/".format(self.root, post["code"]),
"likes": post.get("like_count", 0),
- "pinned": post.get("timeline_pinned_user_ids", ()),
"liked": post.get("has_liked", False),
+ "pinned": self._extract_pinned(post),
}
caption = post["caption"]
@@ -385,6 +385,10 @@ class InstagramExtractor(Extractor):
"username" : user["username"],
"full_name": user["full_name"]})
+ def _extract_pinned(self, post):
+ return (post.get("timeline_pinned_user_ids") or
+ post.get("clips_tab_pinned_user_ids") or ())
+
def _init_cursor(self):
cursor = self.config("cursor", True)
if cursor is True:
@@ -451,6 +455,12 @@ class InstagramPostsExtractor(InstagramExtractor):
uid = self.api.user_id(self.item)
return self.api.user_feed(uid)
+ def _extract_pinned(self, post):
+ try:
+ return post["timeline_pinned_user_ids"]
+ except KeyError:
+ return ()
+
class InstagramReelsExtractor(InstagramExtractor):
"""Extractor for an Instagram user's reels"""
@@ -462,6 +472,12 @@ class InstagramReelsExtractor(InstagramExtractor):
uid = self.api.user_id(self.item)
return self.api.user_clips(uid)
+ def _extract_pinned(self, post):
+ try:
+ return post["clips_tab_pinned_user_ids"]
+ except KeyError:
+ return ()
+
class InstagramTaggedExtractor(InstagramExtractor):
"""Extractor for an Instagram user's tagged posts"""
diff --git a/gallery_dl/extractor/kemonoparty.py b/gallery_dl/extractor/kemonoparty.py
index a7caca9..66bbab5 100644
--- a/gallery_dl/extractor/kemonoparty.py
+++ b/gallery_dl/extractor/kemonoparty.py
@@ -455,9 +455,15 @@ class KemonopartyFavoriteExtractor(KemonopartyExtractor):
reverse=(order == "desc"))
for user in users:
- user["_extractor"] = KemonopartyUserExtractor
- url = "{}/{}/user/{}".format(
- self.root, user["service"], user["id"])
+ service = user["service"]
+ if service == "discord":
+ user["_extractor"] = KemonopartyDiscordServerExtractor
+ url = "{}/discord/server/{}".format(
+ self.root, user["id"])
+ else:
+ user["_extractor"] = KemonopartyUserExtractor
+ url = "{}/{}/user/{}".format(
+ self.root, service, user["id"])
yield Message.Queue, url, user
elif type == "post":
diff --git a/gallery_dl/extractor/piczel.py b/gallery_dl/extractor/piczel.py
index fe26704..8a729f3 100644
--- a/gallery_dl/extractor/piczel.py
+++ b/gallery_dl/extractor/piczel.py
@@ -11,6 +11,8 @@
from .common import Extractor, Message
from .. import text
+BASE_PATTERN = r"(?:https?://)?(?:www\.)?piczel\.tv"
+
class PiczelExtractor(Extractor):
"""Base class for piczel extractors"""
@@ -30,6 +32,7 @@ class PiczelExtractor(Extractor):
if post["multi"]:
images = post["images"]
del post["images"]
+ post["count"] = len(images)
yield Message.Directory, post
for post["num"], image in enumerate(images):
if "id" in image:
@@ -39,6 +42,7 @@ class PiczelExtractor(Extractor):
yield Message.Url, url, text.nameext_from_url(url, post)
else:
+ post["count"] = 1
yield Message.Directory, post
post["num"] = 0
url = post["image"]["url"]
@@ -47,35 +51,27 @@ class PiczelExtractor(Extractor):
def posts(self):
"""Return an iterable with all relevant post objects"""
- def _pagination(self, url, folder_id=None):
- params = {
- "from_id" : None,
- "folder_id": folder_id,
- }
+ def _pagination(self, url, pnum=1):
+ params = {"page": pnum}
while True:
data = self.request(url, params=params).json()
- if not data:
- return
- params["from_id"] = data[-1]["id"]
- for post in data:
- if not folder_id or folder_id == post["folder_id"]:
- yield post
+ yield from data["data"]
+
+ params["page"] = data["meta"]["next_page"]
+ if not params["page"]:
+ return
class PiczelUserExtractor(PiczelExtractor):
"""Extractor for all images from a user's gallery"""
subcategory = "user"
- pattern = r"(?:https?://)?(?:www\.)?piczel\.tv/gallery/([^/?#]+)/?$"
+ pattern = BASE_PATTERN + r"/gallery/([^/?#]+)/?$"
example = "https://piczel.tv/gallery/USER"
- def __init__(self, match):
- PiczelExtractor.__init__(self, match)
- self.user = match.group(1)
-
def posts(self):
- url = "{}/api/users/{}/gallery".format(self.root_api, self.user)
+ url = "{}/api/users/{}/gallery".format(self.root_api, self.groups[0])
return self._pagination(url)
@@ -84,29 +80,20 @@ class PiczelFolderExtractor(PiczelExtractor):
subcategory = "folder"
directory_fmt = ("{category}", "{user[username]}", "{folder[name]}")
archive_fmt = "f{folder[id]}_{id}_{num}"
- pattern = (r"(?:https?://)?(?:www\.)?piczel\.tv"
- r"/gallery/(?!image)([^/?#]+)/(\d+)")
+ pattern = BASE_PATTERN + r"/gallery/(?!image/)[^/?#]+/(\d+)"
example = "https://piczel.tv/gallery/USER/12345"
- def __init__(self, match):
- PiczelExtractor.__init__(self, match)
- self.user, self.folder_id = match.groups()
-
def posts(self):
- url = "{}/api/users/{}/gallery".format(self.root_api, self.user)
- return self._pagination(url, int(self.folder_id))
+ url = "{}/api/gallery/folder/{}".format(self.root_api, self.groups[0])
+ return self._pagination(url)
class PiczelImageExtractor(PiczelExtractor):
"""Extractor for individual images"""
subcategory = "image"
- pattern = r"(?:https?://)?(?:www\.)?piczel\.tv/gallery/image/(\d+)"
+ pattern = BASE_PATTERN + r"/gallery/image/(\d+)"
example = "https://piczel.tv/gallery/image/12345"
- def __init__(self, match):
- PiczelExtractor.__init__(self, match)
- self.image_id = match.group(1)
-
def posts(self):
- url = "{}/api/gallery/{}".format(self.root_api, self.image_id)
+ url = "{}/api/gallery/{}".format(self.root_api, self.groups[0])
return (self.request(url).json(),)
diff --git a/gallery_dl/extractor/poipiku.py b/gallery_dl/extractor/poipiku.py
index e09a7aa..e371ee2 100644
--- a/gallery_dl/extractor/poipiku.py
+++ b/gallery_dl/extractor/poipiku.py
@@ -52,20 +52,23 @@ class PoipikuExtractor(Extractor):
}
yield Message.Directory, post
- post["num"] = 0
+ post["num"] = warning = 0
while True:
thumb = extr('class="IllustItemThumbImg" src="', '"')
if not thumb:
break
elif thumb.startswith(("//img.poipiku.com/img/", "/img/")):
+ if "/warning" in thumb:
+ warning = True
+ self.log.debug("%s: %s", post["post_id"], thumb)
continue
post["num"] += 1
url = text.ensure_http_scheme(thumb[:-8]).replace(
"//img.", "//img-org.", 1)
yield Message.Url, url, text.nameext_from_url(url, post)
- if not extr('ShowAppendFile', '<'):
+ if not warning and not extr('ShowAppendFile', '<'):
continue
url = self.root + "/f/ShowAppendFileF.jsp"
@@ -87,7 +90,8 @@ class PoipikuExtractor(Extractor):
page = resp["html"]
if (resp.get("result_num") or 0) < 0:
- self.log.warning("'%s'", page.replace("<br/>", " "))
+ self.log.warning("%s: '%s'",
+ post["post_id"], page.replace("<br/>", " "))
for thumb in text.extract_iter(
page, 'class="IllustItemThumbImg" src="', '"'):
diff --git a/gallery_dl/extractor/sankaku.py b/gallery_dl/extractor/sankaku.py
index d5309dc..5e3a958 100644
--- a/gallery_dl/extractor/sankaku.py
+++ b/gallery_dl/extractor/sankaku.py
@@ -131,7 +131,7 @@ class SankakuPoolExtractor(SankakuExtractor):
subcategory = "pool"
directory_fmt = ("{category}", "pool", "{pool[id]} {pool[name_en]}")
archive_fmt = "p_{pool}_{id}"
- pattern = BASE_PATTERN + r"/(?:books|pools?/show)/(\d+)"
+ pattern = BASE_PATTERN + r"/(?:books|pools?/show)/(\w+)"
example = "https://sankaku.app/books/12345"
def __init__(self, match):
diff --git a/gallery_dl/extractor/subscribestar.py b/gallery_dl/extractor/subscribestar.py
index 7c760ac..8668330 100644
--- a/gallery_dl/extractor/subscribestar.py
+++ b/gallery_dl/extractor/subscribestar.py
@@ -11,6 +11,7 @@
from .common import Extractor, Message
from .. import text, util, exception
from ..cache import cache
+import re
BASE_PATTERN = r"(?:https?://)?(?:www\.)?subscribestar\.(com|adult)"
@@ -98,9 +99,10 @@ class SubscribestarExtractor(Extractor):
media.append(item)
attachments = text.extr(
- html, 'class="uploads-docs"', 'data-role="post-edit_form"')
+ html, 'class="uploads-docs"', 'class="post-edit_form"')
if attachments:
- for att in attachments.split('class="doc_preview"')[1:]:
+ for att in re.split(
+ r'class="doc_preview[" ]', attachments)[1:]:
media.append({
"id" : text.parse_int(text.extr(
att, 'data-upload-id="', '"')),
@@ -110,6 +112,20 @@ class SubscribestarExtractor(Extractor):
"type": "attachment",
})
+ audios = text.extr(
+ html, 'class="uploads-audios"', 'class="post-edit_form"')
+ if audios:
+ for audio in re.split(
+ r'class="audio_preview-data[" ]', audios)[1:]:
+ media.append({
+ "id" : text.parse_int(text.extr(
+ audio, 'data-upload-id="', '"')),
+ "name": text.unescape(text.extr(
+ audio, 'audio_preview-title">', '<')),
+ "url" : text.unescape(text.extr(audio, 'src="', '"')),
+ "type": "audio",
+ })
+
return media
def _data_from_post(self, html):
@@ -121,9 +137,7 @@ class SubscribestarExtractor(Extractor):
"author_nick": text.unescape(extr('>', '<')),
"date" : self._parse_datetime(extr(
'class="post-date">', '</').rpartition(">")[2]),
- "content" : (extr(
- '<div class="post-content', '<div class="post-uploads')
- .partition(">")[2]),
+ "content" : extr('<body>', '</body>').strip(),
}
def _parse_datetime(self, dt):
@@ -180,7 +194,5 @@ class SubscribestarPostExtractor(SubscribestarExtractor):
"author_nick": text.unescape(extr('alt="', '"')),
"date" : self._parse_datetime(extr(
'<span class="star_link-types">', '<')),
- "content" : (extr(
- '<div class="post-content', '<div class="post-uploads')
- .partition(">")[2]),
+ "content" : extr('<body>', '</body>').strip(),
}
diff --git a/gallery_dl/extractor/szurubooru.py b/gallery_dl/extractor/szurubooru.py
index b6917cc..b122f26 100644
--- a/gallery_dl/extractor/szurubooru.py
+++ b/gallery_dl/extractor/szurubooru.py
@@ -92,6 +92,10 @@ BASE_PATTERN = SzurubooruExtractor.update({
"root": "https://snootbooru.com",
"pattern": r"snootbooru\.com",
},
+ "visuabusters": {
+ "root": "https://www.visuabusters.com/booru",
+ "pattern": r"(?:www\.)?visuabusters\.com/booru",
+ },
})
diff --git a/gallery_dl/extractor/tapas.py b/gallery_dl/extractor/tapas.py
index e756385..35a346d 100644
--- a/gallery_dl/extractor/tapas.py
+++ b/gallery_dl/extractor/tapas.py
@@ -85,8 +85,8 @@ class TapasEpisodeExtractor(TapasExtractor):
episode = data["episode"]
if not episode.get("free") and not episode.get("unlocked"):
raise exception.AuthorizationError(
- "%s: Episode '%s' not unlocked",
- episode_id, episode["title"])
+ "{}: Episode '{}' not unlocked".format(
+ episode_id, episode["title"]))
html = data["html"]
episode["series"] = self._extract_series(html)
diff --git a/gallery_dl/version.py b/gallery_dl/version.py
index 651745a..4b28924 100644
--- a/gallery_dl/version.py
+++ b/gallery_dl/version.py
@@ -6,5 +6,5 @@
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
-__version__ = "1.28.2"
+__version__ = "1.28.3"
__variant__ = None