diff options
Diffstat (limited to 'gallery_dl')
| -rw-r--r-- | gallery_dl/extractor/8muses.py | 7 | ||||
| -rw-r--r-- | gallery_dl/extractor/batoto.py | 60 | ||||
| -rw-r--r-- | gallery_dl/extractor/boosty.py | 3 | ||||
| -rw-r--r-- | gallery_dl/extractor/civitai.py | 17 | ||||
| -rw-r--r-- | gallery_dl/extractor/deviantart.py | 107 | ||||
| -rw-r--r-- | gallery_dl/extractor/directlink.py | 3 | ||||
| -rw-r--r-- | gallery_dl/extractor/hitomi.py | 27 | ||||
| -rw-r--r-- | gallery_dl/extractor/instagram.py | 18 | ||||
| -rw-r--r-- | gallery_dl/extractor/kemonoparty.py | 12 | ||||
| -rw-r--r-- | gallery_dl/extractor/piczel.py | 49 | ||||
| -rw-r--r-- | gallery_dl/extractor/poipiku.py | 10 | ||||
| -rw-r--r-- | gallery_dl/extractor/sankaku.py | 2 | ||||
| -rw-r--r-- | gallery_dl/extractor/subscribestar.py | 28 | ||||
| -rw-r--r-- | gallery_dl/extractor/szurubooru.py | 4 | ||||
| -rw-r--r-- | gallery_dl/extractor/tapas.py | 4 | ||||
| -rw-r--r-- | gallery_dl/version.py | 2 |
16 files changed, 260 insertions, 93 deletions
diff --git a/gallery_dl/extractor/8muses.py b/gallery_dl/extractor/8muses.py index f88a0c6..68b906e 100644 --- a/gallery_dl/extractor/8muses.py +++ b/gallery_dl/extractor/8muses.py @@ -57,7 +57,12 @@ class _8musesAlbumExtractor(Extractor): albums = data.get("albums") if albums: for album in albums: - url = self.root + "/comics/album/" + album["permalink"] + permalink = album.get("permalink") + if not permalink: + self.log.debug("Private album") + continue + + url = self.root + "/comics/album/" + permalink yield Message.Queue, url, { "url" : url, "name" : album["name"], diff --git a/gallery_dl/extractor/batoto.py b/gallery_dl/extractor/batoto.py index 786acd9..77c40ef 100644 --- a/gallery_dl/extractor/batoto.py +++ b/gallery_dl/extractor/batoto.py @@ -10,17 +10,55 @@ from .common import Extractor, ChapterExtractor, MangaExtractor from .. import text, exception import re -BASE_PATTERN = (r"(?:https?://)?(?:" - r"(?:ba|d|h|m|w)to\.to|" +BASE_PATTERN = (r"(?:https?://)?(" + r"(?:ba|d|f|h|j|m|w)to\.to|" r"(?:(?:manga|read)toto|batocomic|[xz]bato)\.(?:com|net|org)|" r"comiko\.(?:net|org)|" r"bat(?:otoo|o?two)\.com)") +# https://rentry.co/batoto +DOMAINS = { + "dto.to", + "fto.to", + "hto.to", + "jto.to", + "mto.to", + "wto.to", + "xbato.com", + "xbato.net", + "xbato.org", + "zbato.com", + "zbato.net", + "zbato.org", + "readtoto.com", + "readtoto.net", + "readtoto.org", + "batocomic.com", + "batocomic.net", + "batocomic.org", + "batotoo.com", + "batotwo.com", + "comiko.net", + "comiko.org", + "battwo.com", +} +LEGACY_DOMAINS = { + "bato.to", + "mangatoto.com", + "mangatoto.net", + "mangatoto.org", +} + class BatotoBase(): """Base class for batoto extractors""" category = "batoto" - root = "https://bato.to" + root = "https://xbato.org" + + def _init_root(self, match): + domain = match.group(1) + if domain not in LEGACY_DOMAINS: + self.root = "https://" + domain def request(self, url, **kwargs): kwargs["encoding"] = "utf-8" @@ -28,13 +66,13 @@ class BatotoBase(): class BatotoChapterExtractor(BatotoBase, ChapterExtractor): - """Extractor for bato.to manga chapters""" + """Extractor for batoto manga chapters""" pattern = BASE_PATTERN + r"/(?:title/[^/?#]+|chapter)/(\d+)" - example = "https://bato.to/title/12345-MANGA/54321" + example = "https://xbato.org/title/12345-MANGA/54321" def __init__(self, match): - self.root = text.root_from_url(match.group(0)) - self.chapter_id = match.group(1) + self._init_root(match) + self.chapter_id = match.group(2) url = "{}/title/0/{}".format(self.root, self.chapter_id) ChapterExtractor.__init__(self, match, url) @@ -86,16 +124,16 @@ class BatotoChapterExtractor(BatotoBase, ChapterExtractor): class BatotoMangaExtractor(BatotoBase, MangaExtractor): - """Extractor for bato.to manga""" + """Extractor for batoto manga""" reverse = False chapterclass = BatotoChapterExtractor pattern = (BASE_PATTERN + r"/(?:title/(\d+)[^/?#]*|series/(\d+)(?:/[^/?#]*)?)/?$") - example = "https://bato.to/title/12345-MANGA/" + example = "https://xbato.org/title/12345-MANGA/" def __init__(self, match): - self.root = text.root_from_url(match.group(0)) - self.manga_id = match.group(1) or match.group(2) + self._init_root(match) + self.manga_id = match.group(2) or match.group(3) url = "{}/title/{}".format(self.root, self.manga_id) MangaExtractor.__init__(self, match, url) diff --git a/gallery_dl/extractor/boosty.py b/gallery_dl/extractor/boosty.py index 33823be..c28fad9 100644 --- a/gallery_dl/extractor/boosty.py +++ b/gallery_dl/extractor/boosty.py @@ -124,6 +124,9 @@ class BoostyExtractor(Extractor): elif type == "audio_file": files.append(self._update_url(post, block)) + elif type == "file": + files.append(self._update_url(post, block)) + else: self.log.debug("%s: Unsupported data type '%s'", post["int_id"], type) diff --git a/gallery_dl/extractor/civitai.py b/gallery_dl/extractor/civitai.py index 1e8cb42..36efcfe 100644 --- a/gallery_dl/extractor/civitai.py +++ b/gallery_dl/extractor/civitai.py @@ -338,6 +338,7 @@ class CivitaiUserExtractor(CivitaiExtractor): (CivitaiUserModelsExtractor, base + "models"), (CivitaiUserPostsExtractor , base + "posts"), (CivitaiUserImagesExtractor, base + "images"), + (CivitaiUserVideosExtractor, base + "videos"), ), ("user-models", "user-posts")) @@ -400,6 +401,20 @@ class CivitaiUserImagesExtractor(CivitaiExtractor): return self.api.images(params) +class CivitaiUserVideosExtractor(CivitaiExtractor): + subcategory = "user-videos" + directory_fmt = ("{category}", "{username|user[username]}", "videos") + pattern = USER_PATTERN + r"/videos/?(?:\?([^#]+))?" + example = "https://civitai.com/user/USER/videos" + + def images(self): + self._image_ext = "mp4" + params = text.parse_query(self.groups[1]) + params["types"] = ["video"] + params["username"] = text.unquote(self.groups[0]) + return self.api.images(params) + + class CivitaiRestAPI(): """Interface for the Civitai Public REST API @@ -484,7 +499,7 @@ class CivitaiTrpcAPI(): self.root = extractor.root + "/api/trpc/" self.headers = { "content-type" : "application/json", - "x-client-version": "5.0.211", + "x-client-version": "5.0.394", "x-client-date" : "", "x-client" : "web", "x-fingerprint" : "undefined", diff --git a/gallery_dl/extractor/deviantart.py b/gallery_dl/extractor/deviantart.py index 69934b4..8172f62 100644 --- a/gallery_dl/extractor/deviantart.py +++ b/gallery_dl/extractor/deviantart.py @@ -440,7 +440,8 @@ class DeviantartExtractor(Extractor): html.append("text-align:") html.append(attrs["textAlign"]) html.append(";") - html.append('margin-inline-start:0px">') + self._tiptap_process_indentation(html, attrs) + html.append('">') for block in children: self._tiptap_process_content(html, block) @@ -460,17 +461,32 @@ class DeviantartExtractor(Extractor): html.append(' style="text-align:') html.append(attrs.get("textAlign") or "left") html.append('">') - html.append('<span style="margin-inline-start:0px">') - - children = content.get("content") - if children: - for block in children: - self._tiptap_process_content(html, block) - + html.append('<span style="') + self._tiptap_process_indentation(html, attrs) + html.append('">') + self._tiptap_process_children(html, content) html.append("</span></h") html.append(level) html.append(">") + elif type in ("listItem", "bulletList", "orderedList", "blockquote"): + c = type[1] + tag = ( + "li" if c == "i" else + "ul" if c == "u" else + "ol" if c == "r" else + "blockquote" + ) + html.append("<" + tag + ">") + self._tiptap_process_children(html, content) + html.append("</" + tag + ">") + + elif type == "anchor": + attrs = content["attrs"] + html.append('<a id="') + html.append(attrs.get("id") or "") + html.append('" data-testid="anchor"></a>') + elif type == "hardBreak": html.append("<br/><br/>") @@ -488,6 +504,44 @@ class DeviantartExtractor(Extractor): html.append(user) html.append('</a>') + elif type == "da-gif": + attrs = content["attrs"] + width = str(attrs.get("width") or "") + height = str(attrs.get("height") or "") + url = text.escape(attrs.get("url") or "") + + html.append('<div data-da-type="da-gif" data-width="') + html.append(width) + html.append('" data-height="') + html.append(height) + html.append('" data-alignment="') + html.append(attrs.get("alignment") or "") + html.append('" data-url="') + html.append(url) + html.append('" class="t61qu"><video role="img" autoPlay="" ' + 'muted="" loop="" style="pointer-events:none" ' + 'controlsList="nofullscreen" playsInline="" ' + 'aria-label="gif" data-da-type="da-gif" width="') + html.append(width) + html.append('" height="') + html.append(height) + html.append('" src="') + html.append(url) + html.append('" class="_1Fkk6"></video></div>') + + elif type == "da-video": + src = text.escape(content["attrs"].get("src") or "") + html.append('<div data-testid="video" data-da-type="da-video" ' + 'data-src="') + html.append(src) + html.append('" class="_1Uxvs"><div data-canfs="yes" data-testid="v' + 'ideo-inner" class="main-video" style="width:780px;hei' + 'ght:438px"><div style="width:780px;height:438px">' + '<video src="') + html.append(src) + html.append('" style="width:100%;height:100%;" preload="auto" cont' + 'rols=""></video></div></div></div>') + else: self.log.warning("Unsupported content type '%s'", type) @@ -501,7 +555,13 @@ class DeviantartExtractor(Extractor): attrs = mark.get("attrs") or {} html.append('<a href="') html.append(text.escape(attrs.get("href") or "")) - html.append('" rel="noopener noreferrer nofollow ugc">') + if "target" in attrs: + html.append('" target="') + html.append(attrs["target"]) + html.append('" rel="') + html.append(attrs.get("rel") or + "noopener noreferrer nofollow ugc") + html.append('">') close.append("</a>") elif type == "bold": html.append("<strong>") @@ -525,6 +585,18 @@ class DeviantartExtractor(Extractor): else: html.append(text.escape(content["text"])) + def _tiptap_process_children(self, html, content): + children = content.get("content") + if children: + for block in children: + self._tiptap_process_content(html, block) + + def _tiptap_process_indentation(self, html, attrs): + itype = ("text-indent" if attrs.get("indentType") == "line" else + "margin-inline-start") + isize = str((attrs.get("indentation") or 0) * 24) + html.append(itype + ":" + isize + "px") + def _tiptap_process_deviation(self, html, content): dev = content["attrs"]["deviation"] media = dev.get("media") or () @@ -758,19 +830,22 @@ x2="45.4107524%" y2="71.4898596%" id="app-root-3">\ self.api.user_friends_unwatch(username) def _eclipse_media(self, media, format="preview"): - url = [media["baseUri"], ] + url = [media["baseUri"]] formats = { fmt["t"]: fmt for fmt in media["types"] } - tokens = media["token"] - if len(tokens) == 1: - fmt = formats[format] - url.append(fmt["c"].replace("<prettyName>", media["prettyName"])) - url.append("?token=") - url.append(tokens[-1]) + tokens = media.get("token") or () + if tokens: + if len(tokens) <= 1: + fmt = formats[format] + if "c" in fmt: + url.append(fmt["c"].replace( + "<prettyName>", media["prettyName"])) + url.append("?token=") + url.append(tokens[-1]) return "".join(url), formats diff --git a/gallery_dl/extractor/directlink.py b/gallery_dl/extractor/directlink.py index 2f0230a..4559aff 100644 --- a/gallery_dl/extractor/directlink.py +++ b/gallery_dl/extractor/directlink.py @@ -25,7 +25,8 @@ class DirectlinkExtractor(Extractor): def __init__(self, match): Extractor.__init__(self, match) - self.data = match.groupdict() + self.data = data = match.groupdict() + self.subcategory = ".".join(data["domain"].rsplit(".", 2)[-2:]) def items(self): data = self.data diff --git a/gallery_dl/extractor/hitomi.py b/gallery_dl/extractor/hitomi.py index 308b42c..e15e13c 100644 --- a/gallery_dl/extractor/hitomi.py +++ b/gallery_dl/extractor/hitomi.py @@ -122,7 +122,10 @@ class HitomiTagExtractor(Extractor): self.tag = tag def items(self): - data = {"_extractor": HitomiGalleryExtractor} + data = { + "_extractor": HitomiGalleryExtractor, + "search_tags": text.unquote(self.tag.rpartition("-")[0]), + } nozomi_url = "https://ltn.hitomi.la/{}/{}.nozomi".format( self.type, self.tag) headers = { @@ -202,12 +205,14 @@ class HitomiSearchExtractor(Extractor): def __init__(self, match): Extractor.__init__(self, match) self.query = match.group(1) - self.tags = text.unquote(self.query).split(" ") + self.tags = text.unquote(self.query) def items(self): - data = {"_extractor": HitomiGalleryExtractor} - - results = [self.get_nozomi_items(tag) for tag in self.tags] + data = { + "_extractor": HitomiGalleryExtractor, + "search_tags": self.tags, + } + results = [self.get_nozomi_items(tag) for tag in self.tags.split(" ")] intersects = set.intersection(*results) for gallery_id in sorted(intersects, reverse=True): @@ -219,20 +224,16 @@ class HitomiSearchExtractor(Extractor): area, tag, language = self.get_nozomi_args(full_tag) if area: - referer_base = "{}/n/{}/{}-{}.html".format( - self.root, area, tag, language) - nozomi_url = "https://ltn.hitomi.la/{}/{}-{}.nozomi".format( + nozomi_url = "https://ltn.hitomi.la/n/{}/{}-{}.nozomi".format( area, tag, language) else: - referer_base = "{}/n/{}-{}.html".format( - self.root, tag, language) - nozomi_url = "https://ltn.hitomi.la/{}-{}.nozomi".format( + nozomi_url = "https://ltn.hitomi.la/n/{}-{}.nozomi".format( tag, language) headers = { "Origin": self.root, "Cache-Control": "max-age=0", - "Referer": "{}/search.html?{}".format(referer_base, self.query), + "Referer": "{}/search.html?{}".format(self.root, self.query), } response = self.request(nozomi_url, headers=headers) @@ -251,7 +252,7 @@ class HitomiSearchExtractor(Extractor): language = tag tag = "index" - return area, tag, language + return area, tag.replace("_", " "), language @memcache(maxage=1800) diff --git a/gallery_dl/extractor/instagram.py b/gallery_dl/extractor/instagram.py index 8c5b180..e344b2f 100644 --- a/gallery_dl/extractor/instagram.py +++ b/gallery_dl/extractor/instagram.py @@ -172,8 +172,8 @@ class InstagramExtractor(Extractor): "post_shortcode": post["code"], "post_url": "{}/p/{}/".format(self.root, post["code"]), "likes": post.get("like_count", 0), - "pinned": post.get("timeline_pinned_user_ids", ()), "liked": post.get("has_liked", False), + "pinned": self._extract_pinned(post), } caption = post["caption"] @@ -385,6 +385,10 @@ class InstagramExtractor(Extractor): "username" : user["username"], "full_name": user["full_name"]}) + def _extract_pinned(self, post): + return (post.get("timeline_pinned_user_ids") or + post.get("clips_tab_pinned_user_ids") or ()) + def _init_cursor(self): cursor = self.config("cursor", True) if cursor is True: @@ -451,6 +455,12 @@ class InstagramPostsExtractor(InstagramExtractor): uid = self.api.user_id(self.item) return self.api.user_feed(uid) + def _extract_pinned(self, post): + try: + return post["timeline_pinned_user_ids"] + except KeyError: + return () + class InstagramReelsExtractor(InstagramExtractor): """Extractor for an Instagram user's reels""" @@ -462,6 +472,12 @@ class InstagramReelsExtractor(InstagramExtractor): uid = self.api.user_id(self.item) return self.api.user_clips(uid) + def _extract_pinned(self, post): + try: + return post["clips_tab_pinned_user_ids"] + except KeyError: + return () + class InstagramTaggedExtractor(InstagramExtractor): """Extractor for an Instagram user's tagged posts""" diff --git a/gallery_dl/extractor/kemonoparty.py b/gallery_dl/extractor/kemonoparty.py index a7caca9..66bbab5 100644 --- a/gallery_dl/extractor/kemonoparty.py +++ b/gallery_dl/extractor/kemonoparty.py @@ -455,9 +455,15 @@ class KemonopartyFavoriteExtractor(KemonopartyExtractor): reverse=(order == "desc")) for user in users: - user["_extractor"] = KemonopartyUserExtractor - url = "{}/{}/user/{}".format( - self.root, user["service"], user["id"]) + service = user["service"] + if service == "discord": + user["_extractor"] = KemonopartyDiscordServerExtractor + url = "{}/discord/server/{}".format( + self.root, user["id"]) + else: + user["_extractor"] = KemonopartyUserExtractor + url = "{}/{}/user/{}".format( + self.root, service, user["id"]) yield Message.Queue, url, user elif type == "post": diff --git a/gallery_dl/extractor/piczel.py b/gallery_dl/extractor/piczel.py index fe26704..8a729f3 100644 --- a/gallery_dl/extractor/piczel.py +++ b/gallery_dl/extractor/piczel.py @@ -11,6 +11,8 @@ from .common import Extractor, Message from .. import text +BASE_PATTERN = r"(?:https?://)?(?:www\.)?piczel\.tv" + class PiczelExtractor(Extractor): """Base class for piczel extractors""" @@ -30,6 +32,7 @@ class PiczelExtractor(Extractor): if post["multi"]: images = post["images"] del post["images"] + post["count"] = len(images) yield Message.Directory, post for post["num"], image in enumerate(images): if "id" in image: @@ -39,6 +42,7 @@ class PiczelExtractor(Extractor): yield Message.Url, url, text.nameext_from_url(url, post) else: + post["count"] = 1 yield Message.Directory, post post["num"] = 0 url = post["image"]["url"] @@ -47,35 +51,27 @@ class PiczelExtractor(Extractor): def posts(self): """Return an iterable with all relevant post objects""" - def _pagination(self, url, folder_id=None): - params = { - "from_id" : None, - "folder_id": folder_id, - } + def _pagination(self, url, pnum=1): + params = {"page": pnum} while True: data = self.request(url, params=params).json() - if not data: - return - params["from_id"] = data[-1]["id"] - for post in data: - if not folder_id or folder_id == post["folder_id"]: - yield post + yield from data["data"] + + params["page"] = data["meta"]["next_page"] + if not params["page"]: + return class PiczelUserExtractor(PiczelExtractor): """Extractor for all images from a user's gallery""" subcategory = "user" - pattern = r"(?:https?://)?(?:www\.)?piczel\.tv/gallery/([^/?#]+)/?$" + pattern = BASE_PATTERN + r"/gallery/([^/?#]+)/?$" example = "https://piczel.tv/gallery/USER" - def __init__(self, match): - PiczelExtractor.__init__(self, match) - self.user = match.group(1) - def posts(self): - url = "{}/api/users/{}/gallery".format(self.root_api, self.user) + url = "{}/api/users/{}/gallery".format(self.root_api, self.groups[0]) return self._pagination(url) @@ -84,29 +80,20 @@ class PiczelFolderExtractor(PiczelExtractor): subcategory = "folder" directory_fmt = ("{category}", "{user[username]}", "{folder[name]}") archive_fmt = "f{folder[id]}_{id}_{num}" - pattern = (r"(?:https?://)?(?:www\.)?piczel\.tv" - r"/gallery/(?!image)([^/?#]+)/(\d+)") + pattern = BASE_PATTERN + r"/gallery/(?!image/)[^/?#]+/(\d+)" example = "https://piczel.tv/gallery/USER/12345" - def __init__(self, match): - PiczelExtractor.__init__(self, match) - self.user, self.folder_id = match.groups() - def posts(self): - url = "{}/api/users/{}/gallery".format(self.root_api, self.user) - return self._pagination(url, int(self.folder_id)) + url = "{}/api/gallery/folder/{}".format(self.root_api, self.groups[0]) + return self._pagination(url) class PiczelImageExtractor(PiczelExtractor): """Extractor for individual images""" subcategory = "image" - pattern = r"(?:https?://)?(?:www\.)?piczel\.tv/gallery/image/(\d+)" + pattern = BASE_PATTERN + r"/gallery/image/(\d+)" example = "https://piczel.tv/gallery/image/12345" - def __init__(self, match): - PiczelExtractor.__init__(self, match) - self.image_id = match.group(1) - def posts(self): - url = "{}/api/gallery/{}".format(self.root_api, self.image_id) + url = "{}/api/gallery/{}".format(self.root_api, self.groups[0]) return (self.request(url).json(),) diff --git a/gallery_dl/extractor/poipiku.py b/gallery_dl/extractor/poipiku.py index e09a7aa..e371ee2 100644 --- a/gallery_dl/extractor/poipiku.py +++ b/gallery_dl/extractor/poipiku.py @@ -52,20 +52,23 @@ class PoipikuExtractor(Extractor): } yield Message.Directory, post - post["num"] = 0 + post["num"] = warning = 0 while True: thumb = extr('class="IllustItemThumbImg" src="', '"') if not thumb: break elif thumb.startswith(("//img.poipiku.com/img/", "/img/")): + if "/warning" in thumb: + warning = True + self.log.debug("%s: %s", post["post_id"], thumb) continue post["num"] += 1 url = text.ensure_http_scheme(thumb[:-8]).replace( "//img.", "//img-org.", 1) yield Message.Url, url, text.nameext_from_url(url, post) - if not extr('ShowAppendFile', '<'): + if not warning and not extr('ShowAppendFile', '<'): continue url = self.root + "/f/ShowAppendFileF.jsp" @@ -87,7 +90,8 @@ class PoipikuExtractor(Extractor): page = resp["html"] if (resp.get("result_num") or 0) < 0: - self.log.warning("'%s'", page.replace("<br/>", " ")) + self.log.warning("%s: '%s'", + post["post_id"], page.replace("<br/>", " ")) for thumb in text.extract_iter( page, 'class="IllustItemThumbImg" src="', '"'): diff --git a/gallery_dl/extractor/sankaku.py b/gallery_dl/extractor/sankaku.py index d5309dc..5e3a958 100644 --- a/gallery_dl/extractor/sankaku.py +++ b/gallery_dl/extractor/sankaku.py @@ -131,7 +131,7 @@ class SankakuPoolExtractor(SankakuExtractor): subcategory = "pool" directory_fmt = ("{category}", "pool", "{pool[id]} {pool[name_en]}") archive_fmt = "p_{pool}_{id}" - pattern = BASE_PATTERN + r"/(?:books|pools?/show)/(\d+)" + pattern = BASE_PATTERN + r"/(?:books|pools?/show)/(\w+)" example = "https://sankaku.app/books/12345" def __init__(self, match): diff --git a/gallery_dl/extractor/subscribestar.py b/gallery_dl/extractor/subscribestar.py index 7c760ac..8668330 100644 --- a/gallery_dl/extractor/subscribestar.py +++ b/gallery_dl/extractor/subscribestar.py @@ -11,6 +11,7 @@ from .common import Extractor, Message from .. import text, util, exception from ..cache import cache +import re BASE_PATTERN = r"(?:https?://)?(?:www\.)?subscribestar\.(com|adult)" @@ -98,9 +99,10 @@ class SubscribestarExtractor(Extractor): media.append(item) attachments = text.extr( - html, 'class="uploads-docs"', 'data-role="post-edit_form"') + html, 'class="uploads-docs"', 'class="post-edit_form"') if attachments: - for att in attachments.split('class="doc_preview"')[1:]: + for att in re.split( + r'class="doc_preview[" ]', attachments)[1:]: media.append({ "id" : text.parse_int(text.extr( att, 'data-upload-id="', '"')), @@ -110,6 +112,20 @@ class SubscribestarExtractor(Extractor): "type": "attachment", }) + audios = text.extr( + html, 'class="uploads-audios"', 'class="post-edit_form"') + if audios: + for audio in re.split( + r'class="audio_preview-data[" ]', audios)[1:]: + media.append({ + "id" : text.parse_int(text.extr( + audio, 'data-upload-id="', '"')), + "name": text.unescape(text.extr( + audio, 'audio_preview-title">', '<')), + "url" : text.unescape(text.extr(audio, 'src="', '"')), + "type": "audio", + }) + return media def _data_from_post(self, html): @@ -121,9 +137,7 @@ class SubscribestarExtractor(Extractor): "author_nick": text.unescape(extr('>', '<')), "date" : self._parse_datetime(extr( 'class="post-date">', '</').rpartition(">")[2]), - "content" : (extr( - '<div class="post-content', '<div class="post-uploads') - .partition(">")[2]), + "content" : extr('<body>', '</body>').strip(), } def _parse_datetime(self, dt): @@ -180,7 +194,5 @@ class SubscribestarPostExtractor(SubscribestarExtractor): "author_nick": text.unescape(extr('alt="', '"')), "date" : self._parse_datetime(extr( '<span class="star_link-types">', '<')), - "content" : (extr( - '<div class="post-content', '<div class="post-uploads') - .partition(">")[2]), + "content" : extr('<body>', '</body>').strip(), } diff --git a/gallery_dl/extractor/szurubooru.py b/gallery_dl/extractor/szurubooru.py index b6917cc..b122f26 100644 --- a/gallery_dl/extractor/szurubooru.py +++ b/gallery_dl/extractor/szurubooru.py @@ -92,6 +92,10 @@ BASE_PATTERN = SzurubooruExtractor.update({ "root": "https://snootbooru.com", "pattern": r"snootbooru\.com", }, + "visuabusters": { + "root": "https://www.visuabusters.com/booru", + "pattern": r"(?:www\.)?visuabusters\.com/booru", + }, }) diff --git a/gallery_dl/extractor/tapas.py b/gallery_dl/extractor/tapas.py index e756385..35a346d 100644 --- a/gallery_dl/extractor/tapas.py +++ b/gallery_dl/extractor/tapas.py @@ -85,8 +85,8 @@ class TapasEpisodeExtractor(TapasExtractor): episode = data["episode"] if not episode.get("free") and not episode.get("unlocked"): raise exception.AuthorizationError( - "%s: Episode '%s' not unlocked", - episode_id, episode["title"]) + "{}: Episode '{}' not unlocked".format( + episode_id, episode["title"])) html = data["html"] episode["series"] = self._extract_series(html) diff --git a/gallery_dl/version.py b/gallery_dl/version.py index 651745a..4b28924 100644 --- a/gallery_dl/version.py +++ b/gallery_dl/version.py @@ -6,5 +6,5 @@ # it under the terms of the GNU General Public License version 2 as # published by the Free Software Foundation. -__version__ = "1.28.2" +__version__ = "1.28.3" __variant__ = None |
