diff options
| author | 2023-03-13 02:07:49 -0400 | |
|---|---|---|
| committer | 2023-03-13 02:07:49 -0400 | |
| commit | 10987f08f8b6c510ba64f4b42d95ba67eec6e5b0 (patch) | |
| tree | 1af82cad9ac859a70cafc976a980280b939cfcc7 /gallery_dl/extractor/bunkr.py | |
| parent | 919f8ba16a7b82ba1099bd25b2c61c7881a05aa2 (diff) | |
New upstream version 1.25.0.upstream/1.25.0
Diffstat (limited to 'gallery_dl/extractor/bunkr.py')
| -rw-r--r-- | gallery_dl/extractor/bunkr.py | 92 |
1 files changed, 52 insertions, 40 deletions
diff --git a/gallery_dl/extractor/bunkr.py b/gallery_dl/extractor/bunkr.py index 1c339a9..17d066d 100644 --- a/gallery_dl/extractor/bunkr.py +++ b/gallery_dl/extractor/bunkr.py @@ -6,20 +6,19 @@ # it under the terms of the GNU General Public License version 2 as # published by the Free Software Foundation. -"""Extractors for https://bunkr.ru/""" +"""Extractors for https://bunkr.su/""" from .lolisafe import LolisafeAlbumExtractor from .. import text -import json class BunkrAlbumExtractor(LolisafeAlbumExtractor): - """Extractor for bunkr.ru albums""" + """Extractor for bunkr.su albums""" category = "bunkr" - root = "https://bunkr.ru" - pattern = r"(?:https?://)?(?:app\.)?bunkr\.(?:ru|is|to)/a/([^/?#]+)" + root = "https://bunkr.su" + pattern = r"(?:https?://)?(?:app\.)?bunkr\.(?:[sr]u|is|to)/a/([^/?#]+)" test = ( - ("https://bunkr.ru/a/Lktg9Keq", { + ("https://bunkr.su/a/Lktg9Keq", { "pattern": r"https://cdn\.bunkr\.ru/test-ใในใ-\"&>-QjgneIQv\.png", "content": "0c8768055e4e20e7c7259608b67799171b691140", "keyword": { @@ -33,7 +32,7 @@ class BunkrAlbumExtractor(LolisafeAlbumExtractor): }, }), # mp4 (#2239) - ("https://app.bunkr.is/a/ptRHaCn2", { + ("https://app.bunkr.ru/a/ptRHaCn2", { "pattern": r"https://media-files\.bunkr\.ru/_-RnHoW69L\.mp4", "content": "80e61d1dbc5896ae7ef9a28734c747b28b320471", }), @@ -41,44 +40,57 @@ class BunkrAlbumExtractor(LolisafeAlbumExtractor): ("https://bunkr.is/a/iXTTc1o2", { "pattern": r"https://(cdn|media-files)4\.bunkr\.ru/", "content": "da29aae371b7adc8c5ef8e6991b66b69823791e8", + "keyword": { + "album_id": "iXTTc1o2", + "album_name": "test2", + "album_size": "691.1 KB", + "count": 2, + "description": "072022", + "filename": "re:video-wFO9FtxG|image-sZrQUeOx", + "id": "re:wFO9FtxG|sZrQUeOx", + "name": "re:video|image", + "num": int, + }, }), ("https://bunkr.to/a/Lktg9Keq"), ) def fetch_album(self, album_id): - root = self.root + # album metadata + page = self.request(self.root + "/a/" + self.album_id).text + info = text.split_html(text.extr( + page, "<h1", "</div>").partition(">")[2]) + count, _, size = info[1].split(None, 2) + + # files + cdn = None + files = [] + append = files.append + headers = {"Referer": self.root.replace("://", "://stream.", 1) + "/"} - try: - data = json.loads(text.extr( - self.request(root + "/a/" + self.album_id).text, - 'id="__NEXT_DATA__" type="application/json">', '<')) - album = data["props"]["pageProps"]["album"] - files = album["files"] - except Exception as exc: - self.log.debug("%s: %s", exc.__class__.__name__, exc) - self.log.debug("Falling back to lolisafe API") - self.root = root.replace("://", "://app.", 1) - files, data = LolisafeAlbumExtractor.fetch_album(self, album_id) - # fix file URLs (bunkr..ru -> bunkr.ru) (#3481) - for file in files: - file["file"] = file["file"].replace("bunkr..", "bunkr.", 1) - else: - for file in files: - file["file"] = file["cdn"] + "/" + file["name"] - data = { - "album_id" : self.album_id, - "album_name" : text.unescape(album["name"]), - "description": text.unescape(album["description"]), - "count" : len(files), - } + pos = page.index('class="grid-images') + for url in text.extract_iter(page, '<a href="', '"', pos): + if url.startswith("/"): + if not cdn: + # fetch cdn root from download page + durl = "{}/d/{}".format(self.root, url[3:]) + cdn = text.extr(self.request( + durl).text, 'link.href = "', '"') + cdn = cdn[:cdn.index("/", 8)] + url = cdn + url[2:] - headers = {"Referer": root.replace("://", "://stream.", 1) + "/"} - for file in files: - if file["file"].endswith( - (".mp4", ".m4v", ".mov", ".webm", ".mkv", ".ts", - ".zip", ".rar", ".7z")): - file["_http_headers"] = headers - file["file"] = file["file"].replace( - "://cdn", "://media-files", 1) + url = text.unescape(url) + if url.endswith((".mp4", ".m4v", ".mov", ".webm", ".mkv", ".ts", + ".zip", ".rar", ".7z")): + append({"file": url.replace("://cdn", "://media-files", 1), + "_http_headers": headers}) + else: + append({"file": url}) - return files, data + return files, { + "album_id" : self.album_id, + "album_name" : text.unescape(info[0]), + "album_size" : size[1:-1], + "description": text.unescape(info[2]) if len(info) > 2 else "", + "count" : len(files), + } |
