diff options
Diffstat (limited to 'gallery_dl/extractor/bunkr.py')
| -rw-r--r-- | gallery_dl/extractor/bunkr.py | 114 |
1 files changed, 40 insertions, 74 deletions
diff --git a/gallery_dl/extractor/bunkr.py b/gallery_dl/extractor/bunkr.py index 35b2752..5509f5a 100644 --- a/gallery_dl/extractor/bunkr.py +++ b/gallery_dl/extractor/bunkr.py @@ -10,6 +10,18 @@ from .lolisafe import LolisafeAlbumExtractor from .. import text +from urllib.parse import urlsplit, urlunsplit + +MEDIA_DOMAIN_OVERRIDES = { + "cdn9.bunkr.ru" : "c9.bunkr.ru", + "cdn12.bunkr.ru": "media-files12.bunkr.la", + "cdn-pizza.bunkr.ru": "pizza.bunkr.ru", +} + +CDN_HOSTED_EXTENSIONS = ( + ".mp4", ".m4v", ".mov", ".webm", ".mkv", ".ts", ".wmv", + ".zip", ".rar", ".7z", +) class BunkrAlbumExtractor(LolisafeAlbumExtractor): @@ -17,53 +29,7 @@ class BunkrAlbumExtractor(LolisafeAlbumExtractor): category = "bunkr" root = "https://bunkrr.su" pattern = r"(?:https?://)?(?:app\.)?bunkr+\.(?:la|[sr]u|is|to)/a/([^/?#]+)" - test = ( - ("https://bunkrr.su/a/Lktg9Keq", { - "pattern": r"https://cdn\.bunkr\.ru/test-テスト-\"&>-QjgneIQv\.png", - "content": "0c8768055e4e20e7c7259608b67799171b691140", - "keyword": { - "album_id": "Lktg9Keq", - "album_name": 'test テスト "&>', - "count": 1, - "filename": 'test-テスト-"&>-QjgneIQv', - "id": "QjgneIQv", - "name": 'test-テスト-"&>', - "num": int, - }, - }), - # mp4 (#2239) - ("https://app.bunkr.ru/a/ptRHaCn2", { - "pattern": r"https://media-files\.bunkr\.ru/_-RnHoW69L\.mp4", - "content": "80e61d1dbc5896ae7ef9a28734c747b28b320471", - }), - # cdn4 - ("https://bunkr.is/a/iXTTc1o2", { - "pattern": r"https://(cdn|media-files)4\.bunkr\.ru/", - "content": "da29aae371b7adc8c5ef8e6991b66b69823791e8", - "keyword": { - "album_id": "iXTTc1o2", - "album_name": "test2", - "album_size": "691.1 KB", - "count": 2, - "description": "072022", - "filename": "re:video-wFO9FtxG|image-sZrQUeOx", - "id": "re:wFO9FtxG|sZrQUeOx", - "name": "re:video|image", - "num": int, - }, - }), - # cdn12 .ru TLD (#4147) - ("https://bunkrr.su/a/j1G29CnD", { - "pattern": r"https://(cdn12.bunkr.ru|media-files12.bunkr.la)/\w+", - "count": 8, - }), - ("https://bunkrr.su/a/Lktg9Keq"), - ("https://bunkr.la/a/Lktg9Keq"), - ("https://bunkr.su/a/Lktg9Keq"), - ("https://bunkr.ru/a/Lktg9Keq"), - ("https://bunkr.is/a/Lktg9Keq"), - ("https://bunkr.to/a/Lktg9Keq"), - ) + example = "https://bunkrr.su/a/ID" def fetch_album(self, album_id): # album metadata @@ -72,37 +38,37 @@ class BunkrAlbumExtractor(LolisafeAlbumExtractor): page, "<h1", "</div>").partition(">")[2]) count, _, size = info[1].split(None, 2) - # files - cdn = None - files = [] - append = files.append - headers = {"Referer": self.root + "/"} - pos = page.index('class="grid-images') - for url in text.extract_iter(page, '<a href="', '"', pos): - if url.startswith("/"): - if not cdn: - # fetch cdn root from download page - durl = "{}/d/{}".format(self.root, url[3:]) - cdn = text.extr(self.request( - durl).text, 'link.href = "', '"') - cdn = cdn[:cdn.index("/", 8)] - url = cdn + url[2:] - - url = text.unescape(url) - if url.endswith((".mp4", ".m4v", ".mov", ".webm", ".mkv", ".ts", - ".zip", ".rar", ".7z")): - if url.startswith("https://cdn12."): - url = ("https://media-files12.bunkr.la" + - url[url.find("/", 14):]) - else: - url = url.replace("://cdn", "://media-files", 1) - append({"file": url, "_http_headers": headers}) + urls = list(text.extract_iter(page, '<a href="', '"', pos)) - return files, { + return self._extract_files(urls), { "album_id" : self.album_id, "album_name" : text.unescape(info[0]), "album_size" : size[1:-1], "description": text.unescape(info[2]) if len(info) > 2 else "", - "count" : len(files), + "count" : len(urls), } + + def _extract_files(self, urls): + for url in urls: + if url.startswith("/"): + try: + page = self.request(self.root + text.unescape(url)).text + if url[1] == "v": + url = text.extr(page, '<source src="', '"') + else: + url = text.extr(page, '<img src="', '"') + except Exception as exc: + self.log.error("%s: %s", exc.__class__.__name__, exc) + continue + + else: + if url.lower().endswith(CDN_HOSTED_EXTENSIONS): + scheme, domain, path, query, fragment = urlsplit(url) + if domain in MEDIA_DOMAIN_OVERRIDES: + domain = MEDIA_DOMAIN_OVERRIDES[domain] + else: + domain = domain.replace("cdn", "media-files", 1) + url = urlunsplit((scheme, domain, path, query, fragment)) + + yield {"file": text.unescape(url)} |
