aboutsummaryrefslogtreecommitdiffstats
path: root/gallery_dl/extractor/bunkr.py
diff options
context:
space:
mode:
Diffstat (limited to 'gallery_dl/extractor/bunkr.py')
-rw-r--r--gallery_dl/extractor/bunkr.py83
1 files changed, 38 insertions, 45 deletions
diff --git a/gallery_dl/extractor/bunkr.py b/gallery_dl/extractor/bunkr.py
index e7fc14b..1a0e47d 100644
--- a/gallery_dl/extractor/bunkr.py
+++ b/gallery_dl/extractor/bunkr.py
@@ -6,32 +6,39 @@
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
-"""Extractors for https://bunkrr.ru/"""
+"""Extractors for https://bunkr.sk/"""
from .lolisafe import LolisafeAlbumExtractor
from .. import text
-from urllib.parse import urlsplit, urlunsplit
-BASE_PATTERN = r"(?:https?://)?(?:app\.)?bunkr+\.(?:[rs]u|la|is|to)"
+BASE_PATTERN = (
+ r"(?:https?://)?(?:app\.)?(bunkr+"
+ r"\.(?:s[kiu]|ru|la|is|to|ac|black|cat|media|red|site|ws))"
+)
-MEDIA_DOMAIN_OVERRIDES = {
- "cdn9.bunkr.ru" : "c9.bunkr.ru",
- "cdn12.bunkr.ru": "media-files12.bunkr.la",
- "cdn-pizza.bunkr.ru": "pizza.bunkr.ru",
+LEGACY_DOMAINS = {
+ "bunkr.ru",
+ "bunkrr.ru",
+ "bunkr.su",
+ "bunkrr.su",
+ "bunkr.la",
+ "bunkr.is",
+ "bunkr.to",
}
-CDN_HOSTED_EXTENSIONS = (
- ".mp4", ".m4v", ".mov", ".webm", ".mkv", ".ts", ".wmv",
- ".zip", ".rar", ".7z",
-)
-
class BunkrAlbumExtractor(LolisafeAlbumExtractor):
- """Extractor for bunkrr.ru albums"""
+ """Extractor for bunkr.sk albums"""
category = "bunkr"
- root = "https://bunkrr.ru"
+ root = "https://bunkr.sk"
pattern = BASE_PATTERN + r"/a/([^/?#]+)"
- example = "https://bunkrr.ru/a/ID"
+ example = "https://bunkr.sk/a/ID"
+
+ def __init__(self, match):
+ LolisafeAlbumExtractor.__init__(self, match)
+ domain = match.group(match.lastindex-1)
+ if domain not in LEGACY_DOMAINS:
+ self.root = "https://" + domain
def fetch_album(self, album_id):
# album metadata
@@ -53,46 +60,32 @@ class BunkrAlbumExtractor(LolisafeAlbumExtractor):
def _extract_files(self, urls):
for url in urls:
- if url.startswith("/"):
- try:
- url = self._extract_file(text.unescape(url))
- except Exception as exc:
- self.log.error("%s: %s", exc.__class__.__name__, exc)
- continue
-
- else:
- if url.lower().endswith(CDN_HOSTED_EXTENSIONS):
- scheme, domain, path, query, fragment = urlsplit(url)
- if domain in MEDIA_DOMAIN_OVERRIDES:
- domain = MEDIA_DOMAIN_OVERRIDES[domain]
- else:
- domain = domain.replace("cdn", "media-files", 1)
- url = urlunsplit((scheme, domain, path, query, fragment))
-
+ try:
+ url = self._extract_file(text.unescape(url))
+ except Exception as exc:
+ self.log.error("%s: %s", exc.__class__.__name__, exc)
+ continue
yield {"file": text.unescape(url)}
- def _extract_file(self, path):
- page = self.request(self.root + path).text
- if path[1] == "v":
- url = text.extr(page, '<source src="', '"')
- else:
- url = text.extr(page, '<img src="', '"')
- if not url:
- url = text.rextract(
- page, ' href="', '"', page.rindex("Download"))[0]
- return url
+ def _extract_file(self, url):
+ page = self.request(url).text
+ return (
+ text.extr(page, '<source src="', '"') or
+ text.extr(page, '<img src="', '"') or
+ text.rextract(page, ' href="', '"', page.rindex("Download"))[0]
+ )
class BunkrMediaExtractor(BunkrAlbumExtractor):
- """Extractor for bunkrr.ru media links"""
+ """Extractor for bunkr.sk media links"""
subcategory = "media"
directory_fmt = ("{category}",)
- pattern = BASE_PATTERN + r"/[vid]/([^/?#]+)"
- example = "https://bunkrr.ru/v/FILENAME"
+ pattern = BASE_PATTERN + r"(/[vid]/[^/?#]+)"
+ example = "https://bunkr.sk/v/FILENAME"
def fetch_album(self, album_id):
try:
- url = self._extract_file(urlsplit(self.url).path)
+ url = self._extract_file(self.root + self.album_id)
except Exception as exc:
self.log.error("%s: %s", exc.__class__.__name__, exc)
return (), {}