diff options
Diffstat (limited to 'gallery_dl/extractor/bunkr.py')
| -rw-r--r-- | gallery_dl/extractor/bunkr.py | 83 |
1 files changed, 38 insertions, 45 deletions
diff --git a/gallery_dl/extractor/bunkr.py b/gallery_dl/extractor/bunkr.py index e7fc14b..1a0e47d 100644 --- a/gallery_dl/extractor/bunkr.py +++ b/gallery_dl/extractor/bunkr.py @@ -6,32 +6,39 @@ # it under the terms of the GNU General Public License version 2 as # published by the Free Software Foundation. -"""Extractors for https://bunkrr.ru/""" +"""Extractors for https://bunkr.sk/""" from .lolisafe import LolisafeAlbumExtractor from .. import text -from urllib.parse import urlsplit, urlunsplit -BASE_PATTERN = r"(?:https?://)?(?:app\.)?bunkr+\.(?:[rs]u|la|is|to)" +BASE_PATTERN = ( + r"(?:https?://)?(?:app\.)?(bunkr+" + r"\.(?:s[kiu]|ru|la|is|to|ac|black|cat|media|red|site|ws))" +) -MEDIA_DOMAIN_OVERRIDES = { - "cdn9.bunkr.ru" : "c9.bunkr.ru", - "cdn12.bunkr.ru": "media-files12.bunkr.la", - "cdn-pizza.bunkr.ru": "pizza.bunkr.ru", +LEGACY_DOMAINS = { + "bunkr.ru", + "bunkrr.ru", + "bunkr.su", + "bunkrr.su", + "bunkr.la", + "bunkr.is", + "bunkr.to", } -CDN_HOSTED_EXTENSIONS = ( - ".mp4", ".m4v", ".mov", ".webm", ".mkv", ".ts", ".wmv", - ".zip", ".rar", ".7z", -) - class BunkrAlbumExtractor(LolisafeAlbumExtractor): - """Extractor for bunkrr.ru albums""" + """Extractor for bunkr.sk albums""" category = "bunkr" - root = "https://bunkrr.ru" + root = "https://bunkr.sk" pattern = BASE_PATTERN + r"/a/([^/?#]+)" - example = "https://bunkrr.ru/a/ID" + example = "https://bunkr.sk/a/ID" + + def __init__(self, match): + LolisafeAlbumExtractor.__init__(self, match) + domain = match.group(match.lastindex-1) + if domain not in LEGACY_DOMAINS: + self.root = "https://" + domain def fetch_album(self, album_id): # album metadata @@ -53,46 +60,32 @@ class BunkrAlbumExtractor(LolisafeAlbumExtractor): def _extract_files(self, urls): for url in urls: - if url.startswith("/"): - try: - url = self._extract_file(text.unescape(url)) - except Exception as exc: - self.log.error("%s: %s", exc.__class__.__name__, exc) - continue - - else: - if url.lower().endswith(CDN_HOSTED_EXTENSIONS): - scheme, domain, path, query, fragment = urlsplit(url) - if domain in MEDIA_DOMAIN_OVERRIDES: - domain = MEDIA_DOMAIN_OVERRIDES[domain] - else: - domain = domain.replace("cdn", "media-files", 1) - url = urlunsplit((scheme, domain, path, query, fragment)) - + try: + url = self._extract_file(text.unescape(url)) + except Exception as exc: + self.log.error("%s: %s", exc.__class__.__name__, exc) + continue yield {"file": text.unescape(url)} - def _extract_file(self, path): - page = self.request(self.root + path).text - if path[1] == "v": - url = text.extr(page, '<source src="', '"') - else: - url = text.extr(page, '<img src="', '"') - if not url: - url = text.rextract( - page, ' href="', '"', page.rindex("Download"))[0] - return url + def _extract_file(self, url): + page = self.request(url).text + return ( + text.extr(page, '<source src="', '"') or + text.extr(page, '<img src="', '"') or + text.rextract(page, ' href="', '"', page.rindex("Download"))[0] + ) class BunkrMediaExtractor(BunkrAlbumExtractor): - """Extractor for bunkrr.ru media links""" + """Extractor for bunkr.sk media links""" subcategory = "media" directory_fmt = ("{category}",) - pattern = BASE_PATTERN + r"/[vid]/([^/?#]+)" - example = "https://bunkrr.ru/v/FILENAME" + pattern = BASE_PATTERN + r"(/[vid]/[^/?#]+)" + example = "https://bunkr.sk/v/FILENAME" def fetch_album(self, album_id): try: - url = self._extract_file(urlsplit(self.url).path) + url = self._extract_file(self.root + self.album_id) except Exception as exc: self.log.error("%s: %s", exc.__class__.__name__, exc) return (), {} |
