diff options
Diffstat (limited to 'gallery_dl/extractor/bunkr.py')
| -rw-r--r-- | gallery_dl/extractor/bunkr.py | 50 |
1 files changed, 32 insertions, 18 deletions
diff --git a/gallery_dl/extractor/bunkr.py b/gallery_dl/extractor/bunkr.py index 240bbd3..780bdf1 100644 --- a/gallery_dl/extractor/bunkr.py +++ b/gallery_dl/extractor/bunkr.py @@ -6,15 +6,24 @@ # it under the terms of the GNU General Public License version 2 as # published by the Free Software Foundation. -"""Extractors for https://bunkr.sk/""" +"""Extractors for https://bunkr.si/""" from .lolisafe import LolisafeAlbumExtractor -from .. import text - -BASE_PATTERN = ( - r"(?:https?://)?(?:app\.)?(bunkr+" - r"\.(?:s[kiu]|[cf]i|ru|la|is|to|ac|black|cat|media|red|site|ws|org))" -) +from .. import text, config + + +if config.get(("extractor", "bunkr"), "tlds"): + BASE_PATTERN = ( + r"(?:bunkr:(?:https?://)?([^/?#]+)|" + r"(?:https?://)?(?:app\.)?(bunkr+\.\w+))" + ) +else: + BASE_PATTERN = ( + r"(?:bunkr:(?:https?://)?([^/?#]+)|" + r"(?:https?://)?(?:app\.)?(bunkr+" + r"\.(?:s[kiu]|[cf]i|ru|la|is|to|a[cx]" + r"|black|cat|media|red|site|ws|org)))" + ) LEGACY_DOMAINS = { "bunkr.ru", @@ -28,15 +37,15 @@ LEGACY_DOMAINS = { class BunkrAlbumExtractor(LolisafeAlbumExtractor): - """Extractor for bunkr.sk albums""" + """Extractor for bunkr.si albums""" category = "bunkr" - root = "https://bunkr.sk" + root = "https://bunkr.si" pattern = BASE_PATTERN + r"/a/([^/?#]+)" - example = "https://bunkr.sk/a/ID" + example = "https://bunkr.si/a/ID" def __init__(self, match): LolisafeAlbumExtractor.__init__(self, match) - domain = match.group(match.lastindex-1) + domain = self.groups[0] or self.groups[1] if domain not in LEGACY_DOMAINS: self.root = "https://" + domain @@ -69,11 +78,16 @@ class BunkrAlbumExtractor(LolisafeAlbumExtractor): def _extract_file(self, url): page = self.request(url).text - return ( - text.extr(page, '<source src="', '"') or - text.extr(page, '<img src="', '"') or - text.rextract(page, ' href="', '"', page.rindex("Download"))[0] - ) + url = (text.extr(page, '<source src="', '"') or + text.extr(page, '<img src="', '"')) + + if not url: + url_download = text.rextract( + page, ' href="', '"', page.rindex("Download"))[0] + page = self.request(text.unescape(url_download)).text + url = text.unescape(text.rextract(page, ' href="', '"')[0]) + + return url def _validate(self, response): if response.history and response.url.endswith("/maintenance-vid.mp4"): @@ -83,11 +97,11 @@ class BunkrAlbumExtractor(LolisafeAlbumExtractor): class BunkrMediaExtractor(BunkrAlbumExtractor): - """Extractor for bunkr.sk media links""" + """Extractor for bunkr.si media links""" subcategory = "media" directory_fmt = ("{category}",) pattern = BASE_PATTERN + r"(/[vid]/[^/?#]+)" - example = "https://bunkr.sk/v/FILENAME" + example = "https://bunkr.si/v/FILENAME" def fetch_album(self, album_id): try: |
