aboutsummaryrefslogtreecommitdiffstats
path: root/gallery_dl/extractor/bunkr.py
diff options
context:
space:
mode:
Diffstat (limited to 'gallery_dl/extractor/bunkr.py')
-rw-r--r--gallery_dl/extractor/bunkr.py50
1 files changed, 32 insertions, 18 deletions
diff --git a/gallery_dl/extractor/bunkr.py b/gallery_dl/extractor/bunkr.py
index 240bbd3..780bdf1 100644
--- a/gallery_dl/extractor/bunkr.py
+++ b/gallery_dl/extractor/bunkr.py
@@ -6,15 +6,24 @@
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
-"""Extractors for https://bunkr.sk/"""
+"""Extractors for https://bunkr.si/"""
from .lolisafe import LolisafeAlbumExtractor
-from .. import text
-
-BASE_PATTERN = (
- r"(?:https?://)?(?:app\.)?(bunkr+"
- r"\.(?:s[kiu]|[cf]i|ru|la|is|to|ac|black|cat|media|red|site|ws|org))"
-)
+from .. import text, config
+
+
+if config.get(("extractor", "bunkr"), "tlds"):
+ BASE_PATTERN = (
+ r"(?:bunkr:(?:https?://)?([^/?#]+)|"
+ r"(?:https?://)?(?:app\.)?(bunkr+\.\w+))"
+ )
+else:
+ BASE_PATTERN = (
+ r"(?:bunkr:(?:https?://)?([^/?#]+)|"
+ r"(?:https?://)?(?:app\.)?(bunkr+"
+ r"\.(?:s[kiu]|[cf]i|ru|la|is|to|a[cx]"
+ r"|black|cat|media|red|site|ws|org)))"
+ )
LEGACY_DOMAINS = {
"bunkr.ru",
@@ -28,15 +37,15 @@ LEGACY_DOMAINS = {
class BunkrAlbumExtractor(LolisafeAlbumExtractor):
- """Extractor for bunkr.sk albums"""
+ """Extractor for bunkr.si albums"""
category = "bunkr"
- root = "https://bunkr.sk"
+ root = "https://bunkr.si"
pattern = BASE_PATTERN + r"/a/([^/?#]+)"
- example = "https://bunkr.sk/a/ID"
+ example = "https://bunkr.si/a/ID"
def __init__(self, match):
LolisafeAlbumExtractor.__init__(self, match)
- domain = match.group(match.lastindex-1)
+ domain = self.groups[0] or self.groups[1]
if domain not in LEGACY_DOMAINS:
self.root = "https://" + domain
@@ -69,11 +78,16 @@ class BunkrAlbumExtractor(LolisafeAlbumExtractor):
def _extract_file(self, url):
page = self.request(url).text
- return (
- text.extr(page, '<source src="', '"') or
- text.extr(page, '<img src="', '"') or
- text.rextract(page, ' href="', '"', page.rindex("Download"))[0]
- )
+ url = (text.extr(page, '<source src="', '"') or
+ text.extr(page, '<img src="', '"'))
+
+ if not url:
+ url_download = text.rextract(
+ page, ' href="', '"', page.rindex("Download"))[0]
+ page = self.request(text.unescape(url_download)).text
+ url = text.unescape(text.rextract(page, ' href="', '"')[0])
+
+ return url
def _validate(self, response):
if response.history and response.url.endswith("/maintenance-vid.mp4"):
@@ -83,11 +97,11 @@ class BunkrAlbumExtractor(LolisafeAlbumExtractor):
class BunkrMediaExtractor(BunkrAlbumExtractor):
- """Extractor for bunkr.sk media links"""
+ """Extractor for bunkr.si media links"""
subcategory = "media"
directory_fmt = ("{category}",)
pattern = BASE_PATTERN + r"(/[vid]/[^/?#]+)"
- example = "https://bunkr.sk/v/FILENAME"
+ example = "https://bunkr.si/v/FILENAME"
def fetch_album(self, album_id):
try: