# -*- coding: utf-8 -*-
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
"""Extractors for https://redbust.com/"""
from .common import GalleryExtractor, Extractor, Message
from .. import text
BASE_PATTERN = r"(?:https?://)?redbust\.com"
class RedbustExtractor(Extractor):
"""Base class for RedBust extractors"""
category = "redbust"
root = "https://redbust.com"
filename_fmt = "{filename}.{extension}"
def items(self):
data = {"_extractor": RedbustGalleryExtractor}
for url in self.galleries():
yield Message.Queue, url, data
def _pagination(self, path, page=None):
if page is None:
url = f"{self.root}{path}/"
base = url + "page/"
page = self.request(url).text
else:
base = f"{self.root}{path}/page/"
pnum = 1
while True:
for post in text.extract_iter(
page, '
', "rel="):
yield text.extr(post, 'href="', '"')
pnum += 1
url = f"{base}{pnum}/"
if url not in page:
return
page = self.request(url).text
class RedbustGalleryExtractor(GalleryExtractor, RedbustExtractor):
"""Extractor for RedBust galleries"""
pattern = BASE_PATTERN + r"/([\w-]+)/?$"
example = "https://redbust.com/TITLE/"
def items(self):
url = f"{self.root}/{self.groups[0]}/"
self.page = page = self.request(url).text
self.gallery_id = gid = text.extr(
page, "', "rel="):
url = text.extr(post, 'href="', '"')
yield Message.Queue, url, data
pnum += 1
url = f"{base}{pnum}/"
if url not in page:
return
page = self.request(url).text
def metadata(self, _):
extr = text.extract_from(self.page)
return {
"gallery_id" : self.gallery_id,
"gallery_slug": self.groups[0],
"categories" : text.split_html(extr(
'
', "
"))[::2],
"title" : text.unescape(extr('class="post-title">', "<")),
"date" : text.parse_datetime(
extr('class="post-byline">', "<").strip(), "%B %d, %Y"),
"views" : text.parse_int(extr("", "v").replace(",", "")),
"tags" : text.split_html(extr(
'class="post-tags">', ""):
if src := text.extr(img, 'src="', '"'):
path, _, end = src.rpartition("-")
if "x" in end:
url = f"{path}.{end.rpartition('.')[2]}"
data = None if src == url else {"_fallback": (src,)}
else:
url = src
data = None
results.append((url, data))
if not results:
# fallback for older galleries
for path in text.extract_iter(
self.page, '