# -*- coding: utf-8 -*- # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as # published by the Free Software Foundation. """Extractors for https://redbust.com/""" from .common import GalleryExtractor, Extractor, Message from .. import text BASE_PATTERN = r"(?:https?://)?redbust\.com" class RedbustExtractor(Extractor): """Base class for RedBust extractors""" category = "redbust" root = "https://redbust.com" filename_fmt = "{filename}.{extension}" def items(self): data = {"_extractor": RedbustGalleryExtractor} for url in self.galleries(): yield Message.Queue, url, data def _pagination(self, path, page=None): if page is None: url = f"{self.root}{path}/" base = url + "page/" page = self.request(url).text else: base = f"{self.root}{path}/page/" pnum = 1 while True: for post in text.extract_iter( page, '

', "rel="): yield text.extr(post, 'href="', '"') pnum += 1 url = f"{base}{pnum}/" if url not in page: return page = self.request(url).text class RedbustGalleryExtractor(GalleryExtractor, RedbustExtractor): """Extractor for RedBust galleries""" pattern = BASE_PATTERN + r"/([\w-]+)/?$" example = "https://redbust.com/TITLE/" def items(self): url = f"{self.root}/{self.groups[0]}/" self.page = page = self.request(url).text self.gallery_id = gid = text.extr( page, "', "rel="): url = text.extr(post, 'href="', '"') yield Message.Queue, url, data pnum += 1 url = f"{base}{pnum}/" if url not in page: return page = self.request(url).text def metadata(self, _): extr = text.extract_from(self.page) return { "gallery_id" : self.gallery_id, "gallery_slug": self.groups[0], "categories" : text.split_html(extr( '
  • ', "
  • "))[::2], "title" : text.unescape(extr('class="post-title">', "<")), "date" : text.parse_datetime( extr('class="post-byline">', "<").strip(), "%B %d, %Y"), "views" : text.parse_int(extr("", "v").replace(",", "")), "tags" : text.split_html(extr( 'class="post-tags">', ""): if src := text.extr(img, 'src="', '"'): path, _, end = src.rpartition("-") if "x" in end: url = f"{path}.{end.rpartition('.')[2]}" data = None if src == url else {"_fallback": (src,)} else: url = src data = None results.append((url, data)) if not results: # fallback for older galleries for path in text.extract_iter( self.page, '