aboutsummaryrefslogtreecommitdiffstats
path: root/gallery_dl/extractor/redbust.py
diff options
context:
space:
mode:
Diffstat (limited to 'gallery_dl/extractor/redbust.py')
-rw-r--r--gallery_dl/extractor/redbust.py186
1 files changed, 0 insertions, 186 deletions
diff --git a/gallery_dl/extractor/redbust.py b/gallery_dl/extractor/redbust.py
deleted file mode 100644
index d00ed52..0000000
--- a/gallery_dl/extractor/redbust.py
+++ /dev/null
@@ -1,186 +0,0 @@
-# -*- coding: utf-8 -*-
-
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License version 2 as
-# published by the Free Software Foundation.
-
-"""Extractors for https://redbust.com/"""
-
-from .common import GalleryExtractor, Extractor, Message
-from .. import text
-
-BASE_PATTERN = r"(?:https?://)?redbust\.com"
-
-
-class RedbustExtractor(Extractor):
- """Base class for RedBust extractors"""
- category = "redbust"
- root = "https://redbust.com"
- filename_fmt = "{filename}.{extension}"
-
- def items(self):
- data = {"_extractor": RedbustGalleryExtractor}
- for url in self.galleries():
- yield Message.Queue, url, data
-
- def _pagination(self, path, page=None):
- if page is None:
- url = f"{self.root}{path}/"
- base = url + "page/"
- page = self.request(url).text
- else:
- base = f"{self.root}{path}/page/"
-
- pnum = 1
- while True:
- for post in text.extract_iter(
- page, '<h2 class="post-title">', "rel="):
- yield text.extr(post, 'href="', '"')
-
- pnum += 1
- url = f"{base}{pnum}/"
- if url not in page:
- return
- page = self.request(url).text
-
-
-class RedbustGalleryExtractor(GalleryExtractor, RedbustExtractor):
- """Extractor for RedBust galleries"""
- pattern = BASE_PATTERN + r"/([\w-]+)/?$"
- example = "https://redbust.com/TITLE/"
-
- def items(self):
- url = f"{self.root}/{self.groups[0]}/"
- self.page = page = self.request(url).text
-
- self.gallery_id = gid = text.extr(
- page, "<link rel='shortlink' href='https://redbust.com/?p=", "'")
-
- if gid:
- self.page_url = False
- return GalleryExtractor.items(self)
- else:
- self.subcategory = "category"
- return self._items_category(page)
-
- def _items_category(self, _):
- page = self.page
- data = {"_extractor": RedbustGalleryExtractor}
- base = f"{self.root}/{self.groups[0]}/page/"
- pnum = 1
-
- while True:
- for post in text.extract_iter(
- page, '<h2 class="post-title">', "rel="):
- url = text.extr(post, 'href="', '"')
- yield Message.Queue, url, data
-
- pnum += 1
- url = f"{base}{pnum}/"
- if url not in page:
- return
- page = self.request(url).text
-
- def metadata(self, _):
- extr = text.extract_from(self.page)
-
- return {
- "gallery_id" : self.gallery_id,
- "gallery_slug": self.groups[0],
- "categories" : text.split_html(extr(
- '<li class="category">', "</li>"))[::2],
- "title" : text.unescape(extr('class="post-title">', "<")),
- "date" : text.parse_datetime(
- extr('class="post-byline">', "<").strip(), "%B %d, %Y"),
- "views" : text.parse_int(extr("</b>", "v").replace(",", "")),
- "tags" : text.split_html(extr(
- 'class="post-tags">', "</p"))[1:],
- }
-
- def images(self, _):
- results = []
-
- for img in text.extract_iter(self.page, "'><img ", ">"):
- if src := text.extr(img, 'src="', '"'):
- path, _, end = src.rpartition("-")
- if "x" in end:
- url = f"{path}.{end.rpartition('.')[2]}"
- data = None if src == url else {"_fallback": (src,)}
- else:
- url = src
- data = None
- results.append((url, data))
-
- if not results:
- # fallback for older galleries
- for path in text.extract_iter(
- self.page, '<img src="/wp-content/uploads/', '"'):
- results.append(
- (f"{self.root}/wp-content/uploads/{path}", None))
-
- return results
-
-
-class RedbustTagExtractor(RedbustExtractor):
- """Extractor for RedBust tag searches"""
- subcategory = "tag"
- pattern = BASE_PATTERN + r"/tag/([\w-]+)"
- example = "https://redbust.com/tag/TAG/"
-
- def galleries(self):
- return self._pagination("/tag/" + self.groups[0])
-
-
-class RedbustArchiveExtractor(RedbustExtractor):
- """Extractor for RedBust monthly archive collections"""
- subcategory = "archive"
- pattern = BASE_PATTERN + r"(/\d{4}/\d{2})"
- example = "https://redbust.com/2010/01/"
-
- def galleries(self):
- return self._pagination(self.groups[0])
-
-
-class RedbustImageExtractor(RedbustExtractor):
- """Extractor for RedBust images"""
- subcategory = "image"
- directory_fmt = ("{category}", "{title}")
- pattern = BASE_PATTERN + r"/(?!tag/|\d{4}/)([\w-]+)/([\w-]+)/?$"
- example = "https://redbust.com/TITLE/SLUG/"
-
- def items(self):
- gallery_slug, image_slug = self.groups
- url = f"{self.root}/{gallery_slug}/{image_slug}/"
- page = self.request(url).text
-
- img_url = None
-
- # Look for the largest image in srcset first
- if srcset := text.extr(page, 'srcset="', '"'):
- # Extract the largest image from srcset (typically last one)
- urls = srcset.split(", ")
- img_url = urls[-1].partition(" ")[0] if urls else None
-
- # Fallback to original extraction method
- if not img_url:
- if entry := text.extr(page, "entry-inner ", "alt="):
- img_url = text.extr(entry, "img src=", " ").strip("\"'")
-
- if not img_url:
- return
-
- end = img_url.rpartition("-")[2]
- data = text.nameext_from_url(img_url, {
- "title" : text.unescape(text.extr(
- page, 'title="Return to ', '"')),
- "image_id" : text.extr(
- page, "rel='shortlink' href='https://redbust.com/?p=", "'"),
- "gallery_slug": gallery_slug,
- "image_slug" : image_slug,
- "num" : text.parse_int(end.partition(".")[0]),
- "count" : 1,
- "url" : img_url,
- })
-
- yield Message.Directory, data
- yield Message.Url, img_url, data