# -*- coding: utf-8 -*- # Copyright 2016-2025 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as # published by the Free Software Foundation. """Collection of extractors for various imagehosts""" from .common import Extractor, Message from .. import text, exception from ..cache import memcache class ImagehostImageExtractor(Extractor): """Base class for single-image extractors for various imagehosts""" basecategory = "imagehost" subcategory = "image" archive_fmt = "{token}" parent = True _params = None _cookies = None _encoding = None _validate = None def __init__(self, match): Extractor.__init__(self, match) self.page_url = (self.root or "https://") + match[1] self.token = match[2] if self._params == "simple": self._params = { "imgContinue": "Continue+to+image+...+", } elif self._params == "complex": self._params = { "op": "view", "id": self.token, "pre": "1", "adb": "1", "next": "Continue+to+image+...+", } def items(self): page = self.request( self.page_url, method=("POST" if self._params else "GET"), data=self._params, cookies=self._cookies, encoding=self._encoding, ).text url, filename = self.get_info(page) if not url: return if filename: data = text.nameext_from_name(filename) if not data["extension"]: data["extension"] = text.ext_from_url(url) else: data = text.nameext_from_url(url) data["token"] = self.token data["post_url"] = self.page_url data.update(self.metadata(page)) if url.startswith("http:"): url = "https:" + url[5:] if self._validate is not None: data["_http_validate"] = self._validate yield Message.Directory, "", data yield Message.Url, url, data def get_info(self, page): """Find image-url and string to get filename from""" def metadata(self, page): """Return additional metadata""" return () def not_found(self, resource=None): raise exception.NotFoundError(resource or self.__class__.subcategory) class ImxtoImageExtractor(ImagehostImageExtractor): """Extractor for single images from imx.to""" category = "imxto" pattern = (r"(?:https?://)?(?:www\.)?((?:imx\.to|img\.yt)" r"/(?:i/|img-)(\w+)(\.html)?)") example = "https://imx.to/i/ID" _params = "simple" _encoding = "utf-8" def __init__(self, match): ImagehostImageExtractor.__init__(self, match) if "/img-" in self.page_url: self.page_url = self.page_url.replace("img.yt", "imx.to") def get_info(self, page): url, pos = text.extract( page, '
", "").replace(" ", "")[:-1] width, _, height = extr(">", " px").partition("x") return { "size" : text.parse_bytes(size), "width" : text.parse_int(width), "height": text.parse_int(height), "hash" : extr(">", ""), } class ImxtoGalleryExtractor(ImagehostImageExtractor): """Extractor for image galleries from imx.to""" category = "imxto" subcategory = "gallery" pattern = r"(?:https?://)?(?:www\.)?(imx\.to/g/([^/?#]+))" example = "https://imx.to/g/ID" def items(self): page = self.request(self.page_url).text title, pos = text.extract(page, '
")[2]).strip(), } params = {"page": 1} while True: for url in text.extract_iter(page, "Last' in page: return params["page"] += 1 page = self.request(self.page_url, params=params).text class AcidimgImageExtractor(ImagehostImageExtractor): """Extractor for single images from acidimg.cc""" category = "acidimg" pattern = r"(?:https?://)?((?:www\.)?acidimg\.cc/img-([a-z0-9]+)\.html)" example = "https://acidimg.cc/img-abc123.html" _params = "simple" _encoding = "utf-8" def get_info(self, page): url, pos = text.extract(page, "', '") data = {"_extractor": PixhostImageExtractor} for url in text.extract_iter(page, '', '<', pos) return url, text.unescape(filename) if filename else None class PostimgGalleryExtractor(ImagehostImageExtractor): """Extractor for images galleries from postimages.org""" category = "postimg" subcategory = "gallery" root = "https://postimg.cc" pattern = (r"(?:https?://)?(?:www\.)?(?:postim(?:ages|g)|pixxxels)" r"\.(?:cc|org)(/gallery/([^/?#]+))") example = "https://postimg.cc/gallery/ID" def items(self): page = self.request(self.page_url).text title = text.extr( page, 'property="og:title" content="', ' — Postimages"') data = { "_extractor" : PostimgImageExtractor, "gallery_title": text.unescape(title), } for token in text.extract_iter(page, 'data-image="', '"'): url = f"{self.root}/{token}" yield Message.Queue, url, data class TurboimagehostImageExtractor(ImagehostImageExtractor): """Extractor for single images from www.turboimagehost.com""" category = "turboimagehost" pattern = (r"(?:https?://)?((?:www\.)?turboimagehost\.com" r"/p/(\d+)/[^/?#]+\.html)") example = "https://www.turboimagehost.com/p/12345/NAME.EXT.html" def get_info(self, page): url = text.extract(page, 'src="', '"', page.index("') date, pos = text.extract(page, '', 'by', pos) user, pos = text.extract(page, '>', '<', pos) date = date.split() return { "date": self.parse_datetime_iso(f"{date[0][:10]} {date[1]}"), "user": text.unescape(user), }