# -*- coding: utf-8 -*- # Copyright 2016-2025 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as # published by the Free Software Foundation. """Collection of extractors for various imagehosts""" from .common import Extractor, Message from .. import text, exception from ..cache import memcache from os.path import splitext class ImagehostImageExtractor(Extractor): """Base class for single-image extractors for various imagehosts""" basecategory = "imagehost" subcategory = "image" archive_fmt = "{token}" parent = True _https = True _params = None _cookies = None _encoding = None _validate = None def __init__(self, match): Extractor.__init__(self, match) if self.root: self.page_url = f"{self.root}{match[1]}" else: self.page_url = f"http{'s' if self._https else ''}://{match[1]}" self.token = match[2] if self._params == "simple": self._params = { "imgContinue": "Continue+to+image+...+", } elif self._params == "complex": self._params = { "op": "view", "id": self.token, "pre": "1", "adb": "1", "next": "Continue+to+image+...+", } def items(self): page = self.request( self.page_url, method=("POST" if self._params else "GET"), data=self._params, cookies=self._cookies, encoding=self._encoding, ).text url, filename = self.get_info(page) if not url: return if filename: data = text.nameext_from_name(filename) if not data["extension"]: data["extension"] = text.ext_from_url(url) else: data = text.nameext_from_url(url) data["token"] = self.token data["post_url"] = self.page_url data.update(self.metadata(page)) if self._https and url.startswith("http:"): url = "https:" + url[5:] if self._validate is not None: data["_http_validate"] = self._validate yield Message.Directory, "", data yield Message.Url, url, data def get_info(self, page): """Find image-url and string to get filename from""" def metadata(self, page): """Return additional metadata""" return () def not_found(self, resource=None): raise exception.NotFoundError(resource or self.__class__.subcategory) class ImxtoImageExtractor(ImagehostImageExtractor): """Extractor for single images from imx.to""" category = "imxto" pattern = (r"(?:https?://)?(?:www\.)?((?:imx\.to|img\.yt)" r"/(?:i/|img-)(\w+)(\.html)?)") example = "https://imx.to/i/ID" _params = "simple" _encoding = "utf-8" def __init__(self, match): ImagehostImageExtractor.__init__(self, match) if "/img-" in self.page_url: self.page_url = self.page_url.replace("img.yt", "imx.to") self.url_ext = True else: self.url_ext = False def get_info(self, page): url, pos = text.extract( page, '
", "").replace(" ", "")[:-1] width, _, height = extr(">", " px").partition("x") return { "size" : text.parse_bytes(size), "width" : text.parse_int(width), "height": text.parse_int(height), "hash" : extr(">", ""), } class ImxtoGalleryExtractor(ImagehostImageExtractor): """Extractor for image galleries from imx.to""" category = "imxto" subcategory = "gallery" pattern = r"(?:https?://)?(?:www\.)?(imx\.to/g/([^/?#]+))" example = "https://imx.to/g/ID" def items(self): page = self.request(self.page_url).text title, pos = text.extract(page, '
")[2]).strip(), } params = {"page": 1} while True: for url in text.extract_iter(page, "Last' in page: return params["page"] += 1 page = self.request(self.page_url, params=params).text class AcidimgImageExtractor(ImagehostImageExtractor): """Extractor for single images from acidimg.cc""" category = "acidimg" pattern = r"(?:https?://)?((?:www\.)?acidimg\.cc/img-([a-z0-9]+)\.html)" example = "https://acidimg.cc/img-abc123.html" _params = "simple" _encoding = "utf-8" def get_info(self, page): url, pos = text.extract(page, "', '") data = {"_extractor": PixhostImageExtractor} for url in text.extract_iter(page, '', '<', pos) return url, text.unescape(filename) if filename else None class PostimgGalleryExtractor(ImagehostImageExtractor): """Extractor for images galleries from postimages.org""" category = "postimg" subcategory = "gallery" root = "https://postimg.cc" pattern = (r"(?:https?://)?(?:www\.)?(?:postim(?:ages|g)|pixxxels)" r"\.(?:cc|org)(/gallery/([^/?#]+))") example = "https://postimg.cc/gallery/ID" def items(self): page = self.request(self.page_url).text title = text.extr( page, 'property="og:title" content="', ' — Postimages"') data = { "_extractor" : PostimgImageExtractor, "gallery_title": text.unescape(title), } for token in text.extract_iter(page, 'data-image="', '"'): url = f"{self.root}/{token}" yield Message.Queue, url, data class TurboimagehostImageExtractor(ImagehostImageExtractor): """Extractor for single images from www.turboimagehost.com""" category = "turboimagehost" pattern = (r"(?:https?://)?((?:www\.)?turboimagehost\.com" r"/p/(\d+)/[^/?#]+\.html)") example = "https://www.turboimagehost.com/p/12345/NAME.EXT.html" def get_info(self, page): url = text.extract(page, 'src="', '"', page.index("