# -*- coding: utf-8 -*-
# Copyright 2016-2025 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
"""Collection of extractors for various imagehosts"""
from .common import Extractor, Message
from .. import text, exception
from ..cache import memcache
class ImagehostImageExtractor(Extractor):
"""Base class for single-image extractors for various imagehosts"""
basecategory = "imagehost"
subcategory = "image"
archive_fmt = "{token}"
parent = True
_params = None
_cookies = None
_encoding = None
_validate = None
def __init__(self, match):
Extractor.__init__(self, match)
self.page_url = (self.root or "https://") + match[1]
self.token = match[2]
if self._params == "simple":
self._params = {
"imgContinue": "Continue+to+image+...+",
}
elif self._params == "complex":
self._params = {
"op": "view",
"id": self.token,
"pre": "1",
"adb": "1",
"next": "Continue+to+image+...+",
}
def items(self):
page = self.request(
self.page_url,
method=("POST" if self._params else "GET"),
data=self._params,
cookies=self._cookies,
encoding=self._encoding,
).text
url, filename = self.get_info(page)
if not url:
return
if filename:
data = text.nameext_from_name(filename)
if not data["extension"]:
data["extension"] = text.ext_from_url(url)
else:
data = text.nameext_from_url(url)
data["token"] = self.token
data["post_url"] = self.page_url
data.update(self.metadata(page))
if url.startswith("http:"):
url = "https:" + url[5:]
if self._validate is not None:
data["_http_validate"] = self._validate
yield Message.Directory, "", data
yield Message.Url, url, data
def get_info(self, page):
"""Find image-url and string to get filename from"""
def metadata(self, page):
"""Return additional metadata"""
return ()
def not_found(self, resource=None):
raise exception.NotFoundError(resource or self.__class__.subcategory)
class ImxtoImageExtractor(ImagehostImageExtractor):
"""Extractor for single images from imx.to"""
category = "imxto"
pattern = (r"(?:https?://)?(?:www\.)?((?:imx\.to|img\.yt)"
r"/(?:i/|img-)(\w+)(\.html)?)")
example = "https://imx.to/i/ID"
_params = "simple"
_encoding = "utf-8"
def __init__(self, match):
ImagehostImageExtractor.__init__(self, match)
if "/img-" in self.page_url:
self.page_url = self.page_url.replace("img.yt", "imx.to")
def get_info(self, page):
url, pos = text.extract(
page, '
", "").replace(" ", "")[:-1]
width, _, height = extr(">", " px").partition("x")
return {
"size" : text.parse_bytes(size),
"width" : text.parse_int(width),
"height": text.parse_int(height),
"hash" : extr(">", ""),
}
class ImxtoGalleryExtractor(ImagehostImageExtractor):
"""Extractor for image galleries from imx.to"""
category = "imxto"
subcategory = "gallery"
pattern = r"(?:https?://)?(?:www\.)?(imx\.to/g/([^/?#]+))"
example = "https://imx.to/g/ID"
def items(self):
page = self.request(self.page_url).text
title, pos = text.extract(page, '")[2]).strip(),
}
params = {"page": 1}
while True:
for url in text.extract_iter(page, "
Last' in page:
return
params["page"] += 1
page = self.request(self.page_url, params=params).text
class AcidimgImageExtractor(ImagehostImageExtractor):
"""Extractor for single images from acidimg.cc"""
category = "acidimg"
pattern = r"(?:https?://)?((?:www\.)?acidimg\.cc/img-([a-z0-9]+)\.html)"
example = "https://acidimg.cc/img-abc123.html"
_params = "simple"
_encoding = "utf-8"
def get_info(self, page):
url, pos = text.extract(page, "
")
data = {"_extractor": PixhostImageExtractor}
for url in text.extract_iter(page, '', '<', pos)
return url, text.unescape(filename) if filename else None
class PostimgGalleryExtractor(ImagehostImageExtractor):
"""Extractor for images galleries from postimages.org"""
category = "postimg"
subcategory = "gallery"
root = "https://postimg.cc"
pattern = (r"(?:https?://)?(?:www\.)?(?:postim(?:ages|g)|pixxxels)"
r"\.(?:cc|org)(/gallery/([^/?#]+))")
example = "https://postimg.cc/gallery/ID"
def items(self):
page = self.request(self.page_url).text
title = text.extr(
page, 'property="og:title" content="', ' — Postimages"')
data = {
"_extractor" : PostimgImageExtractor,
"gallery_title": text.unescape(title),
}
for token in text.extract_iter(page, 'data-image="', '"'):
url = f"{self.root}/{token}"
yield Message.Queue, url, data
class TurboimagehostImageExtractor(ImagehostImageExtractor):
"""Extractor for single images from www.turboimagehost.com"""
category = "turboimagehost"
pattern = (r"(?:https?://)?((?:www\.)?turboimagehost\.com"
r"/p/(\d+)/[^/?#]+\.html)")
example = "https://www.turboimagehost.com/p/12345/NAME.EXT.html"
def get_info(self, page):
url = text.extract(page, 'src="', '"', page.index("![]()

')
date, pos = text.extract(page, '', 'by', pos)
user, pos = text.extract(page, '>', '<', pos)
date = date.split()
return {
"date": self.parse_datetime_iso(f"{date[0][:10]} {date[1]}"),
"user": text.unescape(user),
}