summaryrefslogtreecommitdiffstats
path: root/gallery_dl/extractor/imagehosts.py
diff options
context:
space:
mode:
Diffstat (limited to 'gallery_dl/extractor/imagehosts.py')
-rw-r--r--gallery_dl/extractor/imagehosts.py101
1 files changed, 70 insertions, 31 deletions
diff --git a/gallery_dl/extractor/imagehosts.py b/gallery_dl/extractor/imagehosts.py
index 817d2c4..21e6cf8 100644
--- a/gallery_dl/extractor/imagehosts.py
+++ b/gallery_dl/extractor/imagehosts.py
@@ -19,6 +19,7 @@ class ImagehostImageExtractor(Extractor):
basecategory = "imagehost"
subcategory = "image"
archive_fmt = "{token}"
+ parent = True
_https = True
_params = None
_cookies = None
@@ -27,7 +28,10 @@ class ImagehostImageExtractor(Extractor):
def __init__(self, match):
Extractor.__init__(self, match)
- self.page_url = f"http{'s' if self._https else ''}://{match[1]}"
+ if self.root:
+ self.page_url = f"{self.root}{match[1]}"
+ else:
+ self.page_url = f"http{'s' if self._https else ''}://{match[1]}"
self.token = match[2]
if self._params == "simple":
@@ -53,14 +57,25 @@ class ImagehostImageExtractor(Extractor):
).text
url, filename = self.get_info(page)
- data = text.nameext_from_url(filename, {"token": self.token})
+ if not url:
+ return
+
+ if filename:
+ data = text.nameext_from_name(filename)
+ if not data["extension"]:
+ data["extension"] = text.ext_from_url(url)
+ else:
+ data = text.nameext_from_url(url)
+ data["token"] = self.token
+ data["post_url"] = self.page_url
data.update(self.metadata(page))
+
if self._https and url.startswith("http:"):
url = "https:" + url[5:]
if self._validate is not None:
data["_http_validate"] = self._validate
- yield Message.Directory, data
+ yield Message.Directory, "", data
yield Message.Url, url, data
def get_info(self, page):
@@ -70,6 +85,9 @@ class ImagehostImageExtractor(Extractor):
"""Return additional metadata"""
return ()
+ def not_found(self, resource=None):
+ raise exception.NotFoundError(resource or self.__class__.subcategory)
+
class ImxtoImageExtractor(ImagehostImageExtractor):
"""Extractor for single images from imx.to"""
@@ -92,7 +110,7 @@ class ImxtoImageExtractor(ImagehostImageExtractor):
url, pos = text.extract(
page, '<div style="text-align:center;"><a href="', '"')
if not url:
- raise exception.NotFoundError("image")
+ self.not_found()
filename, pos = text.extract(page, ' title="', '"', pos)
if self.url_ext and filename:
filename += splitext(url)[1]
@@ -152,7 +170,7 @@ class AcidimgImageExtractor(ImagehostImageExtractor):
if not url:
url, pos = text.extract(page, '<img class="centred" src="', '"')
if not url:
- raise exception.NotFoundError("image")
+ self.not_found()
filename, pos = text.extract(page, "alt='", "'", pos)
if not filename:
@@ -169,7 +187,11 @@ class ImagevenueImageExtractor(ImagehostImageExtractor):
example = "https://www.imagevenue.com/ME123456789"
def get_info(self, page):
- pos = page.index('class="card-body')
+ try:
+ pos = page.index('class="card-body')
+ except ValueError:
+ self.not_found()
+
url, pos = text.extract(page, '<img src="', '"', pos)
if url.endswith("/loader.svg"):
url, pos = text.extract(page, '<img src="', '"', pos)
@@ -199,6 +221,8 @@ class ImagetwistImageExtractor(ImagehostImageExtractor):
def get_info(self, page):
url , pos = text.extract(page, '<img src="', '"')
+ if url and url.startswith("/imgs/"):
+ self.not_found()
filename, pos = text.extract(page, ' alt="', '"', pos)
return url, filename
@@ -249,7 +273,7 @@ class ImgspiceImageExtractor(ImagehostImageExtractor):
def get_info(self, page):
pos = page.find('id="imgpreview"')
if pos < 0:
- raise exception.NotFoundError("image")
+ self.not_found()
url , pos = text.extract(page, 'src="', '"', pos)
name, pos = text.extract(page, 'alt="', '"', pos)
return url, text.unescape(name)
@@ -258,23 +282,26 @@ class ImgspiceImageExtractor(ImagehostImageExtractor):
class PixhostImageExtractor(ImagehostImageExtractor):
"""Extractor for single images from pixhost.to"""
category = "pixhost"
- pattern = (r"(?:https?://)?((?:www\.)?pixhost\.(?:to|org)"
- r"/show/\d+/(\d+)_[^/?#]+)")
+ root = "https://pixhost.to"
+ pattern = (r"(?:https?://)?(?:www\.)?pixhost\.(?:to|org)"
+ r"(/show/\d+/(\d+)_[^/?#]+)")
example = "https://pixhost.to/show/123/12345_NAME.EXT"
_cookies = {"pixhostads": "1", "pixhosttest": "1"}
def get_info(self, page):
- url , pos = text.extract(page, "class=\"image-img\" src=\"", "\"")
- filename, pos = text.extract(page, "alt=\"", "\"", pos)
- return url, filename
+ self.kwdict["directory"] = self.page_url.rsplit("/")[-2]
+ url , pos = text.extract(page, "class=\"image-img\" src=\"", "\"")
+ name, pos = text.extract(page, "alt=\"", "\"", pos)
+ return url, text.unescape(name) if name else None
class PixhostGalleryExtractor(ImagehostImageExtractor):
"""Extractor for image galleries from pixhost.to"""
category = "pixhost"
subcategory = "gallery"
- pattern = (r"(?:https?://)?((?:www\.)?pixhost\.(?:to|org)"
- r"/gallery/([^/?#]+))")
+ root = "https://pixhost.to"
+ pattern = (r"(?:https?://)?(?:www\.)?pixhost\.(?:to|org)"
+ r"(/gallery/([^/?#]+))")
example = "https://pixhost.to/gallery/ID"
def items(self):
@@ -288,29 +315,39 @@ class PixhostGalleryExtractor(ImagehostImageExtractor):
class PostimgImageExtractor(ImagehostImageExtractor):
"""Extractor for single images from postimages.org"""
category = "postimg"
- pattern = (r"(?:https?://)?((?:www\.)?(?:postim(?:ages|g)|pixxxels)"
- r"\.(?:cc|org)/(?!gallery/)(?:image/)?([^/?#]+)/?)")
- example = "https://postimages.org/ID"
+ root = "https://postimg.cc"
+ pattern = (r"(?:https?://)?(?:www\.)?(?:postim(?:ages|g)|pixxxels)"
+ r"\.(?:cc|org)(/(?!gallery/)(?:image/)?([^/?#]+)/?)")
+ example = "https://postimg.cc/ID"
def get_info(self, page):
pos = page.index(' id="download"')
url , pos = text.rextract(page, ' href="', '"', pos)
- filename, pos = text.extract(page, 'class="imagename">', '<', pos)
- return url, text.unescape(filename)
+ filename, pos = text.extract(page, ' class="my-4">', '<', pos)
+ return url, text.unescape(filename) if filename else None
class PostimgGalleryExtractor(ImagehostImageExtractor):
"""Extractor for images galleries from postimages.org"""
category = "postimg"
subcategory = "gallery"
- pattern = (r"(?:https?://)?((?:www\.)?(?:postim(?:ages|g)|pixxxels)"
- r"\.(?:cc|org)/gallery/([^/?#]+))")
- example = "https://postimages.org/gallery/ID"
+ root = "https://postimg.cc"
+ pattern = (r"(?:https?://)?(?:www\.)?(?:postim(?:ages|g)|pixxxels)"
+ r"\.(?:cc|org)(/gallery/([^/?#]+))")
+ example = "https://postimg.cc/gallery/ID"
def items(self):
page = self.request(self.page_url).text
- data = {"_extractor": PostimgImageExtractor}
- for url in text.extract_iter(page, ' class="thumb"><a href="', '"'):
+ title = text.extr(
+ page, 'property="og:title" content="', ' — Postimages"')
+
+ data = {
+ "_extractor" : PostimgImageExtractor,
+ "gallery_title": text.unescape(title),
+ }
+
+ for token in text.extract_iter(page, 'data-image="', '"'):
+ url = f"{self.root}/{token}"
yield Message.Queue, url, data
@@ -323,7 +360,7 @@ class TurboimagehostImageExtractor(ImagehostImageExtractor):
def get_info(self, page):
url = text.extract(page, 'src="', '"', page.index("<img "))[0]
- return url, url
+ return url, None
class TurboimagehostGalleryExtractor(ImagehostImageExtractor):
@@ -343,7 +380,7 @@ class TurboimagehostGalleryExtractor(ImagehostImageExtractor):
if params["p"] == 1 and \
"Requested gallery don`t exist on our website." in page:
- raise exception.NotFoundError("gallery")
+ self.not_found()
thumb_url = None
for thumb_url in text.extract_iter(page, '"><a href="', '"'):
@@ -362,7 +399,7 @@ class ViprImageExtractor(ImagehostImageExtractor):
def get_info(self, page):
url = text.extr(page, '<img src="', '"')
- return url, url
+ return url, None
class ImgclickImageExtractor(ImagehostImageExtractor):
@@ -439,14 +476,16 @@ class ImgdriveImageExtractor(ImagehostImageExtractor):
class SilverpicImageExtractor(ImagehostImageExtractor):
"""Extractor for single images from silverpic.com"""
category = "silverpic"
- pattern = (r"(?:https?://)?((?:www\.)?silverpic\.com"
- r"/([a-z0-9]{10,})/[\S]+\.html)")
- example = "https://silverpic.com/a1b2c3d4f5g6/NAME.EXT.html"
+ root = "https://silverpic.net"
+ _params = "complex"
+ pattern = (r"(?:https?://)?(?:www\.)?silverpic\.(?:net|com)"
+ r"(/([a-z0-9]{10,})/[\S]+\.html)")
+ example = "https://silverpic.net/a1b2c3d4f5g6/NAME.EXT.html"
def get_info(self, page):
url, pos = text.extract(page, '<img src="/img/', '"')
alt, pos = text.extract(page, 'alt="', '"', pos)
- return f"https://silverpic.com/img/{url}", alt
+ return f"{self.root}/img/{url}", alt
def metadata(self, page):
pos = page.find('<img src="/img/')