diff options
Diffstat (limited to 'gallery_dl/extractor/imagehosts.py')
| -rw-r--r-- | gallery_dl/extractor/imagehosts.py | 38 |
1 files changed, 34 insertions, 4 deletions
diff --git a/gallery_dl/extractor/imagehosts.py b/gallery_dl/extractor/imagehosts.py index d6b36cb..0e5ce7e 100644 --- a/gallery_dl/extractor/imagehosts.py +++ b/gallery_dl/extractor/imagehosts.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2016-2023 Mike Fährmann +# Copyright 2016-2025 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -23,12 +23,12 @@ class ImagehostImageExtractor(Extractor): _params = None _cookies = None _encoding = None + _validate = None def __init__(self, match): Extractor.__init__(self, match) - self.page_url = "http{}://{}".format( - "s" if self._https else "", match.group(1)) - self.token = match.group(2) + self.page_url = f"http{'s' if self._https else ''}://{match[1]}" + self.token = match[2] if self._params == "simple": self._params = { @@ -57,6 +57,8 @@ class ImagehostImageExtractor(Extractor): data.update(self.metadata(page)) if self._https and url.startswith("http:"): url = "https:" + url[5:] + if self._validate is not None: + data["_http_validate"] = self._validate yield Message.Directory, data yield Message.Url, url, data @@ -164,6 +166,14 @@ class ImagevenueImageExtractor(ImagehostImageExtractor): filename, pos = text.extract(page, 'alt="', '"', pos) return url, text.unescape(filename) + def _validate(self, response): + hget = response.headers.get + return not ( + hget("content-length") == "14396" and + hget("content-type") == "image/jpeg" and + hget("last-modified") == "Mon, 04 May 2020 07:19:52 GMT" + ) + class ImagetwistImageExtractor(ImagehostImageExtractor): """Extractor for single images from imagetwist.com""" @@ -200,6 +210,26 @@ class ImagetwistGalleryExtractor(ImagehostImageExtractor): yield Message.Queue, root + path, data +class ImgadultImageExtractor(ImagehostImageExtractor): + """Extractor for single images from imgadult.com""" + category = "imgadult" + _cookies = {"img_i_d": "1"} + pattern = r"(?:https?://)?((?:www\.)?imgadult\.com/img-([0-9a-f]+)\.html)" + example = "https://imgadult.com/img-0123456789abc.html" + + def get_info(self, page): + url , pos = text.extract(page, "' src='", "'") + name, pos = text.extract(page, "alt='", "'", pos) + + if name: + name, _, rhs = name.rpartition(" image hosted at ImgAdult.com") + if not name: + name = rhs + name = text.unescape(name) + + return url, name + + class ImgspiceImageExtractor(ImagehostImageExtractor): """Extractor for single images from imgspice.com""" category = "imgspice" |
