diff options
Diffstat (limited to 'gallery_dl/extractor/imagehosts.py')
| -rw-r--r-- | gallery_dl/extractor/imagehosts.py | 48 |
1 files changed, 31 insertions, 17 deletions
diff --git a/gallery_dl/extractor/imagehosts.py b/gallery_dl/extractor/imagehosts.py index 21e6cf8..d1abe01 100644 --- a/gallery_dl/extractor/imagehosts.py +++ b/gallery_dl/extractor/imagehosts.py @@ -11,7 +11,6 @@ from .common import Extractor, Message from .. import text, exception from ..cache import memcache -from os.path import splitext class ImagehostImageExtractor(Extractor): @@ -20,7 +19,6 @@ class ImagehostImageExtractor(Extractor): subcategory = "image" archive_fmt = "{token}" parent = True - _https = True _params = None _cookies = None _encoding = None @@ -28,10 +26,7 @@ class ImagehostImageExtractor(Extractor): def __init__(self, match): Extractor.__init__(self, match) - if self.root: - self.page_url = f"{self.root}{match[1]}" - else: - self.page_url = f"http{'s' if self._https else ''}://{match[1]}" + self.page_url = (self.root or "https://") + match[1] self.token = match[2] if self._params == "simple": @@ -70,7 +65,7 @@ class ImagehostImageExtractor(Extractor): data["post_url"] = self.page_url data.update(self.metadata(page)) - if self._https and url.startswith("http:"): + if url.startswith("http:"): url = "https:" + url[5:] if self._validate is not None: data["_http_validate"] = self._validate @@ -102,9 +97,6 @@ class ImxtoImageExtractor(ImagehostImageExtractor): ImagehostImageExtractor.__init__(self, match) if "/img-" in self.page_url: self.page_url = self.page_url.replace("img.yt", "imx.to") - self.url_ext = True - else: - self.url_ext = False def get_info(self, page): url, pos = text.extract( @@ -112,9 +104,7 @@ class ImxtoImageExtractor(ImagehostImageExtractor): if not url: self.not_found() filename, pos = text.extract(page, ' title="', '"', pos) - if self.url_ext and filename: - filename += splitext(url)[1] - return url, filename or url + return url, filename or None def metadata(self, page): extr = text.extract_from(page, page.index("[ FILESIZE <")) @@ -176,7 +166,7 @@ class AcidimgImageExtractor(ImagehostImageExtractor): if not filename: filename, pos = text.extract(page, 'alt="', '"', pos) - return url, (filename + splitext(url)[1]) if filename else url + return url, filename or None class ImagevenueImageExtractor(ImagehostImageExtractor): @@ -407,7 +397,6 @@ class ImgclickImageExtractor(ImagehostImageExtractor): category = "imgclick" pattern = r"(?:https?://)?((?:www\.)?imgclick\.net/([^/?#]+))" example = "http://imgclick.net/abc123/NAME.EXT.html" - _https = False _params = "complex" def get_info(self, page): @@ -461,8 +450,8 @@ class ImgdriveImageExtractor(ImagehostImageExtractor): def __init__(self, match): path, category, self.token = match.groups() - self.page_url = f"https://{path}" - self.category = f"img{category}" + self.page_url = "https://" + path + self.category = "img" + category Extractor.__init__(self, match) def get_info(self, page): @@ -496,3 +485,28 @@ class SilverpicImageExtractor(ImagehostImageExtractor): "width" : text.parse_int(width), "height": text.parse_int(height), } + + +class ImgpvImageExtractor(ImagehostImageExtractor): + """Extractor for imgpv.com images""" + category = "imgpv" + root = "https://imgpv.com" + pattern = (r"(?:https?://)?(?:www\.)?imgpv\.com" + r"(/([a-z0-9]{10,})/[\S]+\.html)") + example = "https://www.imgpv.com/a1b2c3d4f5g6/NAME.EXT.html" + + def get_info(self, page): + url, pos = text.extract(page, 'id="img-preview" src="', '"') + alt, pos = text.extract(page, 'alt="', '"', pos) + return url, text.unescape(alt) + + def metadata(self, page): + pos = page.find('class="upinfo">') + date, pos = text.extract(page, '<b>', 'by', pos) + user, pos = text.extract(page, '>', '<', pos) + + date = date.split() + return { + "date": self.parse_datetime_iso(f"{date[0][:10]} {date[1]}"), + "user": text.unescape(user), + } |
