aboutsummaryrefslogtreecommitdiffstats
path: root/gallery_dl/extractor/imagehosts.py
diff options
context:
space:
mode:
Diffstat (limited to 'gallery_dl/extractor/imagehosts.py')
-rw-r--r--gallery_dl/extractor/imagehosts.py48
1 files changed, 31 insertions, 17 deletions
diff --git a/gallery_dl/extractor/imagehosts.py b/gallery_dl/extractor/imagehosts.py
index 21e6cf8..d1abe01 100644
--- a/gallery_dl/extractor/imagehosts.py
+++ b/gallery_dl/extractor/imagehosts.py
@@ -11,7 +11,6 @@
from .common import Extractor, Message
from .. import text, exception
from ..cache import memcache
-from os.path import splitext
class ImagehostImageExtractor(Extractor):
@@ -20,7 +19,6 @@ class ImagehostImageExtractor(Extractor):
subcategory = "image"
archive_fmt = "{token}"
parent = True
- _https = True
_params = None
_cookies = None
_encoding = None
@@ -28,10 +26,7 @@ class ImagehostImageExtractor(Extractor):
def __init__(self, match):
Extractor.__init__(self, match)
- if self.root:
- self.page_url = f"{self.root}{match[1]}"
- else:
- self.page_url = f"http{'s' if self._https else ''}://{match[1]}"
+ self.page_url = (self.root or "https://") + match[1]
self.token = match[2]
if self._params == "simple":
@@ -70,7 +65,7 @@ class ImagehostImageExtractor(Extractor):
data["post_url"] = self.page_url
data.update(self.metadata(page))
- if self._https and url.startswith("http:"):
+ if url.startswith("http:"):
url = "https:" + url[5:]
if self._validate is not None:
data["_http_validate"] = self._validate
@@ -102,9 +97,6 @@ class ImxtoImageExtractor(ImagehostImageExtractor):
ImagehostImageExtractor.__init__(self, match)
if "/img-" in self.page_url:
self.page_url = self.page_url.replace("img.yt", "imx.to")
- self.url_ext = True
- else:
- self.url_ext = False
def get_info(self, page):
url, pos = text.extract(
@@ -112,9 +104,7 @@ class ImxtoImageExtractor(ImagehostImageExtractor):
if not url:
self.not_found()
filename, pos = text.extract(page, ' title="', '"', pos)
- if self.url_ext and filename:
- filename += splitext(url)[1]
- return url, filename or url
+ return url, filename or None
def metadata(self, page):
extr = text.extract_from(page, page.index("[ FILESIZE <"))
@@ -176,7 +166,7 @@ class AcidimgImageExtractor(ImagehostImageExtractor):
if not filename:
filename, pos = text.extract(page, 'alt="', '"', pos)
- return url, (filename + splitext(url)[1]) if filename else url
+ return url, filename or None
class ImagevenueImageExtractor(ImagehostImageExtractor):
@@ -407,7 +397,6 @@ class ImgclickImageExtractor(ImagehostImageExtractor):
category = "imgclick"
pattern = r"(?:https?://)?((?:www\.)?imgclick\.net/([^/?#]+))"
example = "http://imgclick.net/abc123/NAME.EXT.html"
- _https = False
_params = "complex"
def get_info(self, page):
@@ -461,8 +450,8 @@ class ImgdriveImageExtractor(ImagehostImageExtractor):
def __init__(self, match):
path, category, self.token = match.groups()
- self.page_url = f"https://{path}"
- self.category = f"img{category}"
+ self.page_url = "https://" + path
+ self.category = "img" + category
Extractor.__init__(self, match)
def get_info(self, page):
@@ -496,3 +485,28 @@ class SilverpicImageExtractor(ImagehostImageExtractor):
"width" : text.parse_int(width),
"height": text.parse_int(height),
}
+
+
+class ImgpvImageExtractor(ImagehostImageExtractor):
+ """Extractor for imgpv.com images"""
+ category = "imgpv"
+ root = "https://imgpv.com"
+ pattern = (r"(?:https?://)?(?:www\.)?imgpv\.com"
+ r"(/([a-z0-9]{10,})/[\S]+\.html)")
+ example = "https://www.imgpv.com/a1b2c3d4f5g6/NAME.EXT.html"
+
+ def get_info(self, page):
+ url, pos = text.extract(page, 'id="img-preview" src="', '"')
+ alt, pos = text.extract(page, 'alt="', '"', pos)
+ return url, text.unescape(alt)
+
+ def metadata(self, page):
+ pos = page.find('class="upinfo">')
+ date, pos = text.extract(page, '<b>', 'by', pos)
+ user, pos = text.extract(page, '>', '<', pos)
+
+ date = date.split()
+ return {
+ "date": self.parse_datetime_iso(f"{date[0][:10]} {date[1]}"),
+ "user": text.unescape(user),
+ }