diff options
Diffstat (limited to 'gallery_dl/extractor/imagehosts.py')
| -rw-r--r-- | gallery_dl/extractor/imagehosts.py | 72 |
1 files changed, 68 insertions, 4 deletions
diff --git a/gallery_dl/extractor/imagehosts.py b/gallery_dl/extractor/imagehosts.py index 0e5ce7e..fccc466 100644 --- a/gallery_dl/extractor/imagehosts.py +++ b/gallery_dl/extractor/imagehosts.py @@ -372,14 +372,78 @@ class ImgclickImageExtractor(ImagehostImageExtractor): class FappicImageExtractor(ImagehostImageExtractor): """Extractor for single images from fappic.com""" category = "fappic" - pattern = r"(?:https?://)?((?:www\.)?fappic\.com/(\w+)/[^/?#]+)" - example = "https://fappic.com/abc123/NAME.EXT" + pattern = (r"(?:https?://)?(?:www\.|img\d+\.)?fappic\.com" + r"/(?:i/\d+/())?(\w{10,})(?:/|\.)\w+") + example = "https://fappic.com/abcde12345/NAME.EXT" + + def __init__(self, match): + Extractor.__init__(self, match) + + thumb, token = self.groups + if thumb is not None and token.endswith("_t"): + self.token = token = token[:-2] + else: + self.token = token + self.page_url = f"https://fappic.com/{token}/pic.jpg" def get_info(self, page): url , pos = text.extract(page, '<a href="#"><img src="', '"') filename, pos = text.extract(page, 'alt="', '"', pos) + return url, text.re(r"^Porn[ -]Pic(?:s|ture)[ -]").sub("", filename) + - if filename.startswith("Porn-Picture-"): - filename = filename[13:] +class PicstateImageExtractor(ImagehostImageExtractor): + """Extractor for single images from picstate.com""" + category = "picstate" + pattern = r"(?:https?://)?((?:www\.)?picstate\.com/view/full/([^/?#]+))" + example = "https://picstate.com/view/full/123" + def get_info(self, page): + pos = page.index(' id="image_container"') + url , pos = text.extract(page, '<img src="', '"', pos) + filename, pos = text.extract(page, 'alt="', '"', pos) return url, filename + + +class ImgdriveImageExtractor(ImagehostImageExtractor): + """Extractor for single images from imgdrive.net""" + category = "imgdrive" + pattern = (r"(?:https?://)?(?:www\.)?(img(drive|taxi|wallet)\.(?:com|net)" + r"/img-(\w+)\.html)") + example = "https://imgdrive.net/img-0123456789abc.html" + + def __init__(self, match): + path, category, self.token = match.groups() + self.page_url = f"https://{path}" + self.category = f"img{category}" + Extractor.__init__(self, match) + + def get_info(self, page): + title, pos = text.extract( + page, 'property="og:title" content="', '"') + image, pos = text.extract( + page, 'property="og:image" content="', '"', pos) + return image.replace("/small/", "/big/"), title.rsplit(" | ", 2)[0] + + +class SilverpicImageExtractor(ImagehostImageExtractor): + """Extractor for single images from silverpic.com""" + category = "silverpic" + pattern = (r"(?:https?://)?((?:www\.)?silverpic\.com" + r"/([a-z0-9]{10,})/[\S]+\.html)") + example = "https://silverpic.com/a1b2c3d4f5g6/NAME.EXT.html" + + def get_info(self, page): + url, pos = text.extract(page, '<img src="/img/', '"') + alt, pos = text.extract(page, 'alt="', '"', pos) + return f"https://silverpic.com/img/{url}", alt + + def metadata(self, page): + pos = page.find('<img src="/img/') + width = text.extract(page, 'width="', '"', pos)[0] + height = text.extract(page, 'height="', '"', pos)[0] + + return { + "width" : text.parse_int(width), + "height": text.parse_int(height), + } |
