diff options
Diffstat (limited to 'gallery_dl/extractor/imagehosts.py')
| -rw-r--r-- | gallery_dl/extractor/imagehosts.py | 189 |
1 files changed, 52 insertions, 137 deletions
diff --git a/gallery_dl/extractor/imagehosts.py b/gallery_dl/extractor/imagehosts.py index a6e848c..94019bd 100644 --- a/gallery_dl/extractor/imagehosts.py +++ b/gallery_dl/extractor/imagehosts.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2016-2022 Mike Fährmann +# Copyright 2016-2023 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -19,23 +19,23 @@ class ImagehostImageExtractor(Extractor): basecategory = "imagehost" subcategory = "image" archive_fmt = "{token}" - https = True - params = None - cookies = None - encoding = None + _https = True + _params = None + _cookies = None + _encoding = None def __init__(self, match): Extractor.__init__(self, match) self.page_url = "http{}://{}".format( - "s" if self.https else "", match.group(1)) + "s" if self._https else "", match.group(1)) self.token = match.group(2) - if self.params == "simple": - self.params = { + if self._params == "simple": + self._params = { "imgContinue": "Continue+to+image+...+", } - elif self.params == "complex": - self.params = { + elif self._params == "complex": + self._params = { "op": "view", "id": self.token, "pre": "1", @@ -46,16 +46,16 @@ class ImagehostImageExtractor(Extractor): def items(self): page = self.request( self.page_url, - method=("POST" if self.params else "GET"), - data=self.params, - cookies=self.cookies, - encoding=self.encoding, + method=("POST" if self._params else "GET"), + data=self._params, + cookies=self._cookies, + encoding=self._encoding, ).text url, filename = self.get_info(page) data = text.nameext_from_url(filename, {"token": self.token}) data.update(self.metadata(page)) - if self.https and url.startswith("http:"): + if self._https and url.startswith("http:"): url = "https:" + url[5:] yield Message.Directory, data @@ -74,36 +74,9 @@ class ImxtoImageExtractor(ImagehostImageExtractor): category = "imxto" pattern = (r"(?:https?://)?(?:www\.)?((?:imx\.to|img\.yt)" r"/(?:i/|img-)(\w+)(\.html)?)") - test = ( - ("https://imx.to/i/1qdeva", { # new-style URL - "url": "ab2173088a6cdef631d7a47dec4a5da1c6a00130", - "content": "0c8768055e4e20e7c7259608b67799171b691140", - "keyword": { - "size" : 18, - "width" : 64, - "height": 32, - "hash" : "94d56c599223c59f3feb71ea603484d1", - }, - }), - ("https://imx.to/img-57a2050547b97.html", { # old-style URL - "url": "a83fe6ef1909a318c4d49fcf2caf62f36c3f9204", - "content": "54592f2635674c25677c6872db3709d343cdf92f", - "keyword": { - "size" : 5284, - "width" : 320, - "height": 160, - "hash" : "40da6aaa7b8c42b18ef74309bbc713fc", - }, - }), - ("https://img.yt/img-57a2050547b97.html", { # img.yt domain - "url": "a83fe6ef1909a318c4d49fcf2caf62f36c3f9204", - }), - ("https://imx.to/img-57a2050547b98.html", { - "exception": exception.NotFoundError, - }), - ) - params = "simple" - encoding = "utf-8" + example = "https://imx.to/i/ID" + _params = "simple" + _encoding = "utf-8" def __init__(self, match): ImagehostImageExtractor.__init__(self, match) @@ -140,11 +113,7 @@ class ImxtoGalleryExtractor(ImagehostImageExtractor): category = "imxto" subcategory = "gallery" pattern = r"(?:https?://)?(?:www\.)?(imx\.to/g/([^/?#]+))" - test = ("https://imx.to/g/ozdy", { - "pattern": ImxtoImageExtractor.pattern, - "keyword": {"title": "untitled gallery"}, - "count": 40, - }) + example = "https://imx.to/g/ID" def items(self): page = self.request(self.page_url).text @@ -162,19 +131,21 @@ class AcidimgImageExtractor(ImagehostImageExtractor): """Extractor for single images from acidimg.cc""" category = "acidimg" pattern = r"(?:https?://)?((?:www\.)?acidimg\.cc/img-([a-z0-9]+)\.html)" - test = ("https://acidimg.cc/img-5acb6b9de4640.html", { - "url": "f132a630006e8d84f52d59555191ed82b3b64c04", - "keyword": "135347ab4345002fc013863c0d9419ba32d98f78", - "content": "0c8768055e4e20e7c7259608b67799171b691140", - }) - params = "simple" - encoding = "utf-8" + example = "https://acidimg.cc/img-abc123.html" + _params = "simple" + _encoding = "utf-8" def get_info(self, page): - url, pos = text.extract(page, '<img class="centred" src="', '"') + url, pos = text.extract(page, "<img class='centred' src='", "'") if not url: - raise exception.NotFoundError("image") - filename, pos = text.extract(page, ' alt="', '"', pos) + url, pos = text.extract(page, '<img class="centred" src="', '"') + if not url: + raise exception.NotFoundError("image") + + filename, pos = text.extract(page, "alt='", "'", pos) + if not filename: + filename, pos = text.extract(page, 'alt="', '"', pos) + return url, (filename + splitext(url)[1]) if filename else url @@ -183,26 +154,13 @@ class ImagevenueImageExtractor(ImagehostImageExtractor): category = "imagevenue" pattern = (r"(?:https?://)?((?:www|img\d+)\.imagevenue\.com" r"/([A-Z0-9]{8,10}|view/.*|img\.php\?.*))") - test = ( - ("https://www.imagevenue.com/ME13LS07", { - "pattern": r"https://cdn-images\.imagevenue\.com" - r"/10/ac/05/ME13LS07_o\.png", - "keyword": "ae15d6e3b2095f019eee84cd896700cd34b09c36", - "content": "cfaa8def53ed1a575e0c665c9d6d8cf2aac7a0ee", - }), - (("https://www.imagevenue.com/view/o?i=92518_13732377" - "annakarina424200712535AM_122_486lo.jpg&h=img150&l=loc486"), { - "url": "8bf0254e29250d8f5026c0105bbdda3ee3d84980", - }), - (("http://img28116.imagevenue.com/img.php" - "?image=th_52709_test_122_64lo.jpg"), { - "url": "f98e3091df7f48a05fb60fbd86f789fc5ec56331", - }), - ) + example = "https://www.imagevenue.com/ME123456789" def get_info(self, page): pos = page.index('class="card-body') url, pos = text.extract(page, '<img src="', '"', pos) + if url.endswith("/loader.svg"): + url, pos = text.extract(page, '<img src="', '"', pos) filename, pos = text.extract(page, 'alt="', '"', pos) return url, text.unescape(filename) @@ -212,21 +170,11 @@ class ImagetwistImageExtractor(ImagehostImageExtractor): category = "imagetwist" pattern = (r"(?:https?://)?((?:www\.|phun\.)?" r"image(?:twist|haha)\.com/([a-z0-9]{12}))") - test = ( - ("https://imagetwist.com/f1i2s4vhvbrq/test.png", { - "url": "8d5e168c0bee30211f821c6f3b2116e419d42671", - "keyword": "d1060a4c2e3b73b83044e20681712c0ffdd6cfef", - "content": "0c8768055e4e20e7c7259608b67799171b691140", - }), - ("https://www.imagetwist.com/f1i2s4vhvbrq/test.png"), - ("https://phun.imagetwist.com/f1i2s4vhvbrq/test.png"), - ("https://imagehaha.com/f1i2s4vhvbrq/test.png"), - ("https://www.imagehaha.com/f1i2s4vhvbrq/test.png"), - ) + example = "https://imagetwist.com/123456abcdef/NAME.EXT" @property @memcache(maxage=3*3600) - def cookies(self): + def _cookies(self): return self.request(self.page_url).cookies def get_info(self, page): @@ -239,11 +187,7 @@ class ImgspiceImageExtractor(ImagehostImageExtractor): """Extractor for single images from imgspice.com""" category = "imgspice" pattern = r"(?:https?://)?((?:www\.)?imgspice\.com/([^/?#]+))" - test = ("https://imgspice.com/nwfwtpyog50y/test.png.html", { - "url": "b8c30a8f51ee1012959a4cfd46197fabf14de984", - "keyword": "100e310a19a2fa22d87e1bbc427ecb9f6501e0c0", - "content": "0c8768055e4e20e7c7259608b67799171b691140", - }) + example = "https://imgspice.com/ID/NAME.EXT.html" def get_info(self, page): pos = page.find('id="imgpreview"') @@ -259,12 +203,8 @@ class PixhostImageExtractor(ImagehostImageExtractor): category = "pixhost" pattern = (r"(?:https?://)?((?:www\.)?pixhost\.(?:to|org)" r"/show/\d+/(\d+)_[^/?#]+)") - test = ("http://pixhost.to/show/190/130327671_test-.png", { - "url": "4e5470dcf6513944773044d40d883221bbc46cff", - "keyword": "3bad6d59db42a5ebbd7842c2307e1c3ebd35e6b0", - "content": "0c8768055e4e20e7c7259608b67799171b691140", - }) - cookies = {"pixhostads": "1", "pixhosttest": "1"} + example = "https://pixhost.to/show/123/12345_NAME.EXT" + _cookies = {"pixhostads": "1", "pixhosttest": "1"} def get_info(self, page): url , pos = text.extract(page, "class=\"image-img\" src=\"", "\"") @@ -278,10 +218,7 @@ class PixhostGalleryExtractor(ImagehostImageExtractor): subcategory = "gallery" pattern = (r"(?:https?://)?((?:www\.)?pixhost\.(?:to|org)" r"/gallery/([^/?#]+))") - test = ("https://pixhost.to/gallery/jSMFq", { - "pattern": PixhostImageExtractor.pattern, - "count": 3, - }) + example = "https://pixhost.to/gallery/ID" def items(self): page = text.extr(self.request( @@ -294,13 +231,9 @@ class PixhostGalleryExtractor(ImagehostImageExtractor): class PostimgImageExtractor(ImagehostImageExtractor): """Extractor for single images from postimages.org""" category = "postimg" - pattern = (r"(?:https?://)?((?:www\.)?(?:postimg|pixxxels)\.(?:cc|org)" - r"/(?!gallery/)(?:image/)?([^/?#]+)/?)") - test = ("https://postimg.cc/Wtn2b3hC", { - "url": "72f3c8b1d6c6601a20ad58f35635494b4891a99e", - "keyword": "2d05808d04e4e83e33200db83521af06e3147a84", - "content": "cfaa8def53ed1a575e0c665c9d6d8cf2aac7a0ee", - }) + pattern = (r"(?:https?://)?((?:www\.)?(?:postim(?:ages|g)|pixxxels)" + r"\.(?:cc|org)/(?!gallery/)(?:image/)?([^/?#]+)/?)") + example = "https://postimages.org/ID" def get_info(self, page): pos = page.index(' id="download"') @@ -313,12 +246,9 @@ class PostimgGalleryExtractor(ImagehostImageExtractor): """Extractor for images galleries from postimages.org""" category = "postimg" subcategory = "gallery" - pattern = (r"(?:https?://)?((?:www\.)?(?:postimg|pixxxels)\.(?:cc|org)" - r"/(?:gallery/)([^/?#]+)/?)") - test = ("https://postimg.cc/gallery/wxpDLgX", { - "pattern": PostimgImageExtractor.pattern, - "count": 22, - }) + pattern = (r"(?:https?://)?((?:www\.)?(?:postim(?:ages|g)|pixxxels)" + r"\.(?:cc|org)/gallery/([^/?#]+))") + example = "https://postimages.org/gallery/ID" def items(self): page = self.request(self.page_url).text @@ -332,11 +262,7 @@ class TurboimagehostImageExtractor(ImagehostImageExtractor): category = "turboimagehost" pattern = (r"(?:https?://)?((?:www\.)?turboimagehost\.com" r"/p/(\d+)/[^/?#]+\.html)") - test = ("https://www.turboimagehost.com/p/39078423/test--.png.html", { - "url": "b94de43612318771ced924cb5085976f13b3b90e", - "keyword": "704757ca8825f51cec516ec44c1e627c1f2058ca", - "content": "0c8768055e4e20e7c7259608b67799171b691140", - }) + example = "https://www.turboimagehost.com/p/12345/NAME.EXT.html" def get_info(self, page): url = text.extract(page, 'src="', '"', page.index("<img "))[0] @@ -347,10 +273,7 @@ class ViprImageExtractor(ImagehostImageExtractor): """Extractor for single images from vipr.im""" category = "vipr" pattern = r"(?:https?://)?(vipr\.im/(\w+))" - test = ("https://vipr.im/kcd5jcuhgs3v.html", { - "url": "88f6a3ecbf3356a11ae0868b518c60800e070202", - "keyword": "c432e8a1836b0d97045195b745731c2b1bb0e771", - }) + example = "https://vipr.im/abc123.html" def get_info(self, page): url = text.extr(page, '<img src="', '"') @@ -361,13 +284,9 @@ class ImgclickImageExtractor(ImagehostImageExtractor): """Extractor for single images from imgclick.net""" category = "imgclick" pattern = r"(?:https?://)?((?:www\.)?imgclick\.net/([^/?#]+))" - test = ("http://imgclick.net/4tbrre1oxew9/test-_-_.png.html", { - "url": "140dcb250a325f2d26b2d918c18b8ac6a2a0f6ab", - "keyword": "6895256143eab955622fc149aa367777a8815ba3", - "content": "0c8768055e4e20e7c7259608b67799171b691140", - }) - https = False - params = "complex" + example = "http://imgclick.net/abc123/NAME.EXT.html" + _https = False + _params = "complex" def get_info(self, page): url , pos = text.extract(page, '<br><img src="', '"') @@ -379,11 +298,7 @@ class FappicImageExtractor(ImagehostImageExtractor): """Extractor for single images from fappic.com""" category = "fappic" pattern = r"(?:https?://)?((?:www\.)?fappic\.com/(\w+)/[^/?#]+)" - test = ("https://www.fappic.com/98wxqcklyh8k/test.png", { - "pattern": r"https://img\d+\.fappic\.com/img/\w+/test\.png", - "keyword": "433b1d310b0ff12ad8a71ac7b9d8ba3f8cd1e898", - "content": "0c8768055e4e20e7c7259608b67799171b691140", - }) + example = "https://fappic.com/abc123/NAME.EXT" def get_info(self, page): url , pos = text.extract(page, '<a href="#"><img src="', '"') |
