diff options
| author | 2024-09-28 20:01:25 -0400 | |
|---|---|---|
| committer | 2024-09-28 20:01:25 -0400 | |
| commit | 1a457ed68769880ab7760e0746f0cbbd9ca00487 (patch) | |
| tree | a5e2f36fa6537e24a7a8851dab900ea03efdbd00 /gallery_dl/extractor/zzup.py | |
| parent | 1f3ffe32342852fd9ea9e7704022488f3a1222bd (diff) | |
New upstream version 1.27.5.upstream/1.27.5
Diffstat (limited to 'gallery_dl/extractor/zzup.py')
| -rw-r--r-- | gallery_dl/extractor/zzup.py | 30 |
1 files changed, 26 insertions, 4 deletions
diff --git a/gallery_dl/extractor/zzup.py b/gallery_dl/extractor/zzup.py index 45b0cd8..05b12b4 100644 --- a/gallery_dl/extractor/zzup.py +++ b/gallery_dl/extractor/zzup.py @@ -4,6 +4,8 @@ # it under the terms of the GNU General Public License version 2 as # published by the Free Software Foundation. +"""Extractors for https://zzup.com/""" + from .common import GalleryExtractor from .. import text @@ -11,17 +13,20 @@ from .. import text class ZzupGalleryExtractor(GalleryExtractor): category = "zzup" directory_fmt = ("{category}", "{title}") - filename_fmt = "{slug}_{num:>03}.{extension}" + filename_fmt = "{num:>03}.{extension}" archive_fmt = "{slug}_{num}" root = "https://zzup.com" - pattern = (r"(?:https?://)?(?:www\.)?zzup\.com(/content" + pattern = (r"(?:https?://)?(up\.|www\.)?zzup\.com(/(?:viewalbum|content)" r"/[\w=]+/([^/?#]+)/[\w=]+)/(?:index|page-\d+)\.html") example = "https://zzup.com/content/xyz=/12345_TITLE/123=/index.html" def __init__(self, match): - url = "{}/{}/index.html".format(self.root, match.group(1)) + subdomain, path, self.slug = match.groups() + if subdomain == "up.": + self.root = "https://up.zzup.com" + self.images = self.images_v2 + url = "{}{}/index.html".format(self.root, path) GalleryExtractor.__init__(self, match, url) - self.slug = match.group(2) def metadata(self, page): return { @@ -38,3 +43,20 @@ class ZzupGalleryExtractor(GalleryExtractor): p1, _, p2 = url.partition("/image0") ufmt = p1 + "/image{:>05}" + p2[4:] return [(ufmt.format(num), None) for num in range(1, count + 1)] + + def images_v2(self, page): + results = [] + + while True: + for path in text.extract_iter( + page, ' class="picbox"><a target="_blank" href="', '"'): + results.append(("{}/showimage/{}/zzup.com.jpg".format( + self.root, "/".join(path.split("/")[2:-2])), None)) + + pos = page.find("glyphicon-arrow-right") + if pos < 0: + break + path = text.rextract(page, ' href="', '"', pos)[0] + page = self.request(text.urljoin(self.gallery_url, path)).text + + return results |
