diff options
| author | 2022-08-29 02:17:16 -0400 | |
|---|---|---|
| committer | 2022-08-29 02:17:16 -0400 | |
| commit | a768930761f7f20587ae40a8cacca0e55c85290a (patch) | |
| tree | 5a4163db912b93fc45f717e5e43fd5be3e66f16c /gallery_dl/extractor/catbox.py | |
| parent | ae2a0f5622beaa6f402526f8a7b939419283a090 (diff) | |
New upstream version 1.23.0.upstream/1.23.0
Diffstat (limited to 'gallery_dl/extractor/catbox.py')
| -rw-r--r-- | gallery_dl/extractor/catbox.py | 56 |
1 files changed, 56 insertions, 0 deletions
diff --git a/gallery_dl/extractor/catbox.py b/gallery_dl/extractor/catbox.py new file mode 100644 index 0000000..509108f --- /dev/null +++ b/gallery_dl/extractor/catbox.py @@ -0,0 +1,56 @@ +# -*- coding: utf-8 -*- + +# Copyright 2022 Mike Fährmann +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. + +"""Extractors for https://catbox.moe/""" + +from .common import GalleryExtractor +from .. import text + + +class CatboxAlbumExtractor(GalleryExtractor): + """Extractor for catbox albums""" + category = "catbox" + subcategory = "album" + root = "https://catbox.moe" + filename_fmt = "{filename}.{extension}" + directory_fmt = ("{category}", "{album_name} ({album_id})") + archive_fmt = "{album_id}_{filename}" + pattern = r"(?:https?://)?(?:www\.)?catbox\.moe(/c/[^/?#]+)" + test = ( + ("https://catbox.moe/c/1igcbe", { + "url": "35866a88c29462814f103bc22ec031eaeb380f8a", + "content": "70ddb9de3872e2d17cc27e48e6bf395e5c8c0b32", + "pattern": r"https://files\.catbox\.moe/\w+\.\w{3}$", + "count": 3, + "keyword": { + "album_id": "1igcbe", + "album_name": "test", + "date": "dt:2022-08-18 00:00:00", + "description": "album test &>", + }, + }), + ("https://www.catbox.moe/c/cd90s1"), + ("https://catbox.moe/c/w7tm47#"), + ) + + def metadata(self, page): + extr = text.extract_from(page) + return { + "album_id" : self.gallery_url.rpartition("/")[2], + "album_name" : text.unescape(extr("<h1>", "<")), + "date" : text.parse_datetime(extr( + "<p>Created ", "<"), "%B %d %Y"), + "description": text.unescape(extr("<p>", "<")), + } + + def images(self, page): + return [ + ("https://files.catbox.moe/" + path, None) + for path in text.extract_iter( + page, ">https://files.catbox.moe/", "<") + ] |
