diff options
Diffstat (limited to 'gallery_dl/extractor/mememuseum.py')
| -rw-r--r-- | gallery_dl/extractor/mememuseum.py | 120 |
1 files changed, 120 insertions, 0 deletions
diff --git a/gallery_dl/extractor/mememuseum.py b/gallery_dl/extractor/mememuseum.py new file mode 100644 index 0000000..1de0d76 --- /dev/null +++ b/gallery_dl/extractor/mememuseum.py @@ -0,0 +1,120 @@ +# -*- coding: utf-8 -*- + +# Copyright 2022 Mike Fährmann +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. + +"""Extractors for https://meme.museum/""" + +from .common import Extractor, Message +from .. import text + + +class MememuseumExtractor(Extractor): + """Base class for meme.museum extractors""" + basecategory = "booru" + category = "mememuseum" + filename_fmt = "{category}_{id}_{md5}.{extension}" + archive_fmt = "{id}" + root = "https://meme.museum" + + def items(self): + data = self.metadata() + + for post in self.posts(): + url = post["file_url"] + for key in ("id", "width", "height"): + post[key] = text.parse_int(post[key]) + post["tags"] = text.unquote(post["tags"]) + post.update(data) + yield Message.Directory, post + yield Message.Url, url, text.nameext_from_url(url, post) + + def metadata(self): + """Return general metadata""" + return () + + def posts(self): + """Return an iterable containing data of all relevant posts""" + return () + + +class MememuseumTagExtractor(MememuseumExtractor): + """Extractor for images from meme.museum by search-tags""" + subcategory = "tag" + directory_fmt = ("{category}", "{search_tags}") + pattern = r"(?:https?://)?meme\.museum/post/list/([^/?#]+)" + test = ("https://meme.museum/post/list/animated/1", { + "pattern": r"https://meme\.museum/_images/\w+/\d+%20-%20", + "count": ">= 30" + }) + per_page = 25 + + def __init__(self, match): + MememuseumExtractor.__init__(self, match) + self.tags = text.unquote(match.group(1)) + + def metadata(self): + return {"search_tags": self.tags} + + def posts(self): + pnum = 1 + while True: + url = "{}/post/list/{}/{}".format(self.root, self.tags, pnum) + extr = text.extract_from(self.request(url).text) + + while True: + mime = extr("data-mime='", "'") + if not mime: + break + + pid = extr("data-post-id='", "'") + tags, dimensions, size = extr("title='", "'").split(" // ") + md5 = extr("/_thumbs/", "/") + width, _, height = dimensions.partition("x") + + yield { + "file_url": "{}/_images/{}/{}%20-%20{}.{}".format( + self.root, md5, pid, text.quote(tags), + mime.rpartition("/")[2]), + "id": pid, "md5": md5, "tags": tags, + "width": width, "height": height, + "size": text.parse_bytes(size[:-1]), + } + + if not extr(">Next<", ">"): + return + pnum += 1 + + +class MememuseumPostExtractor(MememuseumExtractor): + """Extractor for single images from meme.museum""" + subcategory = "post" + pattern = r"(?:https?://)?meme\.museum/post/view/(\d+)" + test = ("https://meme.museum/post/view/10243", { + "pattern": r"https://meme\.museum/_images/105febebcd5ca791ee332adc4997" + r"1f78/10243%20-%20g%20beard%20open_source%20richard_stallm" + r"an%20stallman%20tagme%20text\.jpg", + "keyword": "3c8009251480cf17248c08b2b194dc0c4d59580e", + "content": "45565f3f141fc960a8ae1168b80e718a494c52d2", + }) + + def __init__(self, match): + MememuseumExtractor.__init__(self, match) + self.post_id = match.group(1) + + def posts(self): + url = "{}/post/view/{}".format(self.root, self.post_id) + extr = text.extract_from(self.request(url).text) + + return ({ + "id" : self.post_id, + "tags" : extr(": ", "<"), + "md5" : extr("/_thumbs/", "/"), + "file_url": self.root + extr("id='main_image' src='", "'"), + "width" : extr("data-width=", " ").strip("'\""), + "height" : extr("data-height=", " ").strip("'\""), + "size" : 0, + },) |
