diff options
Diffstat (limited to 'gallery_dl/extractor/__init__.py')
| -rw-r--r-- | gallery_dl/extractor/__init__.py | 189 |
1 files changed, 189 insertions, 0 deletions
diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py new file mode 100644 index 0000000..81d480e --- /dev/null +++ b/gallery_dl/extractor/__init__.py @@ -0,0 +1,189 @@ +# -*- coding: utf-8 -*- + +# Copyright 2015-2019 Mike Fährmann +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. + +import re +import importlib + +modules = [ + "2chan", + "35photo", + "3dbooru", + "4chan", + "500px", + "8chan", + "8muses", + "artstation", + "behance", + "bobx", + "danbooru", + "deviantart", + "dynastyscans", + "e621", + "exhentai", + "fallenangels", + "flickr", + "gelbooru", + "gfycat", + "hbrowse", + "hentai2read", + "hentaicafe", + "hentaifoundry", + "hentaifox", + "hentaihere", + "hentainexus", + "hitomi", + "hypnohub", + "idolcomplex", + "imagebam", + "imagefap", + "imgbox", + "imgth", + "imgur", + "instagram", + "keenspot", + "khinsider", + "kissmanga", + "komikcast", + "konachan", + "livedoor", + "luscious", + "mangadex", + "mangafox", + "mangahere", + "mangapanda", + "mangapark", + "mangareader", + "mangastream", + "mangoxo", + "myportfolio", + "newgrounds", + "ngomik", + "nhentai", + "nijie", + "nsfwalbum", + "paheal", + "patreon", + "photobucket", + "piczel", + "pinterest", + "pixiv", + "pixnet", + "plurk", + "pornhub", + "pururin", + "reactor", + "readcomiconline", + "reddit", + "rule34", + "safebooru", + "sankaku", + "sankakucomplex", + "seiga", + "senmanga", + "sexcom", + "simplyhentai", + "slickpic", + "slideshare", + "smugmug", + "tsumino", + "tumblr", + "twitter", + "vanillarock", + "wallhaven", + "warosu", + "weibo", + "wikiart", + "xhamster", + "xvideos", + "yandere", + "yaplog", + "yuki", + "foolfuuka", + "foolslide", + "mastodon", + "shopify", + "imagehosts", + "directlink", + "recursive", + "oauth", + "test", +] + + +def find(url): + """Find a suitable extractor for the given URL""" + for cls in _list_classes(): + match = cls.pattern.match(url) + if match and cls not in _blacklist: + return cls(match) + return None + + +def add(cls): + """Add 'cls' to the list of available extractors""" + cls.pattern = re.compile(cls.pattern) + _cache.append(cls) + return cls + + +def add_module(module): + """Add all extractors in 'module' to the list of available extractors""" + classes = _get_classes(module) + for cls in classes: + cls.pattern = re.compile(cls.pattern) + _cache.extend(classes) + return classes + + +def extractors(): + """Yield all available extractor classes""" + return sorted( + _list_classes(), + key=lambda x: x.__name__ + ) + + +class blacklist(): + """Context Manager to blacklist extractor modules""" + def __init__(self, categories, extractors=None): + self.extractors = extractors or [] + for cls in _list_classes(): + if cls.category in categories: + self.extractors.append(cls) + + def __enter__(self): + _blacklist.update(self.extractors) + + def __exit__(self, etype, value, traceback): + _blacklist.clear() + + +# -------------------------------------------------------------------- +# internals + +_cache = [] +_blacklist = set() +_module_iter = iter(modules) + + +def _list_classes(): + """Yield all available extractor classes""" + yield from _cache + + for module_name in _module_iter: + module = importlib.import_module("."+module_name, __package__) + yield from add_module(module) + + +def _get_classes(module): + """Return a list of all extractor classes in a module""" + return [ + cls for cls in module.__dict__.values() if ( + hasattr(cls, "pattern") and cls.__module__ == module.__name__ + ) + ] |
