diff options
Diffstat (limited to 'test/test_extractor.py')
| -rw-r--r-- | test/test_extractor.py | 186 |
1 files changed, 186 insertions, 0 deletions
diff --git a/test/test_extractor.py b/test/test_extractor.py new file mode 100644 index 0000000..fa0709b --- /dev/null +++ b/test/test_extractor.py @@ -0,0 +1,186 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- + +# Copyright 2018-2019 Mike Fährmann +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. + +import sys +import unittest +import string + +from gallery_dl import extractor +from gallery_dl.extractor.common import Extractor, Message +from gallery_dl.extractor.directlink import DirectlinkExtractor as DLExtractor + + +class FakeExtractor(Extractor): + category = "fake" + subcategory = "test" + pattern = "fake:" + + def items(self): + yield Message.Version, 1 + yield Message.Url, "text:foobar", {} + + +class TestExtractor(unittest.TestCase): + VALID_URIS = ( + "https://example.org/file.jpg", + "tumblr:foobar", + "oauth:flickr", + "test:pixiv:", + "recursive:https://example.org/document.html", + ) + + def setUp(self): + extractor._cache.clear() + extractor._module_iter = iter(extractor.modules) + + def test_find(self): + for uri in self.VALID_URIS: + result = extractor.find(uri) + self.assertIsInstance(result, Extractor, uri) + + for not_found in ("", "/tmp/file.ext"): + self.assertIsNone(extractor.find(not_found)) + + for invalid in (None, [], {}, 123, b"test:"): + with self.assertRaises(TypeError): + extractor.find(invalid) + + def test_add(self): + uri = "fake:foobar" + self.assertIsNone(extractor.find(uri)) + + extractor.add(FakeExtractor) + self.assertIsInstance(extractor.find(uri), FakeExtractor) + + def test_add_module(self): + uri = "fake:foobar" + self.assertIsNone(extractor.find(uri)) + + classes = extractor.add_module(sys.modules[__name__]) + self.assertEqual(len(classes), 1) + self.assertEqual(classes[0].pattern, FakeExtractor.pattern) + self.assertEqual(classes[0], FakeExtractor) + self.assertIsInstance(extractor.find(uri), FakeExtractor) + + def test_blacklist(self): + link_uri = "https://example.org/file.jpg" + test_uri = "test:" + fake_uri = "fake:" + + self.assertIsInstance(extractor.find(link_uri), DLExtractor) + self.assertIsInstance(extractor.find(test_uri), Extractor) + self.assertIsNone(extractor.find(fake_uri)) + + with extractor.blacklist(["directlink"]): + self.assertIsNone(extractor.find(link_uri)) + self.assertIsInstance(extractor.find(test_uri), Extractor) + self.assertIsNone(extractor.find(fake_uri)) + + with extractor.blacklist([], [DLExtractor, FakeExtractor]): + self.assertIsNone(extractor.find(link_uri)) + self.assertIsInstance(extractor.find(test_uri), Extractor) + self.assertIsNone(extractor.find(fake_uri)) + + with extractor.blacklist(["test"], [DLExtractor]): + self.assertIsNone(extractor.find(link_uri)) + self.assertIsNone(extractor.find(test_uri)) + self.assertIsNone(extractor.find(fake_uri)) + + def test_from_url(self): + for uri in self.VALID_URIS: + cls = extractor.find(uri).__class__ + extr = cls.from_url(uri) + self.assertIs(type(extr), cls) + self.assertIsInstance(extr, Extractor) + + for not_found in ("", "/tmp/file.ext"): + self.assertIsNone(FakeExtractor.from_url(not_found)) + + for invalid in (None, [], {}, 123, b"test:"): + with self.assertRaises(TypeError): + FakeExtractor.from_url(invalid) + + def test_unique_pattern_matches(self): + test_urls = [] + + # collect testcase URLs + for extr in extractor.extractors(): + for testcase in extr._get_tests(): + test_urls.append((testcase[0], extr)) + + # iterate over all testcase URLs + for url, extr1 in test_urls: + matches = [] + + # ... and apply all regex patterns to each one + for extr2 in extractor._cache: + + # skip DirectlinkExtractor pattern if it isn't tested + if extr1 != DLExtractor and extr2 == DLExtractor: + continue + + match = extr2.pattern.match(url) + if match: + matches.append(match) + + # fail if more or less than 1 match happened + if len(matches) > 1: + msg = "'{}' gets matched by more than one pattern:".format(url) + for match in matches: + msg += "\n- " + msg += match.re.pattern + self.fail(msg) + + if len(matches) < 1: + msg = "'{}' isn't matched by any pattern".format(url) + self.fail(msg) + + def test_docstrings(self): + """ensure docstring uniqueness""" + for extr1 in extractor.extractors(): + for extr2 in extractor.extractors(): + if extr1 != extr2 and extr1.__doc__ and extr2.__doc__: + self.assertNotEqual( + extr1.__doc__, + extr2.__doc__, + "{} <-> {}".format(extr1, extr2), + ) + + def test_names(self): + """Ensure extractor classes are named CategorySubcategoryExtractor""" + def capitalize(c): + if "-" in c: + return string.capwords(c.replace("-", " ")).replace(" ", "") + if "." in c: + c = c.replace(".", "") + return c.capitalize() + + mapping = { + "2chan" : "futaba", + "3dbooru": "threedeebooru", + "4chan" : "fourchan", + "4plebs" : "fourplebs", + "8chan" : "infinitychan", + "oauth" : None, + } + + for extr in extractor.extractors(): + category = mapping.get(extr.category, extr.category) + if category: + expected = "{}{}Extractor".format( + capitalize(category), + capitalize(extr.subcategory), + ) + if expected[0].isdigit(): + expected = "_" + expected + self.assertEqual(expected, extr.__name__) + + +if __name__ == "__main__": + unittest.main() |
