diff options
| author | 2024-10-14 03:02:05 -0400 | |
|---|---|---|
| committer | 2024-10-14 03:02:05 -0400 | |
| commit | 0db541f524e1774865efebcbe5653e9ad76ea2e8 (patch) | |
| tree | b0fc6ce19628931f61c43f2dc9ebfd88a4332388 /gallery_dl/extractor/generic.py | |
| parent | 1a457ed68769880ab7760e0746f0cbbd9ca00487 (diff) | |
New upstream version 1.27.6.upstream/1.27.6
Diffstat (limited to 'gallery_dl/extractor/generic.py')
| -rw-r--r-- | gallery_dl/extractor/generic.py | 51 |
1 files changed, 27 insertions, 24 deletions
diff --git a/gallery_dl/extractor/generic.py b/gallery_dl/extractor/generic.py index a6c1d5a..370cd43 100644 --- a/gallery_dl/extractor/generic.py +++ b/gallery_dl/extractor/generic.py @@ -89,30 +89,33 @@ class GenericExtractor(Extractor): def metadata(self, page): """Extract generic webpage metadata, return them in a dict.""" - data = {} - data['path'] = self.path.replace("/", "") - data['pageurl'] = self.url - data['title'] = text.extr(page, '<title>', "</title>") - data['description'] = text.extr( - page, '<meta name="description" content="', '"') - data['keywords'] = text.extr( - page, '<meta name="keywords" content="', '"') - data['language'] = text.extr( - page, '<meta name="language" content="', '"') - data['name'] = text.extr( - page, '<meta itemprop="name" content="', '"') - data['copyright'] = text.extr( - page, '<meta name="copyright" content="', '"') - data['og_site'] = text.extr( - page, '<meta property="og:site" content="', '"') - data['og_site_name'] = text.extr( - page, '<meta property="og:site_name" content="', '"') - data['og_title'] = text.extr( - page, '<meta property="og:title" content="', '"') - data['og_description'] = text.extr( - page, '<meta property="og:description" content="', '"') - - data = {k: text.unescape(data[k]) for k in data if data[k] != ""} + data = { + "title" : text.extr( + page, "<title>", "</title>"), + "description" : text.extr( + page, '<meta name="description" content="', '"'), + "keywords" : text.extr( + page, '<meta name="keywords" content="', '"'), + "language" : text.extr( + page, '<meta name="language" content="', '"'), + "name" : text.extr( + page, '<meta itemprop="name" content="', '"'), + "copyright" : text.extr( + page, '<meta name="copyright" content="', '"'), + "og_site" : text.extr( + page, '<meta property="og:site" content="', '"'), + "og_site_name" : text.extr( + page, '<meta property="og:site_name" content="', '"'), + "og_title" : text.extr( + page, '<meta property="og:title" content="', '"'), + "og_description": text.extr( + page, '<meta property="og:description" content="', '"'), + + } + + data = {k: text.unescape(v) for k, v in data.items() if v} + data["path"] = self.path.replace("/", "") + data["pageurl"] = self.url return data |
