diff options
Diffstat (limited to 'gallery_dl/extractor/architizer.py')
| -rw-r--r-- | gallery_dl/extractor/architizer.py | 39 |
1 files changed, 10 insertions, 29 deletions
diff --git a/gallery_dl/extractor/architizer.py b/gallery_dl/extractor/architizer.py index dbc197e..8064e78 100644 --- a/gallery_dl/extractor/architizer.py +++ b/gallery_dl/extractor/architizer.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2021 Mike Fährmann +# Copyright 2021-2023 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -21,25 +21,7 @@ class ArchitizerProjectExtractor(GalleryExtractor): filename_fmt = "{filename}.{extension}" archive_fmt = "{gid}_{num}" pattern = r"(?:https?://)?architizer\.com/projects/([^/?#]+)" - test = ("https://architizer.com/projects/house-lo/", { - "pattern": r"https://architizer-prod\.imgix\.net/media/mediadata" - r"/uploads/.+\.jpg$", - "keyword": { - "count": 27, - "description": str, - "firm": "Atelier Lina Bellovicova", - "gid": "225496", - "location": "Czechia", - "num": int, - "size": "1000 sqft - 3000 sqft", - "slug": "house-lo", - "status": "Built", - "subcategory": "project", - "title": "House LO", - "type": "Residential › Private House", - "year": "2020", - }, - }) + example = "https://architizer.com/projects/NAME/" def __init__(self, match): url = "{}/projects/{}/".format(self.root, match.group(1)) @@ -47,11 +29,13 @@ class ArchitizerProjectExtractor(GalleryExtractor): def metadata(self, page): extr = text.extract_from(page) + extr('id="Pages"', "") + return { - "title" : extr("data-name='", "'"), - "slug" : extr("data-slug='", "'"), - "gid" : extr("data-gid='", "'").rpartition(".")[2], - "firm" : extr("data-firm-leaders-str='", "'"), + "title" : extr('data-name="', '"'), + "slug" : extr('data-slug="', '"'), + "gid" : extr('data-gid="', '"').rpartition(".")[2], + "firm" : extr('data-firm-leaders-str="', '"'), "location" : extr("<h2>", "<").strip(), "type" : text.unescape(text.remove_html(extr( '<div class="title">Type</div>', '<br'))), @@ -70,7 +54,7 @@ class ArchitizerProjectExtractor(GalleryExtractor): return [ (url, None) for url in text.extract_iter( - page, "property='og:image:secure_url' content='", "?") + page, 'property="og:image:secure_url" content="', "?") ] @@ -80,10 +64,7 @@ class ArchitizerFirmExtractor(Extractor): subcategory = "firm" root = "https://architizer.com" pattern = r"(?:https?://)?architizer\.com/firms/([^/?#]+)" - test = ("https://architizer.com/firms/olson-kundig/", { - "pattern": ArchitizerProjectExtractor.pattern, - "count": ">= 90", - }) + example = "https://architizer.com/firms/NAME/" def __init__(self, match): Extractor.__init__(self, match) |
