diff options
| author | 2020-08-15 17:48:11 -0400 | |
|---|---|---|
| committer | 2020-08-15 17:48:11 -0400 | |
| commit | 7cf59dc17c3607e096292462ed15d391be4e3dfd (patch) | |
| tree | 50d2750e958f43271dc6cc5310211cf8f8bbd9d0 /gallery_dl/extractor/shopify.py | |
| parent | ba039cfb2e1ba2522ee0a0fa2a84a1a6579e4877 (diff) | |
New upstream version 1.14.4.upstream/1.14.4
Diffstat (limited to 'gallery_dl/extractor/shopify.py')
| -rw-r--r-- | gallery_dl/extractor/shopify.py | 43 |
1 files changed, 27 insertions, 16 deletions
diff --git a/gallery_dl/extractor/shopify.py b/gallery_dl/extractor/shopify.py index 28ee46c..9d1df18 100644 --- a/gallery_dl/extractor/shopify.py +++ b/gallery_dl/extractor/shopify.py @@ -74,21 +74,33 @@ class ShopifyCollectionExtractor(ShopifyExtractor): def products(self): params = text.parse_query(self.params) params["page"] = text.parse_int(params.get("page"), 1) - search_re = re.compile(r"/collections/[\w-]+/products/[\w-]+") - - while True: - page = self.request(self.item_url, params=params).text - urls = search_re.findall(page) - last = None - - if not urls: - return - for path in urls: - if last == path: - continue - last = path - yield self.root + path - params["page"] += 1 + fetch = True + last = None + + for pattern in ( + r"/collections/[\w-]+/products/[\w-]+", + r"href=[\"'](/products/[\w-]+)", + ): + search_re = re.compile(pattern) + + while True: + if fetch: + page = self.request(self.item_url, params=params).text + urls = search_re.findall(page) + + if len(urls) < 3: + if last: + return + fetch = False + break + fetch = True + + for path in urls: + if last == path: + continue + last = path + yield self.root + path + params["page"] += 1 class ShopifyProductExtractor(ShopifyExtractor): @@ -121,7 +133,6 @@ EXTRACTORS = { ("https://www.fashionnova.com/collections/mini-dresses/?page=1"), ("https://www.fashionnova.com/collections/mini-dresses#1"), ), - }, } |
