diff options
Diffstat (limited to 'gallery_dl/extractor/shopify.py')
| -rw-r--r-- | gallery_dl/extractor/shopify.py | 28 |
1 files changed, 11 insertions, 17 deletions
diff --git a/gallery_dl/extractor/shopify.py b/gallery_dl/extractor/shopify.py index 35895bb..b2498a0 100644 --- a/gallery_dl/extractor/shopify.py +++ b/gallery_dl/extractor/shopify.py @@ -10,7 +10,6 @@ from .common import Extractor, Message, SharedConfigMixin, generate_extractors from .. import text -import time import re @@ -24,19 +23,9 @@ class ShopifyExtractor(SharedConfigMixin, Extractor): Extractor.__init__(self, match) self.item_url = self.root + match.group(1) - def request(self, url, method="GET", expect=range(400, 500), **kwargs): - tries = 0 - kwargs["expect"] = expect - while True: - response = Extractor.request(self, url, method, **kwargs) - if response.status_code not in (429, 430): - return response - tries += 1 - waittime = 2 ** (tries + 2) - self.log.warning( - "HTTP status %s: %s - Waiting for %d seconds", - response.status_code, response.reason, waittime) - time.sleep(waittime) + def request(self, url, **kwargs): + kwargs["retries"] = float("inf") + return Extractor.request(self, url, **kwargs) def items(self): data = self.metadata() @@ -45,9 +34,10 @@ class ShopifyExtractor(SharedConfigMixin, Extractor): headers = {"X-Requested-With": "XMLHttpRequest"} for url in self.products(): - response = self.request(url + ".json", headers=headers) + response = self.request( + url + ".json", headers=headers, fatal=False) if response.status_code >= 400: - self.log.warning('Skipping %s ("%d: %s")', + self.log.warning('Skipping %s ("%s: %s")', url, response.status_code, response.reason) continue product = response.json()["product"] @@ -89,10 +79,14 @@ class ShopifyCollectionExtractor(ShopifyExtractor): while True: page = self.request(self.item_url, params=params).text urls = search_re.findall(page) + last = None if not urls: return for path in urls: + if last == path: + continue + last = path yield self.root + path params["page"] += 1 @@ -113,7 +107,7 @@ EXTRACTORS = { "pattern": r"(?:www\.)?fashionnova\.com", "test-product": ( ("https://www.fashionnova.com/products/essential-slide-red", { - "pattern": r"https?://cdn\.shopify.com/", + "pattern": r"https?://cdn\d*\.shopify.com/", "count": 3, }), ("https://www.fashionnova.com/collections/flats/products/name"), |
