diff options
Diffstat (limited to 'gallery_dl/extractor/shopify.py')
| -rw-r--r-- | gallery_dl/extractor/shopify.py | 64 |
1 files changed, 21 insertions, 43 deletions
diff --git a/gallery_dl/extractor/shopify.py b/gallery_dl/extractor/shopify.py index 1bc353a..6d924de 100644 --- a/gallery_dl/extractor/shopify.py +++ b/gallery_dl/extractor/shopify.py @@ -10,7 +10,6 @@ from .common import BaseExtractor, Message from .. import text -import re class ShopifyExtractor(BaseExtractor): @@ -27,17 +26,7 @@ class ShopifyExtractor(BaseExtractor): data = self.metadata() yield Message.Directory, data - headers = {"X-Requested-With": "XMLHttpRequest"} - for url in self.products(): - response = self.request( - url + ".json", headers=headers, fatal=False) - if response.status_code >= 400: - self.log.warning('Skipping %s ("%s: %s")', - url, response.status_code, response.reason) - continue - product = response.json()["product"] - del product["image"] - + for product in self.products(): for num, image in enumerate(product.pop("images"), 1): text.nameext_from_url(image["src"], image) image.update(data) @@ -59,7 +48,10 @@ BASE_PATTERN = ShopifyExtractor.update({ "pattern": r"(?:www\.)?fashionnova\.com", }, "omgmiamiswimwear": { - "root": "https://www.omgmiamiswimwear.com" + "root": "https://www.omgmiamiswimwear.com", + }, + "windsorstore": { + "root": "https://www.windsorstore.com", }, }) @@ -73,45 +65,27 @@ class ShopifyCollectionExtractor(ShopifyExtractor): ("https://www.fashionnova.com/collections/mini-dresses", { "range": "1-20", "count": 20, - "archive": False, }), ("https://www.fashionnova.com/collections/mini-dresses/?page=1"), ("https://www.fashionnova.com/collections/mini-dresses#1"), ("https://www.omgmiamiswimwear.com/collections/fajas"), + ("https://www.windsorstore.com/collections/dresses-ball-gowns"), ) def metadata(self): return self.request(self.item_url + ".json").json() def products(self): - params = {"page": 1} - fetch = True - last = None - - for pattern in ( - r"/collections/[\w-]+/products/[\w-]+", - r"href=[\"'](/products/[\w-]+)", - ): - search_re = re.compile(pattern) - - while True: - if fetch: - page = self.request(self.item_url, params=params).text - urls = search_re.findall(page) - - if len(urls) < 3: - if last: - return - fetch = False - break - fetch = True - - for path in urls: - if last == path: - continue - last = path - yield self.root + path - params["page"] += 1 + url = self.item_url + "/products.json" + + while url: + response = self.request(url) + yield from response.json()["products"] + + url = response.links.get("next") + if not url: + return + url = url["url"] class ShopifyProductExtractor(ShopifyExtractor): @@ -129,7 +103,11 @@ class ShopifyProductExtractor(ShopifyExtractor): "count": 5, }), ("https://www.fashionnova.com/collections/flats/products/name"), + ("https://www.windsorstore.com/collections/accessories-belts/products" + "/rhine-buckle-dbl-o-ring-pu-strap-belt-073010158001"), ) def products(self): - return (self.item_url,) + product = self.request(self.item_url + ".json").json()["product"] + del product["image"] + return (product,) |
