summaryrefslogtreecommitdiffstats
path: root/gallery_dl/extractor/shopify.py
diff options
context:
space:
mode:
Diffstat (limited to 'gallery_dl/extractor/shopify.py')
-rw-r--r--gallery_dl/extractor/shopify.py64
1 files changed, 21 insertions, 43 deletions
diff --git a/gallery_dl/extractor/shopify.py b/gallery_dl/extractor/shopify.py
index 1bc353a..6d924de 100644
--- a/gallery_dl/extractor/shopify.py
+++ b/gallery_dl/extractor/shopify.py
@@ -10,7 +10,6 @@
from .common import BaseExtractor, Message
from .. import text
-import re
class ShopifyExtractor(BaseExtractor):
@@ -27,17 +26,7 @@ class ShopifyExtractor(BaseExtractor):
data = self.metadata()
yield Message.Directory, data
- headers = {"X-Requested-With": "XMLHttpRequest"}
- for url in self.products():
- response = self.request(
- url + ".json", headers=headers, fatal=False)
- if response.status_code >= 400:
- self.log.warning('Skipping %s ("%s: %s")',
- url, response.status_code, response.reason)
- continue
- product = response.json()["product"]
- del product["image"]
-
+ for product in self.products():
for num, image in enumerate(product.pop("images"), 1):
text.nameext_from_url(image["src"], image)
image.update(data)
@@ -59,7 +48,10 @@ BASE_PATTERN = ShopifyExtractor.update({
"pattern": r"(?:www\.)?fashionnova\.com",
},
"omgmiamiswimwear": {
- "root": "https://www.omgmiamiswimwear.com"
+ "root": "https://www.omgmiamiswimwear.com",
+ },
+ "windsorstore": {
+ "root": "https://www.windsorstore.com",
},
})
@@ -73,45 +65,27 @@ class ShopifyCollectionExtractor(ShopifyExtractor):
("https://www.fashionnova.com/collections/mini-dresses", {
"range": "1-20",
"count": 20,
- "archive": False,
}),
("https://www.fashionnova.com/collections/mini-dresses/?page=1"),
("https://www.fashionnova.com/collections/mini-dresses#1"),
("https://www.omgmiamiswimwear.com/collections/fajas"),
+ ("https://www.windsorstore.com/collections/dresses-ball-gowns"),
)
def metadata(self):
return self.request(self.item_url + ".json").json()
def products(self):
- params = {"page": 1}
- fetch = True
- last = None
-
- for pattern in (
- r"/collections/[\w-]+/products/[\w-]+",
- r"href=[\"'](/products/[\w-]+)",
- ):
- search_re = re.compile(pattern)
-
- while True:
- if fetch:
- page = self.request(self.item_url, params=params).text
- urls = search_re.findall(page)
-
- if len(urls) < 3:
- if last:
- return
- fetch = False
- break
- fetch = True
-
- for path in urls:
- if last == path:
- continue
- last = path
- yield self.root + path
- params["page"] += 1
+ url = self.item_url + "/products.json"
+
+ while url:
+ response = self.request(url)
+ yield from response.json()["products"]
+
+ url = response.links.get("next")
+ if not url:
+ return
+ url = url["url"]
class ShopifyProductExtractor(ShopifyExtractor):
@@ -129,7 +103,11 @@ class ShopifyProductExtractor(ShopifyExtractor):
"count": 5,
}),
("https://www.fashionnova.com/collections/flats/products/name"),
+ ("https://www.windsorstore.com/collections/accessories-belts/products"
+ "/rhine-buckle-dbl-o-ring-pu-strap-belt-073010158001"),
)
def products(self):
- return (self.item_url,)
+ product = self.request(self.item_url + ".json").json()["product"]
+ del product["image"]
+ return (product,)