summaryrefslogtreecommitdiffstats
path: root/gallery_dl/extractor/shopify.py
diff options
context:
space:
mode:
Diffstat (limited to 'gallery_dl/extractor/shopify.py')
-rw-r--r--gallery_dl/extractor/shopify.py43
1 files changed, 27 insertions, 16 deletions
diff --git a/gallery_dl/extractor/shopify.py b/gallery_dl/extractor/shopify.py
index 28ee46c..9d1df18 100644
--- a/gallery_dl/extractor/shopify.py
+++ b/gallery_dl/extractor/shopify.py
@@ -74,21 +74,33 @@ class ShopifyCollectionExtractor(ShopifyExtractor):
def products(self):
params = text.parse_query(self.params)
params["page"] = text.parse_int(params.get("page"), 1)
- search_re = re.compile(r"/collections/[\w-]+/products/[\w-]+")
-
- while True:
- page = self.request(self.item_url, params=params).text
- urls = search_re.findall(page)
- last = None
-
- if not urls:
- return
- for path in urls:
- if last == path:
- continue
- last = path
- yield self.root + path
- params["page"] += 1
+ fetch = True
+ last = None
+
+ for pattern in (
+ r"/collections/[\w-]+/products/[\w-]+",
+ r"href=[\"'](/products/[\w-]+)",
+ ):
+ search_re = re.compile(pattern)
+
+ while True:
+ if fetch:
+ page = self.request(self.item_url, params=params).text
+ urls = search_re.findall(page)
+
+ if len(urls) < 3:
+ if last:
+ return
+ fetch = False
+ break
+ fetch = True
+
+ for path in urls:
+ if last == path:
+ continue
+ last = path
+ yield self.root + path
+ params["page"] += 1
class ShopifyProductExtractor(ShopifyExtractor):
@@ -121,7 +133,6 @@ EXTRACTORS = {
("https://www.fashionnova.com/collections/mini-dresses/?page=1"),
("https://www.fashionnova.com/collections/mini-dresses#1"),
),
-
},
}