summaryrefslogtreecommitdiffstats
path: root/gallery_dl/extractor/shopify.py
diff options
context:
space:
mode:
authorLibravatarUnit 193 <unit193@unit193.net>2020-08-15 17:48:11 -0400
committerLibravatarUnit 193 <unit193@unit193.net>2020-08-15 17:48:11 -0400
commit7cf59dc17c3607e096292462ed15d391be4e3dfd (patch)
tree50d2750e958f43271dc6cc5310211cf8f8bbd9d0 /gallery_dl/extractor/shopify.py
parentba039cfb2e1ba2522ee0a0fa2a84a1a6579e4877 (diff)
New upstream version 1.14.4.upstream/1.14.4
Diffstat (limited to 'gallery_dl/extractor/shopify.py')
-rw-r--r--gallery_dl/extractor/shopify.py43
1 files changed, 27 insertions, 16 deletions
diff --git a/gallery_dl/extractor/shopify.py b/gallery_dl/extractor/shopify.py
index 28ee46c..9d1df18 100644
--- a/gallery_dl/extractor/shopify.py
+++ b/gallery_dl/extractor/shopify.py
@@ -74,21 +74,33 @@ class ShopifyCollectionExtractor(ShopifyExtractor):
def products(self):
params = text.parse_query(self.params)
params["page"] = text.parse_int(params.get("page"), 1)
- search_re = re.compile(r"/collections/[\w-]+/products/[\w-]+")
-
- while True:
- page = self.request(self.item_url, params=params).text
- urls = search_re.findall(page)
- last = None
-
- if not urls:
- return
- for path in urls:
- if last == path:
- continue
- last = path
- yield self.root + path
- params["page"] += 1
+ fetch = True
+ last = None
+
+ for pattern in (
+ r"/collections/[\w-]+/products/[\w-]+",
+ r"href=[\"'](/products/[\w-]+)",
+ ):
+ search_re = re.compile(pattern)
+
+ while True:
+ if fetch:
+ page = self.request(self.item_url, params=params).text
+ urls = search_re.findall(page)
+
+ if len(urls) < 3:
+ if last:
+ return
+ fetch = False
+ break
+ fetch = True
+
+ for path in urls:
+ if last == path:
+ continue
+ last = path
+ yield self.root + path
+ params["page"] += 1
class ShopifyProductExtractor(ShopifyExtractor):
@@ -121,7 +133,6 @@ EXTRACTORS = {
("https://www.fashionnova.com/collections/mini-dresses/?page=1"),
("https://www.fashionnova.com/collections/mini-dresses#1"),
),
-
},
}