summaryrefslogtreecommitdiffstats
path: root/gallery_dl/extractor/shopify.py
diff options
context:
space:
mode:
Diffstat (limited to 'gallery_dl/extractor/shopify.py')
-rw-r--r--gallery_dl/extractor/shopify.py28
1 files changed, 11 insertions, 17 deletions
diff --git a/gallery_dl/extractor/shopify.py b/gallery_dl/extractor/shopify.py
index 35895bb..b2498a0 100644
--- a/gallery_dl/extractor/shopify.py
+++ b/gallery_dl/extractor/shopify.py
@@ -10,7 +10,6 @@
from .common import Extractor, Message, SharedConfigMixin, generate_extractors
from .. import text
-import time
import re
@@ -24,19 +23,9 @@ class ShopifyExtractor(SharedConfigMixin, Extractor):
Extractor.__init__(self, match)
self.item_url = self.root + match.group(1)
- def request(self, url, method="GET", expect=range(400, 500), **kwargs):
- tries = 0
- kwargs["expect"] = expect
- while True:
- response = Extractor.request(self, url, method, **kwargs)
- if response.status_code not in (429, 430):
- return response
- tries += 1
- waittime = 2 ** (tries + 2)
- self.log.warning(
- "HTTP status %s: %s - Waiting for %d seconds",
- response.status_code, response.reason, waittime)
- time.sleep(waittime)
+ def request(self, url, **kwargs):
+ kwargs["retries"] = float("inf")
+ return Extractor.request(self, url, **kwargs)
def items(self):
data = self.metadata()
@@ -45,9 +34,10 @@ class ShopifyExtractor(SharedConfigMixin, Extractor):
headers = {"X-Requested-With": "XMLHttpRequest"}
for url in self.products():
- response = self.request(url + ".json", headers=headers)
+ response = self.request(
+ url + ".json", headers=headers, fatal=False)
if response.status_code >= 400:
- self.log.warning('Skipping %s ("%d: %s")',
+ self.log.warning('Skipping %s ("%s: %s")',
url, response.status_code, response.reason)
continue
product = response.json()["product"]
@@ -89,10 +79,14 @@ class ShopifyCollectionExtractor(ShopifyExtractor):
while True:
page = self.request(self.item_url, params=params).text
urls = search_re.findall(page)
+ last = None
if not urls:
return
for path in urls:
+ if last == path:
+ continue
+ last = path
yield self.root + path
params["page"] += 1
@@ -113,7 +107,7 @@ EXTRACTORS = {
"pattern": r"(?:www\.)?fashionnova\.com",
"test-product": (
("https://www.fashionnova.com/products/essential-slide-red", {
- "pattern": r"https?://cdn\.shopify.com/",
+ "pattern": r"https?://cdn\d*\.shopify.com/",
"count": 3,
}),
("https://www.fashionnova.com/collections/flats/products/name"),