diff options
Diffstat (limited to 'gallery_dl/extractor/shopify.py')
| -rw-r--r-- | gallery_dl/extractor/shopify.py | 136 |
1 files changed, 136 insertions, 0 deletions
diff --git a/gallery_dl/extractor/shopify.py b/gallery_dl/extractor/shopify.py new file mode 100644 index 0000000..35895bb --- /dev/null +++ b/gallery_dl/extractor/shopify.py @@ -0,0 +1,136 @@ +# -*- coding: utf-8 -*- + +# Copyright 2019 Mike Fährmann +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. + +"""Extractors for Shopify instances""" + +from .common import Extractor, Message, SharedConfigMixin, generate_extractors +from .. import text +import time +import re + + +class ShopifyExtractor(SharedConfigMixin, Extractor): + """Base class for Shopify extractors""" + basecategory = "shopify" + filename_fmt = "{product[title]}_{num:>02}_{id}.{extension}" + archive_fmt = "{id}" + + def __init__(self, match): + Extractor.__init__(self, match) + self.item_url = self.root + match.group(1) + + def request(self, url, method="GET", expect=range(400, 500), **kwargs): + tries = 0 + kwargs["expect"] = expect + while True: + response = Extractor.request(self, url, method, **kwargs) + if response.status_code not in (429, 430): + return response + tries += 1 + waittime = 2 ** (tries + 2) + self.log.warning( + "HTTP status %s: %s - Waiting for %d seconds", + response.status_code, response.reason, waittime) + time.sleep(waittime) + + def items(self): + data = self.metadata() + yield Message.Version, 1 + yield Message.Directory, data + + headers = {"X-Requested-With": "XMLHttpRequest"} + for url in self.products(): + response = self.request(url + ".json", headers=headers) + if response.status_code >= 400: + self.log.warning('Skipping %s ("%d: %s")', + url, response.status_code, response.reason) + continue + product = response.json()["product"] + del product["image"] + + for num, image in enumerate(product.pop("images"), 1): + text.nameext_from_url(image["src"], image) + image.update(data) + image["product"] = product + image["num"] = num + yield Message.Url, image["src"], image + + def metadata(self): + """Return general metadata""" + return {} + + def products(self): + """Return an iterable with all relevant product URLs""" + + +class ShopifyCollectionExtractor(ShopifyExtractor): + """Base class for collection extractors for Shopify based sites""" + subcategory = "collection" + directory_fmt = ("{category}", "{collection[title]}") + pattern_fmt = r"(/collections/[\w-]+)/?(?:\?([^#]+))?(?:$|#)" + + def __init__(self, match): + ShopifyExtractor.__init__(self, match) + self.params = match.group(2) + + def metadata(self): + return self.request(self.item_url + ".json").json() + + def products(self): + params = text.parse_query(self.params) + params["page"] = text.parse_int(params.get("page"), 1) + search_re = re.compile(r"/collections/[\w-]+/products/[\w-]+") + + while True: + page = self.request(self.item_url, params=params).text + urls = search_re.findall(page) + + if not urls: + return + for path in urls: + yield self.root + path + params["page"] += 1 + + +class ShopifyProductExtractor(ShopifyExtractor): + """Base class for product extractors for Shopify based sites""" + subcategory = "product" + directory_fmt = ("{category}", "Products") + pattern_fmt = r"((?:/collections/[\w-]+)?/products/[\w-]+)" + + def products(self): + return (self.item_url,) + + +EXTRACTORS = { + "fashionnova": { + "root": "https://www.fashionnova.com", + "pattern": r"(?:www\.)?fashionnova\.com", + "test-product": ( + ("https://www.fashionnova.com/products/essential-slide-red", { + "pattern": r"https?://cdn\.shopify.com/", + "count": 3, + }), + ("https://www.fashionnova.com/collections/flats/products/name"), + ), + "test-collection": ( + ("https://www.fashionnova.com/collections/mini-dresses", { + "range": "1-20", + "count": 20, + }), + ("https://www.fashionnova.com/collections/mini-dresses/?page=1"), + ("https://www.fashionnova.com/collections/mini-dresses#1"), + ), + + }, +} + +generate_extractors(EXTRACTORS, globals(), ( + ShopifyProductExtractor, + ShopifyCollectionExtractor, +)) |
