summaryrefslogtreecommitdiffstats
path: root/gallery_dl/extractor/shopify.py
diff options
context:
space:
mode:
authorLibravatarUnit 193 <unit193@ubuntu.com>2019-07-02 04:33:45 -0400
committerLibravatarUnit 193 <unit193@ubuntu.com>2019-07-02 04:33:45 -0400
commit195c45911e79c33cf0bb986721365fb06df5a153 (patch)
treeac0c9b6ef40bea7aa7ab0c5c3cb500eb510668fa /gallery_dl/extractor/shopify.py
Import Upstream version 1.8.7upstream/1.8.7
Diffstat (limited to 'gallery_dl/extractor/shopify.py')
-rw-r--r--gallery_dl/extractor/shopify.py136
1 files changed, 136 insertions, 0 deletions
diff --git a/gallery_dl/extractor/shopify.py b/gallery_dl/extractor/shopify.py
new file mode 100644
index 0000000..35895bb
--- /dev/null
+++ b/gallery_dl/extractor/shopify.py
@@ -0,0 +1,136 @@
+# -*- coding: utf-8 -*-
+
+# Copyright 2019 Mike Fährmann
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Extractors for Shopify instances"""
+
+from .common import Extractor, Message, SharedConfigMixin, generate_extractors
+from .. import text
+import time
+import re
+
+
+class ShopifyExtractor(SharedConfigMixin, Extractor):
+ """Base class for Shopify extractors"""
+ basecategory = "shopify"
+ filename_fmt = "{product[title]}_{num:>02}_{id}.{extension}"
+ archive_fmt = "{id}"
+
+ def __init__(self, match):
+ Extractor.__init__(self, match)
+ self.item_url = self.root + match.group(1)
+
+ def request(self, url, method="GET", expect=range(400, 500), **kwargs):
+ tries = 0
+ kwargs["expect"] = expect
+ while True:
+ response = Extractor.request(self, url, method, **kwargs)
+ if response.status_code not in (429, 430):
+ return response
+ tries += 1
+ waittime = 2 ** (tries + 2)
+ self.log.warning(
+ "HTTP status %s: %s - Waiting for %d seconds",
+ response.status_code, response.reason, waittime)
+ time.sleep(waittime)
+
+ def items(self):
+ data = self.metadata()
+ yield Message.Version, 1
+ yield Message.Directory, data
+
+ headers = {"X-Requested-With": "XMLHttpRequest"}
+ for url in self.products():
+ response = self.request(url + ".json", headers=headers)
+ if response.status_code >= 400:
+ self.log.warning('Skipping %s ("%d: %s")',
+ url, response.status_code, response.reason)
+ continue
+ product = response.json()["product"]
+ del product["image"]
+
+ for num, image in enumerate(product.pop("images"), 1):
+ text.nameext_from_url(image["src"], image)
+ image.update(data)
+ image["product"] = product
+ image["num"] = num
+ yield Message.Url, image["src"], image
+
+ def metadata(self):
+ """Return general metadata"""
+ return {}
+
+ def products(self):
+ """Return an iterable with all relevant product URLs"""
+
+
+class ShopifyCollectionExtractor(ShopifyExtractor):
+ """Base class for collection extractors for Shopify based sites"""
+ subcategory = "collection"
+ directory_fmt = ("{category}", "{collection[title]}")
+ pattern_fmt = r"(/collections/[\w-]+)/?(?:\?([^#]+))?(?:$|#)"
+
+ def __init__(self, match):
+ ShopifyExtractor.__init__(self, match)
+ self.params = match.group(2)
+
+ def metadata(self):
+ return self.request(self.item_url + ".json").json()
+
+ def products(self):
+ params = text.parse_query(self.params)
+ params["page"] = text.parse_int(params.get("page"), 1)
+ search_re = re.compile(r"/collections/[\w-]+/products/[\w-]+")
+
+ while True:
+ page = self.request(self.item_url, params=params).text
+ urls = search_re.findall(page)
+
+ if not urls:
+ return
+ for path in urls:
+ yield self.root + path
+ params["page"] += 1
+
+
+class ShopifyProductExtractor(ShopifyExtractor):
+ """Base class for product extractors for Shopify based sites"""
+ subcategory = "product"
+ directory_fmt = ("{category}", "Products")
+ pattern_fmt = r"((?:/collections/[\w-]+)?/products/[\w-]+)"
+
+ def products(self):
+ return (self.item_url,)
+
+
+EXTRACTORS = {
+ "fashionnova": {
+ "root": "https://www.fashionnova.com",
+ "pattern": r"(?:www\.)?fashionnova\.com",
+ "test-product": (
+ ("https://www.fashionnova.com/products/essential-slide-red", {
+ "pattern": r"https?://cdn\.shopify.com/",
+ "count": 3,
+ }),
+ ("https://www.fashionnova.com/collections/flats/products/name"),
+ ),
+ "test-collection": (
+ ("https://www.fashionnova.com/collections/mini-dresses", {
+ "range": "1-20",
+ "count": 20,
+ }),
+ ("https://www.fashionnova.com/collections/mini-dresses/?page=1"),
+ ("https://www.fashionnova.com/collections/mini-dresses#1"),
+ ),
+
+ },
+}
+
+generate_extractors(EXTRACTORS, globals(), (
+ ShopifyProductExtractor,
+ ShopifyCollectionExtractor,
+))