aboutsummaryrefslogtreecommitdiffstats
path: root/gallery_dl/extractor/lensdump.py
diff options
context:
space:
mode:
Diffstat (limited to 'gallery_dl/extractor/lensdump.py')
-rw-r--r--gallery_dl/extractor/lensdump.py109
1 files changed, 51 insertions, 58 deletions
diff --git a/gallery_dl/extractor/lensdump.py b/gallery_dl/extractor/lensdump.py
index 12e8860..72a6453 100644
--- a/gallery_dl/extractor/lensdump.py
+++ b/gallery_dl/extractor/lensdump.py
@@ -17,42 +17,30 @@ class LensdumpBase():
category = "lensdump"
root = "https://lensdump.com"
- def nodes(self, page=None):
- if page is None:
- page = self.request(self.url).text
-
- # go through all pages starting from the oldest
- page_url = text.urljoin(self.root, text.extr(
- text.extr(page, ' id="list-most-oldest-link"', '>'),
- 'href="', '"'))
- while page_url is not None:
- if page_url == self.url:
- current_page = page
- else:
- current_page = self.request(page_url).text
-
- for node in text.extract_iter(
- current_page, ' class="list-item ', '>'):
- yield node
-
- # find url of next page
- page_url = text.extr(
- text.extr(current_page, ' data-pagination="next"', '>'),
- 'href="', '"')
- if page_url is not None and len(page_url) > 0:
- page_url = text.urljoin(self.root, page_url)
- else:
- page_url = None
+ def _pagination(self, page, begin, end):
+ while True:
+ yield from text.extract_iter(page, begin, end)
+
+ next = text.extr(page, ' data-pagination="next"', '>')
+ if not next:
+ return
+
+ url = text.urljoin(self.root, text.extr(next, 'href="', '"'))
+ page = self.request(url).text
class LensdumpAlbumExtractor(LensdumpBase, GalleryExtractor):
subcategory = "album"
- pattern = BASE_PATTERN + r"/(?:((?!\w+/albums|a/|i/)\w+)|a/(\w+))"
+ pattern = BASE_PATTERN + r"/a/(\w+)(?:/?\?([^#]+))?"
example = "https://lensdump.com/a/ID"
def __init__(self, match):
- GalleryExtractor.__init__(self, match, match.string)
- self.gallery_id = match.group(1) or match.group(2)
+ self.gallery_id, query = match.groups()
+ if query:
+ url = "{}/a/{}/?{}".format(self.root, self.gallery_id, query)
+ else:
+ url = "{}/a/{}".format(self.root, self.gallery_id)
+ GalleryExtractor.__init__(self, match, url)
def metadata(self, page):
return {
@@ -62,40 +50,48 @@ class LensdumpAlbumExtractor(LensdumpBase, GalleryExtractor):
}
def images(self, page):
- for node in self.nodes(page):
- # get urls and filenames of images in current page
- json_data = util.json_loads(text.unquote(
- text.extr(node, "data-object='", "'") or
- text.extr(node, 'data-object="', '"')))
- image_id = json_data.get('name')
- image_url = json_data.get('url')
- image_title = json_data.get('title')
+ for image in self._pagination(page, ' class="list-item ', '>'):
+
+ data = util.json_loads(text.unquote(
+ text.extr(image, "data-object='", "'") or
+ text.extr(image, 'data-object="', '"')))
+ image_id = data.get("name")
+ image_url = data.get("url")
+ image_title = data.get("title")
if image_title is not None:
image_title = text.unescape(image_title)
+
yield (image_url, {
- 'id': image_id,
- 'url': image_url,
- 'title': image_title,
- 'name': json_data.get('filename'),
- 'filename': image_id,
- 'extension': json_data.get('extension'),
- 'height': text.parse_int(json_data.get('height')),
- 'width': text.parse_int(json_data.get('width')),
+ "id" : image_id,
+ "url" : image_url,
+ "title" : image_title,
+ "name" : data.get("filename"),
+ "filename" : image_id,
+ "extension": data.get("extension"),
+ "width" : text.parse_int(data.get("width")),
+ "height" : text.parse_int(data.get("height")),
})
class LensdumpAlbumsExtractor(LensdumpBase, Extractor):
"""Extractor for album list from lensdump.com"""
subcategory = "albums"
- pattern = BASE_PATTERN + r"/\w+/albums"
- example = "https://lensdump.com/USER/albums"
+ pattern = BASE_PATTERN + r"/(?![ai]/)([^/?#]+)(?:/?\?([^#]+))?"
+ example = "https://lensdump.com/USER"
def items(self):
- for node in self.nodes():
- album_url = text.urljoin(self.root, text.extr(
- node, 'data-url-short="', '"'))
- yield Message.Queue, album_url, {
- "_extractor": LensdumpAlbumExtractor}
+ user, query = self.groups
+ url = "{}/{}/".format(self.root, user)
+ if query:
+ params = text.parse_query(query)
+ else:
+ params = {"sort": "date_asc", "page": "1"}
+ page = self.request(url, params=params).text
+
+ data = {"_extractor": LensdumpAlbumExtractor}
+ for album_path in self._pagination(page, 'data-url-short="', '"'):
+ album_url = text.urljoin(self.root, album_path)
+ yield Message.Queue, album_url, data
class LensdumpImageExtractor(LensdumpBase, Extractor):
@@ -107,16 +103,13 @@ class LensdumpImageExtractor(LensdumpBase, Extractor):
pattern = r"(?:https?://)?(?:(?:i\d?\.)?lensdump\.com|\w\.l3n\.co)/i/(\w+)"
example = "https://lensdump.com/i/ID"
- def __init__(self, match):
- Extractor.__init__(self, match)
- self.key = match.group(1)
-
def items(self):
- url = "{}/i/{}".format(self.root, self.key)
+ key = self.groups[0]
+ url = "{}/i/{}".format(self.root, key)
extr = text.extract_from(self.request(url).text)
data = {
- "id" : self.key,
+ "id" : key,
"title" : text.unescape(extr(
'property="og:title" content="', '"')),
"url" : extr(