diff options
Diffstat (limited to 'gallery_dl/extractor/lensdump.py')
| -rw-r--r-- | gallery_dl/extractor/lensdump.py | 109 |
1 files changed, 51 insertions, 58 deletions
diff --git a/gallery_dl/extractor/lensdump.py b/gallery_dl/extractor/lensdump.py index 12e8860..72a6453 100644 --- a/gallery_dl/extractor/lensdump.py +++ b/gallery_dl/extractor/lensdump.py @@ -17,42 +17,30 @@ class LensdumpBase(): category = "lensdump" root = "https://lensdump.com" - def nodes(self, page=None): - if page is None: - page = self.request(self.url).text - - # go through all pages starting from the oldest - page_url = text.urljoin(self.root, text.extr( - text.extr(page, ' id="list-most-oldest-link"', '>'), - 'href="', '"')) - while page_url is not None: - if page_url == self.url: - current_page = page - else: - current_page = self.request(page_url).text - - for node in text.extract_iter( - current_page, ' class="list-item ', '>'): - yield node - - # find url of next page - page_url = text.extr( - text.extr(current_page, ' data-pagination="next"', '>'), - 'href="', '"') - if page_url is not None and len(page_url) > 0: - page_url = text.urljoin(self.root, page_url) - else: - page_url = None + def _pagination(self, page, begin, end): + while True: + yield from text.extract_iter(page, begin, end) + + next = text.extr(page, ' data-pagination="next"', '>') + if not next: + return + + url = text.urljoin(self.root, text.extr(next, 'href="', '"')) + page = self.request(url).text class LensdumpAlbumExtractor(LensdumpBase, GalleryExtractor): subcategory = "album" - pattern = BASE_PATTERN + r"/(?:((?!\w+/albums|a/|i/)\w+)|a/(\w+))" + pattern = BASE_PATTERN + r"/a/(\w+)(?:/?\?([^#]+))?" example = "https://lensdump.com/a/ID" def __init__(self, match): - GalleryExtractor.__init__(self, match, match.string) - self.gallery_id = match.group(1) or match.group(2) + self.gallery_id, query = match.groups() + if query: + url = "{}/a/{}/?{}".format(self.root, self.gallery_id, query) + else: + url = "{}/a/{}".format(self.root, self.gallery_id) + GalleryExtractor.__init__(self, match, url) def metadata(self, page): return { @@ -62,40 +50,48 @@ class LensdumpAlbumExtractor(LensdumpBase, GalleryExtractor): } def images(self, page): - for node in self.nodes(page): - # get urls and filenames of images in current page - json_data = util.json_loads(text.unquote( - text.extr(node, "data-object='", "'") or - text.extr(node, 'data-object="', '"'))) - image_id = json_data.get('name') - image_url = json_data.get('url') - image_title = json_data.get('title') + for image in self._pagination(page, ' class="list-item ', '>'): + + data = util.json_loads(text.unquote( + text.extr(image, "data-object='", "'") or + text.extr(image, 'data-object="', '"'))) + image_id = data.get("name") + image_url = data.get("url") + image_title = data.get("title") if image_title is not None: image_title = text.unescape(image_title) + yield (image_url, { - 'id': image_id, - 'url': image_url, - 'title': image_title, - 'name': json_data.get('filename'), - 'filename': image_id, - 'extension': json_data.get('extension'), - 'height': text.parse_int(json_data.get('height')), - 'width': text.parse_int(json_data.get('width')), + "id" : image_id, + "url" : image_url, + "title" : image_title, + "name" : data.get("filename"), + "filename" : image_id, + "extension": data.get("extension"), + "width" : text.parse_int(data.get("width")), + "height" : text.parse_int(data.get("height")), }) class LensdumpAlbumsExtractor(LensdumpBase, Extractor): """Extractor for album list from lensdump.com""" subcategory = "albums" - pattern = BASE_PATTERN + r"/\w+/albums" - example = "https://lensdump.com/USER/albums" + pattern = BASE_PATTERN + r"/(?![ai]/)([^/?#]+)(?:/?\?([^#]+))?" + example = "https://lensdump.com/USER" def items(self): - for node in self.nodes(): - album_url = text.urljoin(self.root, text.extr( - node, 'data-url-short="', '"')) - yield Message.Queue, album_url, { - "_extractor": LensdumpAlbumExtractor} + user, query = self.groups + url = "{}/{}/".format(self.root, user) + if query: + params = text.parse_query(query) + else: + params = {"sort": "date_asc", "page": "1"} + page = self.request(url, params=params).text + + data = {"_extractor": LensdumpAlbumExtractor} + for album_path in self._pagination(page, 'data-url-short="', '"'): + album_url = text.urljoin(self.root, album_path) + yield Message.Queue, album_url, data class LensdumpImageExtractor(LensdumpBase, Extractor): @@ -107,16 +103,13 @@ class LensdumpImageExtractor(LensdumpBase, Extractor): pattern = r"(?:https?://)?(?:(?:i\d?\.)?lensdump\.com|\w\.l3n\.co)/i/(\w+)" example = "https://lensdump.com/i/ID" - def __init__(self, match): - Extractor.__init__(self, match) - self.key = match.group(1) - def items(self): - url = "{}/i/{}".format(self.root, self.key) + key = self.groups[0] + url = "{}/i/{}".format(self.root, key) extr = text.extract_from(self.request(url).text) data = { - "id" : self.key, + "id" : key, "title" : text.unescape(extr( 'property="og:title" content="', '"')), "url" : extr( |
