diff options
Diffstat (limited to 'gallery_dl/extractor')
| -rw-r--r-- | gallery_dl/extractor/__init__.py | 5 | ||||
| -rw-r--r-- | gallery_dl/extractor/common.py | 19 | ||||
| -rw-r--r-- | gallery_dl/extractor/danbooru.py | 230 | ||||
| -rw-r--r-- | gallery_dl/extractor/e621.py | 149 | ||||
| -rw-r--r-- | gallery_dl/extractor/furaffinity.py | 39 | ||||
| -rw-r--r-- | gallery_dl/extractor/imgbox.py | 9 | ||||
| -rw-r--r-- | gallery_dl/extractor/inkbunny.py | 23 | ||||
| -rw-r--r-- | gallery_dl/extractor/kemonoparty.py | 2 | ||||
| -rw-r--r-- | gallery_dl/extractor/lightroom.py | 103 | ||||
| -rw-r--r-- | gallery_dl/extractor/reddit.py | 10 | ||||
| -rw-r--r-- | gallery_dl/extractor/redgifs.py | 9 | ||||
| -rw-r--r-- | gallery_dl/extractor/twitter.py | 10 | ||||
| -rw-r--r-- | gallery_dl/extractor/vk.py | 5 | ||||
| -rw-r--r-- | gallery_dl/extractor/wallpapercave.py | 30 |
14 files changed, 412 insertions, 231 deletions
diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py index e7d71d6..b52561e 100644 --- a/gallery_dl/extractor/__init__.py +++ b/gallery_dl/extractor/__init__.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2015-2021 Mike Fährmann +# Copyright 2015-2022 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -31,7 +31,6 @@ modules = [ "desktopography", "deviantart", "dynastyscans", - "e621", "erome", "exhentai", "fallenangels", @@ -70,6 +69,7 @@ modules = [ "khinsider", "kohlchan", "komikcast", + "lightroom", "lineblog", "livedoor", "luscious", @@ -132,6 +132,7 @@ modules = [ "vk", "vsco", "wallhaven", + "wallpapercave", "warosu", "weasyl", "webtoons", diff --git a/gallery_dl/extractor/common.py b/gallery_dl/extractor/common.py index 1d81dfc..5a2d3a3 100644 --- a/gallery_dl/extractor/common.py +++ b/gallery_dl/extractor/common.py @@ -371,8 +371,16 @@ class Extractor(): for cookie in self._cookiejar: if cookie.name in names and ( not domain or cookie.domain == domain): - if cookie.expires and cookie.expires < now: - self.log.warning("Cookie '%s' has expired", cookie.name) + if cookie.expires: + diff = int(cookie.expires - now) + if diff <= 0: + self.log.warning( + "Cookie '%s' has expired", cookie.name) + elif diff <= 86400: + hours = diff // 3600 + self.log.warning( + "Cookie '%s' will expire in less than %s hour%s", + cookie.name, hours + 1, "s" if hours else "") else: names.discard(cookie.name) if not names: @@ -607,6 +615,9 @@ class BaseExtractor(Extractor): if group is not None: if index: self.category, self.root = self.instances[index-1] + if not self.root: + url = text.ensure_http_scheme(match.group(0)) + self.root = url[:url.index("/", 8)] else: self.root = group self.category = group.partition("://")[2] @@ -624,7 +635,9 @@ class BaseExtractor(Extractor): pattern_list = [] instance_list = cls.instances = [] for category, info in instances.items(): - root = info["root"].rstrip("/") + root = info["root"] + if root: + root = root.rstrip("/") instance_list.append((category, root)) pattern = info.get("pattern") diff --git a/gallery_dl/extractor/danbooru.py b/gallery_dl/extractor/danbooru.py index c6c33b4..710950a 100644 --- a/gallery_dl/extractor/danbooru.py +++ b/gallery_dl/extractor/danbooru.py @@ -1,36 +1,29 @@ # -*- coding: utf-8 -*- -# Copyright 2014-2020 Mike Fährmann +# Copyright 2014-2022 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as # published by the Free Software Foundation. -"""Extractors for https://danbooru.donmai.us/""" +"""Extractors for https://danbooru.donmai.us/ and other Danbooru instances""" -from .common import Extractor, Message +from .common import BaseExtractor, Message from .. import text import datetime -BASE_PATTERN = ( - r"(?:https?://)?" - r"(danbooru|hijiribe|sonohara|safebooru)" - r"\.donmai\.us" -) - -class DanbooruExtractor(Extractor): +class DanbooruExtractor(BaseExtractor): """Base class for danbooru extractors""" - basecategory = "booru" - category = "danbooru" - filename_fmt = "{category}_{id}_{md5}.{extension}" + basecategory = "Danbooru" + filename_fmt = "{category}_{id}_{filename}.{extension}" page_limit = 1000 page_start = None per_page = 200 def __init__(self, match): - super().__init__(match) - self.root = "https://{}.donmai.us".format(match.group(1)) + BaseExtractor.__init__(self, match) + self.ugoira = self.config("ugoira", False) self.external = self.config("external", False) self.extended_metadata = self.config("metadata", False) @@ -40,6 +33,20 @@ class DanbooruExtractor(Extractor): self.log.debug("Using HTTP Basic Auth for user '%s'", username) self.session.auth = (username, api_key) + instance = INSTANCES.get(self.category) or {} + iget = instance.get + + self.headers = iget("headers") + self.page_limit = iget("page-limit", 1000) + self.page_start = iget("page-start") + self.per_page = iget("per-page", 200) + self.request_interval_min = iget("request-interval-min", 0.0) + self._pools = iget("pools") + + def request(self, url, **kwargs): + kwargs["headers"] = self.headers + return BaseExtractor.request(self, url, **kwargs) + def skip(self, num): pages = num // self.per_page if pages >= self.page_limit: @@ -50,16 +57,31 @@ class DanbooruExtractor(Extractor): def items(self): data = self.metadata() for post in self.posts(): - try: - url = post["file_url"] - except KeyError: - if self.external and post["source"]: - post.update(data) - yield Message.Directory, post - yield Message.Queue, post["source"], post - continue - - text.nameext_from_url(url, post) + + file = post.get("file") + if file: + url = file["url"] + if not url: + md5 = file["md5"] + url = file["url"] = ( + "https://static1.{}/data/{}/{}/{}.{}".format( + self.root[8:], md5[0:2], md5[2:4], md5, file["ext"] + )) + post["filename"] = file["md5"] + post["extension"] = file["ext"] + + else: + try: + url = post["file_url"] + except KeyError: + if self.external and post["source"]: + post.update(data) + yield Message.Directory, post + yield Message.Queue, post["source"], post + continue + + text.nameext_from_url(url, post) + if post["extension"] == "zip": if self.ugoira: post["frames"] = self.request( @@ -89,11 +111,8 @@ class DanbooruExtractor(Extractor): def posts(self): return () - def _pagination(self, endpoint, params=None, pagenum=False): + def _pagination(self, endpoint, params, pagenum=False): url = self.root + endpoint - - if params is None: - params = {} params["limit"] = self.per_page params["page"] = self.page_start @@ -117,12 +136,36 @@ class DanbooruExtractor(Extractor): return +INSTANCES = { + "danbooru": { + "root": None, + "pattern": r"(?:danbooru|hijiribe|sonohara|safebooru)\.donmai\.us", + }, + "e621": { + "root": None, + "pattern": r"e(?:621|926)\.net", + "headers": {"User-Agent": "gallery-dl/1.14.0 (by mikf)"}, + "pools": "sort", + "page-limit": 750, + "per-page": 320, + "request-interval-min": 1.0, + }, + "atfbooru": { + "root": "https://booru.allthefallen.moe", + "pattern": r"booru\.allthefallen\.moe", + "page-limit": 5000, + }, +} + +BASE_PATTERN = DanbooruExtractor.update(INSTANCES) + + class DanbooruTagExtractor(DanbooruExtractor): """Extractor for danbooru posts from tag searches""" subcategory = "tag" directory_fmt = ("{category}", "{search_tags}") archive_fmt = "t_{search_tags}_{id}" - pattern = BASE_PATTERN + r"/posts\?(?:[^&#]*&)*tags=([^&#]+)" + pattern = BASE_PATTERN + r"/posts\?(?:[^&#]*&)*tags=([^&#]*)" test = ( ("https://danbooru.donmai.us/posts?tags=bonocho", { "content": "b196fb9f1668109d7774a0a82efea3ffdda07746", @@ -136,21 +179,29 @@ class DanbooruTagExtractor(DanbooruExtractor): "options": (("external", True),), "pattern": r"http://img16.pixiv.net/img/takaraakihito/1476533.jpg", }), + ("https://e621.net/posts?tags=anry", { + "url": "8021e5ea28d47c474c1ffc9bd44863c4d45700ba", + "content": "501d1e5d922da20ee8ff9806f5ed3ce3a684fd58", + }), + ("https://booru.allthefallen.moe/posts?tags=yume_shokunin", { + "count": 12, + }), ("https://hijiribe.donmai.us/posts?tags=bonocho"), ("https://sonohara.donmai.us/posts?tags=bonocho"), ("https://safebooru.donmai.us/posts?tags=bonocho"), + ("https://e926.net/posts?tags=anry"), ) def __init__(self, match): - super().__init__(match) - self.tags = text.unquote(match.group(2).replace("+", " ")) + DanbooruExtractor.__init__(self, match) + tags = match.group(match.lastindex) + self.tags = text.unquote(tags.replace("+", " ")) def metadata(self): return {"search_tags": self.tags} def posts(self): - params = {"tags": self.tags} - return self._pagination("/posts.json", params) + return self._pagination("/posts.json", {"tags": self.tags}) class DanbooruPoolExtractor(DanbooruExtractor): @@ -158,33 +209,66 @@ class DanbooruPoolExtractor(DanbooruExtractor): subcategory = "pool" directory_fmt = ("{category}", "pool", "{pool[id]} {pool[name]}") archive_fmt = "p_{pool[id]}_{id}" - pattern = BASE_PATTERN + r"/pools/(\d+)" - test = ("https://danbooru.donmai.us/pools/7659", { - "content": "b16bab12bea5f7ea9e0a836bf8045f280e113d99", - }) + pattern = BASE_PATTERN + r"/pool(?:s|/show)/(\d+)" + test = ( + ("https://danbooru.donmai.us/pools/7659", { + "content": "b16bab12bea5f7ea9e0a836bf8045f280e113d99", + }), + ("https://e621.net/pools/73", { + "url": "1bd09a72715286a79eea3b7f09f51b3493eb579a", + "content": "91abe5d5334425d9787811d7f06d34c77974cd22", + }), + ("https://booru.allthefallen.moe/pools/9", { + "url": "902549ffcdb00fe033c3f63e12bc3cb95c5fd8d5", + "count": 6, + }), + ("https://danbooru.donmai.us/pool/show/7659"), + ("https://e621.net/pool/show/73"), + ) def __init__(self, match): - super().__init__(match) - self.pool_id = match.group(2) + DanbooruExtractor.__init__(self, match) + self.pool_id = match.group(match.lastindex) self.post_ids = () def metadata(self): url = "{}/pools/{}.json".format(self.root, self.pool_id) pool = self.request(url).json() pool["name"] = pool["name"].replace("_", " ") - self.post_ids = pool.pop("post_ids") + self.post_ids = pool.pop("post_ids", ()) return {"pool": pool} def posts(self): - params = {"tags": "pool:" + self.pool_id} - return self._pagination("/posts.json", params) + if self._pools == "sort": + self.log.info("Fetching posts of pool %s", self.pool_id) + + id_to_post = { + post["id"]: post + for post in self._pagination( + "/posts.json", {"tags": "pool:" + self.pool_id}) + } + + posts = [] + append = posts.append + for num, pid in enumerate(self.post_ids, 1): + if pid in id_to_post: + post = id_to_post[pid] + post["num"] = num + append(post) + else: + self.log.warning("Post %s is unavailable", pid) + return posts + + else: + params = {"tags": "pool:" + self.pool_id} + return self._pagination("/posts.json", params) class DanbooruPostExtractor(DanbooruExtractor): """Extractor for single danbooru posts""" subcategory = "post" archive_fmt = "{id}" - pattern = BASE_PATTERN + r"/posts/(\d+)" + pattern = BASE_PATTERN + r"/post(?:s|/show)/(\d+)" test = ( ("https://danbooru.donmai.us/posts/294929", { "content": "5e255713cbf0a8e0801dc423563c34d896bb9229", @@ -192,12 +276,21 @@ class DanbooruPostExtractor(DanbooruExtractor): ("https://danbooru.donmai.us/posts/3613024", { "pattern": r"https?://.+\.zip$", "options": (("ugoira", True),) - }) + }), + ("https://e621.net/posts/535", { + "url": "f7f78b44c9b88f8f09caac080adc8d6d9fdaa529", + "content": "66f46e96a893fba8e694c4e049b23c2acc9af462", + }), + ("https://booru.allthefallen.moe/posts/22", { + "content": "21dda68e1d7e0a554078e62923f537d8e895cac8", + }), + ("https://danbooru.donmai.us/post/show/294929"), + ("https://e621.net/post/show/535"), ) def __init__(self, match): - super().__init__(match) - self.post_id = match.group(2) + DanbooruExtractor.__init__(self, match) + self.post_id = match.group(match.lastindex) def posts(self): url = "{}/posts/{}.json".format(self.root, self.post_id) @@ -218,15 +311,23 @@ class DanbooruPopularExtractor(DanbooruExtractor): "range": "1-120", "count": 120, }), + ("https://e621.net/explore/posts/popular"), + (("https://e621.net/explore/posts/popular" + "?date=2019-06-01&scale=month"), { + "pattern": r"https://static\d.e621.net/data/../../[0-9a-f]+", + "count": ">= 70", + }), + ("https://booru.allthefallen.moe/explore/posts/popular"), ) def __init__(self, match): - super().__init__(match) - self.params = text.parse_query(match.group(2)) + DanbooruExtractor.__init__(self, match) + self.params = match.group(match.lastindex) def metadata(self): - scale = self.params.get("scale", "day") - date = self.params.get("date") or datetime.date.today().isoformat() + self.params = params = text.parse_query(self.params) + scale = params.get("scale", "day") + date = params.get("date") or datetime.date.today().isoformat() if scale == "week": date = datetime.date.fromisoformat(date) @@ -241,3 +342,30 @@ class DanbooruPopularExtractor(DanbooruExtractor): self.page_start = 1 return self._pagination( "/explore/posts/popular.json", self.params, True) + + +class DanbooruFavoriteExtractor(DanbooruExtractor): + """Extractor for e621 favorites""" + subcategory = "favorite" + directory_fmt = ("{category}", "Favorites", "{user_id}") + archive_fmt = "f_{user_id}_{id}" + pattern = BASE_PATTERN + r"/favorites(?:\?([^#]*))?" + test = ( + ("https://e621.net/favorites"), + ("https://e621.net/favorites?page=2&user_id=53275", { + "pattern": r"https://static\d.e621.net/data/../../[0-9a-f]+", + "count": "> 260", + }), + ) + + def __init__(self, match): + DanbooruExtractor.__init__(self, match) + self.query = text.parse_query(match.group(match.lastindex)) + + def metadata(self): + return {"user_id": self.query.get("user_id", "")} + + def posts(self): + if self.page_start is None: + self.page_start = 1 + return self._pagination("/favorites.json", self.query, True) diff --git a/gallery_dl/extractor/e621.py b/gallery_dl/extractor/e621.py deleted file mode 100644 index 213178c..0000000 --- a/gallery_dl/extractor/e621.py +++ /dev/null @@ -1,149 +0,0 @@ -# -*- coding: utf-8 -*- - -# Copyright 2014-2022 Mike Fährmann -# -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License version 2 as -# published by the Free Software Foundation. - -"""Extractors for https://e621.net/""" - -from .common import Extractor, Message -from . import danbooru -from .. import text - -BASE_PATTERN = r"(?:https?://)?e(621|926)\.net" - - -class E621Extractor(danbooru.DanbooruExtractor): - """Base class for e621 extractors""" - category = "e621" - filename_fmt = "{category}_{id}_{file[md5]}.{extension}" - page_limit = 750 - page_start = None - per_page = 320 - request_interval_min = 1.0 - - def __init__(self, match): - super().__init__(match) - self.root = "https://e{}.net".format(match.group(1)) - self.headers = {"User-Agent": "gallery-dl/1.14.0 (by mikf)"} - - def request(self, url, **kwargs): - kwargs["headers"] = self.headers - return Extractor.request(self, url, **kwargs) - - def items(self): - data = self.metadata() - for post in self.posts(): - file = post["file"] - - if not file["url"]: - md5 = file["md5"] - file["url"] = "https://static1.{}/data/{}/{}/{}.{}".format( - self.root[8:], md5[0:2], md5[2:4], md5, file["ext"]) - - post["filename"] = file["md5"] - post["extension"] = file["ext"] - post.update(data) - yield Message.Directory, post - yield Message.Url, file["url"], post - - -class E621TagExtractor(E621Extractor, danbooru.DanbooruTagExtractor): - """Extractor for e621 posts from tag searches""" - pattern = BASE_PATTERN + r"/posts?(?:\?.*?tags=|/index/\d+/)([^&#]+)" - test = ( - ("https://e621.net/posts?tags=anry", { - "url": "8021e5ea28d47c474c1ffc9bd44863c4d45700ba", - "content": "501d1e5d922da20ee8ff9806f5ed3ce3a684fd58", - }), - ("https://e926.net/posts?tags=anry"), - ("https://e621.net/post/index/1/anry"), - ("https://e621.net/post?tags=anry"), - ) - - -class E621PoolExtractor(E621Extractor, danbooru.DanbooruPoolExtractor): - """Extractor for e621 pools""" - pattern = BASE_PATTERN + r"/pool(?:s|/show)/(\d+)" - test = ( - ("https://e621.net/pools/73", { - "url": "1bd09a72715286a79eea3b7f09f51b3493eb579a", - "content": "91abe5d5334425d9787811d7f06d34c77974cd22", - }), - ("https://e621.net/pool/show/73"), - ) - - def posts(self): - self.log.info("Fetching posts of pool %s", self.pool_id) - - id_to_post = { - post["id"]: post - for post in self._pagination( - "/posts.json", {"tags": "pool:" + self.pool_id}) - } - - posts = [] - append = posts.append - for num, pid in enumerate(self.post_ids, 1): - if pid in id_to_post: - post = id_to_post[pid] - post["num"] = num - append(post) - else: - self.log.warning("Post %s is unavailable", pid) - - return posts - - -class E621PostExtractor(E621Extractor, danbooru.DanbooruPostExtractor): - """Extractor for single e621 posts""" - pattern = BASE_PATTERN + r"/post(?:s|/show)/(\d+)" - test = ( - ("https://e621.net/posts/535", { - "url": "f7f78b44c9b88f8f09caac080adc8d6d9fdaa529", - "content": "66f46e96a893fba8e694c4e049b23c2acc9af462", - }), - ("https://e621.net/post/show/535"), - ) - - -class E621PopularExtractor(E621Extractor, danbooru.DanbooruPopularExtractor): - """Extractor for popular images from e621""" - pattern = BASE_PATTERN + r"/explore/posts/popular(?:\?([^#]*))?" - test = ( - ("https://e621.net/explore/posts/popular"), - (("https://e621.net/explore/posts/popular" - "?date=2019-06-01&scale=month"), { - "pattern": r"https://static\d.e621.net/data/../../[0-9a-f]+", - "count": ">= 70", - }) - ) - - -class E621FavoriteExtractor(E621Extractor): - """Extractor for e621 favorites""" - subcategory = "favorite" - directory_fmt = ("{category}", "Favorites", "{user_id}") - archive_fmt = "f_{user_id}_{id}" - pattern = BASE_PATTERN + r"/favorites(?:\?([^#]*))?" - test = ( - ("https://e621.net/favorites"), - ("https://e621.net/favorites?page=2&user_id=53275", { - "pattern": r"https://static\d.e621.net/data/../../[0-9a-f]+", - "count": "> 260", - }) - ) - - def __init__(self, match): - super().__init__(match) - self.query = text.parse_query(match.group(2)) - - def metadata(self): - return {"user_id": self.query.get("user_id", "")} - - def posts(self): - if self.page_start is None: - self.page_start = 1 - return self._pagination("/favorites.json", self.query, True) diff --git a/gallery_dl/extractor/furaffinity.py b/gallery_dl/extractor/furaffinity.py index 891e0c1..6a8744a 100644 --- a/gallery_dl/extractor/furaffinity.py +++ b/gallery_dl/extractor/furaffinity.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2020-2021 Mike Fährmann +# Copyright 2020-2022 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -32,6 +32,12 @@ class FuraffinityExtractor(Extractor): if self.config("descriptions") == "html": self._process_description = str.strip + layout = self.config("layout") + if layout and layout != "auto": + self._new_layout = False if layout == "old" else True + else: + self._new_layout = None + def items(self): if self._warning: @@ -64,8 +70,11 @@ class FuraffinityExtractor(Extractor): def _parse_post(self, post_id): url = "{}/view/{}/".format(self.root, post_id) extr = text.extract_from(self.request(url).text) - path = extr('href="//d', '"') + if self._new_layout is None: + self._new_layout = ("http-equiv=" not in extr("<meta ", ">")) + + path = extr('href="//d', '"') if not path: self.log.warning( "Unable to download post %s (\"%s\")", @@ -84,10 +93,9 @@ class FuraffinityExtractor(Extractor): "url": "https://d" + path, }) - tags = extr('class="tags-row">', '</section>') - if tags: - # new site layout - data["tags"] = text.split_html(tags) + if self._new_layout: + data["tags"] = text.split_html(extr( + 'class="tags-row">', '</section>')) data["title"] = text.unescape(extr("<h2><p>", "</p></h2>")) data["artist"] = extr("<strong>", "<") data["_description"] = extr('class="section-body">', '</div>') @@ -306,6 +314,25 @@ class FuraffinityPostExtractor(FuraffinityExtractor): r"|http://www\.postybirb\.com", "count": 2, }), + # no tags (#2277) + ("https://www.furaffinity.net/view/45331225/", { + "keyword": { + "artist": "Kota_Remminders", + "artist_url": "kotaremminders", + "date": "dt:2022-01-03 17:49:33", + "fa_category": "Adoptables", + "filename": "1641232173.kotaremminders_chidopts1", + "gender": "Any", + "height": 905, + "id": 45331225, + "rating": "General", + "species": "Unspecified / Any", + "tags": [], + "theme": "All", + "title": "REMINDER", + "width": 1280, + }, + }), ("https://furaffinity.net/view/21835115/"), ("https://sfw.furaffinity.net/view/21835115/"), ("https://www.furaffinity.net/full/21835115/"), diff --git a/gallery_dl/extractor/imgbox.py b/gallery_dl/extractor/imgbox.py index 7ae39c0..251f52e 100644 --- a/gallery_dl/extractor/imgbox.py +++ b/gallery_dl/extractor/imgbox.py @@ -53,8 +53,7 @@ class ImgboxExtractor(Extractor): @staticmethod def get_image_url(page): """Extract download-url""" - pos = page.index(">Image</a>") - return text.extract(page, '<a href="', '"', pos)[0] + return text.extract(page, 'property="og:image" content="', '"')[0] class ImgboxGalleryExtractor(AsynchronousMixin, ImgboxExtractor): @@ -66,12 +65,12 @@ class ImgboxGalleryExtractor(AsynchronousMixin, ImgboxExtractor): pattern = r"(?:https?://)?(?:www\.)?imgbox\.com/g/([A-Za-z0-9]{10})" test = ( ("https://imgbox.com/g/JaX5V5HX7g", { - "url": "678f0bca1251d810372326ea4f16582cafa800e4", + "url": "da4f15b161461119ee78841d4b8e8d054d95f906", "keyword": "4b1e62820ac2c6205b7ad0b6322cc8e00dbe1b0c", "content": "d20307dc8511ac24d688859c55abf2e2cc2dd3cc", }), ("https://imgbox.com/g/cUGEkRbdZZ", { - "url": "d839d47cbbbeb121f83c520072512f7e51f52107", + "url": "76506a3aab175c456910851f66227e90484ca9f7", "keyword": "fb0427b87983197849fb2887905e758f3e50cb6e", }), ("https://imgbox.com/g/JaX5V5HX7h", { @@ -109,7 +108,7 @@ class ImgboxImageExtractor(ImgboxExtractor): pattern = r"(?:https?://)?(?:www\.)?imgbox\.com/([A-Za-z0-9]{8})" test = ( ("https://imgbox.com/qHhw7lpG", { - "url": "d931f675a9b848fa7cb9077d6c2b14eb07bdb80f", + "url": "ee9cdea6c48ad0161c1b5f81f6b0c9110997038c", "keyword": "dfc72310026b45f3feb4f9cada20c79b2575e1af", "content": "0c8768055e4e20e7c7259608b67799171b691140", }), diff --git a/gallery_dl/extractor/inkbunny.py b/gallery_dl/extractor/inkbunny.py index 8ee8ca9..ded8906 100644 --- a/gallery_dl/extractor/inkbunny.py +++ b/gallery_dl/extractor/inkbunny.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2020 Mike Fährmann +# Copyright 2020-2022 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -220,11 +220,26 @@ class InkbunnySearchExtractor(InkbunnyExtractor): def __init__(self, match): InkbunnyExtractor.__init__(self, match) - self.params = text.parse_query(match.group(1)) - self.params.pop("rid", None) + self.query = match.group(1) def posts(self): - return self.api.search(self.params) + params = text.parse_query(self.query) + pop = params.pop + + pop("rid", None) + params["string_join_type"] = pop("stringtype", None) + params["dayslimit"] = pop("days", None) + params["username"] = pop("artist", None) + + favsby = pop("favsby", None) + if favsby: + # get user_id from user profile + url = "{}/{}".format(self.root, favsby) + page = self.request(url).text + user_id = text.extract(page, "?user_id=", "'")[0] + params["favs_user_id"] = user_id.partition("&")[0] + + return self.api.search(params) class InkbunnyFollowingExtractor(InkbunnyExtractor): diff --git a/gallery_dl/extractor/kemonoparty.py b/gallery_dl/extractor/kemonoparty.py index e8fcd1a..b898e3b 100644 --- a/gallery_dl/extractor/kemonoparty.py +++ b/gallery_dl/extractor/kemonoparty.py @@ -100,7 +100,7 @@ class KemonopartyExtractor(Extractor): elif url.startswith(self.root): url = self.root + "/data" + url[20:] - text.nameext_from_url(file["name"], post) + text.nameext_from_url(file.get("name", url), post) yield Message.Url, url, post def login(self): diff --git a/gallery_dl/extractor/lightroom.py b/gallery_dl/extractor/lightroom.py new file mode 100644 index 0000000..8131db8 --- /dev/null +++ b/gallery_dl/extractor/lightroom.py @@ -0,0 +1,103 @@ +# -*- coding: utf-8 -*- + +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. + +"""Extractors for https://lightroom.adobe.com/""" + +from .common import Extractor, Message +from .. import text +import json + + +class LightroomGalleryExtractor(Extractor): + """Extractor for an image gallery on lightroom.adobe.com""" + category = "lightroom" + subcategory = "gallery" + directory_fmt = ("{category}", "{user}", "{title}") + filename_fmt = "{num:>04}_{id}.{extension}" + archive_fmt = "{id}" + pattern = r"(?:https?://)?lightroom\.adobe\.com/shares/([0-9a-f]+)" + test = ( + (("https://lightroom.adobe.com/shares/" + "0c9cce2033f24d24975423fe616368bf"), { + "keyword": { + "title": "Sterne und Nachtphotos", + "user": "Christian Schrang", + }, + "count": ">= 55", + }), + (("https://lightroom.adobe.com/shares/" + "7ba68ad5a97e48608d2e6c57e6082813"), { + "keyword": { + "title": "HEBFC Snr/Res v Brighton", + "user": "", + }, + "count": ">= 180", + }), + ) + + def __init__(self, match): + Extractor.__init__(self, match) + self.href = match.group(1) + + def items(self): + # Get config + url = "https://lightroom.adobe.com/shares/" + self.href + response = self.request(url) + album = json.loads( + text.extract(response.text, "albumAttributes: ", "\n")[0] + ) + + images = self.images(album) + for img in images: + url = img["url"] + yield Message.Directory, img + yield Message.Url, url, text.nameext_from_url(url, img) + + def metadata(self, album): + payload = album["payload"] + story = payload.get("story") or {} + return { + "gallery_id": self.href, + "user": story.get("author", ""), + "title": story.get("title", payload["name"]), + } + + def images(self, album): + album_md = self.metadata(album) + base_url = album["base"] + next_url = album["links"]["/rels/space_album_images_videos"]["href"] + num = 1 + + while next_url: + url = base_url + next_url + page = self.request(url).text + # skip 1st line as it's a JS loop + data = json.loads(page[page.index("\n") + 1:]) + + base_url = data["base"] + for res in data["resources"]: + img_url, img_size = None, 0 + for key, value in res["asset"]["links"].items(): + if not key.startswith("/rels/rendition_type/"): + continue + size = text.parse_int(key.split("/")[-1]) + if size > img_size: + img_size = size + img_url = value["href"] + + if img_url: + img = { + "id": res["asset"]["id"], + "num": num, + "url": base_url + img_url, + } + img.update(album_md) + yield img + num += 1 + try: + next_url = data["links"]["next"]["href"] + except KeyError: + next_url = None diff --git a/gallery_dl/extractor/reddit.py b/gallery_dl/extractor/reddit.py index f7809de..01538bf 100644 --- a/gallery_dl/extractor/reddit.py +++ b/gallery_dl/extractor/reddit.py @@ -172,7 +172,7 @@ class RedditUserExtractor(RedditExtractor): """Extractor for URLs from posts by a reddit user""" subcategory = "user" pattern = (r"(?:https?://)?(?:\w+\.)?reddit\.com/u(?:ser)?/" - r"([^/?#]+(?:/([a-z]+))?)/?(?:\?([^#]*))?") + r"([^/?#]+(?:/([a-z]+))?)/?(?:\?([^#]*))?$") test = ( ("https://www.reddit.com/user/username/", { "count": ">= 2", @@ -197,8 +197,8 @@ class RedditSubmissionExtractor(RedditExtractor): """Extractor for URLs from a submission on reddit.com""" subcategory = "submission" pattern = (r"(?:https?://)?(?:" - r"(?:\w+\.)?reddit\.com/(?:r/[^/?#]+/comments|gallery)" - r"|redd\.it)/([a-z0-9]+)") + r"(?:\w+\.)?reddit\.com/(?:(?:r|u|user)/[^/?#]+" + r"/comments|gallery)|redd\.it)/([a-z0-9]+)") test = ( ("https://www.reddit.com/r/lavaporn/comments/8cqhub/", { "pattern": r"https://c2.staticflickr.com/8/7272/\w+_k.jpg", @@ -235,6 +235,10 @@ class RedditSubmissionExtractor(RedditExtractor): ("https://np.reddit.com/r/lavaporn/comments/2a00np/"), ("https://m.reddit.com/r/lavaporn/comments/2a00np/"), ("https://redd.it/2a00np/"), + ("https://www.reddit.com/user/TheSpiritTree/comments/srilyf/", { + "pattern": r"https://i.redd.it/8fpgv17yqlh81.jpg", + "count": 1, + }), ) def __init__(self, match): diff --git a/gallery_dl/extractor/redgifs.py b/gallery_dl/extractor/redgifs.py index df50f70..2c3ed44 100644 --- a/gallery_dl/extractor/redgifs.py +++ b/gallery_dl/extractor/redgifs.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2020-2021 Mike Fährmann +# Copyright 2020-2022 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -106,8 +106,10 @@ class RedgifsSearchExtractor(RedgifsExtractor): class RedgifsImageExtractor(RedgifsExtractor): """Extractor for individual gifs from redgifs.com""" subcategory = "image" - pattern = (r"(?:https?://)?(?:www\.)?(?:redgifs\.com/(?:watch|ifr)" - r"|gifdeliverynetwork.com)/([A-Za-z]+)") + pattern = (r"(?:https?://)?(?:" + r"(?:www\.)?redgifs\.com/(?:watch|ifr)|" + r"(?:www\.)?gifdeliverynetwork\.com|" + r"i\.redgifs\.com/i)/([A-Za-z]+)") test = ( ("https://redgifs.com/watch/foolishforkedabyssiniancat", { "pattern": r"https://\w+\.redgifs\.com" @@ -115,6 +117,7 @@ class RedgifsImageExtractor(RedgifsExtractor): "content": "f6e03f1df9a2ff2a74092f53ee7580d2fb943533", }), ("https://redgifs.com/ifr/FoolishForkedAbyssiniancat"), + ("https://i.redgifs.com/i/FoolishForkedAbyssiniancat"), ("https://www.gifdeliverynetwork.com/foolishforkedabyssiniancat"), ) diff --git a/gallery_dl/extractor/twitter.py b/gallery_dl/extractor/twitter.py index f459fba..46b06c2 100644 --- a/gallery_dl/extractor/twitter.py +++ b/gallery_dl/extractor/twitter.py @@ -1091,8 +1091,14 @@ class TwitterAPI(): instructions = instructions[key] instructions = instructions["instructions"] - entries = instructions[0]["entries"] - except (KeyError, IndexError): + for instr in instructions: + if instr.get("type") == "TimelineAddEntries": + entries = instr["entries"] + break + else: + raise KeyError() + + except LookupError: extr.log.debug(data) if self._user: diff --git a/gallery_dl/extractor/vk.py b/gallery_dl/extractor/vk.py index ed565bc..dd2eb4e 100644 --- a/gallery_dl/extractor/vk.py +++ b/gallery_dl/extractor/vk.py @@ -34,7 +34,6 @@ class VkExtractor(Extractor): def _pagination(self, photos_url, user_id): sub = re.compile(r"/imp[fg]/").sub needle = 'data-id="{}_'.format(user_id) - cnt = 0 headers = { "X-Requested-With": "XMLHttpRequest", @@ -56,7 +55,9 @@ class VkExtractor(Extractor): offset = payload[0] html = payload[1] - for cnt, photo in enumerate(text.extract_iter(html, needle, ')')): + cnt = 0 + for photo in text.extract_iter(html, needle, ')'): + cnt += 1 pid = photo[:photo.find('"')] url = photo[photo.rindex("(")+1:] url = sub("/", url.partition("?")[0]) diff --git a/gallery_dl/extractor/wallpapercave.py b/gallery_dl/extractor/wallpapercave.py new file mode 100644 index 0000000..6c3af76 --- /dev/null +++ b/gallery_dl/extractor/wallpapercave.py @@ -0,0 +1,30 @@ +# -*- coding: utf-8 -*- + +# Copyright 2021 David Hoppenbrouwers +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. + +"""Extractors for https://wallpapercave.com/""" + +from .common import Extractor, Message +from .. import text + + +class WallpapercaveImageExtractor(Extractor): + """Extractor for images on wallpapercave.com""" + category = "wallpapercave" + subcategory = "image" + root = "https://wallpapercave.com" + pattern = r"(?:https?://)?(?:www\.)?wallpapercave\.com" + test = ("https://wallpapercave.com/w/wp10270355", { + "content": "58b088aaa1cf1a60e347015019eb0c5a22b263a6", + }) + + def items(self): + page = self.request(text.ensure_http_scheme(self.url)).text + for path in text.extract_iter(page, 'class="download" href="', '"'): + image = text.nameext_from_url(path) + yield Message.Directory, image + yield Message.Url, self.root + path, image |
