aboutsummaryrefslogtreecommitdiffstats
path: root/gallery_dl
diff options
context:
space:
mode:
Diffstat (limited to 'gallery_dl')
-rw-r--r--gallery_dl/__init__.py15
-rw-r--r--gallery_dl/downloader/ytdl.py10
-rw-r--r--gallery_dl/extractor/__init__.py5
-rw-r--r--gallery_dl/extractor/common.py19
-rw-r--r--gallery_dl/extractor/danbooru.py230
-rw-r--r--gallery_dl/extractor/e621.py149
-rw-r--r--gallery_dl/extractor/furaffinity.py39
-rw-r--r--gallery_dl/extractor/imgbox.py9
-rw-r--r--gallery_dl/extractor/inkbunny.py23
-rw-r--r--gallery_dl/extractor/kemonoparty.py2
-rw-r--r--gallery_dl/extractor/lightroom.py103
-rw-r--r--gallery_dl/extractor/reddit.py10
-rw-r--r--gallery_dl/extractor/redgifs.py9
-rw-r--r--gallery_dl/extractor/twitter.py10
-rw-r--r--gallery_dl/extractor/vk.py5
-rw-r--r--gallery_dl/extractor/wallpapercave.py30
-rw-r--r--gallery_dl/text.py4
-rw-r--r--gallery_dl/version.py2
18 files changed, 437 insertions, 237 deletions
diff --git a/gallery_dl/__init__.py b/gallery_dl/__init__.py
index ad8286e..0214659 100644
--- a/gallery_dl/__init__.py
+++ b/gallery_dl/__init__.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2014-2021 Mike Fährmann
+# Copyright 2014-2022 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -131,6 +131,19 @@ def main():
for opts in args.options:
config.set(*opts)
+ # signals
+ signals = config.get((), "signals-ignore")
+ if signals:
+ import signal
+ if isinstance(signals, str):
+ signals = signals.split(",")
+ for signal_name in signals:
+ signal_num = getattr(signal, signal_name, None)
+ if signal_num is None:
+ log.warning("signal '%s' is not defined", signal_name)
+ else:
+ signal.signal(signal_num, signal.SIG_IGN)
+
# extractor modules
modules = config.get(("extractor",), "modules")
if modules is not None:
diff --git a/gallery_dl/downloader/ytdl.py b/gallery_dl/downloader/ytdl.py
index 30f628e..462bbf8 100644
--- a/gallery_dl/downloader/ytdl.py
+++ b/gallery_dl/downloader/ytdl.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2018-2021 Mike Fährmann
+# Copyright 2018-2022 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -39,7 +39,13 @@ class YoutubeDLDownloader(DownloaderBase):
if not ytdl_instance:
ytdl_instance = self.ytdl_instance
if not ytdl_instance:
- module = ytdl.import_module(self.config("module"))
+ try:
+ module = ytdl.import_module(self.config("module"))
+ except ImportError as exc:
+ self.log.error("Cannot import module '%s'", exc.name)
+ self.log.debug("", exc_info=True)
+ self.download = lambda u, p: False
+ return False
self.ytdl_instance = ytdl_instance = ytdl.construct_YoutubeDL(
module, self, self.ytdl_opts)
if self.outtmpl == "default":
diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py
index e7d71d6..b52561e 100644
--- a/gallery_dl/extractor/__init__.py
+++ b/gallery_dl/extractor/__init__.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2015-2021 Mike Fährmann
+# Copyright 2015-2022 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -31,7 +31,6 @@ modules = [
"desktopography",
"deviantart",
"dynastyscans",
- "e621",
"erome",
"exhentai",
"fallenangels",
@@ -70,6 +69,7 @@ modules = [
"khinsider",
"kohlchan",
"komikcast",
+ "lightroom",
"lineblog",
"livedoor",
"luscious",
@@ -132,6 +132,7 @@ modules = [
"vk",
"vsco",
"wallhaven",
+ "wallpapercave",
"warosu",
"weasyl",
"webtoons",
diff --git a/gallery_dl/extractor/common.py b/gallery_dl/extractor/common.py
index 1d81dfc..5a2d3a3 100644
--- a/gallery_dl/extractor/common.py
+++ b/gallery_dl/extractor/common.py
@@ -371,8 +371,16 @@ class Extractor():
for cookie in self._cookiejar:
if cookie.name in names and (
not domain or cookie.domain == domain):
- if cookie.expires and cookie.expires < now:
- self.log.warning("Cookie '%s' has expired", cookie.name)
+ if cookie.expires:
+ diff = int(cookie.expires - now)
+ if diff <= 0:
+ self.log.warning(
+ "Cookie '%s' has expired", cookie.name)
+ elif diff <= 86400:
+ hours = diff // 3600
+ self.log.warning(
+ "Cookie '%s' will expire in less than %s hour%s",
+ cookie.name, hours + 1, "s" if hours else "")
else:
names.discard(cookie.name)
if not names:
@@ -607,6 +615,9 @@ class BaseExtractor(Extractor):
if group is not None:
if index:
self.category, self.root = self.instances[index-1]
+ if not self.root:
+ url = text.ensure_http_scheme(match.group(0))
+ self.root = url[:url.index("/", 8)]
else:
self.root = group
self.category = group.partition("://")[2]
@@ -624,7 +635,9 @@ class BaseExtractor(Extractor):
pattern_list = []
instance_list = cls.instances = []
for category, info in instances.items():
- root = info["root"].rstrip("/")
+ root = info["root"]
+ if root:
+ root = root.rstrip("/")
instance_list.append((category, root))
pattern = info.get("pattern")
diff --git a/gallery_dl/extractor/danbooru.py b/gallery_dl/extractor/danbooru.py
index c6c33b4..710950a 100644
--- a/gallery_dl/extractor/danbooru.py
+++ b/gallery_dl/extractor/danbooru.py
@@ -1,36 +1,29 @@
# -*- coding: utf-8 -*-
-# Copyright 2014-2020 Mike Fährmann
+# Copyright 2014-2022 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
-"""Extractors for https://danbooru.donmai.us/"""
+"""Extractors for https://danbooru.donmai.us/ and other Danbooru instances"""
-from .common import Extractor, Message
+from .common import BaseExtractor, Message
from .. import text
import datetime
-BASE_PATTERN = (
- r"(?:https?://)?"
- r"(danbooru|hijiribe|sonohara|safebooru)"
- r"\.donmai\.us"
-)
-
-class DanbooruExtractor(Extractor):
+class DanbooruExtractor(BaseExtractor):
"""Base class for danbooru extractors"""
- basecategory = "booru"
- category = "danbooru"
- filename_fmt = "{category}_{id}_{md5}.{extension}"
+ basecategory = "Danbooru"
+ filename_fmt = "{category}_{id}_{filename}.{extension}"
page_limit = 1000
page_start = None
per_page = 200
def __init__(self, match):
- super().__init__(match)
- self.root = "https://{}.donmai.us".format(match.group(1))
+ BaseExtractor.__init__(self, match)
+
self.ugoira = self.config("ugoira", False)
self.external = self.config("external", False)
self.extended_metadata = self.config("metadata", False)
@@ -40,6 +33,20 @@ class DanbooruExtractor(Extractor):
self.log.debug("Using HTTP Basic Auth for user '%s'", username)
self.session.auth = (username, api_key)
+ instance = INSTANCES.get(self.category) or {}
+ iget = instance.get
+
+ self.headers = iget("headers")
+ self.page_limit = iget("page-limit", 1000)
+ self.page_start = iget("page-start")
+ self.per_page = iget("per-page", 200)
+ self.request_interval_min = iget("request-interval-min", 0.0)
+ self._pools = iget("pools")
+
+ def request(self, url, **kwargs):
+ kwargs["headers"] = self.headers
+ return BaseExtractor.request(self, url, **kwargs)
+
def skip(self, num):
pages = num // self.per_page
if pages >= self.page_limit:
@@ -50,16 +57,31 @@ class DanbooruExtractor(Extractor):
def items(self):
data = self.metadata()
for post in self.posts():
- try:
- url = post["file_url"]
- except KeyError:
- if self.external and post["source"]:
- post.update(data)
- yield Message.Directory, post
- yield Message.Queue, post["source"], post
- continue
-
- text.nameext_from_url(url, post)
+
+ file = post.get("file")
+ if file:
+ url = file["url"]
+ if not url:
+ md5 = file["md5"]
+ url = file["url"] = (
+ "https://static1.{}/data/{}/{}/{}.{}".format(
+ self.root[8:], md5[0:2], md5[2:4], md5, file["ext"]
+ ))
+ post["filename"] = file["md5"]
+ post["extension"] = file["ext"]
+
+ else:
+ try:
+ url = post["file_url"]
+ except KeyError:
+ if self.external and post["source"]:
+ post.update(data)
+ yield Message.Directory, post
+ yield Message.Queue, post["source"], post
+ continue
+
+ text.nameext_from_url(url, post)
+
if post["extension"] == "zip":
if self.ugoira:
post["frames"] = self.request(
@@ -89,11 +111,8 @@ class DanbooruExtractor(Extractor):
def posts(self):
return ()
- def _pagination(self, endpoint, params=None, pagenum=False):
+ def _pagination(self, endpoint, params, pagenum=False):
url = self.root + endpoint
-
- if params is None:
- params = {}
params["limit"] = self.per_page
params["page"] = self.page_start
@@ -117,12 +136,36 @@ class DanbooruExtractor(Extractor):
return
+INSTANCES = {
+ "danbooru": {
+ "root": None,
+ "pattern": r"(?:danbooru|hijiribe|sonohara|safebooru)\.donmai\.us",
+ },
+ "e621": {
+ "root": None,
+ "pattern": r"e(?:621|926)\.net",
+ "headers": {"User-Agent": "gallery-dl/1.14.0 (by mikf)"},
+ "pools": "sort",
+ "page-limit": 750,
+ "per-page": 320,
+ "request-interval-min": 1.0,
+ },
+ "atfbooru": {
+ "root": "https://booru.allthefallen.moe",
+ "pattern": r"booru\.allthefallen\.moe",
+ "page-limit": 5000,
+ },
+}
+
+BASE_PATTERN = DanbooruExtractor.update(INSTANCES)
+
+
class DanbooruTagExtractor(DanbooruExtractor):
"""Extractor for danbooru posts from tag searches"""
subcategory = "tag"
directory_fmt = ("{category}", "{search_tags}")
archive_fmt = "t_{search_tags}_{id}"
- pattern = BASE_PATTERN + r"/posts\?(?:[^&#]*&)*tags=([^&#]+)"
+ pattern = BASE_PATTERN + r"/posts\?(?:[^&#]*&)*tags=([^&#]*)"
test = (
("https://danbooru.donmai.us/posts?tags=bonocho", {
"content": "b196fb9f1668109d7774a0a82efea3ffdda07746",
@@ -136,21 +179,29 @@ class DanbooruTagExtractor(DanbooruExtractor):
"options": (("external", True),),
"pattern": r"http://img16.pixiv.net/img/takaraakihito/1476533.jpg",
}),
+ ("https://e621.net/posts?tags=anry", {
+ "url": "8021e5ea28d47c474c1ffc9bd44863c4d45700ba",
+ "content": "501d1e5d922da20ee8ff9806f5ed3ce3a684fd58",
+ }),
+ ("https://booru.allthefallen.moe/posts?tags=yume_shokunin", {
+ "count": 12,
+ }),
("https://hijiribe.donmai.us/posts?tags=bonocho"),
("https://sonohara.donmai.us/posts?tags=bonocho"),
("https://safebooru.donmai.us/posts?tags=bonocho"),
+ ("https://e926.net/posts?tags=anry"),
)
def __init__(self, match):
- super().__init__(match)
- self.tags = text.unquote(match.group(2).replace("+", " "))
+ DanbooruExtractor.__init__(self, match)
+ tags = match.group(match.lastindex)
+ self.tags = text.unquote(tags.replace("+", " "))
def metadata(self):
return {"search_tags": self.tags}
def posts(self):
- params = {"tags": self.tags}
- return self._pagination("/posts.json", params)
+ return self._pagination("/posts.json", {"tags": self.tags})
class DanbooruPoolExtractor(DanbooruExtractor):
@@ -158,33 +209,66 @@ class DanbooruPoolExtractor(DanbooruExtractor):
subcategory = "pool"
directory_fmt = ("{category}", "pool", "{pool[id]} {pool[name]}")
archive_fmt = "p_{pool[id]}_{id}"
- pattern = BASE_PATTERN + r"/pools/(\d+)"
- test = ("https://danbooru.donmai.us/pools/7659", {
- "content": "b16bab12bea5f7ea9e0a836bf8045f280e113d99",
- })
+ pattern = BASE_PATTERN + r"/pool(?:s|/show)/(\d+)"
+ test = (
+ ("https://danbooru.donmai.us/pools/7659", {
+ "content": "b16bab12bea5f7ea9e0a836bf8045f280e113d99",
+ }),
+ ("https://e621.net/pools/73", {
+ "url": "1bd09a72715286a79eea3b7f09f51b3493eb579a",
+ "content": "91abe5d5334425d9787811d7f06d34c77974cd22",
+ }),
+ ("https://booru.allthefallen.moe/pools/9", {
+ "url": "902549ffcdb00fe033c3f63e12bc3cb95c5fd8d5",
+ "count": 6,
+ }),
+ ("https://danbooru.donmai.us/pool/show/7659"),
+ ("https://e621.net/pool/show/73"),
+ )
def __init__(self, match):
- super().__init__(match)
- self.pool_id = match.group(2)
+ DanbooruExtractor.__init__(self, match)
+ self.pool_id = match.group(match.lastindex)
self.post_ids = ()
def metadata(self):
url = "{}/pools/{}.json".format(self.root, self.pool_id)
pool = self.request(url).json()
pool["name"] = pool["name"].replace("_", " ")
- self.post_ids = pool.pop("post_ids")
+ self.post_ids = pool.pop("post_ids", ())
return {"pool": pool}
def posts(self):
- params = {"tags": "pool:" + self.pool_id}
- return self._pagination("/posts.json", params)
+ if self._pools == "sort":
+ self.log.info("Fetching posts of pool %s", self.pool_id)
+
+ id_to_post = {
+ post["id"]: post
+ for post in self._pagination(
+ "/posts.json", {"tags": "pool:" + self.pool_id})
+ }
+
+ posts = []
+ append = posts.append
+ for num, pid in enumerate(self.post_ids, 1):
+ if pid in id_to_post:
+ post = id_to_post[pid]
+ post["num"] = num
+ append(post)
+ else:
+ self.log.warning("Post %s is unavailable", pid)
+ return posts
+
+ else:
+ params = {"tags": "pool:" + self.pool_id}
+ return self._pagination("/posts.json", params)
class DanbooruPostExtractor(DanbooruExtractor):
"""Extractor for single danbooru posts"""
subcategory = "post"
archive_fmt = "{id}"
- pattern = BASE_PATTERN + r"/posts/(\d+)"
+ pattern = BASE_PATTERN + r"/post(?:s|/show)/(\d+)"
test = (
("https://danbooru.donmai.us/posts/294929", {
"content": "5e255713cbf0a8e0801dc423563c34d896bb9229",
@@ -192,12 +276,21 @@ class DanbooruPostExtractor(DanbooruExtractor):
("https://danbooru.donmai.us/posts/3613024", {
"pattern": r"https?://.+\.zip$",
"options": (("ugoira", True),)
- })
+ }),
+ ("https://e621.net/posts/535", {
+ "url": "f7f78b44c9b88f8f09caac080adc8d6d9fdaa529",
+ "content": "66f46e96a893fba8e694c4e049b23c2acc9af462",
+ }),
+ ("https://booru.allthefallen.moe/posts/22", {
+ "content": "21dda68e1d7e0a554078e62923f537d8e895cac8",
+ }),
+ ("https://danbooru.donmai.us/post/show/294929"),
+ ("https://e621.net/post/show/535"),
)
def __init__(self, match):
- super().__init__(match)
- self.post_id = match.group(2)
+ DanbooruExtractor.__init__(self, match)
+ self.post_id = match.group(match.lastindex)
def posts(self):
url = "{}/posts/{}.json".format(self.root, self.post_id)
@@ -218,15 +311,23 @@ class DanbooruPopularExtractor(DanbooruExtractor):
"range": "1-120",
"count": 120,
}),
+ ("https://e621.net/explore/posts/popular"),
+ (("https://e621.net/explore/posts/popular"
+ "?date=2019-06-01&scale=month"), {
+ "pattern": r"https://static\d.e621.net/data/../../[0-9a-f]+",
+ "count": ">= 70",
+ }),
+ ("https://booru.allthefallen.moe/explore/posts/popular"),
)
def __init__(self, match):
- super().__init__(match)
- self.params = text.parse_query(match.group(2))
+ DanbooruExtractor.__init__(self, match)
+ self.params = match.group(match.lastindex)
def metadata(self):
- scale = self.params.get("scale", "day")
- date = self.params.get("date") or datetime.date.today().isoformat()
+ self.params = params = text.parse_query(self.params)
+ scale = params.get("scale", "day")
+ date = params.get("date") or datetime.date.today().isoformat()
if scale == "week":
date = datetime.date.fromisoformat(date)
@@ -241,3 +342,30 @@ class DanbooruPopularExtractor(DanbooruExtractor):
self.page_start = 1
return self._pagination(
"/explore/posts/popular.json", self.params, True)
+
+
+class DanbooruFavoriteExtractor(DanbooruExtractor):
+ """Extractor for e621 favorites"""
+ subcategory = "favorite"
+ directory_fmt = ("{category}", "Favorites", "{user_id}")
+ archive_fmt = "f_{user_id}_{id}"
+ pattern = BASE_PATTERN + r"/favorites(?:\?([^#]*))?"
+ test = (
+ ("https://e621.net/favorites"),
+ ("https://e621.net/favorites?page=2&user_id=53275", {
+ "pattern": r"https://static\d.e621.net/data/../../[0-9a-f]+",
+ "count": "> 260",
+ }),
+ )
+
+ def __init__(self, match):
+ DanbooruExtractor.__init__(self, match)
+ self.query = text.parse_query(match.group(match.lastindex))
+
+ def metadata(self):
+ return {"user_id": self.query.get("user_id", "")}
+
+ def posts(self):
+ if self.page_start is None:
+ self.page_start = 1
+ return self._pagination("/favorites.json", self.query, True)
diff --git a/gallery_dl/extractor/e621.py b/gallery_dl/extractor/e621.py
deleted file mode 100644
index 213178c..0000000
--- a/gallery_dl/extractor/e621.py
+++ /dev/null
@@ -1,149 +0,0 @@
-# -*- coding: utf-8 -*-
-
-# Copyright 2014-2022 Mike Fährmann
-#
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License version 2 as
-# published by the Free Software Foundation.
-
-"""Extractors for https://e621.net/"""
-
-from .common import Extractor, Message
-from . import danbooru
-from .. import text
-
-BASE_PATTERN = r"(?:https?://)?e(621|926)\.net"
-
-
-class E621Extractor(danbooru.DanbooruExtractor):
- """Base class for e621 extractors"""
- category = "e621"
- filename_fmt = "{category}_{id}_{file[md5]}.{extension}"
- page_limit = 750
- page_start = None
- per_page = 320
- request_interval_min = 1.0
-
- def __init__(self, match):
- super().__init__(match)
- self.root = "https://e{}.net".format(match.group(1))
- self.headers = {"User-Agent": "gallery-dl/1.14.0 (by mikf)"}
-
- def request(self, url, **kwargs):
- kwargs["headers"] = self.headers
- return Extractor.request(self, url, **kwargs)
-
- def items(self):
- data = self.metadata()
- for post in self.posts():
- file = post["file"]
-
- if not file["url"]:
- md5 = file["md5"]
- file["url"] = "https://static1.{}/data/{}/{}/{}.{}".format(
- self.root[8:], md5[0:2], md5[2:4], md5, file["ext"])
-
- post["filename"] = file["md5"]
- post["extension"] = file["ext"]
- post.update(data)
- yield Message.Directory, post
- yield Message.Url, file["url"], post
-
-
-class E621TagExtractor(E621Extractor, danbooru.DanbooruTagExtractor):
- """Extractor for e621 posts from tag searches"""
- pattern = BASE_PATTERN + r"/posts?(?:\?.*?tags=|/index/\d+/)([^&#]+)"
- test = (
- ("https://e621.net/posts?tags=anry", {
- "url": "8021e5ea28d47c474c1ffc9bd44863c4d45700ba",
- "content": "501d1e5d922da20ee8ff9806f5ed3ce3a684fd58",
- }),
- ("https://e926.net/posts?tags=anry"),
- ("https://e621.net/post/index/1/anry"),
- ("https://e621.net/post?tags=anry"),
- )
-
-
-class E621PoolExtractor(E621Extractor, danbooru.DanbooruPoolExtractor):
- """Extractor for e621 pools"""
- pattern = BASE_PATTERN + r"/pool(?:s|/show)/(\d+)"
- test = (
- ("https://e621.net/pools/73", {
- "url": "1bd09a72715286a79eea3b7f09f51b3493eb579a",
- "content": "91abe5d5334425d9787811d7f06d34c77974cd22",
- }),
- ("https://e621.net/pool/show/73"),
- )
-
- def posts(self):
- self.log.info("Fetching posts of pool %s", self.pool_id)
-
- id_to_post = {
- post["id"]: post
- for post in self._pagination(
- "/posts.json", {"tags": "pool:" + self.pool_id})
- }
-
- posts = []
- append = posts.append
- for num, pid in enumerate(self.post_ids, 1):
- if pid in id_to_post:
- post = id_to_post[pid]
- post["num"] = num
- append(post)
- else:
- self.log.warning("Post %s is unavailable", pid)
-
- return posts
-
-
-class E621PostExtractor(E621Extractor, danbooru.DanbooruPostExtractor):
- """Extractor for single e621 posts"""
- pattern = BASE_PATTERN + r"/post(?:s|/show)/(\d+)"
- test = (
- ("https://e621.net/posts/535", {
- "url": "f7f78b44c9b88f8f09caac080adc8d6d9fdaa529",
- "content": "66f46e96a893fba8e694c4e049b23c2acc9af462",
- }),
- ("https://e621.net/post/show/535"),
- )
-
-
-class E621PopularExtractor(E621Extractor, danbooru.DanbooruPopularExtractor):
- """Extractor for popular images from e621"""
- pattern = BASE_PATTERN + r"/explore/posts/popular(?:\?([^#]*))?"
- test = (
- ("https://e621.net/explore/posts/popular"),
- (("https://e621.net/explore/posts/popular"
- "?date=2019-06-01&scale=month"), {
- "pattern": r"https://static\d.e621.net/data/../../[0-9a-f]+",
- "count": ">= 70",
- })
- )
-
-
-class E621FavoriteExtractor(E621Extractor):
- """Extractor for e621 favorites"""
- subcategory = "favorite"
- directory_fmt = ("{category}", "Favorites", "{user_id}")
- archive_fmt = "f_{user_id}_{id}"
- pattern = BASE_PATTERN + r"/favorites(?:\?([^#]*))?"
- test = (
- ("https://e621.net/favorites"),
- ("https://e621.net/favorites?page=2&user_id=53275", {
- "pattern": r"https://static\d.e621.net/data/../../[0-9a-f]+",
- "count": "> 260",
- })
- )
-
- def __init__(self, match):
- super().__init__(match)
- self.query = text.parse_query(match.group(2))
-
- def metadata(self):
- return {"user_id": self.query.get("user_id", "")}
-
- def posts(self):
- if self.page_start is None:
- self.page_start = 1
- return self._pagination("/favorites.json", self.query, True)
diff --git a/gallery_dl/extractor/furaffinity.py b/gallery_dl/extractor/furaffinity.py
index 891e0c1..6a8744a 100644
--- a/gallery_dl/extractor/furaffinity.py
+++ b/gallery_dl/extractor/furaffinity.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2020-2021 Mike Fährmann
+# Copyright 2020-2022 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -32,6 +32,12 @@ class FuraffinityExtractor(Extractor):
if self.config("descriptions") == "html":
self._process_description = str.strip
+ layout = self.config("layout")
+ if layout and layout != "auto":
+ self._new_layout = False if layout == "old" else True
+ else:
+ self._new_layout = None
+
def items(self):
if self._warning:
@@ -64,8 +70,11 @@ class FuraffinityExtractor(Extractor):
def _parse_post(self, post_id):
url = "{}/view/{}/".format(self.root, post_id)
extr = text.extract_from(self.request(url).text)
- path = extr('href="//d', '"')
+ if self._new_layout is None:
+ self._new_layout = ("http-equiv=" not in extr("<meta ", ">"))
+
+ path = extr('href="//d', '"')
if not path:
self.log.warning(
"Unable to download post %s (\"%s\")",
@@ -84,10 +93,9 @@ class FuraffinityExtractor(Extractor):
"url": "https://d" + path,
})
- tags = extr('class="tags-row">', '</section>')
- if tags:
- # new site layout
- data["tags"] = text.split_html(tags)
+ if self._new_layout:
+ data["tags"] = text.split_html(extr(
+ 'class="tags-row">', '</section>'))
data["title"] = text.unescape(extr("<h2><p>", "</p></h2>"))
data["artist"] = extr("<strong>", "<")
data["_description"] = extr('class="section-body">', '</div>')
@@ -306,6 +314,25 @@ class FuraffinityPostExtractor(FuraffinityExtractor):
r"|http://www\.postybirb\.com",
"count": 2,
}),
+ # no tags (#2277)
+ ("https://www.furaffinity.net/view/45331225/", {
+ "keyword": {
+ "artist": "Kota_Remminders",
+ "artist_url": "kotaremminders",
+ "date": "dt:2022-01-03 17:49:33",
+ "fa_category": "Adoptables",
+ "filename": "1641232173.kotaremminders_chidopts1",
+ "gender": "Any",
+ "height": 905,
+ "id": 45331225,
+ "rating": "General",
+ "species": "Unspecified / Any",
+ "tags": [],
+ "theme": "All",
+ "title": "REMINDER",
+ "width": 1280,
+ },
+ }),
("https://furaffinity.net/view/21835115/"),
("https://sfw.furaffinity.net/view/21835115/"),
("https://www.furaffinity.net/full/21835115/"),
diff --git a/gallery_dl/extractor/imgbox.py b/gallery_dl/extractor/imgbox.py
index 7ae39c0..251f52e 100644
--- a/gallery_dl/extractor/imgbox.py
+++ b/gallery_dl/extractor/imgbox.py
@@ -53,8 +53,7 @@ class ImgboxExtractor(Extractor):
@staticmethod
def get_image_url(page):
"""Extract download-url"""
- pos = page.index(">Image</a>")
- return text.extract(page, '<a href="', '"', pos)[0]
+ return text.extract(page, 'property="og:image" content="', '"')[0]
class ImgboxGalleryExtractor(AsynchronousMixin, ImgboxExtractor):
@@ -66,12 +65,12 @@ class ImgboxGalleryExtractor(AsynchronousMixin, ImgboxExtractor):
pattern = r"(?:https?://)?(?:www\.)?imgbox\.com/g/([A-Za-z0-9]{10})"
test = (
("https://imgbox.com/g/JaX5V5HX7g", {
- "url": "678f0bca1251d810372326ea4f16582cafa800e4",
+ "url": "da4f15b161461119ee78841d4b8e8d054d95f906",
"keyword": "4b1e62820ac2c6205b7ad0b6322cc8e00dbe1b0c",
"content": "d20307dc8511ac24d688859c55abf2e2cc2dd3cc",
}),
("https://imgbox.com/g/cUGEkRbdZZ", {
- "url": "d839d47cbbbeb121f83c520072512f7e51f52107",
+ "url": "76506a3aab175c456910851f66227e90484ca9f7",
"keyword": "fb0427b87983197849fb2887905e758f3e50cb6e",
}),
("https://imgbox.com/g/JaX5V5HX7h", {
@@ -109,7 +108,7 @@ class ImgboxImageExtractor(ImgboxExtractor):
pattern = r"(?:https?://)?(?:www\.)?imgbox\.com/([A-Za-z0-9]{8})"
test = (
("https://imgbox.com/qHhw7lpG", {
- "url": "d931f675a9b848fa7cb9077d6c2b14eb07bdb80f",
+ "url": "ee9cdea6c48ad0161c1b5f81f6b0c9110997038c",
"keyword": "dfc72310026b45f3feb4f9cada20c79b2575e1af",
"content": "0c8768055e4e20e7c7259608b67799171b691140",
}),
diff --git a/gallery_dl/extractor/inkbunny.py b/gallery_dl/extractor/inkbunny.py
index 8ee8ca9..ded8906 100644
--- a/gallery_dl/extractor/inkbunny.py
+++ b/gallery_dl/extractor/inkbunny.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2020 Mike Fährmann
+# Copyright 2020-2022 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -220,11 +220,26 @@ class InkbunnySearchExtractor(InkbunnyExtractor):
def __init__(self, match):
InkbunnyExtractor.__init__(self, match)
- self.params = text.parse_query(match.group(1))
- self.params.pop("rid", None)
+ self.query = match.group(1)
def posts(self):
- return self.api.search(self.params)
+ params = text.parse_query(self.query)
+ pop = params.pop
+
+ pop("rid", None)
+ params["string_join_type"] = pop("stringtype", None)
+ params["dayslimit"] = pop("days", None)
+ params["username"] = pop("artist", None)
+
+ favsby = pop("favsby", None)
+ if favsby:
+ # get user_id from user profile
+ url = "{}/{}".format(self.root, favsby)
+ page = self.request(url).text
+ user_id = text.extract(page, "?user_id=", "'")[0]
+ params["favs_user_id"] = user_id.partition("&")[0]
+
+ return self.api.search(params)
class InkbunnyFollowingExtractor(InkbunnyExtractor):
diff --git a/gallery_dl/extractor/kemonoparty.py b/gallery_dl/extractor/kemonoparty.py
index e8fcd1a..b898e3b 100644
--- a/gallery_dl/extractor/kemonoparty.py
+++ b/gallery_dl/extractor/kemonoparty.py
@@ -100,7 +100,7 @@ class KemonopartyExtractor(Extractor):
elif url.startswith(self.root):
url = self.root + "/data" + url[20:]
- text.nameext_from_url(file["name"], post)
+ text.nameext_from_url(file.get("name", url), post)
yield Message.Url, url, post
def login(self):
diff --git a/gallery_dl/extractor/lightroom.py b/gallery_dl/extractor/lightroom.py
new file mode 100644
index 0000000..8131db8
--- /dev/null
+++ b/gallery_dl/extractor/lightroom.py
@@ -0,0 +1,103 @@
+# -*- coding: utf-8 -*-
+
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Extractors for https://lightroom.adobe.com/"""
+
+from .common import Extractor, Message
+from .. import text
+import json
+
+
+class LightroomGalleryExtractor(Extractor):
+ """Extractor for an image gallery on lightroom.adobe.com"""
+ category = "lightroom"
+ subcategory = "gallery"
+ directory_fmt = ("{category}", "{user}", "{title}")
+ filename_fmt = "{num:>04}_{id}.{extension}"
+ archive_fmt = "{id}"
+ pattern = r"(?:https?://)?lightroom\.adobe\.com/shares/([0-9a-f]+)"
+ test = (
+ (("https://lightroom.adobe.com/shares/"
+ "0c9cce2033f24d24975423fe616368bf"), {
+ "keyword": {
+ "title": "Sterne und Nachtphotos",
+ "user": "Christian Schrang",
+ },
+ "count": ">= 55",
+ }),
+ (("https://lightroom.adobe.com/shares/"
+ "7ba68ad5a97e48608d2e6c57e6082813"), {
+ "keyword": {
+ "title": "HEBFC Snr/Res v Brighton",
+ "user": "",
+ },
+ "count": ">= 180",
+ }),
+ )
+
+ def __init__(self, match):
+ Extractor.__init__(self, match)
+ self.href = match.group(1)
+
+ def items(self):
+ # Get config
+ url = "https://lightroom.adobe.com/shares/" + self.href
+ response = self.request(url)
+ album = json.loads(
+ text.extract(response.text, "albumAttributes: ", "\n")[0]
+ )
+
+ images = self.images(album)
+ for img in images:
+ url = img["url"]
+ yield Message.Directory, img
+ yield Message.Url, url, text.nameext_from_url(url, img)
+
+ def metadata(self, album):
+ payload = album["payload"]
+ story = payload.get("story") or {}
+ return {
+ "gallery_id": self.href,
+ "user": story.get("author", ""),
+ "title": story.get("title", payload["name"]),
+ }
+
+ def images(self, album):
+ album_md = self.metadata(album)
+ base_url = album["base"]
+ next_url = album["links"]["/rels/space_album_images_videos"]["href"]
+ num = 1
+
+ while next_url:
+ url = base_url + next_url
+ page = self.request(url).text
+ # skip 1st line as it's a JS loop
+ data = json.loads(page[page.index("\n") + 1:])
+
+ base_url = data["base"]
+ for res in data["resources"]:
+ img_url, img_size = None, 0
+ for key, value in res["asset"]["links"].items():
+ if not key.startswith("/rels/rendition_type/"):
+ continue
+ size = text.parse_int(key.split("/")[-1])
+ if size > img_size:
+ img_size = size
+ img_url = value["href"]
+
+ if img_url:
+ img = {
+ "id": res["asset"]["id"],
+ "num": num,
+ "url": base_url + img_url,
+ }
+ img.update(album_md)
+ yield img
+ num += 1
+ try:
+ next_url = data["links"]["next"]["href"]
+ except KeyError:
+ next_url = None
diff --git a/gallery_dl/extractor/reddit.py b/gallery_dl/extractor/reddit.py
index f7809de..01538bf 100644
--- a/gallery_dl/extractor/reddit.py
+++ b/gallery_dl/extractor/reddit.py
@@ -172,7 +172,7 @@ class RedditUserExtractor(RedditExtractor):
"""Extractor for URLs from posts by a reddit user"""
subcategory = "user"
pattern = (r"(?:https?://)?(?:\w+\.)?reddit\.com/u(?:ser)?/"
- r"([^/?#]+(?:/([a-z]+))?)/?(?:\?([^#]*))?")
+ r"([^/?#]+(?:/([a-z]+))?)/?(?:\?([^#]*))?$")
test = (
("https://www.reddit.com/user/username/", {
"count": ">= 2",
@@ -197,8 +197,8 @@ class RedditSubmissionExtractor(RedditExtractor):
"""Extractor for URLs from a submission on reddit.com"""
subcategory = "submission"
pattern = (r"(?:https?://)?(?:"
- r"(?:\w+\.)?reddit\.com/(?:r/[^/?#]+/comments|gallery)"
- r"|redd\.it)/([a-z0-9]+)")
+ r"(?:\w+\.)?reddit\.com/(?:(?:r|u|user)/[^/?#]+"
+ r"/comments|gallery)|redd\.it)/([a-z0-9]+)")
test = (
("https://www.reddit.com/r/lavaporn/comments/8cqhub/", {
"pattern": r"https://c2.staticflickr.com/8/7272/\w+_k.jpg",
@@ -235,6 +235,10 @@ class RedditSubmissionExtractor(RedditExtractor):
("https://np.reddit.com/r/lavaporn/comments/2a00np/"),
("https://m.reddit.com/r/lavaporn/comments/2a00np/"),
("https://redd.it/2a00np/"),
+ ("https://www.reddit.com/user/TheSpiritTree/comments/srilyf/", {
+ "pattern": r"https://i.redd.it/8fpgv17yqlh81.jpg",
+ "count": 1,
+ }),
)
def __init__(self, match):
diff --git a/gallery_dl/extractor/redgifs.py b/gallery_dl/extractor/redgifs.py
index df50f70..2c3ed44 100644
--- a/gallery_dl/extractor/redgifs.py
+++ b/gallery_dl/extractor/redgifs.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2020-2021 Mike Fährmann
+# Copyright 2020-2022 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -106,8 +106,10 @@ class RedgifsSearchExtractor(RedgifsExtractor):
class RedgifsImageExtractor(RedgifsExtractor):
"""Extractor for individual gifs from redgifs.com"""
subcategory = "image"
- pattern = (r"(?:https?://)?(?:www\.)?(?:redgifs\.com/(?:watch|ifr)"
- r"|gifdeliverynetwork.com)/([A-Za-z]+)")
+ pattern = (r"(?:https?://)?(?:"
+ r"(?:www\.)?redgifs\.com/(?:watch|ifr)|"
+ r"(?:www\.)?gifdeliverynetwork\.com|"
+ r"i\.redgifs\.com/i)/([A-Za-z]+)")
test = (
("https://redgifs.com/watch/foolishforkedabyssiniancat", {
"pattern": r"https://\w+\.redgifs\.com"
@@ -115,6 +117,7 @@ class RedgifsImageExtractor(RedgifsExtractor):
"content": "f6e03f1df9a2ff2a74092f53ee7580d2fb943533",
}),
("https://redgifs.com/ifr/FoolishForkedAbyssiniancat"),
+ ("https://i.redgifs.com/i/FoolishForkedAbyssiniancat"),
("https://www.gifdeliverynetwork.com/foolishforkedabyssiniancat"),
)
diff --git a/gallery_dl/extractor/twitter.py b/gallery_dl/extractor/twitter.py
index f459fba..46b06c2 100644
--- a/gallery_dl/extractor/twitter.py
+++ b/gallery_dl/extractor/twitter.py
@@ -1091,8 +1091,14 @@ class TwitterAPI():
instructions = instructions[key]
instructions = instructions["instructions"]
- entries = instructions[0]["entries"]
- except (KeyError, IndexError):
+ for instr in instructions:
+ if instr.get("type") == "TimelineAddEntries":
+ entries = instr["entries"]
+ break
+ else:
+ raise KeyError()
+
+ except LookupError:
extr.log.debug(data)
if self._user:
diff --git a/gallery_dl/extractor/vk.py b/gallery_dl/extractor/vk.py
index ed565bc..dd2eb4e 100644
--- a/gallery_dl/extractor/vk.py
+++ b/gallery_dl/extractor/vk.py
@@ -34,7 +34,6 @@ class VkExtractor(Extractor):
def _pagination(self, photos_url, user_id):
sub = re.compile(r"/imp[fg]/").sub
needle = 'data-id="{}_'.format(user_id)
- cnt = 0
headers = {
"X-Requested-With": "XMLHttpRequest",
@@ -56,7 +55,9 @@ class VkExtractor(Extractor):
offset = payload[0]
html = payload[1]
- for cnt, photo in enumerate(text.extract_iter(html, needle, ')')):
+ cnt = 0
+ for photo in text.extract_iter(html, needle, ')'):
+ cnt += 1
pid = photo[:photo.find('"')]
url = photo[photo.rindex("(")+1:]
url = sub("/", url.partition("?")[0])
diff --git a/gallery_dl/extractor/wallpapercave.py b/gallery_dl/extractor/wallpapercave.py
new file mode 100644
index 0000000..6c3af76
--- /dev/null
+++ b/gallery_dl/extractor/wallpapercave.py
@@ -0,0 +1,30 @@
+# -*- coding: utf-8 -*-
+
+# Copyright 2021 David Hoppenbrouwers
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Extractors for https://wallpapercave.com/"""
+
+from .common import Extractor, Message
+from .. import text
+
+
+class WallpapercaveImageExtractor(Extractor):
+ """Extractor for images on wallpapercave.com"""
+ category = "wallpapercave"
+ subcategory = "image"
+ root = "https://wallpapercave.com"
+ pattern = r"(?:https?://)?(?:www\.)?wallpapercave\.com"
+ test = ("https://wallpapercave.com/w/wp10270355", {
+ "content": "58b088aaa1cf1a60e347015019eb0c5a22b263a6",
+ })
+
+ def items(self):
+ page = self.request(text.ensure_http_scheme(self.url)).text
+ for path in text.extract_iter(page, 'class="download" href="', '"'):
+ image = text.nameext_from_url(path)
+ yield Message.Directory, image
+ yield Message.Url, self.root + path, image
diff --git a/gallery_dl/text.py b/gallery_dl/text.py
index 74b87fb..ac4bbcb 100644
--- a/gallery_dl/text.py
+++ b/gallery_dl/text.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2015-2021 Mike Fährmann
+# Copyright 2015-2022 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -168,7 +168,7 @@ def parse_bytes(value, default=0, suffixes="bkmgtp"):
"""Convert a bytes-amount ("500k", "2.5M", ...) to int"""
try:
last = value[-1].lower()
- except (TypeError, KeyError, IndexError):
+ except (TypeError, LookupError):
return default
if last in suffixes:
diff --git a/gallery_dl/version.py b/gallery_dl/version.py
index cedbfa0..4bc9b57 100644
--- a/gallery_dl/version.py
+++ b/gallery_dl/version.py
@@ -6,4 +6,4 @@
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
-__version__ = "1.20.4"
+__version__ = "1.20.5"