summaryrefslogtreecommitdiffstats
path: root/gallery_dl
diff options
context:
space:
mode:
authorLibravatarUnit 193 <unit193@unit193.net>2021-11-01 05:03:49 -0400
committerLibravatarUnit 193 <unit193@unit193.net>2021-11-01 05:03:49 -0400
commit4a965d875415907cc1a016b428ae305a964f9228 (patch)
tree7cece9948a7ba390348e00c669f9cb1f7a9ba39a /gallery_dl
parent34ba2951b8c523713425c98addb9256ea05c946f (diff)
New upstream version 1.19.1.upstream/1.19.1
Diffstat (limited to 'gallery_dl')
-rw-r--r--gallery_dl/cache.py21
-rw-r--r--gallery_dl/downloader/ytdl.py3
-rw-r--r--gallery_dl/extractor/__init__.py1
-rw-r--r--gallery_dl/extractor/cyberdrop.py22
-rw-r--r--gallery_dl/extractor/deviantart.py84
-rw-r--r--gallery_dl/extractor/furaffinity.py2
-rw-r--r--gallery_dl/extractor/gfycat.py31
-rw-r--r--gallery_dl/extractor/inkbunny.py100
-rw-r--r--gallery_dl/extractor/kemonoparty.py135
-rw-r--r--gallery_dl/extractor/mangadex.py27
-rw-r--r--gallery_dl/extractor/nhentai.py92
-rw-r--r--gallery_dl/extractor/patreon.py39
-rw-r--r--gallery_dl/extractor/picarto.py74
-rw-r--r--gallery_dl/extractor/pixiv.py62
-rw-r--r--gallery_dl/extractor/seisoparty.py65
-rw-r--r--gallery_dl/extractor/twitter.py20
-rw-r--r--gallery_dl/extractor/vk.py132
-rw-r--r--gallery_dl/postprocessor/compare.py58
-rw-r--r--gallery_dl/version.py2
19 files changed, 743 insertions, 227 deletions
diff --git a/gallery_dl/cache.py b/gallery_dl/cache.py
index 7a49b61..923ed32 100644
--- a/gallery_dl/cache.py
+++ b/gallery_dl/cache.py
@@ -211,13 +211,18 @@ def _path():
return os.path.join(cachedir, "cache.sqlite3")
-try:
- dbfile = _path()
+def _init():
+ try:
+ dbfile = _path()
+
+ # restrict access permissions for new db files
+ os.close(os.open(dbfile, os.O_CREAT | os.O_RDONLY, 0o600))
+
+ DatabaseCacheDecorator.db = sqlite3.connect(
+ dbfile, timeout=60, check_same_thread=False)
+ except (OSError, TypeError, sqlite3.OperationalError):
+ global cache
+ cache = memcache
- # restrict access permissions for new db files
- os.close(os.open(dbfile, os.O_CREAT | os.O_RDONLY, 0o600))
- DatabaseCacheDecorator.db = sqlite3.connect(
- dbfile, timeout=60, check_same_thread=False)
-except (OSError, TypeError, sqlite3.OperationalError):
- cache = memcache # noqa: F811
+_init()
diff --git a/gallery_dl/downloader/ytdl.py b/gallery_dl/downloader/ytdl.py
index 86e247b..f4d3e05 100644
--- a/gallery_dl/downloader/ytdl.py
+++ b/gallery_dl/downloader/ytdl.py
@@ -136,8 +136,9 @@ class YoutubeDLDownloader(DownloaderBase):
def _progress_hook(self, info):
if info["status"] == "downloading" and \
info["elapsed"] >= self.progress:
+ total = info.get("total_bytes") or info.get("total_bytes_estimate")
self.out.progress(
- info["total_bytes"],
+ None if total is None else int(total),
info["downloaded_bytes"],
int(info["speed"]),
)
diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py
index c512548..93702ab 100644
--- a/gallery_dl/extractor/__init__.py
+++ b/gallery_dl/extractor/__init__.py
@@ -95,6 +95,7 @@ modules = [
"philomena",
"photobucket",
"photovogue",
+ "picarto",
"piczel",
"pillowfort",
"pinterest",
diff --git a/gallery_dl/extractor/cyberdrop.py b/gallery_dl/extractor/cyberdrop.py
index 2004921..d1b1b25 100644
--- a/gallery_dl/extractor/cyberdrop.py
+++ b/gallery_dl/extractor/cyberdrop.py
@@ -8,7 +8,6 @@
from .common import Extractor, Message
from .. import text
-import binascii
class CyberdropAlbumExtractor(Extractor):
@@ -19,7 +18,7 @@ class CyberdropAlbumExtractor(Extractor):
archive_fmt = "{album_id}_{id}"
pattern = r"(?:https?://)?(?:www\.)?cyberdrop\.me/a/([^/?#]+)"
test = ("https://cyberdrop.me/a/keKRjm4t", {
- "pattern": r"https://f\.cyberdrop\.cc/.*\.[a-z]+$",
+ "pattern": r"https://fs-\d+\.cyberdrop\.to/.*\.[a-z]+$",
"keyword": {
"album_id": "keKRjm4t",
"album_name": "Fate (SFW)",
@@ -38,7 +37,14 @@ class CyberdropAlbumExtractor(Extractor):
def items(self):
url = self.root + "/a/" + self.album_id
extr = text.extract_from(self.request(url).text)
- extr("const albumData = {", "")
+
+ files = []
+ append = files.append
+ while True:
+ url = extr('downloadUrl: "', '"')
+ if not url:
+ break
+ append(text.unescape(url))
data = {
"album_id" : self.album_id,
@@ -46,13 +52,11 @@ class CyberdropAlbumExtractor(Extractor):
"date" : text.parse_timestamp(extr("timestamp: ", ",")),
"album_size" : text.parse_int(extr("totalSize: ", ",")),
"description": extr("description: `", "`"),
+ "count" : len(files),
}
- files = extr("fl: '", "'").split(",")
- data["count"] = len(files)
yield Message.Directory, data
- for file_b64 in files:
- file = binascii.a2b_base64(file_b64).decode()
- text.nameext_from_url(file, data)
+ for url in files:
+ text.nameext_from_url(url, data)
data["filename"], _, data["id"] = data["filename"].rpartition("-")
- yield Message.Url, "https://f.cyberdrop.cc/" + file, data
+ yield Message.Url, url, data
diff --git a/gallery_dl/extractor/deviantart.py b/gallery_dl/extractor/deviantart.py
index 7dac770..4604d39 100644
--- a/gallery_dl/extractor/deviantart.py
+++ b/gallery_dl/extractor/deviantart.py
@@ -14,6 +14,7 @@ from ..cache import cache, memcache
import collections
import itertools
import mimetypes
+import binascii
import time
import re
@@ -39,7 +40,6 @@ class DeviantartExtractor(Extractor):
self.offset = 0
self.flat = self.config("flat", True)
self.extra = self.config("extra", False)
- self.quality = self.config("quality", "100")
self.original = self.config("original", True)
self.comments = self.config("comments", False)
self.user = match.group(1) or match.group(2)
@@ -53,9 +53,6 @@ class DeviantartExtractor(Extractor):
else:
self.unwatch = None
- if self.quality:
- self.quality = ",q_{}".format(self.quality)
-
if self.original != "image":
self._update_content = self._update_content_default
else:
@@ -104,19 +101,8 @@ class DeviantartExtractor(Extractor):
if self.original and deviation["is_downloadable"]:
self._update_content(deviation, content)
-
- if content["src"].startswith("https://images-wixmp-"):
- if deviation["index"] <= 790677560:
- # https://github.com/r888888888/danbooru/issues/4069
- intermediary, count = re.subn(
- r"(/f/[^/]+/[^/]+)/v\d+/.*",
- r"/intermediary\1", content["src"], 1)
- if count:
- deviation["_fallback"] = (content["src"],)
- content["src"] = intermediary
- if self.quality:
- content["src"] = re.sub(
- r",q_\d+", self.quality, content["src"], 1)
+ else:
+ self._update_token(deviation, content)
yield self.commit(deviation, content)
@@ -302,6 +288,32 @@ class DeviantartExtractor(Extractor):
if mtype and mtype.startswith("image/"):
content.update(data)
+ def _update_token(self, deviation, content):
+ """Replace JWT to be able to remove width/height limits
+
+ All credit goes to @Ironchest337
+ for discovering and implementing this method
+ """
+ url, sep, _ = content["src"].partition("/v1/")
+ if not sep:
+ return
+
+ # header = b'{"typ":"JWT","alg":"none"}'
+ payload = (
+ b'{"sub":"urn:app:","iss":"urn:app:","obj":[[{"path":"/f/' +
+ url.partition("/f/")[2].encode() +
+ b'"}]],"aud":["urn:service:file.download"]}'
+ )
+
+ deviation["_fallback"] = (content["src"],)
+ content["src"] = (
+ "{}?token=eyJ0eXAiOiJKV1QiLCJhbGciOiJub25lIn0.{}.".format(
+ url,
+ # base64 of 'header' is precomputed as 'eyJ0eX...'
+ # binascii.a2b_base64(header).rstrip(b"=\n").decode(),
+ binascii.b2a_base64(payload).rstrip(b"=\n").decode())
+ )
+
def _limited_request(self, url, **kwargs):
"""Limits HTTP requests to one every 2 seconds"""
kwargs["fatal"] = None
@@ -746,29 +758,27 @@ class DeviantartPopularExtractor(DeviantartExtractor):
def __init__(self, match):
DeviantartExtractor.__init__(self, match)
- self.search_term = self.time_range = self.category_path = None
self.user = ""
trange1, path, trange2, query = match.groups()
- trange = trange1 or trange2
query = text.parse_query(query)
-
- if not trange:
- trange = query.get("order")
-
- if path:
- self.category_path = path.strip("/")
- if trange:
- if trange.startswith("popular-"):
- trange = trange[8:]
- self.time_range = trange.replace("-", "").replace("hours", "hr")
- if query:
- self.search_term = query.get("q")
+ self.search_term = query.get("q")
+
+ trange = trange1 or trange2 or query.get("order", "")
+ if trange.startswith("popular-"):
+ trange = trange[8:]
+ self.time_range = {
+ "most-recent" : "now",
+ "this-week" : "1week",
+ "this-month" : "1month",
+ "this-century": "alltime",
+ "all-time" : "alltime",
+ }.get(trange, "alltime")
self.popular = {
"search": self.search_term or "",
- "range" : trange or "",
- "path" : self.category_path,
+ "range" : trange or "all-time",
+ "path" : path.strip("/") if path else "",
}
def deviations(self):
@@ -851,12 +861,8 @@ class DeviantartDeviationExtractor(DeviantartExtractor):
}),
# wixmp URL rewrite
(("https://www.deviantart.com/citizenfresh/art/Hverarond-789295466"), {
- "pattern": (r"https://images-wixmp-\w+\.wixmp\.com"
- r"/intermediary/f/[^/]+/[^.]+\.jpg")
- }),
- # wixmp URL rewrite v2 (#369)
- (("https://www.deviantart.com/josephbiwald/art/Destiny-2-804940104"), {
- "pattern": r"https://images-wixmp-\w+\.wixmp\.com/.*,q_100,"
+ "pattern": (r"https://images-wixmp-\w+\.wixmp\.com/f"
+ r"/[^/]+/[^.]+\.jpg\?token="),
}),
# GIF (#242)
(("https://www.deviantart.com/skatergators/art/COM-Moni-781571783"), {
diff --git a/gallery_dl/extractor/furaffinity.py b/gallery_dl/extractor/furaffinity.py
index 9516dfa..b5ecbd6 100644
--- a/gallery_dl/extractor/furaffinity.py
+++ b/gallery_dl/extractor/furaffinity.py
@@ -250,7 +250,7 @@ class FuraffinitySearchExtractor(FuraffinityExtractor):
FuraffinityExtractor.__init__(self, match)
self.query = text.parse_query(match.group(2))
if self.user and "q" not in self.query:
- self.query["q"] = text.unescape(self.user)
+ self.query["q"] = text.unquote(self.user)
def metadata(self):
return {"search": self.query.get("q")}
diff --git a/gallery_dl/extractor/gfycat.py b/gallery_dl/extractor/gfycat.py
index 2757852..9b4d5ee 100644
--- a/gallery_dl/extractor/gfycat.py
+++ b/gallery_dl/extractor/gfycat.py
@@ -22,7 +22,13 @@ class GfycatExtractor(Extractor):
def __init__(self, match):
Extractor.__init__(self, match)
self.key = match.group(1).lower()
- self.formats = (self.config("format", "mp4"), "mp4", "webm", "gif")
+
+ formats = self.config("format")
+ if formats is None:
+ formats = ("mp4", "webm", "mobile", "gif")
+ elif isinstance(formats, str):
+ formats = (formats, "mp4", "webm", "mobile", "gif")
+ self.formats = formats
def items(self):
metadata = self.metadata()
@@ -30,23 +36,25 @@ class GfycatExtractor(Extractor):
if "gfyName" not in gfycat:
self.log.warning("Skipping '%s' (malformed)", gfycat["gfyId"])
continue
- url = self._select_format(gfycat)
+ url = self._process(gfycat)
gfycat.update(metadata)
- gfycat["date"] = text.parse_timestamp(gfycat.get("createDate"))
yield Message.Directory, gfycat
yield Message.Url, url, gfycat
- def _select_format(self, gfyitem):
+ def _process(self, gfycat):
+ gfycat["_fallback"] = formats = self._formats(gfycat)
+ gfycat["date"] = text.parse_timestamp(gfycat.get("createDate"))
+ return next(formats, "")
+
+ def _formats(self, gfycat):
for fmt in self.formats:
key = fmt + "Url"
- if key in gfyitem:
- url = gfyitem[key]
+ if key in gfycat:
+ url = gfycat[key]
if url.startswith("http:"):
url = "https" + url[4:]
- gfyitem["extension"] = url.rpartition(".")[2]
- return url
- gfyitem["extension"] = ""
- return ""
+ gfycat["extension"] = url.rpartition(".")[2]
+ yield url
def metadata(self):
return {}
@@ -146,8 +154,7 @@ class GfycatImageExtractor(GfycatExtractor):
if "gfyName" not in gfycat:
self.log.warning("Skipping '%s' (malformed)", gfycat["gfyId"])
return
- url = self._select_format(gfycat)
- gfycat["date"] = text.parse_timestamp(gfycat.get("createDate"))
+ url = self._process(gfycat)
yield Message.Directory, gfycat
yield Message.Url, url, gfycat
diff --git a/gallery_dl/extractor/inkbunny.py b/gallery_dl/extractor/inkbunny.py
index cbe0f43..3d09d79 100644
--- a/gallery_dl/extractor/inkbunny.py
+++ b/gallery_dl/extractor/inkbunny.py
@@ -135,33 +135,123 @@ class InkbunnyUserExtractor(InkbunnyExtractor):
return self.api.search(params)
+class InkbunnyPoolExtractor(InkbunnyExtractor):
+ """Extractor for inkbunny pools"""
+ subcategory = "pool"
+ pattern = (BASE_PATTERN + r"/(?:"
+ r"poolview_process\.php\?pool_id=(\d+)|"
+ r"submissionsviewall\.php\?([^#]+&mode=pool&[^#]+))")
+ test = (
+ ("https://inkbunny.net/poolview_process.php?pool_id=28985", {
+ "count": 9,
+ }),
+ ("https://inkbunny.net/submissionsviewall.php?rid=ffffffffff"
+ "&mode=pool&pool_id=28985&page=1&orderby=pool_order&random=no"),
+ )
+
+ def __init__(self, match):
+ InkbunnyExtractor.__init__(self, match)
+ pid = match.group(1)
+ if pid:
+ self.pool_id = pid
+ self.orderby = "pool_order"
+ else:
+ params = text.parse_query(match.group(2))
+ self.pool_id = params.get("pool_id")
+ self.orderby = params.get("orderby", "pool_order")
+
+ def posts(self):
+ params = {
+ "pool_id": self.pool_id,
+ "orderby": self.orderby,
+ }
+ return self.api.search(params)
+
+
class InkbunnyFavoriteExtractor(InkbunnyExtractor):
"""Extractor for inkbunny user favorites"""
subcategory = "favorite"
- pattern = BASE_PATTERN + r"/userfavorites_process\.php\?favs_user_id=(\d+)"
+ pattern = (BASE_PATTERN + r"/(?:"
+ r"userfavorites_process\.php\?favs_user_id=(\d+)|"
+ r"submissionsviewall\.php\?([^#]+&mode=userfavs&[^#]+))")
test = (
("https://inkbunny.net/userfavorites_process.php?favs_user_id=20969", {
"pattern": r"https://[\w.]+\.metapix\.net/files/full"
r"/\d+/\d+_\w+_.+",
"range": "20-50",
}),
+ ("https://inkbunny.net/submissionsviewall.php?rid=ffffffffff"
+ "&mode=userfavs&random=no&orderby=fav_datetime&page=1&user_id=20969"),
)
def __init__(self, match):
InkbunnyExtractor.__init__(self, match)
- self.user_id = match.group(1)
+ uid = match.group(1)
+ if uid:
+ self.user_id = uid
+ self.orderby = self.config("orderby", "fav_datetime")
+ else:
+ params = text.parse_query(match.group(2))
+ self.user_id = params.get("user_id")
+ self.orderby = params.get("orderby", "fav_datetime")
def posts(self):
- orderby = self.config("orderby", "fav_datetime")
params = {
"favs_user_id": self.user_id,
- "orderby" : orderby,
+ "orderby" : self.orderby,
}
- if orderby and orderby.startswith("unread_"):
+ if self.orderby and self.orderby.startswith("unread_"):
params["unread_submissions"] = "yes"
return self.api.search(params)
+class InkbunnyFollowingExtractor(InkbunnyExtractor):
+ """Extractor for inkbunny user watches"""
+ subcategory = "following"
+ pattern = (BASE_PATTERN + r"/(?:"
+ r"watchlist_process\.php\?mode=watching&user_id=(\d+)|"
+ r"usersviewall\.php\?([^#]+&mode=watching&[^#]+))")
+ test = (
+ (("https://inkbunny.net/watchlist_process.php"
+ "?mode=watching&user_id=20969"), {
+ "pattern": InkbunnyUserExtractor.pattern,
+ "count": ">= 90",
+ }),
+ ("https://inkbunny.net/usersviewall.php?rid=ffffffffff"
+ "&mode=watching&page=1&user_id=20969&orderby=added&namesonly="),
+ )
+
+ def __init__(self, match):
+ InkbunnyExtractor.__init__(self, match)
+ self.user_id = match.group(1) or \
+ text.parse_query(match.group(2)).get("user_id")
+
+ def items(self):
+ url = self.root + "/watchlist_process.php"
+ params = {"mode": "watching", "user_id": self.user_id}
+
+ with self.request(url, params=params) as response:
+ url, _, params = response.url.partition("?")
+ page = response.text
+
+ params = text.parse_query(params)
+ params["page"] = text.parse_int(params.get("page"), 1)
+ data = {"_extractor": InkbunnyUserExtractor}
+
+ while True:
+ cnt = 0
+ for user in text.extract_iter(
+ page, '<a class="widget_userNameSmall" href="', '"',
+ page.index('id="changethumboriginal_form"')):
+ cnt += 1
+ yield Message.Queue, self.root + user, data
+
+ if cnt < 20:
+ return
+ params["page"] += 1
+ page = self.request(url, params=params).text
+
+
class InkbunnyPostExtractor(InkbunnyExtractor):
"""Extractor for individual Inkbunny posts"""
subcategory = "post"
diff --git a/gallery_dl/extractor/kemonoparty.py b/gallery_dl/extractor/kemonoparty.py
index c5f5ae7..d5aad67 100644
--- a/gallery_dl/extractor/kemonoparty.py
+++ b/gallery_dl/extractor/kemonoparty.py
@@ -14,7 +14,8 @@ from ..cache import cache
import itertools
import re
-BASE_PATTERN = r"(?:https?://)?kemono\.party/([^/?#]+)/user/([^/?#]+)"
+BASE_PATTERN = r"(?:https?://)?kemono\.party"
+USER_PATTERN = BASE_PATTERN + r"/([^/?#]+)/user/([^/?#]+)"
class KemonopartyExtractor(Extractor):
@@ -29,7 +30,9 @@ class KemonopartyExtractor(Extractor):
def items(self):
self._prepare_ddosguard_cookies()
- find_inline = re.compile(r'src="(/inline/[^"]+)').findall
+ find_inline = re.compile(
+ r'src="(?:https?://kemono\.party)?(/inline/[^"]+'
+ r'|/[0-9a-f]{2}/[0-9a-f]{2}/[0-9a-f]{64}\.[^"]+)').findall
skip_service = \
"patreon" if self.config("patreon-skip-file", True) else None
@@ -101,7 +104,7 @@ class KemonopartyExtractor(Extractor):
class KemonopartyUserExtractor(KemonopartyExtractor):
"""Extractor for all posts from a kemono.party user listing"""
subcategory = "user"
- pattern = BASE_PATTERN + r"/?(?:\?o=(\d+))?(?:$|[?#])"
+ pattern = USER_PATTERN + r"/?(?:\?o=(\d+))?(?:$|[?#])"
test = (
("https://kemono.party/fanbox/user/6993449", {
"range": "1-25",
@@ -138,11 +141,11 @@ class KemonopartyUserExtractor(KemonopartyExtractor):
class KemonopartyPostExtractor(KemonopartyExtractor):
"""Extractor for a single kemono.party post"""
subcategory = "post"
- pattern = BASE_PATTERN + r"/post/([^/?#]+)"
+ pattern = USER_PATTERN + r"/post/([^/?#]+)"
test = (
("https://kemono.party/fanbox/user/6993449/post/506575", {
- "pattern": r"https://kemono\.party/data/files/fanbox"
- r"/6993449/506575/P058kDFYus7DbqAkGlfWTlOr\.jpeg",
+ "pattern": r"https://kemono.party/data/21/0f"
+ r"/210f35388e28bbcf756db18dd516e2d82ce75[0-9a-f]+\.jpg",
"keyword": {
"added": "Wed, 06 May 2020 20:28:02 GMT",
"content": str,
@@ -197,10 +200,128 @@ class KemonopartyPostExtractor(KemonopartyExtractor):
return (posts[0],) if len(posts) > 1 else posts
+class KemonopartyDiscordExtractor(KemonopartyExtractor):
+ """Extractor for kemono.party discord servers"""
+ subcategory = "discord"
+ directory_fmt = ("{category}", "discord", "{server}",
+ "{channel_name|channel}")
+ filename_fmt = "{id}_{num:>02}_{filename}.{extension}"
+ archive_fmt = "discord_{server}_{id}_{num}"
+ pattern = BASE_PATTERN + r"/discord/server/(\d+)(?:/channel/(\d+))?#(.*)"
+ test = (
+ (("https://kemono.party/discord"
+ "/server/488668827274444803#finish-work"), {
+ "count": 4,
+ "keyword": {"channel_name": "finish-work"},
+ }),
+ (("https://kemono.party/discord"
+ "/server/256559665620451329/channel/462437519519383555#"), {
+ "pattern": r"https://kemono\.party/data/attachments/discord"
+ r"/256559665620451329/\d+/\d+/.+",
+ "count": ">= 2",
+ }),
+ # 'inline' files
+ (("https://kemono.party/discord"
+ "/server/315262215055736843/channel/315262215055736843#general"), {
+ "pattern": r"https://cdn\.discordapp\.com/attachments/\d+/\d+/.+$",
+ "range": "1-5",
+ "options": (("image-filter", "type == 'inline'"),),
+ }),
+ )
+
+ def __init__(self, match):
+ KemonopartyExtractor.__init__(self, match)
+ self.server, self.channel, self.channel_name = match.groups()
+
+ def items(self):
+ self._prepare_ddosguard_cookies()
+
+ find_inline = re.compile(
+ r"https?://(?:cdn\.discordapp.com|media\.discordapp\.net)"
+ r"(/[A-Za-z0-9-._~:/?#\[\]@!$&'()*+,;%=]+)").findall
+
+ posts = self.posts()
+ max_posts = self.config("max-posts")
+ if max_posts:
+ posts = itertools.islice(posts, max_posts)
+
+ for post in posts:
+ files = []
+ append = files.append
+ for attachment in post["attachments"]:
+ attachment["type"] = "attachment"
+ append(attachment)
+ for path in find_inline(post["content"] or ""):
+ append({"path": "https://cdn.discordapp.com" + path,
+ "name": path, "type": "inline"})
+
+ post["channel_name"] = self.channel_name
+ post["date"] = text.parse_datetime(
+ post["published"], "%a, %d %b %Y %H:%M:%S %Z")
+ yield Message.Directory, post
+
+ for post["num"], file in enumerate(files, 1):
+ post["type"] = file["type"]
+ url = file["path"]
+ if url[0] == "/":
+ url = self.root + "/data" + url
+ elif url.startswith("https://kemono.party"):
+ url = self.root + "/data" + url[20:]
+
+ text.nameext_from_url(file["name"], post)
+ yield Message.Url, url, post
+
+ def posts(self):
+ if self.channel is None:
+ url = "{}/api/discord/channels/lookup?q={}".format(
+ self.root, self.server)
+ for channel in self.request(url).json():
+ if channel["name"] == self.channel_name:
+ self.channel = channel["id"]
+ break
+ else:
+ raise exception.NotFoundError("channel")
+
+ url = "{}/api/discord/channel/{}".format(self.root, self.channel)
+ params = {"skip": 0}
+
+ while True:
+ posts = self.request(url, params=params).json()
+ yield from posts
+
+ if len(posts) < 25:
+ break
+ params["skip"] += 25
+
+
+class KemonopartyDiscordServerExtractor(KemonopartyExtractor):
+ subcategory = "discord-server"
+ pattern = BASE_PATTERN + r"/discord/server/(\d+)$"
+ test = ("https://kemono.party/discord/server/488668827274444803", {
+ "pattern": KemonopartyDiscordExtractor.pattern,
+ "count": 13,
+ })
+
+ def __init__(self, match):
+ KemonopartyExtractor.__init__(self, match)
+ self.server = match.group(1)
+
+ def items(self):
+ url = "{}/api/discord/channels/lookup?q={}".format(
+ self.root, self.server)
+ channels = self.request(url).json()
+
+ for channel in channels:
+ url = "{}/discord/server/{}/channel/{}#{}".format(
+ self.root, self.server, channel["id"], channel["name"])
+ channel["_extractor"] = KemonopartyDiscordExtractor
+ yield Message.Queue, url, channel
+
+
class KemonopartyFavoriteExtractor(KemonopartyExtractor):
"""Extractor for kemono.party favorites"""
subcategory = "favorite"
- pattern = r"(?:https?://)?kemono\.party/favorites"
+ pattern = BASE_PATTERN + r"/favorites"
test = ("https://kemono.party/favorites", {
"pattern": KemonopartyUserExtractor.pattern,
"url": "f4b5b796979bcba824af84206578c79101c7f0e1",
diff --git a/gallery_dl/extractor/mangadex.py b/gallery_dl/extractor/mangadex.py
index 634a92d..ff1d7c3 100644
--- a/gallery_dl/extractor/mangadex.py
+++ b/gallery_dl/extractor/mangadex.py
@@ -209,22 +209,15 @@ class MangadexAPI():
return self._call("/manga/" + uuid)["data"]
def manga_feed(self, uuid):
- config = self.extractor.config
- order = "desc" if config("chapter-reverse") else "asc"
+ order = "desc" if self.extractor.config("chapter-reverse") else "asc"
params = {
- "order[volume]" : order,
- "order[chapter]" : order,
- "translatedLanguage[]": config("lang"),
- "contentRating[]" : [
- "safe", "suggestive", "erotica", "pornographic"],
+ "order[volume]" : order,
+ "order[chapter]": order,
}
return self._pagination("/manga/" + uuid + "/feed", params)
def user_follows_manga_feed(self):
- params = {
- "order[publishAt]" : "desc",
- "translatedLanguage[]": self.extractor.config("lang"),
- }
+ params = {"order[publishAt]": "desc"}
return self._pagination("/user/follows/manga/feed", params)
def authenticate(self):
@@ -275,8 +268,20 @@ class MangadexAPI():
def _pagination(self, endpoint, params=None):
if params is None:
params = {}
+
+ config = self.extractor.config
+ ratings = config("ratings")
+ if ratings is None:
+ ratings = ("safe", "suggestive", "erotica", "pornographic")
+
+ params["contentRating[]"] = ratings
+ params["translatedLanguage[]"] = config("lang")
params["offset"] = 0
+ api_params = config("api-parameters")
+ if api_params:
+ params.update(api_params)
+
while True:
data = self._call(endpoint, params)
yield from data["data"]
diff --git a/gallery_dl/extractor/nhentai.py b/gallery_dl/extractor/nhentai.py
index 20b716b..9df43e5 100644
--- a/gallery_dl/extractor/nhentai.py
+++ b/gallery_dl/extractor/nhentai.py
@@ -14,15 +14,10 @@ import collections
import json
-class NhentaiBase():
- """Base class for nhentai extractors"""
+class NhentaiGalleryExtractor(GalleryExtractor):
+ """Extractor for image galleries from nhentai.net"""
category = "nhentai"
root = "https://nhentai.net"
- media_url = "https://i.nhentai.net"
-
-
-class NhentaiGalleryExtractor(NhentaiBase, GalleryExtractor):
- """Extractor for image galleries from nhentai.net"""
pattern = r"(?:https?://)?nhentai\.net/g/(\d+)"
test = ("https://nhentai.net/g/147850/", {
"url": "5179dbf0f96af44005a0ff705a0ad64ac26547d0",
@@ -87,8 +82,8 @@ class NhentaiGalleryExtractor(NhentaiBase, GalleryExtractor):
}
def images(self, _):
- ufmt = "{}/galleries/{}/{{}}.{{}}".format(
- self.media_url, self.data["media_id"])
+ ufmt = ("https://i.nhentai.net/galleries/" +
+ self.data["media_id"] + "/{}.{}")
extdict = {"j": "jpg", "p": "png", "g": "gif"}
return [
@@ -99,28 +94,24 @@ class NhentaiGalleryExtractor(NhentaiBase, GalleryExtractor):
]
-class NhentaiSearchExtractor(NhentaiBase, Extractor):
- """Extractor for nhentai search results"""
- subcategory = "search"
- pattern = r"(?:https?://)?nhentai\.net/search/?\?([^#]+)"
- test = ("https://nhentai.net/search/?q=touhou", {
- "pattern": NhentaiGalleryExtractor.pattern,
- "count": 30,
- "range": "1-30",
- })
+class NhentaiExtractor(Extractor):
+ """Base class for nhentai extractors"""
+ category = "nhentai"
+ root = "https://nhentai.net"
def __init__(self, match):
Extractor.__init__(self, match)
- self.params = text.parse_query(match.group(1))
+ self.path, self.query = match.groups()
def items(self):
data = {"_extractor": NhentaiGalleryExtractor}
- for gallery_id in self._pagination(self.params):
+ for gallery_id in self._pagination():
url = "{}/g/{}/".format(self.root, gallery_id)
yield Message.Queue, url, data
- def _pagination(self, params):
- url = "{}/search/".format(self.root)
+ def _pagination(self):
+ url = self.root + self.path
+ params = text.parse_query(self.query)
params["page"] = text.parse_int(params.get("page"), 1)
while True:
@@ -131,29 +122,40 @@ class NhentaiSearchExtractor(NhentaiBase, Extractor):
params["page"] += 1
-class NhentaiFavoriteExtractor(NhentaiBase, Extractor):
+class NhentaiTagExtractor(NhentaiExtractor):
+ """Extractor for nhentai tag searches"""
+ subcategory = "tag"
+ pattern = (r"(?:https?://)?nhentai\.net("
+ r"/(?:artist|category|character|group|language|parody|tag)"
+ r"/[^/?#]+(?:/popular[^/?#]*)?/?)(?:\?([^#]+))?")
+ test = (
+ ("https://nhentai.net/tag/sole-female/", {
+ "pattern": NhentaiGalleryExtractor.pattern,
+ "count": 30,
+ "range": "1-30",
+ }),
+ ("https://nhentai.net/artist/itou-life/"),
+ ("https://nhentai.net/group/itou-life/"),
+ ("https://nhentai.net/parody/touhou-project/"),
+ ("https://nhentai.net/character/patchouli-knowledge/popular"),
+ ("https://nhentai.net/category/doujinshi/popular-today"),
+ ("https://nhentai.net/language/english/popular-week"),
+ )
+
+
+class NhentaiSearchExtractor(NhentaiExtractor):
+ """Extractor for nhentai search results"""
+ subcategory = "search"
+ pattern = r"(?:https?://)?nhentai\.net(/search/?)\?([^#]+)"
+ test = ("https://nhentai.net/search/?q=touhou", {
+ "pattern": NhentaiGalleryExtractor.pattern,
+ "count": 30,
+ "range": "1-30",
+ })
+
+
+class NhentaiFavoriteExtractor(NhentaiExtractor):
"""Extractor for nhentai favorites"""
subcategory = "favorite"
- pattern = r"(?:https?://)?nhentai\.net/favorites/?(?:\?([^#]+))?"
+ pattern = r"(?:https?://)?nhentai\.net(/favorites/?)(?:\?([^#]+))?"
test = ("https://nhentai.net/favorites/",)
-
- def __init__(self, match):
- Extractor.__init__(self, match)
- self.params = text.parse_query(match.group(1))
-
- def items(self):
- data = {"_extractor": NhentaiGalleryExtractor}
- for gallery_id in self._pagination(self.params):
- url = "{}/g/{}/".format(self.root, gallery_id)
- yield Message.Queue, url, data
-
- def _pagination(self, params):
- url = "{}/favorites/".format(self.root)
- params["page"] = text.parse_int(params.get("page"), 1)
-
- while True:
- page = self.request(url, params=params).text
- yield from text.extract_iter(page, 'href="/g/', '/')
- if 'class="next"' not in page:
- return
- params["page"] += 1
diff --git a/gallery_dl/extractor/patreon.py b/gallery_dl/extractor/patreon.py
index 547465b..c7df089 100644
--- a/gallery_dl/extractor/patreon.py
+++ b/gallery_dl/extractor/patreon.py
@@ -32,22 +32,19 @@ class PatreonExtractor(Extractor):
if "session_id" not in self.session.cookies:
self.log.warning("no 'session_id' cookie set")
PatreonExtractor._warning = False
+ generators = self._build_file_generators(self.config("files"))
for post in self.posts():
if not post.get("current_user_can_view", True):
self.log.warning("Not allowed to view post %s", post["id"])
continue
+ yield Message.Directory, post
+
post["num"] = 0
hashes = set()
-
- yield Message.Directory, post
- for kind, url, name in itertools.chain(
- self._images(post),
- self._attachments(post),
- self._postfile(post),
- self._content(post),
- ):
+ for kind, url, name in itertools.chain.from_iterable(
+ g(post) for g in generators):
fhash = self._filehash(url)
if fhash not in hashes or not fhash:
hashes.add(fhash)
@@ -82,15 +79,14 @@ class PatreonExtractor(Extractor):
if url:
yield "attachment", url, attachment["name"]
- @staticmethod
- def _content(post):
+ def _content(self, post):
content = post.get("content")
if content:
for img in text.extract_iter(
content, '<img data-media-id="', '>'):
url = text.extract(img, 'src="', '"')[0]
if url:
- yield "content", url, url
+ yield "content", url, self._filename(url) or url
def posts(self):
"""Return all relevant post objects"""
@@ -155,7 +151,7 @@ class PatreonExtractor(Extractor):
included[file["type"]][file["id"]]
for file in files["data"]
]
- return []
+ return ()
@memcache(keyarg=1)
def _user(self, url):
@@ -212,6 +208,20 @@ class PatreonExtractor(Extractor):
"&json-api-version=1.0"
)
+ def _build_file_generators(self, filetypes):
+ if filetypes is None:
+ return (self._images, self._attachments,
+ self._postfile, self._content)
+ genmap = {
+ "images" : self._images,
+ "attachments": self._attachments,
+ "postfile" : self._postfile,
+ "content" : self._content,
+ }
+ if isinstance(filetypes, str):
+ filetypes = filetypes.split(",")
+ return [genmap[ft] for ft in filetypes]
+
class PatreonCreatorExtractor(PatreonExtractor):
"""Extractor for a creator's works"""
@@ -305,8 +315,9 @@ class PatreonPostExtractor(PatreonExtractor):
"count": 4,
}),
# postfile + content
- ("https://www.patreon.com/posts/19987002", {
- "count": 4,
+ ("https://www.patreon.com/posts/56127163", {
+ "count": 3,
+ "keyword": {"filename": r"re:^(?!1).+$"},
}),
# tags (#1539)
("https://www.patreon.com/posts/free-post-12497641", {
diff --git a/gallery_dl/extractor/picarto.py b/gallery_dl/extractor/picarto.py
new file mode 100644
index 0000000..77a07b4
--- /dev/null
+++ b/gallery_dl/extractor/picarto.py
@@ -0,0 +1,74 @@
+# -*- coding: utf-8 -*-
+
+# Copyright 2021 Mike Fährmann
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Extractors for https://picarto.tv/"""
+
+from .common import Extractor, Message
+from .. import text
+
+
+class PicartoGalleryExtractor(Extractor):
+ """Extractor for picarto galleries"""
+ category = "picarto"
+ subcategory = "gallery"
+ root = "https://picarto.tv"
+ directory_fmt = ("{category}", "{channel[name]}")
+ filename_fmt = "{id} {title}.{extension}"
+ archive_fmt = "{id}"
+ pattern = r"(?:https?://)?picarto\.tv/([^/?#]+)/gallery"
+ test = ("https://picarto.tv/fnook/gallery/default/", {
+ "pattern": r"https://images\.picarto\.tv/gallery/\d/\d\d/\d+/artwork"
+ r"/[0-9a-f-]+/large-[0-9a-f]+\.(jpg|png|gif)",
+ "count": ">= 7",
+ "keyword": {"date": "type:datetime"},
+ })
+
+ def __init__(self, match):
+ Extractor.__init__(self, match)
+ self.username = match.group(1)
+
+ def items(self):
+ for post in self.posts():
+ post["date"] = text.parse_datetime(
+ post["created_at"], "%Y-%m-%d %H:%M:%S")
+ variations = post.pop("variations", ())
+ yield Message.Directory, post
+
+ image = post["default_image"]
+ if not image:
+ continue
+ url = "https://images.picarto.tv/gallery/" + image["name"]
+ text.nameext_from_url(url, post)
+ yield Message.Url, url, post
+
+ for variation in variations:
+ post.update(variation)
+ image = post["default_image"]
+ url = "https://images.picarto.tv/gallery/" + image["name"]
+ text.nameext_from_url(url, post)
+ yield Message.Url, url, post
+
+ def posts(self):
+ url = "https://ptvintern.picarto.tv/api/channel-gallery"
+ params = {
+ "first": "30",
+ "page": 1,
+ "filter_params[album_id]": "",
+ "filter_params[channel_name]": self.username,
+ "filter_params[q]": "",
+ "filter_params[visibility]": "",
+ "order_by[field]": "published_at",
+ "order_by[order]": "DESC",
+ }
+
+ while True:
+ posts = self.request(url, params=params).json()
+ if not posts:
+ return
+ yield from posts
+ params["page"] += 1
diff --git a/gallery_dl/extractor/pixiv.py b/gallery_dl/extractor/pixiv.py
index e21a82c..8e47e2e 100644
--- a/gallery_dl/extractor/pixiv.py
+++ b/gallery_dl/extractor/pixiv.py
@@ -558,6 +558,68 @@ class PixivPixivisionExtractor(PixivExtractor):
}
+class PixivSketchExtractor(Extractor):
+ """Extractor for user pages on sketch.pixiv.net"""
+ category = "pixiv"
+ subcategory = "sketch"
+ directory_fmt = ("{category}", "sketch", "{user[unique_name]}")
+ filename_fmt = "{post_id} {id}.{extension}"
+ archive_fmt = "S{user[id]}_{id}"
+ root = "https://sketch.pixiv.net"
+ cookiedomain = ".pixiv.net"
+ pattern = r"(?:https?://)?sketch\.pixiv\.net/@([^/?#]+)"
+ test = ("https://sketch.pixiv.net/@nicoby", {
+ "pattern": r"https://img\-sketch\.pixiv\.net/uploads/medium"
+ r"/file/\d+/\d+\.(jpg|png)",
+ "count": ">= 35",
+ })
+
+ def __init__(self, match):
+ Extractor.__init__(self, match)
+ self.username = match.group(1)
+
+ def items(self):
+ headers = {"Referer": "{}/@{}".format(self.root, self.username)}
+
+ for post in self.posts():
+ media = post["media"]
+ post["post_id"] = post["id"]
+ post["date"] = text.parse_datetime(
+ post["created_at"], "%Y-%m-%dT%H:%M:%S.%f%z")
+ util.delete_items(post, ("id", "media", "_links"))
+
+ yield Message.Directory, post
+ post["_http_headers"] = headers
+
+ for photo in media:
+ original = photo["photo"]["original"]
+ post["id"] = photo["id"]
+ post["width"] = original["width"]
+ post["height"] = original["height"]
+
+ url = original["url"]
+ text.nameext_from_url(url, post)
+ yield Message.Url, url, post
+
+ def posts(self):
+ url = "{}/api/walls/@{}/posts/public.json".format(
+ self.root, self.username)
+ headers = {
+ "Accept": "application/vnd.sketch-v4+json",
+ "X-Requested-With": "{}/@{}".format(self.root, self.username),
+ "Referer": self.root + "/",
+ }
+
+ while True:
+ data = self.request(url, headers=headers).json()
+ yield from data["data"]["items"]
+
+ next_url = data["_links"].get("next")
+ if not next_url:
+ return
+ url = self.root + next_url["href"]
+
+
class PixivAppAPI():
"""Minimal interface for the Pixiv App API for mobile devices
diff --git a/gallery_dl/extractor/seisoparty.py b/gallery_dl/extractor/seisoparty.py
index 28e049b..a2a24e0 100644
--- a/gallery_dl/extractor/seisoparty.py
+++ b/gallery_dl/extractor/seisoparty.py
@@ -9,7 +9,8 @@
"""Extractors for https://seiso.party/"""
from .common import Extractor, Message
-from .. import text
+from .. import text, exception
+from ..cache import cache
import re
@@ -52,6 +53,25 @@ class SeisopartyExtractor(Extractor):
"files" : self._find_files(page),
}
+ def login(self):
+ username, password = self._get_auth_info()
+ if username:
+ self._update_cookies(self._login_impl(username, password))
+
+ @cache(maxage=28*24*3600, keyarg=1)
+ def _login_impl(self, username, password):
+ self.log.info("Logging in as %s", username)
+
+ url = self.root + "/account/login"
+ data = {"username": username, "password": password}
+
+ response = self.request(url, method="POST", data=data)
+ if response.url.endswith("/account/login") and \
+ "Username or password is incorrect" in response.text:
+ raise exception.AuthenticationError()
+
+ return {c.name: c.value for c in response.history[0].cookies}
+
class SeisopartyUserExtractor(SeisopartyExtractor):
"""Extractor for all posts from a seiso.party user listing"""
@@ -136,3 +156,46 @@ class SeisopartyPostExtractor(SeisopartyExtractor):
url = "{}/post/{}/{}/{}".format(
self.root, self.service, self.user_id, self.post_id)
return (self._parse_post(self.request(url).text, self.post_id),)
+
+
+class SeisopartyFavoriteExtractor(SeisopartyExtractor):
+ """Extractor for seiso.party favorites"""
+ subcategory = "favorite"
+ pattern = r"(?:https?://)?seiso\.party/favorites/artists/?(?:\?([^#]+))?"
+ test = (
+ ("https://seiso.party/favorites/artists", {
+ "pattern": SeisopartyUserExtractor.pattern,
+ "url": "0c862434bc3bbbe84cbf41c3a6152473a8cde683",
+ "count": 3,
+ }),
+ ("https://seiso.party/favorites/artists?sort=id&sort_direction=asc", {
+ "url": "629a8b9c6d3a8a64f521908bdb3d7426ac03f8d3",
+ }),
+ )
+
+ def __init__(self, match):
+ SeisopartyExtractor.__init__(self, match)
+ self.query = match.group(1)
+
+ def items(self):
+ self._prepare_ddosguard_cookies()
+ self.login()
+
+ url = self.root + "/favorites/artists"
+ data = {"_extractor": SeisopartyUserExtractor}
+ params = text.parse_query(self.query)
+ params["page"] = text.parse_int(params.get("page"), 1)
+
+ while True:
+ page = self.request(url, params=params).text
+
+ cnt = 0
+ for card in text.extract_iter(
+ page, '<div class="artist-card', '</a>'):
+ path = text.extract(card, '<a href="', '"')[0]
+ yield Message.Queue, self.root + path, data
+ cnt += 1
+
+ if cnt < 25:
+ return
+ params["page"] += 1
diff --git a/gallery_dl/extractor/twitter.py b/gallery_dl/extractor/twitter.py
index 4a3f6cd..568ee2e 100644
--- a/gallery_dl/extractor/twitter.py
+++ b/gallery_dl/extractor/twitter.py
@@ -41,6 +41,16 @@ class TwitterExtractor(Extractor):
self.cards = self.config("cards", False)
self._user_cache = {}
+ size = self.config("size")
+ if size is None:
+ self._size_image = "orig"
+ self._size_fallback = ("large", "medium", "small")
+ else:
+ if isinstance(size, str):
+ size = size.split(",")
+ self._size_image = size[0]
+ self._size_fallback = size[1:]
+
def items(self):
self.login()
metadata = self.metadata()
@@ -115,7 +125,7 @@ class TwitterExtractor(Extractor):
base, _, fmt = url.rpartition(".")
base += "?format=" + fmt + "&name="
files.append(text.nameext_from_url(url, {
- "url" : base + "orig",
+ "url" : base + self._size_image,
"width" : width,
"height" : height,
"_fallback": self._image_fallback(base),
@@ -123,11 +133,9 @@ class TwitterExtractor(Extractor):
else:
files.append({"url": media["media_url"]})
- @staticmethod
- def _image_fallback(base):
- yield base + "large"
- yield base + "medium"
- yield base + "small"
+ def _image_fallback(self, base):
+ for fmt in self._size_fallback:
+ yield base + fmt
def _extract_card(self, tweet, files):
card = tweet["card"]
diff --git a/gallery_dl/extractor/vk.py b/gallery_dl/extractor/vk.py
index 9dd2d47..9724c4b 100644
--- a/gallery_dl/extractor/vk.py
+++ b/gallery_dl/extractor/vk.py
@@ -12,18 +12,67 @@ from .common import Extractor, Message
from .. import text
import re
+BASE_PATTERN = r"(?:https://)?(?:www\.|m\.)?vk\.com"
-class VkPhotosExtractor(Extractor):
- """Extractor for photos from a vk user"""
+
+class VkExtractor(Extractor):
+ """Base class for vk extractors"""
category = "vk"
- subcategory = "photos"
directory_fmt = ("{category}", "{user[name]|user[id]}")
filename_fmt = "{id}.{extension}"
archive_fmt = "{id}"
root = "https://vk.com"
request_interval = 1.0
- pattern = (r"(?:https://)?(?:www\.|m\.)?vk\.com/(?:"
- r"(?:albums|photos|id)(-?\d+)|([^/?#]+))")
+
+ def items(self):
+ data = self.metadata()
+ yield Message.Directory, data
+ for photo in self.photos():
+ photo.update(data)
+ yield Message.Url, photo["url"], photo
+
+ def _pagination(self, photos_url, user_id):
+ sub = re.compile(r"/imp[fg]/").sub
+ needle = 'data-id="{}_'.format(user_id)
+ cnt = 0
+
+ headers = {
+ "X-Requested-With": "XMLHttpRequest",
+ "Origin" : self.root,
+ "Referer" : photos_url,
+ }
+ params = {
+ "al" : "1",
+ "al_ad" : "0",
+ "offset": 0,
+ "part" : "1",
+ }
+
+ while True:
+ payload = self.request(
+ photos_url, method="POST", headers=headers, data=params
+ ).json()["payload"][1]
+
+ offset = payload[0]
+ html = payload[1]
+
+ for cnt, photo in enumerate(text.extract_iter(html, needle, ')')):
+ pid = photo[:photo.find('"')]
+ url = photo[photo.rindex("(")+1:]
+ url = sub("/", url.partition("?")[0])
+ yield text.nameext_from_url(url, {"url": url, "id": pid})
+
+ if cnt <= 20 or offset == params["offset"]:
+ return
+ params["offset"] = offset
+
+
+class VkPhotosExtractor(VkExtractor):
+ """Extractor for photos from a vk user"""
+ subcategory = "photos"
+ pattern = (BASE_PATTERN + r"/(?:"
+ r"(?:albums|photos|id)(-?\d+)"
+ r"|(?!album-?\d+_)([^/?#]+))")
test = (
("https://vk.com/id398982326", {
"pattern": r"https://sun\d+-\d+\.userapi\.com/c\d+/v\d+"
@@ -58,10 +107,14 @@ class VkPhotosExtractor(Extractor):
)
def __init__(self, match):
- Extractor.__init__(self, match)
+ VkExtractor.__init__(self, match)
self.user_id, self.user_name = match.groups()
- def items(self):
+ def photos(self):
+ url = "{}/photos{}".format(self.root, self.user_id)
+ return self._pagination(url, self.user_id)
+
+ def metadata(self):
if self.user_id:
user_id = self.user_id
prefix = "public" if user_id[0] == "-" else "id"
@@ -70,40 +123,8 @@ class VkPhotosExtractor(Extractor):
else:
url = "{}/{}".format(self.root, self.user_name)
data = self._extract_profile(url)
- user_id = data["user"]["id"]
-
- photos_url = "{}/photos{}".format(self.root, user_id)
- headers = {
- "X-Requested-With": "XMLHttpRequest",
- "Origin" : self.root,
- "Referer" : photos_url,
- }
- params = {
- "al" : "1",
- "al_ad" : "0",
- "offset": 0,
- "part" : "1",
- }
-
- yield Message.Directory, data
- sub = re.compile(r"/imp[fg]/").sub
- needle = 'data-id="{}_'.format(user_id)
- cnt = 0
-
- while True:
- offset, html = self.request(
- photos_url, method="POST", headers=headers, data=params
- ).json()["payload"][1]
-
- for cnt, photo in enumerate(text.extract_iter(html, needle, ')')):
- data["id"] = photo[:photo.find('"')]
- url = photo[photo.rindex("(")+1:]
- url = sub("/", url.partition("?")[0])
- yield Message.Url, url, text.nameext_from_url(url, data)
-
- if cnt <= 40 or offset == params["offset"]:
- return
- params["offset"] = offset
+ self.user_id = data["user"]["id"]
+ return data
def _extract_profile(self, url):
extr = text.extract_from(self.request(url).text)
@@ -116,3 +137,32 @@ class VkPhotosExtractor(Extractor):
'<span class="current_text">', '</span'))),
"id" : extr('<a href="/albums', '"'),
}}
+
+
+class VkAlbumExtractor(VkExtractor):
+ """Extractor for a vk album"""
+ subcategory = "album"
+ directory_fmt = ("{category}", "{user[id]}", "{album[id]}")
+ pattern = BASE_PATTERN + r"/album(-?\d+)_(\d+)$"
+ test = (
+ ("https://vk.com/album221469416_0", {
+ "count": 3,
+ }),
+ ("https://vk.com/album-165740836_281339889", {
+ "count": 12,
+ }),
+ )
+
+ def __init__(self, match):
+ VkExtractor.__init__(self, match)
+ self.user_id, self.album_id = match.groups()
+
+ def photos(self):
+ url = "{}/album{}_{}".format(self.root, self.user_id, self.album_id)
+ return self._pagination(url, self.user_id)
+
+ def metadata(self):
+ return {
+ "user": {"id": self.user_id},
+ "album": {"id": self.album_id},
+ }
diff --git a/gallery_dl/postprocessor/compare.py b/gallery_dl/postprocessor/compare.py
index a08cdc4..b3b94f7 100644
--- a/gallery_dl/postprocessor/compare.py
+++ b/gallery_dl/postprocessor/compare.py
@@ -20,36 +20,32 @@ class ComparePP(PostProcessor):
PostProcessor.__init__(self, job)
if options.get("shallow"):
self._compare = self._compare_size
+ self._equal_exc = self._equal_cnt = 0
- action = options.get("action")
- if action == "enumerate":
- job.register_hooks({"file": self.enumerate}, options)
- else:
- job.register_hooks({"file": self.compare}, options)
- action, _, smax = action.partition(":")
- self._skipmax = text.parse_int(smax)
- self._skipexc = self._skipcnt = 0
- if action == "abort":
- self._skipexc = exception.StopExtraction
- elif action == "terminate":
- self._skipexc = exception.TerminateExtraction
- elif action == "exit":
- self._skipexc = sys.exit
-
- def compare(self, pathfmt):
+ equal = options.get("equal")
+ if equal:
+ equal, _, emax = equal.partition(":")
+ self._equal_max = text.parse_int(emax)
+ if equal == "abort":
+ self._equal_exc = exception.StopExtraction
+ elif equal == "terminate":
+ self._equal_exc = exception.TerminateExtraction
+ elif equal == "exit":
+ self._equal_exc = sys.exit
+
+ job.register_hooks({"file": (
+ self.enumerate
+ if options.get("action") == "enumerate" else
+ self.replace
+ )}, options)
+
+ def replace(self, pathfmt):
try:
if self._compare(pathfmt.realpath, pathfmt.temppath):
- if self._skipexc:
- self._skipcnt += 1
- if self._skipcnt >= self._skipmax:
- util.remove_file(pathfmt.temppath)
- print()
- raise self._skipexc()
- pathfmt.delete = True
- else:
- self._skipcnt = 0
+ return self._equal(pathfmt)
except OSError:
pass
+ self._equal_cnt = 0
def enumerate(self, pathfmt):
num = 1
@@ -58,9 +54,10 @@ class ComparePP(PostProcessor):
pathfmt.prefix = str(num) + "."
pathfmt.set_extension(pathfmt.extension, False)
num += 1
- pathfmt.delete = True
+ return self._equal(pathfmt)
except OSError:
pass
+ self._equal_cnt = 0
def _compare(self, f1, f2):
return self._compare_size(f1, f2) and self._compare_content(f1, f2)
@@ -81,5 +78,14 @@ class ComparePP(PostProcessor):
if not buf1:
return True
+ def _equal(self, pathfmt):
+ if self._equal_exc:
+ self._equal_cnt += 1
+ if self._equal_cnt >= self._equal_max:
+ util.remove_file(pathfmt.temppath)
+ print()
+ raise self._equal_exc()
+ pathfmt.delete = True
+
__postprocessor__ = ComparePP
diff --git a/gallery_dl/version.py b/gallery_dl/version.py
index acc3b8d..ee01549 100644
--- a/gallery_dl/version.py
+++ b/gallery_dl/version.py
@@ -6,4 +6,4 @@
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
-__version__ = "1.19.0"
+__version__ = "1.19.1"