summaryrefslogtreecommitdiffstats
path: root/gallery_dl
diff options
context:
space:
mode:
Diffstat (limited to 'gallery_dl')
-rw-r--r--gallery_dl/__init__.py7
-rw-r--r--gallery_dl/cookies.py9
-rw-r--r--gallery_dl/downloader/ytdl.py5
-rw-r--r--gallery_dl/extractor/batoto.py27
-rw-r--r--gallery_dl/extractor/bunkr.py50
-rw-r--r--gallery_dl/extractor/cyberdrop.py14
-rw-r--r--gallery_dl/extractor/deviantart.py11
-rw-r--r--gallery_dl/extractor/e621.py24
-rw-r--r--gallery_dl/extractor/exhentai.py2
-rw-r--r--gallery_dl/extractor/flickr.py38
-rw-r--r--gallery_dl/extractor/furaffinity.py5
-rw-r--r--gallery_dl/extractor/generic.py8
-rw-r--r--gallery_dl/extractor/gofile.py3
-rw-r--r--gallery_dl/extractor/hitomi.py1
-rw-r--r--gallery_dl/extractor/instagram.py29
-rw-r--r--gallery_dl/extractor/koharu.py25
-rw-r--r--gallery_dl/extractor/lolisafe.py2
-rw-r--r--gallery_dl/extractor/newgrounds.py10
-rw-r--r--gallery_dl/extractor/pixiv.py90
-rw-r--r--gallery_dl/extractor/sankaku.py5
-rw-r--r--gallery_dl/extractor/sexcom.py19
-rw-r--r--gallery_dl/extractor/szurubooru.py8
-rw-r--r--gallery_dl/extractor/toyhouse.py3
-rw-r--r--gallery_dl/extractor/tumblr.py3
-rw-r--r--gallery_dl/extractor/twitter.py29
-rw-r--r--gallery_dl/extractor/wikimedia.py124
-rw-r--r--gallery_dl/extractor/ytdl.py17
-rw-r--r--gallery_dl/formatter.py18
-rw-r--r--gallery_dl/job.py9
-rw-r--r--gallery_dl/option.py38
-rw-r--r--gallery_dl/path.py28
-rw-r--r--gallery_dl/postprocessor/__init__.py2
-rw-r--r--gallery_dl/postprocessor/hash.py71
-rw-r--r--gallery_dl/postprocessor/metadata.py34
-rw-r--r--gallery_dl/postprocessor/rename.py91
-rw-r--r--gallery_dl/postprocessor/ugoira.py169
-rw-r--r--gallery_dl/util.py57
-rw-r--r--gallery_dl/version.py2
-rw-r--r--gallery_dl/ytdl.py2
39 files changed, 812 insertions, 277 deletions
diff --git a/gallery_dl/__init__.py b/gallery_dl/__init__.py
index 4b39c15..663fe99 100644
--- a/gallery_dl/__init__.py
+++ b/gallery_dl/__init__.py
@@ -238,6 +238,13 @@ def main():
return config.open_extern()
else:
+ input_files = config.get((), "input-files")
+ if input_files:
+ for input_file in input_files:
+ if isinstance(input_file, str):
+ input_file = (input_file, None)
+ args.input_files.append(input_file)
+
if not args.urls and not args.input_files:
parser.error(
"The following arguments are required: URL\n"
diff --git a/gallery_dl/cookies.py b/gallery_dl/cookies.py
index f017929..deb7c7b 100644
--- a/gallery_dl/cookies.py
+++ b/gallery_dl/cookies.py
@@ -179,11 +179,14 @@ def _firefox_cookies_database(profile=None, container=None):
"{}".format(search_root))
_log_debug("Extracting cookies from %s", path)
- if container == "none":
+ if not container or container == "none":
container_id = False
_log_debug("Only loading cookies not belonging to any container")
- elif container:
+ elif container == "all":
+ container_id = None
+
+ else:
containers_path = os.path.join(
os.path.dirname(path), "containers.json")
@@ -207,8 +210,6 @@ def _firefox_cookies_database(profile=None, container=None):
container))
_log_debug("Only loading cookies from container '%s' (ID %s)",
container, container_id)
- else:
- container_id = None
return path, container_id
diff --git a/gallery_dl/downloader/ytdl.py b/gallery_dl/downloader/ytdl.py
index 87e7756..b3bec21 100644
--- a/gallery_dl/downloader/ytdl.py
+++ b/gallery_dl/downloader/ytdl.py
@@ -42,8 +42,9 @@ class YoutubeDLDownloader(DownloaderBase):
if not ytdl_instance:
try:
module = ytdl.import_module(self.config("module"))
- except ImportError as exc:
- self.log.error("Cannot import module '%s'", exc.name)
+ except (ImportError, SyntaxError) as exc:
+ self.log.error("Cannot import module '%s'",
+ getattr(exc, "name", ""))
self.log.debug("", exc_info=True)
self.download = lambda u, p: False
return False
diff --git a/gallery_dl/extractor/batoto.py b/gallery_dl/extractor/batoto.py
index 2adb142..786acd9 100644
--- a/gallery_dl/extractor/batoto.py
+++ b/gallery_dl/extractor/batoto.py
@@ -51,28 +51,29 @@ class BatotoChapterExtractor(BatotoBase, ChapterExtractor):
if not manga:
manga = extr('link-hover">', "<")
info = text.remove_html(extr('link-hover">', "</"))
+ info = text.unescape(info)
match = re.match(
- r"(?:Volume\s+(\d+) )?"
- r"\w+\s+(\d+)(.*)", info)
+ r"(?i)(?:(?:Volume|S(?:eason)?)\s*(\d+)\s+)?"
+ r"(?:Chapter|Episode)\s*(\d+)([\w.]*)", info)
if match:
volume, chapter, minor = match.groups()
- title = text.remove_html(extr(
- "selected>", "</option")).partition(" : ")[2]
else:
volume = chapter = 0
minor = ""
- title = info
return {
- "manga" : text.unescape(manga),
- "manga_id" : text.parse_int(manga_id),
- "title" : text.unescape(title),
- "volume" : text.parse_int(volume),
- "chapter" : text.parse_int(chapter),
- "chapter_minor": minor,
- "chapter_id" : text.parse_int(self.chapter_id),
- "date" : text.parse_timestamp(extr(' time="', '"')[:-3]),
+ "manga" : text.unescape(manga),
+ "manga_id" : text.parse_int(manga_id),
+ "chapter_url" : extr(self.chapter_id + "-ch_", '"'),
+ "title" : text.unescape(text.remove_html(extr(
+ "selected>", "</option")).partition(" : ")[2]),
+ "volume" : text.parse_int(volume),
+ "chapter" : text.parse_int(chapter),
+ "chapter_minor" : minor,
+ "chapter_string": info,
+ "chapter_id" : text.parse_int(self.chapter_id),
+ "date" : text.parse_timestamp(extr(' time="', '"')[:-3]),
}
def images(self, page):
diff --git a/gallery_dl/extractor/bunkr.py b/gallery_dl/extractor/bunkr.py
index 240bbd3..780bdf1 100644
--- a/gallery_dl/extractor/bunkr.py
+++ b/gallery_dl/extractor/bunkr.py
@@ -6,15 +6,24 @@
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
-"""Extractors for https://bunkr.sk/"""
+"""Extractors for https://bunkr.si/"""
from .lolisafe import LolisafeAlbumExtractor
-from .. import text
-
-BASE_PATTERN = (
- r"(?:https?://)?(?:app\.)?(bunkr+"
- r"\.(?:s[kiu]|[cf]i|ru|la|is|to|ac|black|cat|media|red|site|ws|org))"
-)
+from .. import text, config
+
+
+if config.get(("extractor", "bunkr"), "tlds"):
+ BASE_PATTERN = (
+ r"(?:bunkr:(?:https?://)?([^/?#]+)|"
+ r"(?:https?://)?(?:app\.)?(bunkr+\.\w+))"
+ )
+else:
+ BASE_PATTERN = (
+ r"(?:bunkr:(?:https?://)?([^/?#]+)|"
+ r"(?:https?://)?(?:app\.)?(bunkr+"
+ r"\.(?:s[kiu]|[cf]i|ru|la|is|to|a[cx]"
+ r"|black|cat|media|red|site|ws|org)))"
+ )
LEGACY_DOMAINS = {
"bunkr.ru",
@@ -28,15 +37,15 @@ LEGACY_DOMAINS = {
class BunkrAlbumExtractor(LolisafeAlbumExtractor):
- """Extractor for bunkr.sk albums"""
+ """Extractor for bunkr.si albums"""
category = "bunkr"
- root = "https://bunkr.sk"
+ root = "https://bunkr.si"
pattern = BASE_PATTERN + r"/a/([^/?#]+)"
- example = "https://bunkr.sk/a/ID"
+ example = "https://bunkr.si/a/ID"
def __init__(self, match):
LolisafeAlbumExtractor.__init__(self, match)
- domain = match.group(match.lastindex-1)
+ domain = self.groups[0] or self.groups[1]
if domain not in LEGACY_DOMAINS:
self.root = "https://" + domain
@@ -69,11 +78,16 @@ class BunkrAlbumExtractor(LolisafeAlbumExtractor):
def _extract_file(self, url):
page = self.request(url).text
- return (
- text.extr(page, '<source src="', '"') or
- text.extr(page, '<img src="', '"') or
- text.rextract(page, ' href="', '"', page.rindex("Download"))[0]
- )
+ url = (text.extr(page, '<source src="', '"') or
+ text.extr(page, '<img src="', '"'))
+
+ if not url:
+ url_download = text.rextract(
+ page, ' href="', '"', page.rindex("Download"))[0]
+ page = self.request(text.unescape(url_download)).text
+ url = text.unescape(text.rextract(page, ' href="', '"')[0])
+
+ return url
def _validate(self, response):
if response.history and response.url.endswith("/maintenance-vid.mp4"):
@@ -83,11 +97,11 @@ class BunkrAlbumExtractor(LolisafeAlbumExtractor):
class BunkrMediaExtractor(BunkrAlbumExtractor):
- """Extractor for bunkr.sk media links"""
+ """Extractor for bunkr.si media links"""
subcategory = "media"
directory_fmt = ("{category}",)
pattern = BASE_PATTERN + r"(/[vid]/[^/?#]+)"
- example = "https://bunkr.sk/v/FILENAME"
+ example = "https://bunkr.si/v/FILENAME"
def fetch_album(self, album_id):
try:
diff --git a/gallery_dl/extractor/cyberdrop.py b/gallery_dl/extractor/cyberdrop.py
index d864960..a514696 100644
--- a/gallery_dl/extractor/cyberdrop.py
+++ b/gallery_dl/extractor/cyberdrop.py
@@ -14,6 +14,7 @@ from .. import text
class CyberdropAlbumExtractor(lolisafe.LolisafeAlbumExtractor):
category = "cyberdrop"
root = "https://cyberdrop.me"
+ root_api = "https://api.cyberdrop.me"
pattern = r"(?:https?://)?(?:www\.)?cyberdrop\.(?:me|to)/a/([^/?#]+)"
example = "https://cyberdrop.me/a/ID"
@@ -55,5 +56,14 @@ class CyberdropAlbumExtractor(lolisafe.LolisafeAlbumExtractor):
def _extract_files(self, file_ids):
for file_id in file_ids:
- url = "{}/api/f/{}".format(self.root, file_id)
- yield self.request(url).json()
+ try:
+ url = "{}/api/file/info/{}".format(self.root_api, file_id)
+ file = self.request(url).json()
+ auth = self.request(file["auth_url"]).json()
+ file["url"] = auth["url"]
+ except Exception as exc:
+ self.log.warning("%s (%s: %s)",
+ file_id, exc.__class__.__name__, exc)
+ continue
+
+ yield file
diff --git a/gallery_dl/extractor/deviantart.py b/gallery_dl/extractor/deviantart.py
index f3ea4e7..ea70b58 100644
--- a/gallery_dl/extractor/deviantart.py
+++ b/gallery_dl/extractor/deviantart.py
@@ -69,11 +69,12 @@ class DeviantartExtractor(Extractor):
self.quality = ",q_{}".format(self.quality)
self.quality_sub = re.compile(r",q_\d+").sub
- if self.original != "image":
- self._update_content = self._update_content_default
- else:
- self._update_content = self._update_content_image
+ if isinstance(self.original, str) and \
+ self.original.lower().startswith("image"):
self.original = True
+ self._update_content = self._update_content_image
+ else:
+ self._update_content = self._update_content_default
journals = self.config("journals", "html")
if journals == "html":
@@ -1462,6 +1463,8 @@ class DeviantartOAuthAPI():
return
if "next_cursor" in data:
+ if not data["next_cursor"]:
+ return
params["offset"] = None
params["cursor"] = data["next_cursor"]
elif data["next_offset"] is not None:
diff --git a/gallery_dl/extractor/e621.py b/gallery_dl/extractor/e621.py
index af963bc..553ec22 100644
--- a/gallery_dl/extractor/e621.py
+++ b/gallery_dl/extractor/e621.py
@@ -10,6 +10,7 @@
from .common import Message
from . import danbooru
+from ..cache import memcache
from .. import text, util
@@ -44,16 +45,11 @@ class E621Extractor(danbooru.DanbooruExtractor):
self.root[8:], md5[0:2], md5[2:4], md5, file["ext"])
if notes and post.get("has_notes"):
- url = "{}/notes.json?search[post_id]={}".format(
- self.root, post["id"])
- post["notes"] = self.request(url).json()
+ post["notes"] = self._get_notes(post["id"])
if pools and post["pools"]:
- url = "{}/pools.json?search[id]={}".format(
- self.root, ",".join(map(str, post["pools"])))
- post["pools"] = _pools = self.request(url).json()
- for pool in _pools:
- pool["name"] = pool["name"].replace("_", " ")
+ post["pools"] = self._get_pools(
+ ",".join(map(str, post["pools"])))
post["filename"] = file["md5"]
post["extension"] = file["ext"]
@@ -64,6 +60,18 @@ class E621Extractor(danbooru.DanbooruExtractor):
yield Message.Directory, post
yield Message.Url, file["url"], post
+ def _get_notes(self, id):
+ return self.request(
+ "{}/notes.json?search[post_id]={}".format(self.root, id)).json()
+
+ @memcache(keyarg=1)
+ def _get_pools(self, ids):
+ pools = self.request(
+ "{}/pools.json?search[id]={}".format(self.root, ids)).json()
+ for pool in pools:
+ pool["name"] = pool["name"].replace("_", " ")
+ return pools
+
BASE_PATTERN = E621Extractor.update({
"e621": {
diff --git a/gallery_dl/extractor/exhentai.py b/gallery_dl/extractor/exhentai.py
index 1b4f995..01af7a4 100644
--- a/gallery_dl/extractor/exhentai.py
+++ b/gallery_dl/extractor/exhentai.py
@@ -430,7 +430,7 @@ class ExhentaiGalleryExtractor(ExhentaiExtractor):
}
page = self.request(url, cookies=cookies).text
- current = text.extr(page, "<strong>", "</strong>")
+ current = text.extr(page, "<strong>", "</strong>").replace(",", "")
self.log.debug("Image Limits: %s/%s", current, self.limits)
self._remaining = self.limits - text.parse_int(current)
diff --git a/gallery_dl/extractor/flickr.py b/gallery_dl/extractor/flickr.py
index c94a110..1b4971c 100644
--- a/gallery_dl/extractor/flickr.py
+++ b/gallery_dl/extractor/flickr.py
@@ -75,11 +75,8 @@ class FlickrImageExtractor(FlickrExtractor):
def items(self):
photo = self.api.photos_getInfo(self.item_id)
- if self.api.exif:
- photo.update(self.api.photos_getExif(self.item_id))
- if self.api.contexts:
- photo.update(self.api.photos_getAllContexts(self.item_id))
+ self.api._extract_metadata(photo)
if photo["media"] == "video" and self.api.videos:
self.api._extract_video(photo)
else:
@@ -135,8 +132,13 @@ class FlickrAlbumExtractor(FlickrExtractor):
def metadata(self):
data = FlickrExtractor.metadata(self)
- data["album"] = self.api.photosets_getInfo(
- self.album_id, self.user["nsid"])
+ try:
+ data["album"] = self.api.photosets_getInfo(
+ self.album_id, self.user["nsid"])
+ except Exception:
+ data["album"] = {}
+ self.log.warning("%s: Unable to retrieve album metadata",
+ self.album_id)
return data
def photos(self):
@@ -407,6 +409,8 @@ class FlickrAPI(oauth.OAuth1API):
self.log.debug("Server response: %s", data)
if data["code"] == 1:
raise exception.NotFoundError(self.extractor.subcategory)
+ elif data["code"] == 2:
+ raise exception.AuthorizationError(msg)
elif data["code"] == 98:
raise exception.AuthenticationError(msg)
elif data["code"] == 99:
@@ -453,10 +457,7 @@ class FlickrAPI(oauth.OAuth1API):
photo["date"] = text.parse_timestamp(photo["dateupload"])
photo["tags"] = photo["tags"].split()
- if self.exif:
- photo.update(self.photos_getExif(photo["id"]))
- if self.contexts:
- photo.update(self.photos_getAllContexts(photo["id"]))
+ self._extract_metadata(photo)
photo["id"] = text.parse_int(photo["id"])
if "owner" in photo:
@@ -512,6 +513,23 @@ class FlickrAPI(oauth.OAuth1API):
photo["width"] = photo["height"] = 0
return photo
+ def _extract_metadata(self, photo):
+ if self.exif:
+ try:
+ photo.update(self.photos_getExif(photo["id"]))
+ except Exception as exc:
+ self.log.warning(
+ "Unable to retrieve 'exif' data for %s (%s: %s)",
+ photo["id"], exc.__class__.__name__, exc)
+
+ if self.contexts:
+ try:
+ photo.update(self.photos_getAllContexts(photo["id"]))
+ except Exception as exc:
+ self.log.warning(
+ "Unable to retrieve 'contexts' data for %s (%s: %s)",
+ photo["id"], exc.__class__.__name__, exc)
+
@staticmethod
def _clean_info(info):
info["title"] = info["title"]["_content"]
diff --git a/gallery_dl/extractor/furaffinity.py b/gallery_dl/extractor/furaffinity.py
index 3055426..d253582 100644
--- a/gallery_dl/extractor/furaffinity.py
+++ b/gallery_dl/extractor/furaffinity.py
@@ -179,6 +179,11 @@ class FuraffinityExtractor(Extractor):
break
self._favorite_id = text.parse_int(extr('data-fav-id="', '"'))
yield post_id
+
+ pos = page.find('type="submit">Next</button>')
+ if pos >= 0:
+ path = text.rextract(page, '<form action="', '"', pos)[0]
+ continue
path = text.extr(page, 'right" href="', '"')
def _pagination_search(self, query):
diff --git a/gallery_dl/extractor/generic.py b/gallery_dl/extractor/generic.py
index 16d4340..a6c1d5a 100644
--- a/gallery_dl/extractor/generic.py
+++ b/gallery_dl/extractor/generic.py
@@ -15,7 +15,7 @@ import re
class GenericExtractor(Extractor):
"""Extractor for images in a generic web page."""
category = "generic"
- directory_fmt = ("{category}", "{pageurl}")
+ directory_fmt = ("{category}", "{subcategory}", "{path}")
archive_fmt = "{imageurl}"
# By default, the generic extractor is disabled
@@ -52,7 +52,10 @@ class GenericExtractor(Extractor):
self.scheme = match.group('scheme')
else:
self.scheme = 'https://'
- self.url = self.scheme + self.url
+ self.url = text.ensure_http_scheme(self.url, self.scheme)
+
+ self.subcategory = match.group('domain')
+ self.path = match.group('path')
# Used to resolve relative image urls
self.root = self.scheme + match.group('domain')
@@ -87,6 +90,7 @@ class GenericExtractor(Extractor):
def metadata(self, page):
"""Extract generic webpage metadata, return them in a dict."""
data = {}
+ data['path'] = self.path.replace("/", "")
data['pageurl'] = self.url
data['title'] = text.extr(page, '<title>', "</title>")
data['description'] = text.extr(
diff --git a/gallery_dl/extractor/gofile.py b/gallery_dl/extractor/gofile.py
index f0eb4e9..52b4ae6 100644
--- a/gallery_dl/extractor/gofile.py
+++ b/gallery_dl/extractor/gofile.py
@@ -47,8 +47,7 @@ class GofileFolderExtractor(Extractor):
raise exception.AuthorizationError("Password required")
num = 0
- for content_id in folder["childrenIds"]:
- content = contents[content_id]
+ for content in contents.values():
content["folder"] = folder
if content["type"] == "file":
diff --git a/gallery_dl/extractor/hitomi.py b/gallery_dl/extractor/hitomi.py
index 9b74700..18df9df 100644
--- a/gallery_dl/extractor/hitomi.py
+++ b/gallery_dl/extractor/hitomi.py
@@ -89,6 +89,7 @@ class HitomiGalleryExtractor(GalleryExtractor):
path = ext = "webp"
ihash = image["hash"]
idata = text.nameext_from_url(image["name"])
+ idata["extension_original"] = idata["extension"]
if ext:
idata["extension"] = ext
diff --git a/gallery_dl/extractor/instagram.py b/gallery_dl/extractor/instagram.py
index c05fe72..422c865 100644
--- a/gallery_dl/extractor/instagram.py
+++ b/gallery_dl/extractor/instagram.py
@@ -12,6 +12,7 @@
from .common import Extractor, Message
from .. import text, util, exception
from ..cache import cache, memcache
+import itertools
import binascii
import json
import re
@@ -57,12 +58,17 @@ class InstagramExtractor(Extractor):
data = self.metadata()
videos = self.config("videos", True)
previews = self.config("previews", False)
+ max_posts = self.config("max-posts")
video_headers = {"User-Agent": "Mozilla/5.0"}
order = self.config("order-files")
reverse = order[0] in ("r", "d") if order else False
- for post in self.posts():
+ posts = self.posts()
+ if max_posts:
+ posts = itertools.islice(posts, max_posts)
+
+ for post in posts:
if "__typename" in post:
post = self._parse_post_graphql(post)
@@ -159,15 +165,19 @@ class InstagramExtractor(Extractor):
if "title" in post:
data["highlight_title"] = post["title"]
if "created_at" in post:
- data["date"] = text.parse_timestamp(post.get("created_at"))
+ data["post_date"] = data["date"] = text.parse_timestamp(
+ post.get("created_at"))
else: # regular image/video post
+ date = text.parse_timestamp(post.get("taken_at"))
data = {
"post_id" : post["pk"],
"post_shortcode": post["code"],
+ "post_url": "{}/p/{}/".format(self.root, post["code"]),
+ "post_date": date,
+ "date": date,
"likes": post.get("like_count", 0),
"pinned": post.get("timeline_pinned_user_ids", ()),
- "date": text.parse_timestamp(post.get("taken_at")),
"liked": post.get("has_liked", False),
}
@@ -206,7 +216,6 @@ class InstagramExtractor(Extractor):
data["owner_id"] = owner["pk"]
data["username"] = owner.get("username")
data["fullname"] = owner.get("full_name")
- data["post_url"] = "{}/p/{}/".format(self.root, data["post_shortcode"])
data["_files"] = files = []
for num, item in enumerate(items, 1):
@@ -269,7 +278,6 @@ class InstagramExtractor(Extractor):
owner = post["owner"]
data = {
"typename" : typename,
- "date" : text.parse_timestamp(post["taken_at_timestamp"]),
"likes" : post["edge_media_preview_like"]["count"],
"liked" : post.get("viewer_has_liked", False),
"pinned" : pinned,
@@ -279,11 +287,13 @@ class InstagramExtractor(Extractor):
"post_id" : post["id"],
"post_shortcode": post["shortcode"],
"post_url" : "{}/p/{}/".format(self.root, post["shortcode"]),
+ "post_date" : text.parse_timestamp(post["taken_at_timestamp"]),
"description": text.parse_unicode_escapes("\n".join(
edge["node"]["text"]
for edge in post["edge_media_to_caption"]["edges"]
)),
}
+ data["date"] = data["post_date"]
tags = self._find_tags(data["description"])
if tags:
@@ -313,6 +323,7 @@ class InstagramExtractor(Extractor):
media = {
"num": num,
"media_id" : node["id"],
+ "date" : data["date"],
"shortcode" : (node.get("shortcode") or
shortcode_from_id(node["id"])),
"display_url": node["display_url"],
@@ -328,6 +339,7 @@ class InstagramExtractor(Extractor):
dimensions = post["dimensions"]
media = {
"media_id" : post["id"],
+ "date" : data["date"],
"shortcode" : post["shortcode"],
"display_url": post["display_url"],
"video_url" : post.get("video_url"),
@@ -378,7 +390,11 @@ class InstagramExtractor(Extractor):
"full_name": user["full_name"]})
def _init_cursor(self):
- return self.config("cursor") or None
+ cursor = self.config("cursor", True)
+ if not cursor:
+ self._update_cursor = util.identity
+ elif isinstance(cursor, str):
+ return cursor
def _update_cursor(self, cursor):
self.log.debug("Cursor: %s", cursor)
@@ -418,6 +434,7 @@ class InstagramUserExtractor(InstagramExtractor):
base = "{}/{}/".format(self.root, self.item)
stories = "{}/stories/{}/".format(self.root, self.item)
return self._dispatch_extractors((
+ (InstagramInfoExtractor , base + "info/"),
(InstagramAvatarExtractor , base + "avatar/"),
(InstagramStoriesExtractor , stories),
(InstagramHighlightsExtractor, base + "highlights/"),
diff --git a/gallery_dl/extractor/koharu.py b/gallery_dl/extractor/koharu.py
index 979b1a2..cacf504 100644
--- a/gallery_dl/extractor/koharu.py
+++ b/gallery_dl/extractor/koharu.py
@@ -161,16 +161,29 @@ class KoharuGalleryExtractor(KoharuExtractor, GalleryExtractor):
return results
def _select_format(self, formats):
- if not self.fmt or self.fmt == "original":
- fmtid = "0"
+ fmt = self.fmt
+
+ if not fmt or fmt == "best":
+ fmtids = ("0", "1600", "1280", "980", "780")
+ elif isinstance(fmt, str):
+ fmtids = fmt.split(",")
+ elif isinstance(fmt, list):
+ fmtids = fmt
else:
- fmtid = str(self.fmt)
+ fmtids = (str(self.fmt),)
- try:
- fmt = formats[fmtid]
- except KeyError:
+ for fmtid in fmtids:
+ try:
+ fmt = formats[fmtid]
+ if fmt["id"]:
+ break
+ except KeyError:
+ self.log.debug("%s: Format %s is not available",
+ self.groups[0], fmtid)
+ else:
raise exception.NotFoundError("format")
+ self.log.debug("%s: Selected format %s", self.groups[0], fmtid)
fmt["w"] = fmtid
return fmt
diff --git a/gallery_dl/extractor/lolisafe.py b/gallery_dl/extractor/lolisafe.py
index 3d7d685..117b88b 100644
--- a/gallery_dl/extractor/lolisafe.py
+++ b/gallery_dl/extractor/lolisafe.py
@@ -34,7 +34,7 @@ class LolisafeAlbumExtractor(LolisafeExtractor):
def __init__(self, match):
LolisafeExtractor.__init__(self, match)
- self.album_id = match.group(match.lastindex)
+ self.album_id = self.groups[-1]
def _init(self):
domain = self.config("domain")
diff --git a/gallery_dl/extractor/newgrounds.py b/gallery_dl/extractor/newgrounds.py
index ecd6619..5fc0ce5 100644
--- a/gallery_dl/extractor/newgrounds.py
+++ b/gallery_dl/extractor/newgrounds.py
@@ -171,15 +171,17 @@ class NewgroundsExtractor(Extractor):
if self.flash:
url += "/format/flash"
- with self.request(url, fatal=False) as response:
- if response.status_code >= 400:
- return {}
- page = response.text
+ response = self.request(url, fatal=False)
+ page = response.text
pos = page.find('id="adults_only"')
if pos >= 0:
msg = text.extract(page, 'class="highlight">', '<', pos)[0]
self.log.warning('"%s"', msg)
+ return {}
+
+ if response.status_code >= 400:
+ return {}
extr = text.extract_from(page)
data = extract_data(extr, post_url)
diff --git a/gallery_dl/extractor/pixiv.py b/gallery_dl/extractor/pixiv.py
index d732894..3479b88 100644
--- a/gallery_dl/extractor/pixiv.py
+++ b/gallery_dl/extractor/pixiv.py
@@ -94,12 +94,39 @@ class PixivExtractor(Extractor):
work.get("id"), exc.message)
continue
- url = ugoira["zip_urls"]["medium"].replace(
- "_ugoira600x600", "_ugoira1920x1080")
- work["frames"] = ugoira["frames"]
+ url = ugoira["zip_urls"]["medium"]
+ work["frames"] = frames = ugoira["frames"]
work["date_url"] = self._date_from_url(url)
work["_http_adjust_extension"] = False
- yield Message.Url, url, text.nameext_from_url(url, work)
+
+ if self.load_ugoira == "original":
+ base, sep, _ = url.rpartition("_ugoira")
+ base = base.replace(
+ "/img-zip-ugoira/", "/img-original/", 1) + sep
+
+ for ext in ("jpg", "png", "gif"):
+ try:
+ url = ("{}0.{}".format(base, ext))
+ self.request(url, method="HEAD")
+ break
+ except exception.HttpError:
+ pass
+ else:
+ self.log.warning(
+ "Unable to find Ugoira frame URLs (%s)",
+ work.get("id"))
+ continue
+
+ for num, frame in enumerate(frames):
+ url = ("{}{}.{}".format(base, num, ext))
+ work["num"] = work["_ugoira_frame_index"] = num
+ work["suffix"] = "_p{:02}".format(num)
+ text.nameext_from_url(url, work)
+ yield Message.Url, url, work
+
+ else:
+ url = url.replace("_ugoira600x600", "_ugoira1920x1080")
+ yield Message.Url, url, text.nameext_from_url(url, work)
elif work["page_count"] == 1:
url = meta_single_page["original_image_url"]
@@ -551,9 +578,6 @@ class PixivSeriesExtractor(PixivExtractor):
directory_fmt = ("{category}", "{user[id]} {user[account]}",
"{series[id]} {series[title]}")
filename_fmt = "{num_series:>03}_{id}_p{num}.{extension}"
- cookies_domain = ".pixiv.net"
- browser = "firefox"
- tls12 = False
pattern = BASE_PATTERN + r"/user/(\d+)/series/(\d+)"
example = "https://www.pixiv.net/user/12345/series/12345"
@@ -562,34 +586,18 @@ class PixivSeriesExtractor(PixivExtractor):
self.user_id, self.series_id = match.groups()
def works(self):
- url = self.root + "/ajax/series/" + self.series_id
- params = {"p": 1}
- headers = {
- "Accept": "application/json",
- "Referer": "{}/user/{}/series/{}".format(
- self.root, self.user_id, self.series_id),
- "Alt-Used": "www.pixiv.net",
- }
+ series = None
- while True:
- data = self.request(url, params=params, headers=headers).json()
- body = data["body"]
- page = body["page"]
-
- series = body["extraData"]["meta"]
- series["id"] = self.series_id
- series["total"] = page["total"]
- series["title"] = text.extr(series["title"], '"', '"')
-
- for info in page["series"]:
- work = self.api.illust_detail(info["workId"])
- work["num_series"] = info["order"]
- work["series"] = series
- yield work
-
- if len(page["series"]) < 10:
- return
- params["p"] += 1
+ for work in self.api.illust_series(self.series_id):
+ if series is None:
+ series = self.api.data
+ series["total"] = num_series = series.pop("series_work_count")
+ else:
+ num_series -= 1
+
+ work["num_series"] = num_series
+ work["series"] = series
+ yield work
class PixivNovelExtractor(PixivExtractor):
@@ -916,6 +924,11 @@ class PixivAppAPI():
params = {"illust_id": illust_id}
return self._pagination("/v2/illust/related", params)
+ def illust_series(self, series_id, offset=0):
+ params = {"illust_series_id": series_id, "offset": offset}
+ return self._pagination("/v1/illust/series", params,
+ key_data="illust_series_detail")
+
def novel_bookmark_detail(self, novel_id):
params = {"novel_id": novel_id}
return self._call(
@@ -1013,10 +1026,15 @@ class PixivAppAPI():
raise exception.StopExtraction("API request failed: %s", error)
- def _pagination(self, endpoint, params, key="illusts"):
+ def _pagination(self, endpoint, params,
+ key_items="illusts", key_data=None):
while True:
data = self._call(endpoint, params)
- yield from data[key]
+
+ if key_data:
+ self.data = data.get(key_data)
+ key_data = None
+ yield from data[key_items]
if not data["next_url"]:
return
diff --git a/gallery_dl/extractor/sankaku.py b/gallery_dl/extractor/sankaku.py
index ad3efa7..7db8172 100644
--- a/gallery_dl/extractor/sankaku.py
+++ b/gallery_dl/extractor/sankaku.py
@@ -66,7 +66,8 @@ class SankakuExtractor(BooruExtractor):
def _prepare(self, post):
post["created_at"] = post["created_at"]["s"]
post["date"] = text.parse_timestamp(post["created_at"])
- post["tags"] = [tag["name"] for tag in post["tags"] if tag["name"]]
+ post["tags"] = [tag["name"].lower().replace(" ", "_")
+ for tag in post["tags"] if tag["name"]]
post["tag_string"] = " ".join(post["tags"])
post["_http_validate"] = self._check_expired
@@ -79,7 +80,7 @@ class SankakuExtractor(BooruExtractor):
for tag in post["tags"]:
name = tag["name"]
if name:
- tags[types[tag["type"]]].append(name)
+ tags[types[tag["type"]]].append(name.lower().replace(" ", "_"))
for key, value in tags.items():
post["tags_" + key] = value
post["tag_string_" + key] = " ".join(value)
diff --git a/gallery_dl/extractor/sexcom.py b/gallery_dl/extractor/sexcom.py
index 80f2aea..7708b5c 100644
--- a/gallery_dl/extractor/sexcom.py
+++ b/gallery_dl/extractor/sexcom.py
@@ -152,6 +152,25 @@ class SexcomPinsExtractor(SexcomExtractor):
return self._pagination(url)
+class SexcomLikesExtractor(SexcomExtractor):
+ """Extractor for a user's liked pins on www.sex.com"""
+ subcategory = "likes"
+ directory_fmt = ("{category}", "{user}", "Likes")
+ pattern = r"(?:https?://)?(?:www\.)?sex\.com/user/([^/?#]+)/likes/"
+ example = "https://www.sex.com/user/USER/likes/"
+
+ def __init__(self, match):
+ SexcomExtractor.__init__(self, match)
+ self.user = match.group(1)
+
+ def metadata(self):
+ return {"user": text.unquote(self.user)}
+
+ def pins(self):
+ url = "{}/user/{}/likes/".format(self.root, self.user)
+ return self._pagination(url)
+
+
class SexcomBoardExtractor(SexcomExtractor):
"""Extractor for pins from a board on www.sex.com"""
subcategory = "board"
diff --git a/gallery_dl/extractor/szurubooru.py b/gallery_dl/extractor/szurubooru.py
index bba1ece..b6917cc 100644
--- a/gallery_dl/extractor/szurubooru.py
+++ b/gallery_dl/extractor/szurubooru.py
@@ -86,6 +86,7 @@ BASE_PATTERN = SzurubooruExtractor.update({
"bcbnsfw": {
"root": "https://booru.bcbnsfw.space",
"pattern": r"booru\.bcbnsfw\.space",
+ "query-all": "*",
},
"snootbooru": {
"root": "https://snootbooru.com",
@@ -110,7 +111,12 @@ class SzurubooruTagExtractor(SzurubooruExtractor):
return {"search_tags": self.query}
def posts(self):
- return self._pagination("/posts/", {"query": self.query})
+ if self.query.strip():
+ query = self.query
+ else:
+ query = self.config_instance("query-all")
+
+ return self._pagination("/posts/", {"query": query})
class SzurubooruPostExtractor(SzurubooruExtractor):
diff --git a/gallery_dl/extractor/toyhouse.py b/gallery_dl/extractor/toyhouse.py
index 64fa951..44d87ee 100644
--- a/gallery_dl/extractor/toyhouse.py
+++ b/gallery_dl/extractor/toyhouse.py
@@ -123,4 +123,5 @@ class ToyhouseImageExtractor(ToyhouseExtractor):
def posts(self):
url = "{}/~images/{}".format(self.root, self.user)
- return (self._parse_post(self.request(url).text, '<img src="'),)
+ return (self._parse_post(
+ self.request(url).text, '<img class="mw-100" src="'),)
diff --git a/gallery_dl/extractor/tumblr.py b/gallery_dl/extractor/tumblr.py
index ff29c04..73455d2 100644
--- a/gallery_dl/extractor/tumblr.py
+++ b/gallery_dl/extractor/tumblr.py
@@ -400,6 +400,9 @@ class TumblrAPI(oauth.OAuth1API):
"""Retrieve liked posts"""
endpoint = "/v2/blog/{}/likes".format(blog)
params = {"limit": "50", "before": self.before}
+ if self.api_key:
+ params["api_key"] = self.api_key
+
while True:
posts = self._call(endpoint, params)["liked_posts"]
if not posts:
diff --git a/gallery_dl/extractor/twitter.py b/gallery_dl/extractor/twitter.py
index ea57d76..d4ec343 100644
--- a/gallery_dl/extractor/twitter.py
+++ b/gallery_dl/extractor/twitter.py
@@ -155,6 +155,7 @@ class TwitterExtractor(Extractor):
if not self.unavailable:
continue
+ mtype = media.get("type")
descr = media.get("ext_alt_text")
width = media["original_info"].get("width", 0)
height = media["original_info"].get("height", 0)
@@ -164,6 +165,7 @@ class TwitterExtractor(Extractor):
files.append({
"url": "ytdl:{}/i/web/status/{}".format(
self.root, tweet["id_str"]),
+ "type" : mtype,
"width" : width,
"height" : height,
"extension" : None,
@@ -177,6 +179,7 @@ class TwitterExtractor(Extractor):
)
files.append({
"url" : variant["url"],
+ "type" : mtype,
"width" : width,
"height" : height,
"bitrate" : variant.get("bitrate", 0),
@@ -193,6 +196,7 @@ class TwitterExtractor(Extractor):
base = url.rpartition("=")[0] + "="
files.append(text.nameext_from_url(url, {
"url" : base + self._size_image,
+ "type" : mtype,
"width" : width,
"height" : height,
"_fallback" : self._image_fallback(base),
@@ -504,7 +508,11 @@ class TwitterExtractor(Extractor):
}
def _init_cursor(self):
- return self.config("cursor") or None
+ cursor = self.config("cursor", True)
+ if not cursor:
+ self._update_cursor = util.identity
+ elif isinstance(cursor, str):
+ return cursor
def _update_cursor(self, cursor):
self.log.debug("Cursor: %s", cursor)
@@ -560,6 +568,7 @@ class TwitterUserExtractor(TwitterExtractor):
def items(self):
base = "{}/{}/".format(self.root, self.user)
return self._dispatch_extractors((
+ (TwitterInfoExtractor , base + "info"),
(TwitterAvatarExtractor , base + "photo"),
(TwitterBackgroundExtractor, base + "header_photo"),
(TwitterTimelineExtractor , base + "timeline"),
@@ -590,9 +599,16 @@ class TwitterTimelineExtractor(TwitterExtractor):
return cursor
def tweets(self):
- self._cursor = cursor = self.config("cursor") or None
reset = False
+ cursor = self.config("cursor", True)
+ if not cursor:
+ self._update_cursor = util.identity
+ elif isinstance(cursor, str):
+ self._cursor = cursor
+ else:
+ cursor = None
+
if cursor:
state = cursor.partition("/")[0]
state, _, tweet_id = state.partition("_")
@@ -1612,6 +1628,9 @@ class TwitterAPI():
entries = instr["entries"]
elif instr_type == "TimelineAddToModule":
entries = instr["moduleItems"]
+ elif instr_type == "TimelinePinEntry":
+ if pinned_tweet:
+ pinned_tweet = instr["entry"]
elif instr_type == "TimelineReplaceEntry":
entry = instr["entry"]
if entry["entryId"].startswith("cursor-bottom-"):
@@ -1650,9 +1669,11 @@ class TwitterAPI():
tweet = None
if pinned_tweet:
- pinned_tweet = False
- if instructions[-1]["type"] == "TimelinePinEntry":
+ if isinstance(pinned_tweet, dict):
+ tweets.append(pinned_tweet)
+ elif instructions[-1]["type"] == "TimelinePinEntry":
tweets.append(instructions[-1]["entry"])
+ pinned_tweet = False
for entry in entries:
esw = entry["entryId"].startswith
diff --git a/gallery_dl/extractor/wikimedia.py b/gallery_dl/extractor/wikimedia.py
index 9370cfb..7a62e01 100644
--- a/gallery_dl/extractor/wikimedia.py
+++ b/gallery_dl/extractor/wikimedia.py
@@ -17,13 +17,11 @@ class WikimediaExtractor(BaseExtractor):
"""Base class for wikimedia extractors"""
basecategory = "wikimedia"
filename_fmt = "{filename} ({sha1[:8]}).{extension}"
- directory_fmt = ("{category}", "{page}")
archive_fmt = "{sha1}"
request_interval = (1.0, 2.0)
def __init__(self, match):
BaseExtractor.__init__(self, match)
- path = match.group(match.lastindex)
if self.category == "wikimedia":
self.category = self.root.split(".")[-2]
@@ -31,31 +29,7 @@ class WikimediaExtractor(BaseExtractor):
self.category = "{}-{}".format(
self.category, self.root.partition(".")[0].rpartition("/")[2])
- if path.startswith("wiki/"):
- path = path[5:]
-
- pre, sep, _ = path.partition(":")
- prefix = pre.lower() if sep else None
-
- self.title = path = text.unquote(path)
- if prefix:
- self.subcategory = prefix
-
- if prefix == "category":
- self.params = {
- "generator": "categorymembers",
- "gcmtitle" : path,
- "gcmtype" : "file",
- }
- elif prefix == "file":
- self.params = {
- "titles" : path,
- }
- else:
- self.params = {
- "generator": "images",
- "titles" : path,
- }
+ self.per_page = self.config("limit", 50)
def _init(self):
api_path = self.config_instance("api-path")
@@ -67,6 +41,22 @@ class WikimediaExtractor(BaseExtractor):
else:
self.api_url = self.root + "/api.php"
+ @staticmethod
+ def prepare(image):
+ """Adjust the content of a image object"""
+ image["metadata"] = {
+ m["name"]: m["value"]
+ for m in image["metadata"] or ()}
+ image["commonmetadata"] = {
+ m["name"]: m["value"]
+ for m in image["commonmetadata"] or ()}
+
+ filename = image["canonicaltitle"]
+ image["filename"], _, image["extension"] = \
+ filename.partition(":")[2].rpartition(".")
+ image["date"] = text.parse_datetime(
+ image["timestamp"], "%Y-%m-%dT%H:%M:%SZ")
+
def items(self):
for info in self._pagination(self.params):
try:
@@ -75,20 +65,7 @@ class WikimediaExtractor(BaseExtractor):
self.log.debug("Missing 'imageinfo' for %s", info)
continue
- image["metadata"] = {
- m["name"]: m["value"]
- for m in image["metadata"] or ()}
- image["commonmetadata"] = {
- m["name"]: m["value"]
- for m in image["commonmetadata"] or ()}
-
- filename = image["canonicaltitle"]
- image["filename"], _, image["extension"] = \
- filename.partition(":")[2].rpartition(".")
- image["date"] = text.parse_datetime(
- image["timestamp"], "%Y-%m-%dT%H:%M:%SZ")
- image["page"] = self.title
-
+ self.prepare(image)
yield Message.Directory, image
yield Message.Url, image["url"], image
@@ -110,6 +87,17 @@ class WikimediaExtractor(BaseExtractor):
while True:
data = self.request(url, params=params).json()
+ # ref: https://www.mediawiki.org/wiki/API:Errors_and_warnings
+ error = data.get("error")
+ if error:
+ self.log.error("%s: %s", error["code"], error["info"])
+ return
+ # MediaWiki will emit warnings for non-fatal mistakes such as
+ # invalid parameter instead of raising an error
+ warnings = data.get("warnings")
+ if warnings:
+ self.log.debug("MediaWiki returned warnings: %s", warnings)
+
try:
pages = data["query"]["pages"]
except KeyError:
@@ -181,5 +169,59 @@ BASE_PATTERN = WikimediaExtractor.update({
class WikimediaArticleExtractor(WikimediaExtractor):
"""Extractor for wikimedia articles"""
subcategory = "article"
+ directory_fmt = ("{category}", "{page}")
pattern = BASE_PATTERN + r"/(?!static/)([^?#]+)"
example = "https://en.wikipedia.org/wiki/TITLE"
+
+ def __init__(self, match):
+ WikimediaExtractor.__init__(self, match)
+
+ path = match.group(match.lastindex)
+ if path.startswith("wiki/"):
+ path = path[5:]
+
+ pre, sep, _ = path.partition(":")
+ prefix = pre.lower() if sep else None
+
+ self.title = path = text.unquote(path)
+ if prefix:
+ self.subcategory = prefix
+
+ if prefix == "category":
+ self.params = {
+ "generator": "categorymembers",
+ "gcmtitle" : path,
+ "gcmtype" : "file",
+ "gcmlimit" : self.per_page,
+ }
+ elif prefix == "file":
+ self.params = {
+ "titles" : path,
+ }
+ else:
+ self.params = {
+ "generator": "images",
+ "gimlimit" : self.per_page,
+ "titles" : path,
+ }
+
+ def prepare(self, image):
+ WikimediaExtractor.prepare(image)
+ image["page"] = self.title
+
+
+class WikimediaWikiExtractor(WikimediaExtractor):
+ """Extractor for all files on a MediaWiki instance"""
+ subcategory = "wiki"
+ pattern = BASE_PATTERN + r"/?$"
+ example = "https://en.wikipedia.org/"
+
+ def __init__(self, match):
+ WikimediaExtractor.__init__(self, match)
+
+ # ref: https://www.mediawiki.org/wiki/API:Allpages
+ self.params = {
+ "generator" : "allpages",
+ "gapnamespace": 6, # "File" namespace
+ "gaplimit" : self.per_page,
+ }
diff --git a/gallery_dl/extractor/ytdl.py b/gallery_dl/extractor/ytdl.py
index cb3c74c..168845e 100644
--- a/gallery_dl/extractor/ytdl.py
+++ b/gallery_dl/extractor/ytdl.py
@@ -116,21 +116,20 @@ class YoutubeDLExtractor(Extractor):
for entry in entries:
if not entry:
continue
- elif entry.get("_type") in ("url", "url_transparent"):
+
+ if entry.get("_type") in ("url", "url_transparent"):
try:
- info_dict = ytdl_instance.extract_info(
+ entry = ytdl_instance.extract_info(
entry["url"], False,
ie_key=entry.get("ie_key"))
except ytdl_module.utils.YoutubeDLError:
continue
-
- if not info_dict:
+ if not entry:
continue
- elif "entries" in info_dict:
- yield from self._process_entries(
- ytdl_module, ytdl_instance, info_dict["entries"])
- else:
- yield info_dict
+
+ if "entries" in entry:
+ yield from self._process_entries(
+ ytdl_module, ytdl_instance, entry["entries"])
else:
yield entry
diff --git a/gallery_dl/formatter.py b/gallery_dl/formatter.py
index ec1c926..f197e5d 100644
--- a/gallery_dl/formatter.py
+++ b/gallery_dl/formatter.py
@@ -325,6 +325,23 @@ def _parse_slice(format_spec, default):
return apply_slice
+def _parse_arithmetic(format_spec, default):
+ op, _, format_spec = format_spec.partition(_SEPARATOR)
+ fmt = _build_format_func(format_spec, default)
+
+ value = int(op[2:])
+ op = op[1]
+
+ if op == "+":
+ return lambda obj: fmt(obj + value)
+ if op == "-":
+ return lambda obj: fmt(obj - value)
+ if op == "*":
+ return lambda obj: fmt(obj * value)
+
+ return fmt
+
+
def _parse_conversion(format_spec, default):
conversions, _, format_spec = format_spec.partition(_SEPARATOR)
convs = [_CONVERSIONS[c] for c in conversions[1:]]
@@ -480,6 +497,7 @@ _CONVERSIONS = {
_FORMAT_SPECIFIERS = {
"?": _parse_optional,
"[": _parse_slice,
+ "A": _parse_arithmetic,
"C": _parse_conversion,
"D": _parse_datetime,
"J": _parse_join,
diff --git a/gallery_dl/job.py b/gallery_dl/job.py
index 0e0916d..c995767 100644
--- a/gallery_dl/job.py
+++ b/gallery_dl/job.py
@@ -322,6 +322,12 @@ class DownloadJob(Job):
for callback in hooks["prepare-after"]:
callback(pathfmt)
+ if kwdict.pop("_file_recheck", False) and pathfmt.exists():
+ if archive and self._archive_write_skip:
+ archive.add(kwdict)
+ self.handle_skip()
+ return
+
if self.sleep:
self.extractor.sleep(self.sleep(), "download")
@@ -474,10 +480,11 @@ class DownloadJob(Job):
def handle_skip(self):
pathfmt = self.pathfmt
- self.out.skip(pathfmt.path)
if "skip" in self.hooks:
for callback in self.hooks["skip"]:
callback(pathfmt)
+ self.out.skip(pathfmt.path)
+
if self._skipexc:
if not self._skipftr or self._skipftr(pathfmt.kwdict):
self._skipcnt += 1
diff --git a/gallery_dl/option.py b/gallery_dl/option.py
index 155cbd9..0189c0e 100644
--- a/gallery_dl/option.py
+++ b/gallery_dl/option.py
@@ -74,6 +74,21 @@ class MtimeAction(argparse.Action):
})
+class RenameAction(argparse.Action):
+ """Configure rename post processors"""
+ def __call__(self, parser, namespace, value, option_string=None):
+ if self.const:
+ namespace.postprocessors.append({
+ "name": "rename",
+ "to" : value,
+ })
+ else:
+ namespace.postprocessors.append({
+ "name": "rename",
+ "from": value,
+ })
+
+
class UgoiraAction(argparse.Action):
"""Configure ugoira post processors"""
def __call__(self, parser, namespace, value, option_string=None):
@@ -128,7 +143,7 @@ class UgoiraAction(argparse.Action):
pp["name"] = "ugoira"
pp["whitelist"] = ("pixiv", "danbooru")
- namespace.options.append(((), "ugoira", True))
+ namespace.options.append((("extractor",), "ugoira", True))
namespace.postprocessors.append(pp)
@@ -207,7 +222,7 @@ def build_parser():
)
update = parser.add_argument_group("Update Options")
- if util.EXECUTABLE or 1:
+ if util.EXECUTABLE:
update.add_argument(
"-U", "--update",
dest="update", action="store_const", const="latest",
@@ -526,7 +541,8 @@ def build_parser():
"domain prefixed with '/', "
"keyring name prefixed with '+', "
"profile prefixed with ':', and "
- "container prefixed with '::' ('none' for no container)"),
+ "container prefixed with '::' "
+ "('none' for no container (default), 'all' for all containers)"),
)
selection = parser.add_argument_group("Selection Options")
@@ -661,9 +677,21 @@ def build_parser():
help=argparse.SUPPRESS,
)
postprocessor.add_argument(
+ "--rename",
+ dest="postprocessors", metavar="FORMAT", action=RenameAction, const=0,
+ help=("Rename previously downloaded files from FORMAT "
+ "to the current filename format"),
+ )
+ postprocessor.add_argument(
+ "--rename-to",
+ dest="postprocessors", metavar="FORMAT", action=RenameAction, const=1,
+ help=("Rename previously downloaded files from the current filename "
+ "format to FORMAT"),
+ )
+ postprocessor.add_argument(
"--ugoira",
- dest="postprocessors", metavar="FORMAT", action=UgoiraAction,
- help=("Convert Pixiv Ugoira to FORMAT using FFmpeg. "
+ dest="postprocessors", metavar="FMT", action=UgoiraAction,
+ help=("Convert Pixiv Ugoira to FMT using FFmpeg. "
"Supported formats are 'webm', 'mp4', 'gif', "
"'vp8', 'vp9', 'vp9-lossless', 'copy'."),
)
diff --git a/gallery_dl/path.py b/gallery_dl/path.py
index 7892776..d408a41 100644
--- a/gallery_dl/path.py
+++ b/gallery_dl/path.py
@@ -184,29 +184,31 @@ class PathFormat():
def set_directory(self, kwdict):
"""Build directory path and create it if necessary"""
self.kwdict = kwdict
- sep = os.sep
segments = self.build_directory(kwdict)
if segments:
self.directory = directory = self.basedirectory + self.clean_path(
- sep.join(segments) + sep)
+ os.sep.join(segments) + os.sep)
else:
self.directory = directory = self.basedirectory
if WINDOWS and self.extended:
- # Enable longer-than-260-character paths
- directory = os.path.abspath(directory)
- if directory.startswith("\\\\"):
- directory = "\\\\?\\UNC\\" + directory[2:]
- else:
- directory = "\\\\?\\" + directory
-
- # abspath() in Python 3.7+ removes trailing path separators (#402)
- if directory[-1] != sep:
- directory += sep
-
+ directory = self._extended_path(directory)
self.realdirectory = directory
+ def _extended_path(self, path):
+ # Enable longer-than-260-character paths
+ path = os.path.abspath(path)
+ if not path.startswith("\\\\"):
+ path = "\\\\?\\" + path
+ elif not path.startswith("\\\\?\\"):
+ path = "\\\\?\\UNC\\" + path[2:]
+
+ # abspath() in Python 3.7+ removes trailing path separators (#402)
+ if path[-1] != os.sep:
+ return path + os.sep
+ return path
+
def set_filename(self, kwdict):
"""Set general filename data"""
self.kwdict = kwdict
diff --git a/gallery_dl/postprocessor/__init__.py b/gallery_dl/postprocessor/__init__.py
index 4690554..7837b06 100644
--- a/gallery_dl/postprocessor/__init__.py
+++ b/gallery_dl/postprocessor/__init__.py
@@ -12,9 +12,11 @@ modules = [
"classify",
"compare",
"exec",
+ "hash",
"metadata",
"mtime",
"python",
+ "rename",
"ugoira",
"zip",
]
diff --git a/gallery_dl/postprocessor/hash.py b/gallery_dl/postprocessor/hash.py
new file mode 100644
index 0000000..92a7477
--- /dev/null
+++ b/gallery_dl/postprocessor/hash.py
@@ -0,0 +1,71 @@
+# -*- coding: utf-8 -*-
+
+# Copyright 2024 Mike Fährmann
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Compute file hash digests"""
+
+from .common import PostProcessor
+import hashlib
+
+
+class HashPP(PostProcessor):
+
+ def __init__(self, job, options):
+ PostProcessor.__init__(self, job)
+
+ self.chunk_size = options.get("chunk-size", 32768)
+ self.filename = options.get("filename")
+
+ hashes = options.get("hashes")
+ if isinstance(hashes, dict):
+ self.hashes = list(hashes.items())
+ elif isinstance(hashes, str):
+ self.hashes = []
+ for h in hashes.split(","):
+ name, sep, key = h.partition(":")
+ self.hashes.append((key if sep else name, name))
+ elif hashes:
+ self.hashes = hashes
+ else:
+ self.hashes = (("md5", "md5"), ("sha1", "sha1"))
+
+ events = options.get("event")
+ if events is None:
+ events = ("file",)
+ elif isinstance(events, str):
+ events = events.split(",")
+ job.register_hooks({event: self.run for event in events}, options)
+
+ def run(self, pathfmt):
+ hashes = [
+ (key, hashlib.new(name))
+ for key, name in self.hashes
+ ]
+
+ size = self.chunk_size
+ with self._open(pathfmt) as fp:
+ while True:
+ data = fp.read(size)
+ if not data:
+ break
+ for _, h in hashes:
+ h.update(data)
+
+ for key, h in hashes:
+ pathfmt.kwdict[key] = h.hexdigest()
+
+ if self.filename:
+ pathfmt.build_path()
+
+ def _open(self, pathfmt):
+ try:
+ return open(pathfmt.temppath, "rb")
+ except OSError:
+ return open(pathfmt.realpath, "rb")
+
+
+__postprocessor__ = HashPP
diff --git a/gallery_dl/postprocessor/metadata.py b/gallery_dl/postprocessor/metadata.py
index e89b170..3ef9fbc 100644
--- a/gallery_dl/postprocessor/metadata.py
+++ b/gallery_dl/postprocessor/metadata.py
@@ -103,10 +103,10 @@ class MetadataPP(PostProcessor):
job.register_hooks({event: self.run for event in events}, options)
self._init_archive(job, options, "_MD_")
+ self.filter = self._make_filter(options)
self.mtime = options.get("mtime")
self.omode = options.get("open", omode)
self.encoding = options.get("encoding", "utf-8")
- self.private = options.get("private", False)
self.skip = options.get("skip", False)
def run(self, pathfmt):
@@ -114,7 +114,10 @@ class MetadataPP(PostProcessor):
if archive and archive.check(pathfmt.kwdict):
return
- directory = self._directory(pathfmt)
+ if util.WINDOWS and pathfmt.extended:
+ directory = pathfmt._extended_path(self._directory(pathfmt))
+ else:
+ directory = self._directory(pathfmt)
path = directory + self._filename(pathfmt)
if self.skip and os.path.exists(path):
@@ -231,10 +234,33 @@ class MetadataPP(PostProcessor):
fp.write("\n".join(tags) + "\n")
def _write_json(self, fp, kwdict):
- if not self.private:
- kwdict = util.filter_dict(kwdict)
+ if self.filter:
+ kwdict = self.filter(kwdict)
fp.write(self._json_encode(kwdict) + "\n")
+ def _make_filter(self, options):
+ include = options.get("include")
+ if include:
+ if isinstance(include, str):
+ include = include.split(",")
+ return lambda d: {k: d[k] for k in include if k in d}
+
+ exclude = options.get("exclude")
+ private = options.get("private")
+ if exclude:
+ if isinstance(exclude, str):
+ exclude = exclude.split(",")
+ exclude = set(exclude)
+
+ if private:
+ return lambda d: {k: v for k, v in d.items()
+ if k not in exclude}
+ return lambda d: {k: v for k, v in util.filter_dict(d).items()
+ if k not in exclude}
+
+ if not private:
+ return util.filter_dict
+
@staticmethod
def _make_encoder(options, indent=None):
return json.JSONEncoder(
diff --git a/gallery_dl/postprocessor/rename.py b/gallery_dl/postprocessor/rename.py
new file mode 100644
index 0000000..f71738d
--- /dev/null
+++ b/gallery_dl/postprocessor/rename.py
@@ -0,0 +1,91 @@
+# -*- coding: utf-8 -*-
+
+# Copyright 2024 Mike Fährmann
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Rename files"""
+
+from .common import PostProcessor
+from .. import formatter
+import os
+
+
+class RenamePP(PostProcessor):
+
+ def __init__(self, job, options):
+ PostProcessor.__init__(self, job)
+
+ self.skip = options.get("skip", True)
+ old = options.get("from")
+ new = options.get("to")
+
+ if old:
+ self._old = self._apply_format(old)
+ self._new = (self._apply_format(new) if new else
+ self._apply_pathfmt)
+ job.register_hooks({
+ "prepare": self.rename_from,
+ }, options)
+
+ elif new:
+ self._old = self._apply_pathfmt
+ self._new = self._apply_format(new)
+ job.register_hooks({
+ "skip" : self.rename_to_skip,
+ "prepare-after": self.rename_to_pafter,
+ }, options)
+
+ else:
+ raise ValueError("Option 'from' or 'to' is required")
+
+ def rename_from(self, pathfmt):
+ name_old = self._old(pathfmt)
+ path_old = pathfmt.realdirectory + name_old
+
+ if os.path.exists(path_old):
+ name_new = self._new(pathfmt)
+ path_new = pathfmt.realdirectory + name_new
+ self._rename(path_old, name_old, path_new, name_new)
+
+ def rename_to_skip(self, pathfmt):
+ name_old = self._old(pathfmt)
+ path_old = pathfmt.realdirectory + name_old
+
+ if os.path.exists(path_old):
+ pathfmt.filename = name_new = self._new(pathfmt)
+ pathfmt.path = pathfmt.directory + name_new
+ pathfmt.realpath = path_new = pathfmt.realdirectory + name_new
+ self._rename(path_old, name_old, path_new, name_new)
+
+ def rename_to_pafter(self, pathfmt):
+ pathfmt.filename = name_new = self._new(pathfmt)
+ pathfmt.path = pathfmt.directory + name_new
+ pathfmt.realpath = pathfmt.realdirectory + name_new
+ pathfmt.kwdict["_file_recheck"] = True
+
+ def _rename(self, path_old, name_old, path_new, name_new):
+ if self.skip and os.path.exists(path_new):
+ return self.log.warning(
+ "Not renaming '%s' to '%s' since another file with the "
+ "same name exists", name_old, name_new)
+
+ self.log.info("'%s' -> '%s'", name_old, name_new)
+ os.replace(path_old, path_new)
+
+ def _apply_pathfmt(self, pathfmt):
+ return pathfmt.build_filename(pathfmt.kwdict)
+
+ def _apply_format(self, format_string):
+ fmt = formatter.parse(format_string).format_map
+
+ def apply(pathfmt):
+ return pathfmt.clean_path(pathfmt.clean_segment(fmt(
+ pathfmt.kwdict)))
+
+ return apply
+
+
+__postprocessor__ = RenamePP
diff --git a/gallery_dl/postprocessor/ugoira.py b/gallery_dl/postprocessor/ugoira.py
index 9e60ce2..f053afa 100644
--- a/gallery_dl/postprocessor/ugoira.py
+++ b/gallery_dl/postprocessor/ugoira.py
@@ -36,7 +36,8 @@ class UgoiraPP(PostProcessor):
self.delete = not options.get("keep-files", False)
self.repeat = options.get("repeat-last-frame", True)
self.mtime = options.get("mtime", True)
- self.uniform = False
+ self.skip = options.get("skip", True)
+ self.uniform = self._convert_zip = self._convert_files = False
ffmpeg = options.get("ffmpeg-location")
self.ffmpeg = util.expand_path(ffmpeg) if ffmpeg else "ffmpeg"
@@ -90,33 +91,44 @@ class UgoiraPP(PostProcessor):
if self.prevent_odd:
args += ("-vf", "crop=iw-mod(iw\\,2):ih-mod(ih\\,2)")
- job.register_hooks(
- {"prepare": self.prepare, "file": self.convert}, options)
+ job.register_hooks({
+ "prepare": self.prepare,
+ "file" : self.convert_zip,
+ "after" : self.convert_files,
+ }, options)
def prepare(self, pathfmt):
- self._frames = None
-
- if pathfmt.extension != "zip":
+ if "frames" not in pathfmt.kwdict:
+ self._frames = None
return
- kwdict = pathfmt.kwdict
- if "frames" in kwdict:
- self._frames = kwdict["frames"]
- elif "pixiv_ugoira_frame_data" in kwdict:
- self._frames = kwdict["pixiv_ugoira_frame_data"]["data"]
+ self._frames = pathfmt.kwdict["frames"]
+ if pathfmt.extension == "zip":
+ self._convert_zip = True
+ if self.delete:
+ pathfmt.set_extension(self.extension)
+ pathfmt.build_path()
else:
- return
-
- if self.delete:
- pathfmt.set_extension(self.extension)
pathfmt.build_path()
+ index = pathfmt.kwdict["_ugoira_frame_index"]
+ frame = self._frames[index].copy()
+ frame["index"] = index
+ frame["path"] = pathfmt.realpath
+ frame["ext"] = pathfmt.kwdict["extension"]
+
+ if not index:
+ self._files = [frame]
+ else:
+ self._files.append(frame)
+ if len(self._files) >= len(self._frames):
+ self._convert_files = True
- def convert(self, pathfmt):
- if not self._frames:
+ def convert_zip(self, pathfmt):
+ if not self._convert_zip:
return
+ self._convert_zip = False
with tempfile.TemporaryDirectory() as tempdir:
- # extract frames
try:
with zipfile.ZipFile(pathfmt.temppath) as zfile:
zfile.extractall(tempdir)
@@ -124,53 +136,89 @@ class UgoiraPP(PostProcessor):
pathfmt.realpath = pathfmt.temppath
return
- # process frames and collect command-line arguments
- pathfmt.set_extension(self.extension)
- pathfmt.build_path()
-
- args = self._process(pathfmt, tempdir)
- if self.args_pp:
- args += self.args_pp
- if self.args:
- args += self.args
-
- # ensure target directory exists
- os.makedirs(pathfmt.realdirectory, exist_ok=True)
-
- # invoke ffmpeg
- try:
- if self.twopass:
- if "-f" not in self.args:
- args += ("-f", self.extension)
- args += ("-passlogfile", tempdir + "/ffmpeg2pass", "-pass")
- self._exec(args + ["1", "-y", os.devnull])
- self._exec(args + ["2", pathfmt.realpath])
- else:
- args.append(pathfmt.realpath)
- self._exec(args)
- if self._finalize:
- self._finalize(pathfmt, tempdir)
- except OSError as exc:
- print()
- self.log.error("Unable to invoke FFmpeg (%s: %s)",
- exc.__class__.__name__, exc)
- pathfmt.realpath = pathfmt.temppath
- except Exception as exc:
- print()
- self.log.error("%s: %s", exc.__class__.__name__, exc)
- self.log.debug("", exc_info=True)
- pathfmt.realpath = pathfmt.temppath
- else:
- if self.mtime:
- mtime = pathfmt.kwdict.get("_mtime")
- if mtime:
- util.set_mtime(pathfmt.realpath, mtime)
+ if self.convert(pathfmt, tempdir):
if self.delete:
pathfmt.delete = True
else:
+ self.log.info(pathfmt.filename)
pathfmt.set_extension("zip")
pathfmt.build_path()
+ def convert_files(self, pathfmt):
+ if not self._convert_files:
+ return
+ self._convert_files = False
+
+ with tempfile.TemporaryDirectory() as tempdir:
+ for frame in self._files:
+
+ # update frame filename extension
+ frame["file"] = name = "{}.{}".format(
+ frame["file"].partition(".")[0], frame["ext"])
+
+ # move frame into tempdir
+ try:
+ self._copy_file(frame["path"], tempdir + "/" + name)
+ except OSError as exc:
+ self.log.debug("Unable to copy frame %s (%s: %s)",
+ name, exc.__class__.__name__, exc)
+ return
+
+ pathfmt.kwdict["num"] = 0
+ self._frames = self._files
+ if self.convert(pathfmt, tempdir):
+ self.log.info(pathfmt.filename)
+ if self.delete:
+ self.log.debug("Deleting frames")
+ for frame in self._files:
+ util.remove_file(frame["path"])
+
+ def convert(self, pathfmt, tempdir):
+ pathfmt.set_extension(self.extension)
+ pathfmt.build_path()
+ if self.skip and pathfmt.exists():
+ return True
+
+ # process frames and collect command-line arguments
+ args = self._process(pathfmt, tempdir)
+ if self.args_pp:
+ args += self.args_pp
+ if self.args:
+ args += self.args
+
+ # ensure target directory exists
+ os.makedirs(pathfmt.realdirectory, exist_ok=True)
+
+ # invoke ffmpeg
+ try:
+ if self.twopass:
+ if "-f" not in self.args:
+ args += ("-f", self.extension)
+ args += ("-passlogfile", tempdir + "/ffmpeg2pass", "-pass")
+ self._exec(args + ["1", "-y", os.devnull])
+ self._exec(args + ["2", pathfmt.realpath])
+ else:
+ args.append(pathfmt.realpath)
+ self._exec(args)
+ if self._finalize:
+ self._finalize(pathfmt, tempdir)
+ except OSError as exc:
+ print()
+ self.log.error("Unable to invoke FFmpeg (%s: %s)",
+ exc.__class__.__name__, exc)
+ pathfmt.realpath = pathfmt.temppath
+ except Exception as exc:
+ print()
+ self.log.error("%s: %s", exc.__class__.__name__, exc)
+ self.log.debug("", exc_info=True)
+ pathfmt.realpath = pathfmt.temppath
+ else:
+ if self.mtime:
+ mtime = pathfmt.kwdict.get("_mtime")
+ if mtime:
+ util.set_mtime(pathfmt.realpath, mtime)
+ return True
+
def _exec(self, args):
self.log.debug(args)
out = None if self.output else subprocess.DEVNULL
@@ -182,6 +230,9 @@ class UgoiraPP(PostProcessor):
raise ValueError()
return retcode
+ def _copy_file(self, src, dst):
+ shutil.copyfile(src, dst)
+
def _process_concat(self, pathfmt, tempdir):
rate_in, rate_out = self.calculate_framerate(self._frames)
args = [self.ffmpeg, "-f", "concat"]
diff --git a/gallery_dl/util.py b/gallery_dl/util.py
index 5744ef3..ecb496d 100644
--- a/gallery_dl/util.py
+++ b/gallery_dl/util.py
@@ -101,7 +101,7 @@ def raises(cls):
return wrap
-def identity(x):
+def identity(x, _=None):
"""Returns its argument"""
return x
@@ -520,14 +520,9 @@ class CustomNone():
"""None-style type that supports more operations than regular None"""
__slots__ = ()
- def __getattribute__(self, _):
- return self
-
- def __getitem__(self, _):
- return self
-
- def __iter__(self):
- return self
+ __getattribute__ = identity
+ __getitem__ = identity
+ __iter__ = identity
def __call__(self, *args, **kwargs):
return self
@@ -536,10 +531,6 @@ class CustomNone():
def __next__():
raise StopIteration
- @staticmethod
- def __bool__():
- return False
-
def __eq__(self, other):
return self is other
@@ -550,14 +541,48 @@ class CustomNone():
__le__ = true
__gt__ = false
__ge__ = false
+ __bool__ = false
+
+ __add__ = identity
+ __sub__ = identity
+ __mul__ = identity
+ __matmul__ = identity
+ __truediv__ = identity
+ __floordiv__ = identity
+ __mod__ = identity
+
+ __radd__ = identity
+ __rsub__ = identity
+ __rmul__ = identity
+ __rmatmul__ = identity
+ __rtruediv__ = identity
+ __rfloordiv__ = identity
+ __rmod__ = identity
+
+ __lshift__ = identity
+ __rshift__ = identity
+ __and__ = identity
+ __xor__ = identity
+ __or__ = identity
+
+ __rlshift__ = identity
+ __rrshift__ = identity
+ __rand__ = identity
+ __rxor__ = identity
+ __ror__ = identity
+
+ __neg__ = identity
+ __pos__ = identity
+ __abs__ = identity
+ __invert__ = identity
@staticmethod
def __len__():
return 0
- @staticmethod
- def __hash__():
- return 0
+ __int__ = __len__
+ __hash__ = __len__
+ __index__ = __len__
@staticmethod
def __format__(_):
diff --git a/gallery_dl/version.py b/gallery_dl/version.py
index f2462ee..0f9f91b 100644
--- a/gallery_dl/version.py
+++ b/gallery_dl/version.py
@@ -6,5 +6,5 @@
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
-__version__ = "1.27.3"
+__version__ = "1.27.4"
__variant__ = None
diff --git a/gallery_dl/ytdl.py b/gallery_dl/ytdl.py
index d4fdedc..fe88c2c 100644
--- a/gallery_dl/ytdl.py
+++ b/gallery_dl/ytdl.py
@@ -18,7 +18,7 @@ def import_module(module_name):
if module_name is None:
try:
return __import__("yt_dlp")
- except ImportError:
+ except (ImportError, SyntaxError):
return __import__("youtube_dl")
return __import__(module_name.replace("-", "_"))