summaryrefslogtreecommitdiffstats
path: root/gallery_dl
diff options
context:
space:
mode:
authorLibravatarUnit 193 <unit193@unit193.net>2021-10-05 23:30:05 -0400
committerLibravatarUnit 193 <unit193@unit193.net>2021-10-05 23:30:05 -0400
commit34ba2951b8c523713425c98addb9256ea05c946f (patch)
tree6ec7e96d0c6e6f6e94b6b97ecd8c0a414ceef93d /gallery_dl
parent3f5483df9075ae526f4c54f4cbe80edeabf6d4cc (diff)
New upstream version 1.19.0.upstream/1.19.0
Diffstat (limited to 'gallery_dl')
-rw-r--r--gallery_dl/downloader/http.py46
-rw-r--r--gallery_dl/downloader/ytdl.py18
-rw-r--r--gallery_dl/extractor/__init__.py1
-rw-r--r--gallery_dl/extractor/artstation.py3
-rw-r--r--gallery_dl/extractor/aryion.py57
-rw-r--r--gallery_dl/extractor/common.py44
-rw-r--r--gallery_dl/extractor/cyberdrop.py2
-rw-r--r--gallery_dl/extractor/desktopography.py95
-rw-r--r--gallery_dl/extractor/deviantart.py63
-rw-r--r--gallery_dl/extractor/erome.py7
-rw-r--r--gallery_dl/extractor/fantia.py2
-rw-r--r--gallery_dl/extractor/foolslide.py4
-rw-r--r--gallery_dl/extractor/gelbooru_v02.py52
-rw-r--r--gallery_dl/extractor/gfycat.py17
-rw-r--r--gallery_dl/extractor/hiperdex.py10
-rw-r--r--gallery_dl/extractor/imagehosts.py20
-rw-r--r--gallery_dl/extractor/instagram.py3
-rw-r--r--gallery_dl/extractor/kemonoparty.py54
-rw-r--r--gallery_dl/extractor/mangadex.py38
-rw-r--r--gallery_dl/extractor/mastodon.py52
-rw-r--r--gallery_dl/extractor/nozomi.py4
-rw-r--r--gallery_dl/extractor/oauth.py2
-rw-r--r--gallery_dl/extractor/reddit.py16
-rw-r--r--gallery_dl/extractor/redgifs.py2
-rw-r--r--gallery_dl/extractor/twitter.py37
-rw-r--r--gallery_dl/formatter.py306
-rw-r--r--gallery_dl/job.py35
-rw-r--r--gallery_dl/option.py10
-rw-r--r--gallery_dl/output.py89
-rw-r--r--gallery_dl/path.py332
-rw-r--r--gallery_dl/postprocessor/compare.py29
-rw-r--r--gallery_dl/postprocessor/exec.py4
-rw-r--r--gallery_dl/postprocessor/metadata.py8
-rw-r--r--gallery_dl/util.py592
-rw-r--r--gallery_dl/version.py2
35 files changed, 1313 insertions, 743 deletions
diff --git a/gallery_dl/downloader/http.py b/gallery_dl/downloader/http.py
index 78d8d34..d2efd3f 100644
--- a/gallery_dl/downloader/http.py
+++ b/gallery_dl/downloader/http.py
@@ -31,6 +31,7 @@ class HttpDownloader(DownloaderBase):
self.downloading = False
self.adjust_extension = self.config("adjust-extensions", True)
+ self.progress = self.config("progress", 3.0)
self.headers = self.config("headers")
self.minsize = self.config("filesize-min")
self.maxsize = self.config("filesize-max")
@@ -63,6 +64,8 @@ class HttpDownloader(DownloaderBase):
self.receive = self._receive_rate
else:
self.log.warning("Invalid rate limit (%r)", self.rate)
+ if self.progress is not None:
+ self.receive = self._receive_rate
def download(self, url, pathfmt):
try:
@@ -202,6 +205,7 @@ class HttpDownloader(DownloaderBase):
with pathfmt.open(mode) as fp:
if file_header:
fp.write(file_header)
+ offset += len(file_header)
elif offset:
if adjust_extension and \
pathfmt.extension in FILE_SIGNATURES:
@@ -210,7 +214,7 @@ class HttpDownloader(DownloaderBase):
self.out.start(pathfmt.path)
try:
- self.receive(fp, content)
+ self.receive(fp, content, size, offset)
except (RequestException, SSLError, OpenSSLError) as exc:
msg = str(exc)
print()
@@ -234,28 +238,42 @@ class HttpDownloader(DownloaderBase):
return True
@staticmethod
- def receive(fp, content):
+ def receive(fp, content, bytes_total, bytes_downloaded):
write = fp.write
for data in content:
write(data)
- def _receive_rate(self, fp, content):
- rt = self.rate
- t1 = time.time()
+ def _receive_rate(self, fp, content, bytes_total, bytes_downloaded):
+ rate = self.rate
+ progress = self.progress
+ bytes_start = bytes_downloaded
+ write = fp.write
+ t1 = tstart = time.time()
for data in content:
- fp.write(data)
+ write(data)
t2 = time.time() # current time
- actual = t2 - t1 # actual elapsed time
- expected = len(data) / rt # expected elapsed time
+ elapsed = t2 - t1 # elapsed time
+ num_bytes = len(data)
+
+ if progress is not None:
+ bytes_downloaded += num_bytes
+ tdiff = t2 - tstart
+ if tdiff >= progress:
+ self.out.progress(
+ bytes_total, bytes_downloaded,
+ int((bytes_downloaded - bytes_start) / tdiff),
+ )
- if actual < expected:
- # sleep if less time elapsed than expected
- time.sleep(expected - actual)
- t1 = time.time()
- else:
- t1 = t2
+ if rate:
+ expected = num_bytes / rate # expected elapsed time
+ if elapsed < expected:
+ # sleep if less time elapsed than expected
+ time.sleep(expected - elapsed)
+ t2 = time.time()
+
+ t1 = t2
def _find_extension(self, response):
"""Get filename extension from MIME type"""
diff --git a/gallery_dl/downloader/ytdl.py b/gallery_dl/downloader/ytdl.py
index b1e1d58..86e247b 100644
--- a/gallery_dl/downloader/ytdl.py
+++ b/gallery_dl/downloader/ytdl.py
@@ -42,6 +42,10 @@ class YoutubeDLDownloader(DownloaderBase):
if raw_options:
options.update(raw_options)
+ self.progress = self.config("progress", 3.0)
+ if self.progress is not None:
+ options["progress_hooks"] = (self._progress_hook,)
+
if self.config("logging", True):
options["logger"] = self.log
self.forward_cookies = self.config("forward-cookies", False)
@@ -56,7 +60,10 @@ class YoutubeDLDownloader(DownloaderBase):
kwdict = pathfmt.kwdict
ytdl = kwdict.pop("_ytdl_instance", None)
- if not ytdl:
+ if ytdl:
+ if self.progress is not None and not ytdl._progress_hooks:
+ ytdl.add_progress_hook(self._progress_hook)
+ else:
ytdl = self.ytdl
if self.forward_cookies:
set_cookie = ytdl.cookiejar.set_cookie
@@ -126,6 +133,15 @@ class YoutubeDLDownloader(DownloaderBase):
ytdl.process_info(entry)
return True
+ def _progress_hook(self, info):
+ if info["status"] == "downloading" and \
+ info["elapsed"] >= self.progress:
+ self.out.progress(
+ info["total_bytes"],
+ info["downloaded_bytes"],
+ int(info["speed"]),
+ )
+
@staticmethod
def _set_outtmpl(ytdl, outtmpl):
try:
diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py
index f68ea9f..c512548 100644
--- a/gallery_dl/extractor/__init__.py
+++ b/gallery_dl/extractor/__init__.py
@@ -28,6 +28,7 @@ modules = [
"comicvine",
"cyberdrop",
"danbooru",
+ "desktopography",
"deviantart",
"dynastyscans",
"e621",
diff --git a/gallery_dl/extractor/artstation.py b/gallery_dl/extractor/artstation.py
index f2ad0ab..f687ff8 100644
--- a/gallery_dl/extractor/artstation.py
+++ b/gallery_dl/extractor/artstation.py
@@ -158,7 +158,8 @@ class ArtstationUserExtractor(ArtstationExtractor):
def projects(self):
url = "{}/users/{}/projects.json".format(self.root, self.user)
- return self._pagination(url)
+ params = {"album_id": "all"}
+ return self._pagination(url, params)
class ArtstationAlbumExtractor(ArtstationExtractor):
diff --git a/gallery_dl/extractor/aryion.py b/gallery_dl/extractor/aryion.py
index 0d0ad70..06ec571 100644
--- a/gallery_dl/extractor/aryion.py
+++ b/gallery_dl/extractor/aryion.py
@@ -29,7 +29,6 @@ class AryionExtractor(Extractor):
Extractor.__init__(self, match)
self.user = match.group(1)
self.recursive = True
- self._needle = "class='gallery-item' id='"
def login(self):
if self._check_cookies(self.cookienames):
@@ -56,25 +55,50 @@ class AryionExtractor(Extractor):
def items(self):
self.login()
+ data = self.metadata()
for post_id in self.posts():
post = self._parse_post(post_id)
if post:
+ if data:
+ post.update(data)
yield Message.Directory, post
yield Message.Url, post["url"], post
elif post is False and self.recursive:
base = self.root + "/g4/view/"
data = {"_extractor": AryionPostExtractor}
- for post_id in self._pagination(base + post_id):
+ for post_id in self._pagination_params(base + post_id):
yield Message.Queue, base + post_id, data
def posts(self):
"""Yield relevant post IDs"""
- def _pagination(self, url):
+ def metadata(self):
+ """Return general metadata"""
+
+ def _pagination_params(self, url, params=None):
+ if params is None:
+ params = {"p": 1}
+ else:
+ params["p"] = text.parse_int(params.get("p"), 1)
+
+ while True:
+ page = self.request(url, params=params).text
+
+ cnt = 0
+ for post_id in text.extract_iter(
+ page, "class='gallery-item' id='", "'"):
+ cnt += 1
+ yield post_id
+
+ if cnt < 40:
+ return
+ params["p"] += 1
+
+ def _pagination_next(self, url):
while True:
page = self.request(url).text
- yield from text.extract_iter(page, self._needle, "'")
+ yield from text.extract_iter(page, "thumb' href='/g4/view/", "'")
pos = page.find("Next &gt;&gt;")
if pos < 0:
@@ -180,11 +204,30 @@ class AryionGalleryExtractor(AryionExtractor):
def posts(self):
if self.recursive:
url = "{}/g4/gallery/{}".format(self.root, self.user)
- return self._pagination(url)
+ return self._pagination_params(url)
else:
- self._needle = "thumb' href='/g4/view/"
url = "{}/g4/latest.php?name={}".format(self.root, self.user)
- return util.advance(self._pagination(url), self.offset)
+ return util.advance(self._pagination_next(url), self.offset)
+
+
+class AryionTagExtractor(AryionExtractor):
+ """Extractor for tag searches on eka's portal"""
+ subcategory = "tag"
+ directory_fmt = ("{category}", "tags", "{search_tags}")
+ archive_fmt = "t_{search_tags}_{id}"
+ pattern = BASE_PATTERN + r"/tags\.php\?([^#]+)"
+ test = ("https://aryion.com/g4/tags.php?tag=star+wars&p=19", {
+ "count": ">= 5",
+ })
+
+ def metadata(self):
+ self.params = text.parse_query(self.user)
+ self.user = None
+ return {"search_tags": self.params.get("tag")}
+
+ def posts(self):
+ url = self.root + "/g4/tags.php"
+ return self._pagination_params(url, self.params)
class AryionPostExtractor(AryionExtractor):
diff --git a/gallery_dl/extractor/common.py b/gallery_dl/extractor/common.py
index d9f69ab..4f42477 100644
--- a/gallery_dl/extractor/common.py
+++ b/gallery_dl/extractor/common.py
@@ -42,6 +42,7 @@ class Extractor():
def __init__(self, match):
self.log = logging.getLogger(self.category)
self.url = match.string
+ self.finalize = None
if self.basecategory:
self.config = self._config_shared
@@ -53,13 +54,13 @@ class Extractor():
self._retries = self.config("retries", 4)
self._timeout = self.config("timeout", 30)
self._verify = self.config("verify", True)
- self.request_interval = self.config(
- "sleep-request", self.request_interval)
+ self._interval = util.build_duration_func(
+ self.config("sleep-request", self.request_interval),
+ self.request_interval_min,
+ )
if self._retries < 0:
self._retries = float("inf")
- if self.request_interval < self.request_interval_min:
- self.request_interval = self.request_interval_min
self._init_session()
self._init_cookies()
@@ -102,15 +103,19 @@ class Extractor():
def request(self, url, *, method="GET", session=None, retries=None,
encoding=None, fatal=True, notfound=None, **kwargs):
- tries = 1
- retries = self._retries if retries is None else retries
- session = self.session if session is None else session
- kwargs.setdefault("timeout", self._timeout)
- kwargs.setdefault("verify", self._verify)
+ if retries is None:
+ retries = self._retries
+ if session is None:
+ session = self.session
+ if "timeout" not in kwargs:
+ kwargs["timeout"] = self._timeout
+ if "verify" not in kwargs:
+ kwargs["verify"] = self._verify
response = None
+ tries = 1
- if self.request_interval:
- seconds = (self.request_interval -
+ if self._interval:
+ seconds = (self._interval() -
(time.time() - Extractor.request_timestamp))
if seconds > 0.0:
self.log.debug("Sleeping for %.5s seconds", seconds)
@@ -442,16 +447,23 @@ class GalleryExtractor(Extractor):
imgs = self.images(page)
if "count" in data:
- images = zip(
- range(1, data["count"]+1),
- imgs,
- )
+ if self.config("page-reverse"):
+ images = util.enumerate_reversed(imgs, 1, data["count"])
+ else:
+ images = zip(
+ range(1, data["count"]+1),
+ imgs,
+ )
else:
+ enum = enumerate
try:
data["count"] = len(imgs)
except TypeError:
pass
- images = enumerate(imgs, 1)
+ else:
+ if self.config("page-reverse"):
+ enum = util.enumerate_reversed
+ images = enum(imgs, 1)
yield Message.Directory, data
for data[self.enum], (url, imgdata) in images:
diff --git a/gallery_dl/extractor/cyberdrop.py b/gallery_dl/extractor/cyberdrop.py
index e354cb7..2004921 100644
--- a/gallery_dl/extractor/cyberdrop.py
+++ b/gallery_dl/extractor/cyberdrop.py
@@ -15,7 +15,7 @@ class CyberdropAlbumExtractor(Extractor):
category = "cyberdrop"
subcategory = "album"
root = "https://cyberdrop.me"
- directory_fmt = ("{category}", "{album_id} {album_name}")
+ directory_fmt = ("{category}", "{album_name} ({album_id})")
archive_fmt = "{album_id}_{id}"
pattern = r"(?:https?://)?(?:www\.)?cyberdrop\.me/a/([^/?#]+)"
test = ("https://cyberdrop.me/a/keKRjm4t", {
diff --git a/gallery_dl/extractor/desktopography.py b/gallery_dl/extractor/desktopography.py
new file mode 100644
index 0000000..363341a
--- /dev/null
+++ b/gallery_dl/extractor/desktopography.py
@@ -0,0 +1,95 @@
+# -*- coding: utf-8 -*-
+
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Extractors for https://desktopography.net/"""
+
+from .common import Extractor, Message
+from .. import text
+
+BASE_PATTERN = r"(?:https?://)?desktopography\.net"
+
+
+class DesktopographyExtractor(Extractor):
+ """Base class for desktopography extractors"""
+ category = "desktopography"
+ archive_fmt = "{filename}"
+ root = "https://desktopography.net"
+
+
+class DesktopographySiteExtractor(DesktopographyExtractor):
+ """Extractor for all desktopography exhibitions """
+ subcategory = "site"
+ pattern = BASE_PATTERN + r"/$"
+ test = ("https://desktopography.net/",)
+
+ def items(self):
+ page = self.request(self.root).text
+ data = {"_extractor": DesktopographyExhibitionExtractor}
+
+ for exhibition_year in text.extract_iter(
+ page,
+ '<a href="https://desktopography.net/exhibition-',
+ '/">'):
+
+ url = self.root + "/exhibition-" + exhibition_year + "/"
+ yield Message.Queue, url, data
+
+
+class DesktopographyExhibitionExtractor(DesktopographyExtractor):
+ """Extractor for a yearly desktopography exhibition"""
+ subcategory = "exhibition"
+ pattern = BASE_PATTERN + r"/exhibition-([^/?#]+)/"
+ test = ("https://desktopography.net/exhibition-2020/",)
+
+ def __init__(self, match):
+ DesktopographyExtractor.__init__(self, match)
+ self.year = match.group(1)
+
+ def items(self):
+ url = "{}/exhibition-{}/".format(self.root, self.year)
+ base_entry_url = "https://desktopography.net/portfolios/"
+ page = self.request(url).text
+
+ data = {
+ "_extractor": DesktopographyEntryExtractor,
+ "year": self.year,
+ }
+
+ for entry_url in text.extract_iter(
+ page,
+ '<a class="overlay-background" href="' + base_entry_url,
+ '">'):
+
+ url = base_entry_url + entry_url
+ yield Message.Queue, url, data
+
+
+class DesktopographyEntryExtractor(DesktopographyExtractor):
+ """Extractor for all resolutions of a desktopography wallpaper"""
+ subcategory = "entry"
+ pattern = BASE_PATTERN + r"/portfolios/([\w-]+)"
+ test = ("https://desktopography.net/portfolios/new-era/",)
+
+ def __init__(self, match):
+ DesktopographyExtractor.__init__(self, match)
+ self.entry = match.group(1)
+
+ def items(self):
+ url = "{}/portfolios/{}".format(self.root, self.entry)
+ page = self.request(url).text
+
+ entry_data = {"entry": self.entry}
+ yield Message.Directory, entry_data
+
+ for image_data in text.extract_iter(
+ page,
+ '<a target="_blank" href="https://desktopography.net',
+ '">'):
+
+ path, _, filename = image_data.partition(
+ '" class="wallpaper-button" download="')
+ text.nameext_from_url(filename, entry_data)
+ yield Message.Url, self.root + path, entry_data
diff --git a/gallery_dl/extractor/deviantart.py b/gallery_dl/extractor/deviantart.py
index b4ac742..7dac770 100644
--- a/gallery_dl/extractor/deviantart.py
+++ b/gallery_dl/extractor/deviantart.py
@@ -46,6 +46,13 @@ class DeviantartExtractor(Extractor):
self.group = False
self.api = None
+ unwatch = self.config("auto-unwatch")
+ if unwatch:
+ self.unwatch = []
+ self.finalize = self._unwatch_premium
+ else:
+ self.unwatch = None
+
if self.quality:
self.quality = ",q_{}".format(self.quality)
@@ -318,44 +325,48 @@ class DeviantartExtractor(Extractor):
except KeyError:
pass
- # check accessibility
- if self.api.refresh_token_key:
- dev = self.api.deviation(deviation["deviationid"], False)
- has_access = dev["premium_folder_data"]["has_access"]
- username = dev["author"]["username"]
- folder = dev["premium_folder_data"]
-
- if not has_access and folder["type"] == "watchers" and \
- self.config("auto-watch"):
- if self.api.user_friends_watch(username):
- has_access = True
- self.log.info(
- "Watching %s for premium folder access", username)
- else:
- self.log.warning(
- "Error when trying to watch %s. "
- "Try again with a new refresh-token", username)
- else:
+ if not self.api.refresh_token_key:
self.log.warning(
"Unable to access premium content (no refresh-token)")
self._fetch_premium = lambda _: None
return None
+ dev = self.api.deviation(deviation["deviationid"], False)
+ folder = dev["premium_folder_data"]
+ username = dev["author"]["username"]
+ has_access = folder["has_access"]
+
+ if not has_access and folder["type"] == "watchers" and \
+ self.config("auto-watch"):
+ if self.unwatch is not None:
+ self.unwatch.append(username)
+ if self.api.user_friends_watch(username):
+ has_access = True
+ self.log.info(
+ "Watching %s for premium folder access", username)
+ else:
+ self.log.warning(
+ "Error when trying to watch %s. "
+ "Try again with a new refresh-token", username)
+
if has_access:
self.log.info("Fetching premium folder data")
else:
self.log.warning("Unable to access premium content (type: %s)",
folder["type"])
- self._fetch_premium = lambda _: None
- return None
- # fill cache
cache = self._premium_cache
for dev in self.api.gallery(
username, folder["gallery_id"], public=False):
- cache[dev["deviationid"]] = dev
+ cache[dev["deviationid"]] = dev if has_access else None
+
return cache[deviation["deviationid"]]
+ def _unwatch_premium(self):
+ for username in self.unwatch:
+ self.log.info("Unwatching %s", username)
+ self.api.user_friends_unwatch(username)
+
class DeviantartUserExtractor(DeviantartExtractor):
"""Extractor for an artist's user profile"""
@@ -823,7 +834,7 @@ class DeviantartWatchPostsExtractor(DeviantartExtractor):
class DeviantartDeviationExtractor(DeviantartExtractor):
"""Extractor for single deviations"""
subcategory = "deviation"
- archive_fmt = "{index}.{extension}"
+ archive_fmt = "g_{_username}_{index}.{extension}"
pattern = BASE_PATTERN + r"/(art|journal)/(?:[^/?#]+-)?(\d+)"
test = (
(("https://www.deviantart.com/shimoda7/art/For-the-sake-10073852"), {
@@ -1153,13 +1164,15 @@ class DeviantartOAuthAPI():
"mature_content" : self.mature,
}
return self._call(
- endpoint, method="POST", data=data, public=False, fatal=False)
+ endpoint, method="POST", data=data, public=False, fatal=False,
+ ).get("success")
def user_friends_unwatch(self, username):
"""Unwatch a user"""
endpoint = "user/friends/unwatch/" + username
return self._call(
- endpoint, method="POST", public=False, fatal=False)
+ endpoint, method="POST", public=False, fatal=False,
+ ).get("success")
def authenticate(self, refresh_token_key):
"""Authenticate the application by requesting an access token"""
diff --git a/gallery_dl/extractor/erome.py b/gallery_dl/extractor/erome.py
index d4fd826..992db97 100644
--- a/gallery_dl/extractor/erome.py
+++ b/gallery_dl/extractor/erome.py
@@ -46,9 +46,10 @@ class EromeExtractor(Extractor):
user, pos = text.extract(
page, 'href="https://www.erome.com/', '"', pos)
data = {
- "album_id": album_id,
- "title" : text.unescape(title),
- "user" : text.unquote(user),
+ "album_id" : album_id,
+ "title" : text.unescape(title),
+ "user" : text.unquote(user),
+ "_http_headers": {"Referer": url},
}
yield Message.Directory, data
diff --git a/gallery_dl/extractor/fantia.py b/gallery_dl/extractor/fantia.py
index 9df2bef..62f7429 100644
--- a/gallery_dl/extractor/fantia.py
+++ b/gallery_dl/extractor/fantia.py
@@ -66,6 +66,8 @@ class FantiaExtractor(Extractor):
"comment": resp["comment"],
"rating": resp["rating"],
"posted_at": resp["posted_at"],
+ "date": text.parse_datetime(
+ resp["posted_at"], "%a, %d %b %Y %H:%M:%S %z"),
"fanclub_id": resp["fanclub"]["id"],
"fanclub_user_id": resp["fanclub"]["user"]["id"],
"fanclub_user_name": resp["fanclub"]["user"]["name"],
diff --git a/gallery_dl/extractor/foolslide.py b/gallery_dl/extractor/foolslide.py
index a1470dc..c09eb96 100644
--- a/gallery_dl/extractor/foolslide.py
+++ b/gallery_dl/extractor/foolslide.py
@@ -88,7 +88,9 @@ class FoolslideChapterExtractor(FoolslideExtractor):
data["chapter_id"] = text.parse_int(imgs[0]["chapter_id"])
yield Message.Directory, data
- for data["page"], image in enumerate(imgs, 1):
+ enum = util.enumerate_reversed if self.config(
+ "page-reverse") else enumerate
+ for data["page"], image in enum(imgs, 1):
try:
url = image["url"]
del image["url"]
diff --git a/gallery_dl/extractor/gelbooru_v02.py b/gallery_dl/extractor/gelbooru_v02.py
index 1b877b3..e09e190 100644
--- a/gallery_dl/extractor/gelbooru_v02.py
+++ b/gallery_dl/extractor/gelbooru_v02.py
@@ -176,6 +176,58 @@ class GelbooruV02PoolExtractor(GelbooruV02Extractor):
yield post.attrib
+class GelbooruV02FavoriteExtractor(GelbooruV02Extractor):
+ subcategory = "favorite"
+ directory_fmt = ("{category}", "favorites", "{favorite_id}")
+ archive_fmt = "f_{favorite_id}_{id}"
+ per_page = 50
+ pattern = BASE_PATTERN + r"/index\.php\?page=favorites&s=view&id=(\d+)"
+ test = (
+ ("https://rule34.xxx/index.php?page=favorites&s=view&id=1030218", {
+ "count": 3,
+ }),
+ ("https://safebooru.org/index.php?page=favorites&s=view&id=17567", {
+ "count": 2,
+ }),
+ ("https://realbooru.com/index.php?page=favorites&s=view&id=274", {
+ "count": 4,
+ }),
+ ("https://tbib.org/index.php?page=favorites&s=view&id=7881", {
+ "count": 3,
+ }),
+ )
+
+ def __init__(self, match):
+ GelbooruV02Extractor.__init__(self, match)
+ self.favorite_id = match.group(match.lastindex)
+
+ def metadata(self):
+ return {"favorite_id": text.parse_int(self.favorite_id)}
+
+ def posts(self):
+ url = self.root + "/index.php"
+ params = {
+ "page": "favorites",
+ "s" : "view",
+ "id" : self.favorite_id,
+ "pid" : self.page_start * self.per_page,
+ }
+
+ data = {}
+ while True:
+ num_ids = 0
+ page = self.request(url, params=params).text
+
+ for data["id"] in text.extract_iter(page, '" id="p', '"'):
+ num_ids += 1
+ for post in self._api_request(data):
+ yield post.attrib
+
+ if num_ids < self.per_page:
+ return
+ params["pid"] += self.per_page
+
+
class GelbooruV02PostExtractor(GelbooruV02Extractor):
subcategory = "post"
archive_fmt = "{id}"
diff --git a/gallery_dl/extractor/gfycat.py b/gallery_dl/extractor/gfycat.py
index 6d31f7d..2757852 100644
--- a/gallery_dl/extractor/gfycat.py
+++ b/gallery_dl/extractor/gfycat.py
@@ -10,7 +10,6 @@
from .common import Extractor, Message
from .. import text, exception
-from ..cache import cache
class GfycatExtractor(Extractor):
@@ -155,7 +154,6 @@ class GfycatImageExtractor(GfycatExtractor):
class GfycatAPI():
API_ROOT = "https://api.gfycat.com"
- ACCESS_KEY = "Anr96uuqt9EdamSCwK4txKPjMsf2M95Rfa5FLLhPFucu8H5HTzeutyAa"
def __init__(self, extractor):
self.extractor = extractor
@@ -175,23 +173,8 @@ class GfycatAPI():
params = {"search_text": query, "count": 150}
return self._pagination(endpoint, params)
- @cache(keyarg=1, maxage=3600)
- def _authenticate_impl(self, category):
- if category == "redgifs":
- url = "https://api.redgifs.com/v1/oauth/webtoken"
- else:
- url = "https://weblogin." + category + ".com/oauth/webtoken"
- data = {"access_key": self.ACCESS_KEY}
- headers = {"Referer": self.extractor.root + "/",
- "Origin" : self.extractor.root}
- response = self.extractor.request(
- url, method="POST", headers=headers, json=data)
- return "Bearer " + response.json()["access_token"]
-
def _call(self, endpoint, params=None):
url = self.API_ROOT + endpoint
- self.headers["Authorization"] = self._authenticate_impl(
- self.extractor.category)
return self.extractor.request(
url, params=params, headers=self.headers).json()
diff --git a/gallery_dl/extractor/hiperdex.py b/gallery_dl/extractor/hiperdex.py
index a40d631..201ffdd 100644
--- a/gallery_dl/extractor/hiperdex.py
+++ b/gallery_dl/extractor/hiperdex.py
@@ -141,13 +141,17 @@ class HiperdexMangaExtractor(HiperdexBase, MangaExtractor):
shortlink = text.extract(page, "rel='shortlink' href='", "'")[0]
data = {
- "action": "manga_get_chapters",
- "manga" : shortlink.rpartition("=")[2],
+ "action" : "manga_get_reading_nav",
+ "manga" : shortlink.rpartition("=")[2],
+ "chapter" : "",
+ "volume_id": "",
+ "style" : "list",
+ "type" : "manga",
}
url = self.root + "/wp-admin/admin-ajax.php"
page = self.request(url, method="POST", data=data).text
- for url in text.extract_iter(page, 'href="', '"', 320):
+ for url in text.extract_iter(page, 'data-redirect="', '"'):
chapter = url.rpartition("/")[2]
results.append((url, self.chapter_data(chapter)))
diff --git a/gallery_dl/extractor/imagehosts.py b/gallery_dl/extractor/imagehosts.py
index 13996d0..d699f07 100644
--- a/gallery_dl/extractor/imagehosts.py
+++ b/gallery_dl/extractor/imagehosts.py
@@ -279,3 +279,23 @@ class ImgclickImageExtractor(ImagehostImageExtractor):
url , pos = text.extract(page, '<br><img src="', '"')
filename, pos = text.extract(page, 'alt="', '"', pos)
return url, filename
+
+
+class FappicImageExtractor(ImagehostImageExtractor):
+ """Extractor for single images from fappic.com"""
+ category = "fappic"
+ pattern = r"(?:https?://)?((?:www\.)?fappic\.com/(\w+)/[^/?#]+)"
+ test = ("https://www.fappic.com/98wxqcklyh8k/test.png", {
+ "pattern": r"https://img\d+\.fappic\.com/img/\w+/test\.png",
+ "keyword": "433b1d310b0ff12ad8a71ac7b9d8ba3f8cd1e898",
+ "content": "0c8768055e4e20e7c7259608b67799171b691140",
+ })
+
+ def get_info(self, page):
+ url , pos = text.extract(page, '<a href="/?click"><img src="', '"')
+ filename, pos = text.extract(page, 'alt="', '"', pos)
+
+ if filename.startswith("Porn-Picture-"):
+ filename = filename[13:]
+
+ return url, filename
diff --git a/gallery_dl/extractor/instagram.py b/gallery_dl/extractor/instagram.py
index 3590e17..983ae37 100644
--- a/gallery_dl/extractor/instagram.py
+++ b/gallery_dl/extractor/instagram.py
@@ -29,7 +29,7 @@ class InstagramExtractor(Extractor):
root = "https://www.instagram.com"
cookiedomain = ".instagram.com"
cookienames = ("sessionid",)
- request_interval = 8.0
+ request_interval = (6.0, 12.0)
def __init__(self, match):
Extractor.__init__(self, match)
@@ -679,7 +679,6 @@ class InstagramStoriesExtractor(InstagramExtractor):
("https://www.instagram.com/stories/instagram/"),
("https://www.instagram.com/stories/highlights/18042509488170095/"),
)
- request_interval = 1.0
def __init__(self, match):
self.highlight_id, self.user = match.groups()
diff --git a/gallery_dl/extractor/kemonoparty.py b/gallery_dl/extractor/kemonoparty.py
index a911d35..c5f5ae7 100644
--- a/gallery_dl/extractor/kemonoparty.py
+++ b/gallery_dl/extractor/kemonoparty.py
@@ -9,7 +9,8 @@
"""Extractors for https://kemono.party/"""
from .common import Extractor, Message
-from .. import text
+from .. import text, exception
+from ..cache import cache
import itertools
import re
@@ -70,11 +71,32 @@ class KemonopartyExtractor(Extractor):
post["type"] = file["type"]
url = file["path"]
if url[0] == "/":
- url = self.root + url
+ url = self.root + "/data" + url
+ elif url.startswith("https://kemono.party"):
+ url = self.root + "/data" + url[20:]
text.nameext_from_url(file["name"], post)
yield Message.Url, url, post
+ def login(self):
+ username, password = self._get_auth_info()
+ if username:
+ self._update_cookies(self._login_impl(username, password))
+
+ @cache(maxage=28*24*3600, keyarg=1)
+ def _login_impl(self, username, password):
+ self.log.info("Logging in as %s", username)
+
+ url = self.root + "/account/login"
+ data = {"username": username, "password": password}
+
+ response = self.request(url, method="POST", data=data)
+ if response.url.endswith("/account/login") and \
+ "Username or password is incorrect" in response.text:
+ raise exception.AuthenticationError()
+
+ return {c.name: c.value for c in response.history[0].cookies}
+
class KemonopartyUserExtractor(KemonopartyExtractor):
"""Extractor for all posts from a kemono.party user listing"""
@@ -119,7 +141,7 @@ class KemonopartyPostExtractor(KemonopartyExtractor):
pattern = BASE_PATTERN + r"/post/([^/?#]+)"
test = (
("https://kemono.party/fanbox/user/6993449/post/506575", {
- "pattern": r"https://kemono\.party/files/fanbox"
+ "pattern": r"https://kemono\.party/data/files/fanbox"
r"/6993449/506575/P058kDFYus7DbqAkGlfWTlOr\.jpeg",
"keyword": {
"added": "Wed, 06 May 2020 20:28:02 GMT",
@@ -142,12 +164,12 @@ class KemonopartyPostExtractor(KemonopartyExtractor):
}),
# inline image (#1286)
("https://kemono.party/fanbox/user/7356311/post/802343", {
- "pattern": r"https://kemono\.party/inline/fanbox"
+ "pattern": r"https://kemono\.party/data/inline/fanbox"
r"/uaozO4Yga6ydkGIJFAQDixfE\.jpeg",
}),
# kemono.party -> data.kemono.party
("https://kemono.party/gumroad/user/trylsc/post/IURjT", {
- "pattern": r"https://kemono\.party/(file|attachment)s"
+ "pattern": r"https://kemono\.party/data/(file|attachment)s"
r"/gumroad/trylsc/IURjT/",
}),
# username (#1548, #1652)
@@ -173,3 +195,25 @@ class KemonopartyPostExtractor(KemonopartyExtractor):
def posts(self):
posts = self.request(self.api_url).json()
return (posts[0],) if len(posts) > 1 else posts
+
+
+class KemonopartyFavoriteExtractor(KemonopartyExtractor):
+ """Extractor for kemono.party favorites"""
+ subcategory = "favorite"
+ pattern = r"(?:https?://)?kemono\.party/favorites"
+ test = ("https://kemono.party/favorites", {
+ "pattern": KemonopartyUserExtractor.pattern,
+ "url": "f4b5b796979bcba824af84206578c79101c7f0e1",
+ "count": 3,
+ })
+
+ def items(self):
+ self._prepare_ddosguard_cookies()
+ self.login()
+
+ users = self.request(self.root + "/api/favorites").json()
+ for user in users:
+ user["_extractor"] = KemonopartyUserExtractor
+ url = "{}/{}/user/{}".format(
+ self.root, user["service"], user["id"])
+ yield Message.Queue, url, user
diff --git a/gallery_dl/extractor/mangadex.py b/gallery_dl/extractor/mangadex.py
index 53ae76a..634a92d 100644
--- a/gallery_dl/extractor/mangadex.py
+++ b/gallery_dl/extractor/mangadex.py
@@ -37,7 +37,7 @@ class MangadexExtractor(Extractor):
def items(self):
for chapter in self.chapters():
- uuid = chapter["data"]["id"]
+ uuid = chapter["id"]
data = self._transform(chapter)
data["_extractor"] = MangadexChapterExtractor
self._cache[uuid] = (chapter, data)
@@ -51,8 +51,8 @@ class MangadexExtractor(Extractor):
for item in manga["relationships"]:
relationships[item["type"]].append(item["id"])
- cattributes = chapter["data"]["attributes"]
- mattributes = manga["data"]["attributes"]
+ cattributes = chapter["attributes"]
+ mattributes = manga["attributes"]
lang = cattributes["translatedLanguage"].partition("-")[0]
if cattributes["chapter"]:
@@ -63,12 +63,12 @@ class MangadexExtractor(Extractor):
data = {
"manga" : (mattributes["title"].get("en") or
next(iter(mattributes["title"].values()))),
- "manga_id": manga["data"]["id"],
+ "manga_id": manga["id"],
"title" : cattributes["title"],
"volume" : text.parse_int(cattributes["volume"]),
"chapter" : text.parse_int(chnum),
"chapter_minor": sep + minor,
- "chapter_id": chapter["data"]["id"],
+ "chapter_id": chapter["id"],
"date" : text.parse_datetime(cattributes["publishAt"]),
"lang" : lang,
"language": util.code_to_language(lang),
@@ -77,13 +77,13 @@ class MangadexExtractor(Extractor):
if self.config("metadata"):
data["artist"] = [
- self.api.author(uuid)["data"]["attributes"]["name"]
+ self.api.author(uuid)["attributes"]["name"]
for uuid in relationships["artist"]]
data["author"] = [
- self.api.author(uuid)["data"]["attributes"]["name"]
+ self.api.author(uuid)["attributes"]["name"]
for uuid in relationships["author"]]
data["group"] = [
- self.api.group(uuid)["data"]["attributes"]["name"]
+ self.api.group(uuid)["attributes"]["name"]
for uuid in relationships["scanlation_group"]]
return data
@@ -118,11 +118,14 @@ class MangadexChapterExtractor(MangadexExtractor):
data = self._transform(chapter)
yield Message.Directory, data
- cattributes = chapter["data"]["attributes"]
+ cattributes = chapter["attributes"]
data["_http_headers"] = self._headers
base = "{}/data/{}/".format(
self.api.athome_server(self.uuid)["baseUrl"], cattributes["hash"])
- for data["page"], page in enumerate(cattributes["data"], 1):
+
+ enum = util.enumerate_reversed if self.config(
+ "page-reverse") else enumerate
+ for data["page"], page in enum(cattributes["data"], 1):
text.nameext_from_url(page, data)
yield Message.Url, base + page, data
@@ -153,6 +156,9 @@ class MangadexMangaExtractor(MangadexExtractor):
("https://mangadex.org/title/7c1e2742-a086-4fd3-a3be-701fd6cf0be9", {
"count": 1,
}),
+ ("https://mangadex.org/title/584ef094-b2ab-40ce-962c-bce341fb9d10", {
+ "count": ">= 20",
+ })
)
def chapters(self):
@@ -189,18 +195,18 @@ class MangadexAPI():
@memcache(keyarg=1)
def author(self, uuid):
- return self._call("/author/" + uuid)
+ return self._call("/author/" + uuid)["data"]
def chapter(self, uuid):
- return self._call("/chapter/" + uuid)
+ return self._call("/chapter/" + uuid)["data"]
@memcache(keyarg=1)
def group(self, uuid):
- return self._call("/group/" + uuid)
+ return self._call("/group/" + uuid)["data"]
@memcache(keyarg=1)
def manga(self, uuid):
- return self._call("/manga/" + uuid)
+ return self._call("/manga/" + uuid)["data"]
def manga_feed(self, uuid):
config = self.extractor.config
@@ -209,6 +215,8 @@ class MangadexAPI():
"order[volume]" : order,
"order[chapter]" : order,
"translatedLanguage[]": config("lang"),
+ "contentRating[]" : [
+ "safe", "suggestive", "erotica", "pornographic"],
}
return self._pagination("/manga/" + uuid + "/feed", params)
@@ -271,7 +279,7 @@ class MangadexAPI():
while True:
data = self._call(endpoint, params)
- yield from data["results"]
+ yield from data["data"]
params["offset"] = data["offset"] + data["limit"]
if params["offset"] >= data["total"]:
diff --git a/gallery_dl/extractor/mastodon.py b/gallery_dl/extractor/mastodon.py
index ff0bfc3..cd7cabb 100644
--- a/gallery_dl/extractor/mastodon.py
+++ b/gallery_dl/extractor/mastodon.py
@@ -87,7 +87,7 @@ BASE_PATTERN = MastodonExtractor.update(INSTANCES)
class MastodonUserExtractor(MastodonExtractor):
"""Extractor for all images of an account/user"""
subcategory = "user"
- pattern = BASE_PATTERN + r"/@([^/?#]+)(?:/media)?/?$"
+ pattern = BASE_PATTERN + r"/(?:@|users/)([^/?#]+)(?:/media)?/?$"
test = (
("https://mastodon.social/@jk", {
"pattern": r"https://files.mastodon.social/media_attachments"
@@ -100,26 +100,44 @@ class MastodonUserExtractor(MastodonExtractor):
"count": 60,
}),
("https://baraag.net/@pumpkinnsfw"),
+ ("https://mastodon.social/@id:10843"),
+ ("https://mastodon.social/users/id:10843"),
+ ("https://mastodon.social/users/jk"),
)
def statuses(self):
api = MastodonAPI(self)
- username = self.item
- handle = "@{}@{}".format(username, self.instance)
- for account in api.account_search(handle, 1):
- if account["username"] == username:
- break
- else:
- raise exception.NotFoundError("account")
-
return api.account_statuses(
- account["id"],
+ api.account_id_by_username(self.item),
only_media=not self.config("text-posts", False),
exclude_replies=not self.replies,
)
+class MastodonFollowingExtractor(MastodonExtractor):
+ """Extractor for followed mastodon users"""
+ subcategory = "following"
+ pattern = BASE_PATTERN + r"/users/([^/?#]+)/following"
+ test = (
+ ("https://mastodon.social/users/0x4f/following", {
+ "extractor": False,
+ "count": ">= 20",
+ }),
+ ("https://mastodon.social/users/id:10843/following"),
+ ("https://pawoo.net/users/yoru_nine/following"),
+ ("https://baraag.net/users/pumpkinnsfw/following"),
+ )
+
+ def items(self):
+ api = MastodonAPI(self)
+ account_id = api.account_id_by_username(self.item)
+
+ for account in api.account_following(account_id):
+ account["_extractor"] = MastodonUserExtractor
+ yield Message.Queue, account["url"], account
+
+
class MastodonStatusExtractor(MastodonExtractor):
"""Extractor for images from a status"""
subcategory = "status"
@@ -165,6 +183,20 @@ class MastodonAPI():
self.headers = {"Authorization": "Bearer " + access_token}
+ def account_id_by_username(self, username):
+ if username.startswith("id:"):
+ return username[3:]
+
+ handle = "@{}@{}".format(username, self.extractor.instance)
+ for account in self.account_search(handle, 1):
+ if account["username"] == username:
+ return account["id"]
+ raise exception.NotFoundError("account")
+
+ def account_following(self, account_id):
+ endpoint = "/v1/accounts/{}/following".format(account_id)
+ return self._pagination(endpoint, None)
+
def account_search(self, query, limit=40):
"""Search for accounts"""
endpoint = "/v1/accounts/search"
diff --git a/gallery_dl/extractor/nozomi.py b/gallery_dl/extractor/nozomi.py
index 44411c8..4dc880f 100644
--- a/gallery_dl/extractor/nozomi.py
+++ b/gallery_dl/extractor/nozomi.py
@@ -168,7 +168,7 @@ class NozomiTagExtractor(NozomiExtractor):
def __init__(self, match):
NozomiExtractor.__init__(self, match)
tags, self.pnum = match.groups()
- self.tags = text.unquote(tags).lower()
+ self.tags = text.unquote(tags)
self.nozomi = "/nozomi/{}.nozomi".format(self.tags)
def metadata(self):
@@ -187,7 +187,7 @@ class NozomiSearchExtractor(NozomiExtractor):
def __init__(self, match):
NozomiExtractor.__init__(self, match)
- self.tags = text.unquote(match.group(1)).lower().split()
+ self.tags = text.unquote(match.group(1)).split()
def metadata(self):
return {"search_tags": self.tags}
diff --git a/gallery_dl/extractor/oauth.py b/gallery_dl/extractor/oauth.py
index 4dc1e43..6812f35 100644
--- a/gallery_dl/extractor/oauth.py
+++ b/gallery_dl/extractor/oauth.py
@@ -415,7 +415,7 @@ class OAuthPixiv(OAuthBase):
print("""
1) Open your browser's Developer Tools (F12) and switch to the Network tab
2) Login
-4) Select the last network monitor entry ('callback?state=...')
+3) Select the last network monitor entry ('callback?state=...')
4) Copy its 'code' query parameter, paste it below, and press Enter
""")
code = input("code: ")
diff --git a/gallery_dl/extractor/reddit.py b/gallery_dl/extractor/reddit.py
index 8953edd..43c7e50 100644
--- a/gallery_dl/extractor/reddit.py
+++ b/gallery_dl/extractor/reddit.py
@@ -139,7 +139,7 @@ class RedditSubredditExtractor(RedditExtractor):
"""Extractor for URLs from subreddits on reddit.com"""
subcategory = "subreddit"
pattern = (r"(?:https?://)?(?:\w+\.)?reddit\.com/r/"
- r"([^/?#]+(?:/[a-z]+)?)/?(?:\?([^#]*))?(?:$|#)")
+ r"([^/?#]+(?:/([a-z]+))?)/?(?:\?([^#]*))?(?:$|#)")
test = (
("https://www.reddit.com/r/lavaporn/", {
"range": "1-20",
@@ -152,9 +152,11 @@ class RedditSubredditExtractor(RedditExtractor):
)
def __init__(self, match):
+ self.subreddit, sub, params = match.groups()
+ self.params = text.parse_query(params)
+ if sub:
+ self.subcategory += "-" + sub
RedditExtractor.__init__(self, match)
- self.subreddit = match.group(1)
- self.params = text.parse_query(match.group(2))
def submissions(self):
return self.api.submissions_subreddit(self.subreddit, self.params)
@@ -164,7 +166,7 @@ class RedditUserExtractor(RedditExtractor):
"""Extractor for URLs from posts by a reddit user"""
subcategory = "user"
pattern = (r"(?:https?://)?(?:\w+\.)?reddit\.com/u(?:ser)?/"
- r"([^/?#]+(?:/[a-z]+)?)/?(?:\?([^#]*))?")
+ r"([^/?#]+(?:/([a-z]+))?)/?(?:\?([^#]*))?")
test = (
("https://www.reddit.com/user/username/", {
"count": ">= 2",
@@ -175,9 +177,11 @@ class RedditUserExtractor(RedditExtractor):
)
def __init__(self, match):
+ self.user, sub, params = match.groups()
+ self.params = text.parse_query(params)
+ if sub:
+ self.subcategory += "-" + sub
RedditExtractor.__init__(self, match)
- self.user = match.group(1)
- self.params = text.parse_query(match.group(2))
def submissions(self):
return self.api.submissions_user(self.user, self.params)
diff --git a/gallery_dl/extractor/redgifs.py b/gallery_dl/extractor/redgifs.py
index 576564c..e078bef 100644
--- a/gallery_dl/extractor/redgifs.py
+++ b/gallery_dl/extractor/redgifs.py
@@ -72,5 +72,3 @@ class RedgifsImageExtractor(RedgifsExtractor):
class RedgifsAPI(GfycatAPI):
API_ROOT = "https://api.redgifs.com"
- ACCESS_KEY = ("dBLwVuGn9eq4dtXLs8WSfpjcYFY7bPQe"
- "AqGPSFgqeW5B9uzj2cMVhF63pTFF4Rg9")
diff --git a/gallery_dl/extractor/twitter.py b/gallery_dl/extractor/twitter.py
index 2dfcb55..4a3f6cd 100644
--- a/gallery_dl/extractor/twitter.py
+++ b/gallery_dl/extractor/twitter.py
@@ -50,7 +50,7 @@ class TwitterExtractor(Extractor):
if not self.retweets and "retweeted_status_id_str" in tweet:
self.log.debug("Skipping %s (retweet)", tweet["id_str"])
continue
- if not self.quoted and "quoted" in tweet:
+ if not self.quoted and "quoted_by_id_str" in tweet:
self.log.debug("Skipping %s (quoted tweet)", tweet["id_str"])
continue
if "in_reply_to_user_id_str" in tweet and (
@@ -139,8 +139,10 @@ class TwitterExtractor(Extractor):
for size in ("original", "x_large", "large", "small"):
key = prefix + size
if key in bvals:
- files.append(bvals[key]["image_value"])
- return
+ value = bvals[key].get("image_value")
+ if value and "url" in value:
+ files.append(value)
+ return
elif self.videos:
url = "ytdl:{}/i/web/status/{}".format(self.root, tweet["id_str"])
files.append({"url": url})
@@ -199,6 +201,8 @@ class TwitterExtractor(Extractor):
if "in_reply_to_screen_name" in tweet:
tdata["reply_to"] = tweet["in_reply_to_screen_name"]
+ if "quoted_by_id_str" in tweet:
+ tdata["quote_by"] = text.parse_int(tweet["quoted_by_id_str"])
if "author" in tweet:
tdata["author"] = self._transform_user(tweet["author"])
@@ -316,7 +320,7 @@ class TwitterExtractor(Extractor):
class TwitterTimelineExtractor(TwitterExtractor):
- """Extractor for all images from a user's timeline"""
+ """Extractor for Tweets from a user's timeline"""
subcategory = "timeline"
pattern = (BASE_PATTERN + r"/(?!search)(?:([^/?#]+)/?(?:$|[?#])"
r"|i(?:/user/|ntent/user\?user_id=)(\d+))")
@@ -341,8 +345,25 @@ class TwitterTimelineExtractor(TwitterExtractor):
return TwitterAPI(self).timeline_profile(self.user)
+class TwitterRepliesExtractor(TwitterExtractor):
+ """Extractor for Tweets from a user's timeline including replies"""
+ subcategory = "replies"
+ pattern = BASE_PATTERN + r"/(?!search)([^/?#]+)/with_replies(?!\w)"
+ test = (
+ ("https://twitter.com/supernaturepics/with_replies", {
+ "range": "1-40",
+ "url": "c570ac1aae38ed1463be726cc46f31cac3d82a40",
+ }),
+ ("https://mobile.twitter.com/supernaturepics/with_replies#t"),
+ ("https://www.twitter.com/id:2976459548/with_replies"),
+ )
+
+ def tweets(self):
+ return TwitterAPI(self).timeline_profile(self.user, replies=True)
+
+
class TwitterMediaExtractor(TwitterExtractor):
- """Extractor for all images from a user's Media Tweets"""
+ """Extractor for Tweets from a user's Media timeline"""
subcategory = "media"
pattern = BASE_PATTERN + r"/(?!search)([^/?#]+)/media(?!\w)"
test = (
@@ -652,11 +673,11 @@ class TwitterAPI():
endpoint = "/2/timeline/conversation/{}.json".format(conversation_id)
return self._pagination(endpoint)
- def timeline_profile(self, screen_name):
+ def timeline_profile(self, screen_name, replies=False):
user_id = self._user_id_by_screen_name(screen_name)
endpoint = "/2/timeline/profile/{}.json".format(user_id)
params = self.params.copy()
- params["include_tweet_replies"] = "false"
+ params["include_tweet_replies"] = "true" if replies else "false"
return self._pagination(endpoint, params)
def timeline_media(self, screen_name):
@@ -886,7 +907,7 @@ class TwitterAPI():
quoted = quoted.copy()
quoted["author"] = users[quoted["user_id_str"]]
quoted["user"] = tweet["user"]
- quoted["quoted"] = True
+ quoted["quoted_by_id_str"] = tweet["id_str"]
yield quoted
# update cursor value
diff --git a/gallery_dl/formatter.py b/gallery_dl/formatter.py
new file mode 100644
index 0000000..654e5d0
--- /dev/null
+++ b/gallery_dl/formatter.py
@@ -0,0 +1,306 @@
+# -*- coding: utf-8 -*-
+
+# Copyright 2021 Mike Fährmann
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""String formatters"""
+
+import json
+import string
+import _string
+import operator
+from . import text, util
+
+_CACHE = {}
+_CONVERSIONS = None
+
+
+def parse(format_string, default=None):
+ key = format_string, default
+
+ try:
+ return _CACHE[key]
+ except KeyError:
+ pass
+
+ cls = StringFormatter
+ if format_string.startswith("\f"):
+ kind, _, format_string = format_string.partition(" ")
+ kind = kind[1:]
+
+ if kind == "T":
+ cls = TemplateFormatter
+ elif kind == "E":
+ cls = ExpressionFormatter
+ elif kind == "M":
+ cls = ModuleFormatter
+
+ formatter = _CACHE[key] = cls(format_string, default)
+ return formatter
+
+
+class StringFormatter():
+ """Custom, extended version of string.Formatter
+
+ This string formatter implementation is a mostly performance-optimized
+ variant of the original string.Formatter class. Unnecessary features have
+ been removed (positional arguments, unused argument check) and new
+ formatting options have been added.
+
+ Extra Conversions:
+ - "l": calls str.lower on the target value
+ - "u": calls str.upper
+ - "c": calls str.capitalize
+ - "C": calls string.capwords
+ - "j". calls json.dumps
+ - "t": calls str.strip
+ - "d": calls text.parse_timestamp
+ - "U": calls urllib.parse.unquote
+ - "S": calls util.to_string()
+ - "T": calls util.to_timestamü()
+ - Example: {f!l} -> "example"; {f!u} -> "EXAMPLE"
+
+ Extra Format Specifiers:
+ - "?<before>/<after>/":
+ Adds <before> and <after> to the actual value if it evaluates to True.
+ Otherwise the whole replacement field becomes an empty string.
+ Example: {f:?-+/+-/} -> "-+Example+-" (if "f" contains "Example")
+ -> "" (if "f" is None, 0, "")
+
+ - "L<maxlen>/<replacement>/":
+ Replaces the output with <replacement> if its length (in characters)
+ exceeds <maxlen>. Otherwise everything is left as is.
+ Example: {f:L5/too long/} -> "foo" (if "f" is "foo")
+ -> "too long" (if "f" is "foobar")
+
+ - "J<separator>/":
+ Joins elements of a list (or string) using <separator>
+ Example: {f:J - /} -> "a - b - c" (if "f" is ["a", "b", "c"])
+
+ - "R<old>/<new>/":
+ Replaces all occurrences of <old> with <new>
+ Example: {f:R /_/} -> "f_o_o_b_a_r" (if "f" is "f o o b a r")
+ """
+
+ def __init__(self, format_string, default=None):
+ self.default = default
+ self.result = []
+ self.fields = []
+
+ for literal_text, field_name, format_spec, conv in \
+ _string.formatter_parser(format_string):
+ if literal_text:
+ self.result.append(literal_text)
+ if field_name:
+ self.fields.append((
+ len(self.result),
+ self._field_access(field_name, format_spec, conv),
+ ))
+ self.result.append("")
+
+ if len(self.result) == 1:
+ if self.fields:
+ self.format_map = self.fields[0][1]
+ else:
+ self.format_map = lambda _: format_string
+ del self.result, self.fields
+
+ def format_map(self, kwdict):
+ """Apply 'kwdict' to the initial format_string and return its result"""
+ result = self.result
+ for index, func in self.fields:
+ result[index] = func(kwdict)
+ return "".join(result)
+
+ def _field_access(self, field_name, format_spec, conversion):
+ fmt = parse_format_spec(format_spec, conversion)
+
+ if "|" in field_name:
+ return self._apply_list([
+ parse_field_name(fn)
+ for fn in field_name.split("|")
+ ], fmt)
+ else:
+ key, funcs = parse_field_name(field_name)
+ if funcs:
+ return self._apply(key, funcs, fmt)
+ return self._apply_simple(key, fmt)
+
+ def _apply(self, key, funcs, fmt):
+ def wrap(kwdict):
+ try:
+ obj = kwdict[key]
+ for func in funcs:
+ obj = func(obj)
+ except Exception:
+ obj = self.default
+ return fmt(obj)
+ return wrap
+
+ def _apply_simple(self, key, fmt):
+ def wrap(kwdict):
+ return fmt(kwdict[key] if key in kwdict else self.default)
+ return wrap
+
+ def _apply_list(self, lst, fmt):
+ def wrap(kwdict):
+ for key, funcs in lst:
+ try:
+ obj = kwdict[key]
+ for func in funcs:
+ obj = func(obj)
+ if obj:
+ break
+ except Exception:
+ pass
+ else:
+ obj = self.default
+ return fmt(obj)
+ return wrap
+
+
+class TemplateFormatter(StringFormatter):
+ """Read format_string from file"""
+
+ def __init__(self, path, default=None):
+ with open(util.expand_path(path)) as fp:
+ format_string = fp.read()
+ StringFormatter.__init__(self, format_string, default)
+
+
+class ExpressionFormatter():
+ """Generate text by evaluating a Python expression"""
+
+ def __init__(self, expression, default=None):
+ self.format_map = util.compile_expression(expression)
+
+
+class ModuleFormatter():
+ """Generate text by calling an external function"""
+
+ def __init__(self, function_spec, default=None):
+ module_name, _, function_name = function_spec.partition(":")
+ module = __import__(module_name)
+ self.format_map = getattr(module, function_name)
+
+
+def parse_field_name(field_name):
+ first, rest = _string.formatter_field_name_split(field_name)
+ funcs = []
+
+ for is_attr, key in rest:
+ if is_attr:
+ func = operator.attrgetter
+ else:
+ func = operator.itemgetter
+ try:
+ if ":" in key:
+ start, _, stop = key.partition(":")
+ stop, _, step = stop.partition(":")
+ start = int(start) if start else None
+ stop = int(stop) if stop else None
+ step = int(step) if step else None
+ key = slice(start, stop, step)
+ except TypeError:
+ pass # key is an integer
+
+ funcs.append(func(key))
+
+ return first, funcs
+
+
+def parse_format_spec(format_spec, conversion):
+ fmt = build_format_func(format_spec)
+ if not conversion:
+ return fmt
+
+ global _CONVERSIONS
+ if _CONVERSIONS is None:
+ _CONVERSIONS = {
+ "l": str.lower,
+ "u": str.upper,
+ "c": str.capitalize,
+ "C": string.capwords,
+ "j": json.dumps,
+ "t": str.strip,
+ "T": util.to_timestamp,
+ "d": text.parse_timestamp,
+ "U": text.unescape,
+ "S": util.to_string,
+ "s": str,
+ "r": repr,
+ "a": ascii,
+ }
+
+ conversion = _CONVERSIONS[conversion]
+ if fmt is format:
+ return conversion
+ else:
+ def chain(obj):
+ return fmt(conversion(obj))
+ return chain
+
+
+def build_format_func(format_spec):
+ if format_spec:
+ fmt = format_spec[0]
+ if fmt == "?":
+ return _parse_optional(format_spec)
+ if fmt == "L":
+ return _parse_maxlen(format_spec)
+ if fmt == "J":
+ return _parse_join(format_spec)
+ if fmt == "R":
+ return _parse_replace(format_spec)
+ return _default_format(format_spec)
+ return format
+
+
+def _parse_optional(format_spec):
+ before, after, format_spec = format_spec.split("/", 2)
+ before = before[1:]
+ fmt = build_format_func(format_spec)
+
+ def optional(obj):
+ return before + fmt(obj) + after if obj else ""
+ return optional
+
+
+def _parse_maxlen(format_spec):
+ maxlen, replacement, format_spec = format_spec.split("/", 2)
+ maxlen = text.parse_int(maxlen[1:])
+ fmt = build_format_func(format_spec)
+
+ def mlen(obj):
+ obj = fmt(obj)
+ return obj if len(obj) <= maxlen else replacement
+ return mlen
+
+
+def _parse_join(format_spec):
+ separator, _, format_spec = format_spec.partition("/")
+ separator = separator[1:]
+ fmt = build_format_func(format_spec)
+
+ def join(obj):
+ return fmt(separator.join(obj))
+ return join
+
+
+def _parse_replace(format_spec):
+ old, new, format_spec = format_spec.split("/", 2)
+ old = old[1:]
+ fmt = build_format_func(format_spec)
+
+ def replace(obj):
+ return fmt(obj.replace(old, new))
+ return replace
+
+
+def _default_format(format_spec):
+ def wrap(obj):
+ return format(obj, format_spec)
+ return wrap
diff --git a/gallery_dl/job.py b/gallery_dl/job.py
index 32e9bb5..4e185d0 100644
--- a/gallery_dl/job.py
+++ b/gallery_dl/job.py
@@ -15,7 +15,7 @@ import operator
import functools
import collections
from . import extractor, downloader, postprocessor
-from . import config, text, util, output, exception
+from . import config, text, util, path, formatter, output, exception
from .extractor.message import Message
@@ -72,9 +72,9 @@ class Job():
log = extractor.log
msg = None
- sleep = extractor.config("sleep-extractor")
+ sleep = util.build_duration_func(extractor.config("sleep-extractor"))
if sleep:
- time.sleep(sleep)
+ time.sleep(sleep())
try:
for msg in extractor:
@@ -109,6 +109,8 @@ class Job():
log.info("No results for %s", extractor.url)
finally:
self.handle_finalize()
+ if extractor.finalize:
+ extractor.finalize()
return self.status
@@ -234,7 +236,7 @@ class DownloadJob(Job):
return
if self.sleep:
- time.sleep(self.sleep)
+ time.sleep(self.sleep())
# download from URL
if not self.download(url):
@@ -392,11 +394,11 @@ class DownloadJob(Job):
def initialize(self, kwdict=None):
"""Delayed initialization of PathFormat, etc."""
cfg = self.extractor.config
- pathfmt = self.pathfmt = util.PathFormat(self.extractor)
+ pathfmt = self.pathfmt = path.PathFormat(self.extractor)
if kwdict:
pathfmt.set_directory(kwdict)
- self.sleep = cfg("sleep")
+ self.sleep = util.build_duration_func(cfg("sleep"))
self.fallback = cfg("fallback", True)
if not cfg("download", True):
# monkey-patch method to do nothing and always return True
@@ -404,17 +406,18 @@ class DownloadJob(Job):
archive = cfg("archive")
if archive:
- path = util.expand_path(archive)
+ archive = util.expand_path(archive)
try:
- if "{" in path:
- path = util.Formatter(path).format_map(kwdict)
- self.archive = util.DownloadArchive(path, self.extractor)
+ if "{" in archive:
+ archive = formatter.parse(archive).format_map(kwdict)
+ self.archive = util.DownloadArchive(archive, self.extractor)
except Exception as exc:
self.extractor.log.warning(
"Failed to open download archive at '%s' ('%s: %s')",
- path, exc.__class__.__name__, exc)
+ archive, exc.__class__.__name__, exc)
else:
- self.extractor.log.debug("Using download archive '%s'", path)
+ self.extractor.log.debug(
+ "Using download archive '%s'", archive)
skip = cfg("skip", True)
if skip:
@@ -469,6 +472,7 @@ class DownloadJob(Job):
except Exception as exc:
pp_log.error("'%s' initialization failed: %s: %s",
name, exc.__class__.__name__, exc)
+ pp_log.debug("", exc_info=True)
else:
pp_list.append(pp_obj)
@@ -539,7 +543,7 @@ class SimulationJob(DownloadJob):
self.pathfmt.set_filename(kwdict)
self.out.skip(self.pathfmt.path)
if self.sleep:
- time.sleep(self.sleep)
+ time.sleep(self.sleep())
if self.archive:
self.archive.add(kwdict)
@@ -693,9 +697,10 @@ class DataJob(Job):
self.filter = util.identity if private else util.filter_dict
def run(self):
- sleep = self.extractor.config("sleep-extractor")
+ sleep = util.build_duration_func(
+ self.extractor.config("sleep-extractor"))
if sleep:
- time.sleep(sleep)
+ time.sleep(sleep())
# collect data
try:
diff --git a/gallery_dl/option.py b/gallery_dl/option.py
index a046a27..5f7b281 100644
--- a/gallery_dl/option.py
+++ b/gallery_dl/option.py
@@ -372,6 +372,16 @@ def build_parser():
help="Write metadata to separate JSON files",
)
postprocessor.add_argument(
+ "--write-infojson",
+ dest="postprocessors",
+ action="append_const", const={
+ "name" : "metadata",
+ "event" : "init",
+ "filename": "info.json",
+ },
+ help="Write gallery metadata to a info.json file",
+ )
+ postprocessor.add_argument(
"--write-tags",
dest="postprocessors",
action="append_const", const={"name": "metadata", "mode": "tags"},
diff --git a/gallery_dl/output.py b/gallery_dl/output.py
index 7e1f8c1..d4d295f 100644
--- a/gallery_dl/output.py
+++ b/gallery_dl/output.py
@@ -10,7 +10,8 @@ import os
import sys
import shutil
import logging
-from . import config, util
+import unicodedata
+from . import config, util, formatter
# --------------------------------------------------------------------
@@ -91,13 +92,13 @@ class Formatter(logging.Formatter):
if isinstance(fmt, dict):
for key in ("debug", "info", "warning", "error"):
value = fmt[key] if key in fmt else LOG_FORMAT
- fmt[key] = (util.Formatter(value).format_map,
+ fmt[key] = (formatter.parse(value).format_map,
"{asctime" in value)
else:
if fmt == LOG_FORMAT:
fmt = (fmt.format_map, False)
else:
- fmt = (util.Formatter(fmt).format_map, "{asctime" in fmt)
+ fmt = (formatter.parse(fmt).format_map, "{asctime" in fmt)
fmt = {"debug": fmt, "info": fmt, "warning": fmt, "error": fmt}
self.formats = fmt
@@ -257,6 +258,9 @@ class NullOutput():
def success(self, path, tries):
"""Print a message indicating the completion of a download"""
+ def progress(self, bytes_total, bytes_downloaded, bytes_per_second):
+ """Display download progress"""
+
class PipeOutput(NullOutput):
@@ -270,9 +274,14 @@ class PipeOutput(NullOutput):
class TerminalOutput(NullOutput):
def __init__(self):
- self.short = config.get(("output",), "shorten", True)
- if self.short:
- self.width = shutil.get_terminal_size().columns - OFFSET
+ shorten = config.get(("output",), "shorten", True)
+ if shorten:
+ func = shorten_string_eaw if shorten == "eaw" else shorten_string
+ limit = shutil.get_terminal_size().columns - OFFSET
+ sep = CHAR_ELLIPSIES
+ self.shorten = lambda txt: func(txt, limit, sep)
+ else:
+ self.shorten = util.identity
def start(self, path):
print(self.shorten(" " + path), end="", flush=True)
@@ -283,16 +292,14 @@ class TerminalOutput(NullOutput):
def success(self, path, tries):
print("\r", self.shorten(CHAR_SUCCESS + path), sep="")
- def shorten(self, txt):
- """Reduce the length of 'txt' to the width of the terminal"""
- if self.short and len(txt) > self.width:
- hwidth = self.width // 2 - OFFSET
- return "".join((
- txt[:hwidth-1],
- CHAR_ELLIPSIES,
- txt[-hwidth-(self.width % 2):]
- ))
- return txt
+ def progress(self, bytes_total, bytes_downloaded, bytes_per_second):
+ bdl = util.format_value(bytes_downloaded)
+ bps = util.format_value(bytes_per_second)
+ if bytes_total is None:
+ print("\r{:>7}B {:>7}B/s ".format(bdl, bps), end="")
+ else:
+ print("\r{:>3}% {:>7}B {:>7}B/s ".format(
+ bytes_downloaded * 100 // bytes_total, bdl, bps), end="")
class ColorOutput(TerminalOutput):
@@ -307,6 +314,56 @@ class ColorOutput(TerminalOutput):
print("\r\033[1;32m", self.shorten(path), "\033[0m", sep="")
+class EAWCache(dict):
+
+ def __missing__(self, key):
+ width = self[key] = \
+ 2 if unicodedata.east_asian_width(key) in "WF" else 1
+ return width
+
+
+def shorten_string(txt, limit, sep="…"):
+ """Limit width of 'txt'; assume all characters have a width of 1"""
+ if len(txt) <= limit:
+ return txt
+ limit -= len(sep)
+ return txt[:limit // 2] + sep + txt[-((limit+1) // 2):]
+
+
+def shorten_string_eaw(txt, limit, sep="…", cache=EAWCache()):
+ """Limit width of 'txt'; check for east-asian characters with width > 1"""
+ char_widths = [cache[c] for c in txt]
+ text_width = sum(char_widths)
+
+ if text_width <= limit:
+ # no shortening required
+ return txt
+
+ limit -= len(sep)
+ if text_width == len(txt):
+ # all characters have a width of 1
+ return txt[:limit // 2] + sep + txt[-((limit+1) // 2):]
+
+ # wide characters
+ left = 0
+ lwidth = limit // 2
+ while True:
+ lwidth -= char_widths[left]
+ if lwidth < 0:
+ break
+ left += 1
+
+ right = -1
+ rwidth = (limit+1) // 2 + (lwidth + char_widths[left])
+ while True:
+ rwidth -= char_widths[right]
+ if rwidth < 0:
+ break
+ right -= 1
+
+ return txt[:left] + sep + txt[right+1:]
+
+
if util.WINDOWS:
ANSI = os.environ.get("TERM") == "ANSI"
OFFSET = 1
diff --git a/gallery_dl/path.py b/gallery_dl/path.py
new file mode 100644
index 0000000..12ce8ad
--- /dev/null
+++ b/gallery_dl/path.py
@@ -0,0 +1,332 @@
+# -*- coding: utf-8 -*-
+
+# Copyright 2021 Mike Fährmann
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Filesystem path handling"""
+
+import os
+import re
+import time
+import shutil
+import functools
+from email.utils import mktime_tz, parsedate_tz
+from . import util, formatter, exception
+
+WINDOWS = util.WINDOWS
+
+
+class PathFormat():
+ EXTENSION_MAP = {
+ "jpeg": "jpg",
+ "jpe" : "jpg",
+ "jfif": "jpg",
+ "jif" : "jpg",
+ "jfi" : "jpg",
+ }
+
+ def __init__(self, extractor):
+ config = extractor.config
+ kwdefault = config("keywords-default")
+
+ filename_fmt = config("filename")
+ try:
+ if filename_fmt is None:
+ filename_fmt = extractor.filename_fmt
+ elif isinstance(filename_fmt, dict):
+ self.filename_conditions = [
+ (util.compile_expression(expr),
+ formatter.parse(fmt, kwdefault).format_map)
+ for expr, fmt in filename_fmt.items() if expr
+ ]
+ self.build_filename = self.build_filename_conditional
+ filename_fmt = filename_fmt.get("", extractor.filename_fmt)
+
+ self.filename_formatter = formatter.parse(
+ filename_fmt, kwdefault).format_map
+ except Exception as exc:
+ raise exception.FilenameFormatError(exc)
+
+ directory_fmt = config("directory")
+ try:
+ if directory_fmt is None:
+ directory_fmt = extractor.directory_fmt
+ elif isinstance(directory_fmt, dict):
+ self.directory_conditions = [
+ (util.compile_expression(expr), [
+ formatter.parse(fmt, kwdefault).format_map
+ for fmt in fmts
+ ])
+ for expr, fmts in directory_fmt.items() if expr
+ ]
+ self.build_directory = self.build_directory_conditional
+ directory_fmt = directory_fmt.get("", extractor.directory_fmt)
+
+ self.directory_formatters = [
+ formatter.parse(dirfmt, kwdefault).format_map
+ for dirfmt in directory_fmt
+ ]
+ except Exception as exc:
+ raise exception.DirectoryFormatError(exc)
+
+ self.kwdict = {}
+ self.directory = self.realdirectory = \
+ self.filename = self.extension = self.prefix = \
+ self.path = self.realpath = self.temppath = ""
+ self.delete = self._create_directory = False
+
+ extension_map = config("extension-map")
+ if extension_map is None:
+ extension_map = self.EXTENSION_MAP
+ self.extension_map = extension_map.get
+
+ restrict = config("path-restrict", "auto")
+ replace = config("path-replace", "_")
+ if restrict == "auto":
+ restrict = "\\\\|/<>:\"?*" if WINDOWS else "/"
+ elif restrict == "unix":
+ restrict = "/"
+ elif restrict == "windows":
+ restrict = "\\\\|/<>:\"?*"
+ elif restrict == "ascii":
+ restrict = "^0-9A-Za-z_."
+ self.clean_segment = self._build_cleanfunc(restrict, replace)
+
+ remove = config("path-remove", "\x00-\x1f\x7f")
+ self.clean_path = self._build_cleanfunc(remove, "")
+
+ strip = config("path-strip", "auto")
+ if strip == "auto":
+ strip = ". " if WINDOWS else ""
+ elif strip == "unix":
+ strip = ""
+ elif strip == "windows":
+ strip = ". "
+ self.strip = strip
+
+ basedir = extractor._parentdir
+ if not basedir:
+ basedir = config("base-directory")
+ sep = os.sep
+ if basedir is None:
+ basedir = "." + sep + "gallery-dl" + sep
+ elif basedir:
+ basedir = util.expand_path(basedir)
+ altsep = os.altsep
+ if altsep and altsep in basedir:
+ basedir = basedir.replace(altsep, sep)
+ if basedir[-1] != sep:
+ basedir += sep
+ basedir = self.clean_path(basedir)
+ self.basedirectory = basedir
+
+ @staticmethod
+ def _build_cleanfunc(chars, repl):
+ if not chars:
+ return util.identity
+ elif isinstance(chars, dict):
+ def func(x, table=str.maketrans(chars)):
+ return x.translate(table)
+ elif len(chars) == 1:
+ def func(x, c=chars, r=repl):
+ return x.replace(c, r)
+ else:
+ return functools.partial(
+ re.compile("[" + chars + "]").sub, repl)
+ return func
+
+ def open(self, mode="wb"):
+ """Open file and return a corresponding file object"""
+ return open(self.temppath, mode)
+
+ def exists(self):
+ """Return True if the file exists on disk"""
+ if self.extension and os.path.exists(self.realpath):
+ return self.check_file()
+ return False
+
+ @staticmethod
+ def check_file():
+ return True
+
+ def _enum_file(self):
+ num = 1
+ try:
+ while True:
+ self.prefix = str(num) + "."
+ self.set_extension(self.extension, False)
+ os.stat(self.realpath) # raises OSError if file doesn't exist
+ num += 1
+ except OSError:
+ pass
+ return False
+
+ def set_directory(self, kwdict):
+ """Build directory path and create it if necessary"""
+ self.kwdict = kwdict
+ sep = os.sep
+
+ segments = self.build_directory(kwdict)
+ if segments:
+ self.directory = directory = self.basedirectory + self.clean_path(
+ sep.join(segments) + sep)
+ else:
+ self.directory = directory = self.basedirectory
+
+ if WINDOWS:
+ # Enable longer-than-260-character paths on Windows
+ directory = "\\\\?\\" + os.path.abspath(directory)
+
+ # abspath() in Python 3.7+ removes trailing path separators (#402)
+ if directory[-1] != sep:
+ directory += sep
+
+ self.realdirectory = directory
+ self._create_directory = True
+
+ def set_filename(self, kwdict):
+ """Set general filename data"""
+ self.kwdict = kwdict
+ self.temppath = self.prefix = ""
+
+ ext = kwdict["extension"]
+ kwdict["extension"] = self.extension = self.extension_map(ext, ext)
+
+ if self.extension:
+ self.build_path()
+ else:
+ self.filename = ""
+
+ def set_extension(self, extension, real=True):
+ """Set filename extension"""
+ extension = self.extension_map(extension, extension)
+ if real:
+ self.extension = extension
+ self.kwdict["extension"] = self.prefix + extension
+ self.build_path()
+
+ def fix_extension(self, _=None):
+ """Fix filenames without a given filename extension"""
+ if not self.extension:
+ self.set_extension("", False)
+ if self.path[-1] == ".":
+ self.path = self.path[:-1]
+ self.temppath = self.realpath = self.realpath[:-1]
+ return True
+
+ def build_filename(self, kwdict):
+ """Apply 'kwdict' to filename format string"""
+ try:
+ return self.clean_path(self.clean_segment(
+ self.filename_formatter(kwdict)))
+ except Exception as exc:
+ raise exception.FilenameFormatError(exc)
+
+ def build_filename_conditional(self, kwdict):
+ try:
+ for condition, fmt in self.filename_conditions:
+ if condition(kwdict):
+ break
+ else:
+ fmt = self.filename_formatter
+ return self.clean_path(self.clean_segment(fmt(kwdict)))
+ except Exception as exc:
+ raise exception.FilenameFormatError(exc)
+
+ def build_directory(self, kwdict):
+ """Apply 'kwdict' to directory format strings"""
+ segments = []
+ append = segments.append
+ strip = self.strip
+
+ try:
+ for fmt in self.directory_formatters:
+ segment = fmt(kwdict).strip()
+ if strip:
+ # remove trailing dots and spaces (#647)
+ segment = segment.rstrip(strip)
+ if segment:
+ append(self.clean_segment(segment))
+ return segments
+ except Exception as exc:
+ raise exception.DirectoryFormatError(exc)
+
+ def build_directory_conditional(self, kwdict):
+ segments = []
+ append = segments.append
+ strip = self.strip
+
+ try:
+ for condition, formatters in self.directory_conditions:
+ if condition(kwdict):
+ break
+ else:
+ formatters = self.directory_formatters
+ for fmt in formatters:
+ segment = fmt(kwdict).strip()
+ if strip:
+ segment = segment.rstrip(strip)
+ if segment:
+ append(self.clean_segment(segment))
+ return segments
+ except Exception as exc:
+ raise exception.DirectoryFormatError(exc)
+
+ def build_path(self):
+ """Combine directory and filename to full paths"""
+ if self._create_directory:
+ os.makedirs(self.realdirectory, exist_ok=True)
+ self._create_directory = False
+ self.filename = filename = self.build_filename(self.kwdict)
+ self.path = self.directory + filename
+ self.realpath = self.realdirectory + filename
+ if not self.temppath:
+ self.temppath = self.realpath
+
+ def part_enable(self, part_directory=None):
+ """Enable .part file usage"""
+ if self.extension:
+ self.temppath += ".part"
+ else:
+ self.set_extension("part", False)
+ if part_directory:
+ self.temppath = os.path.join(
+ part_directory,
+ os.path.basename(self.temppath),
+ )
+
+ def part_size(self):
+ """Return size of .part file"""
+ try:
+ return os.stat(self.temppath).st_size
+ except OSError:
+ pass
+ return 0
+
+ def finalize(self):
+ """Move tempfile to its target location"""
+ if self.delete:
+ self.delete = False
+ os.unlink(self.temppath)
+ return
+
+ if self.temppath != self.realpath:
+ # Move temp file to its actual location
+ try:
+ os.replace(self.temppath, self.realpath)
+ except OSError:
+ shutil.copyfile(self.temppath, self.realpath)
+ os.unlink(self.temppath)
+
+ mtime = self.kwdict.get("_mtime")
+ if mtime:
+ # Set file modification time
+ try:
+ if isinstance(mtime, str):
+ mtime = mktime_tz(parsedate_tz(mtime))
+ os.utime(self.realpath, (time.time(), mtime))
+ except Exception:
+ pass
diff --git a/gallery_dl/postprocessor/compare.py b/gallery_dl/postprocessor/compare.py
index 1bca593..a08cdc4 100644
--- a/gallery_dl/postprocessor/compare.py
+++ b/gallery_dl/postprocessor/compare.py
@@ -9,6 +9,8 @@
"""Compare versions of the same file and replace/enumerate them on mismatch"""
from .common import PostProcessor
+from .. import text, util, exception
+import sys
import os
@@ -19,16 +21,33 @@ class ComparePP(PostProcessor):
if options.get("shallow"):
self._compare = self._compare_size
- job.register_hooks({"file": (
- self.enumerate
- if options.get("action") == "enumerate" else
- self.compare
- )}, options)
+ action = options.get("action")
+ if action == "enumerate":
+ job.register_hooks({"file": self.enumerate}, options)
+ else:
+ job.register_hooks({"file": self.compare}, options)
+ action, _, smax = action.partition(":")
+ self._skipmax = text.parse_int(smax)
+ self._skipexc = self._skipcnt = 0
+ if action == "abort":
+ self._skipexc = exception.StopExtraction
+ elif action == "terminate":
+ self._skipexc = exception.TerminateExtraction
+ elif action == "exit":
+ self._skipexc = sys.exit
def compare(self, pathfmt):
try:
if self._compare(pathfmt.realpath, pathfmt.temppath):
+ if self._skipexc:
+ self._skipcnt += 1
+ if self._skipcnt >= self._skipmax:
+ util.remove_file(pathfmt.temppath)
+ print()
+ raise self._skipexc()
pathfmt.delete = True
+ else:
+ self._skipcnt = 0
except OSError:
pass
diff --git a/gallery_dl/postprocessor/exec.py b/gallery_dl/postprocessor/exec.py
index 8fed723..cc217c3 100644
--- a/gallery_dl/postprocessor/exec.py
+++ b/gallery_dl/postprocessor/exec.py
@@ -9,7 +9,7 @@
"""Execute processes"""
from .common import PostProcessor
-from .. import util
+from .. import util, formatter
import subprocess
@@ -33,7 +33,7 @@ class ExecPP(PostProcessor):
self.args = args
execute = self.exec_string
else:
- self.args = [util.Formatter(arg) for arg in args]
+ self.args = [formatter.parse(arg) for arg in args]
execute = self.exec_list
events = options.get("event")
diff --git a/gallery_dl/postprocessor/metadata.py b/gallery_dl/postprocessor/metadata.py
index c721612..fe65c88 100644
--- a/gallery_dl/postprocessor/metadata.py
+++ b/gallery_dl/postprocessor/metadata.py
@@ -9,7 +9,7 @@
"""Write metadata to external files"""
from .common import PostProcessor
-from .. import util
+from .. import util, formatter
import os
@@ -24,7 +24,7 @@ class MetadataPP(PostProcessor):
cfmt = options.get("content-format") or options.get("format")
if isinstance(cfmt, list):
cfmt = "\n".join(cfmt) + "\n"
- self._content_fmt = util.Formatter(cfmt).format_map
+ self._content_fmt = formatter.parse(cfmt).format_map
ext = "txt"
elif mode == "tags":
self.write = self._write_tags
@@ -45,10 +45,10 @@ class MetadataPP(PostProcessor):
extfmt = options.get("extension-format")
if filename:
self._filename = self._filename_custom
- self._filename_fmt = util.Formatter(filename).format_map
+ self._filename_fmt = formatter.parse(filename).format_map
elif extfmt:
self._filename = self._filename_extfmt
- self._extension_fmt = util.Formatter(extfmt).format_map
+ self._extension_fmt = formatter.parse(extfmt).format_map
else:
self.extension = options.get("extension", ext)
diff --git a/gallery_dl/util.py b/gallery_dl/util.py
index 935bf99..4a7fdbf 100644
--- a/gallery_dl/util.py
+++ b/gallery_dl/util.py
@@ -12,20 +12,14 @@ import re
import os
import sys
import json
-import time
import random
-import shutil
-import string
-import _string
import sqlite3
import binascii
import datetime
-import operator
import functools
import itertools
import urllib.parse
from http.cookiejar import Cookie
-from email.utils import mktime_tz, parsedate_tz
from . import text, exception
@@ -97,15 +91,15 @@ def generate_token(size=16):
return binascii.hexlify(data).decode()
-def format_value(value, unit="B", suffixes="kMGTPEZY"):
+def format_value(value, suffixes="kMGTPEZY"):
value = format(value)
value_len = len(value)
index = value_len - 4
if index >= 0:
offset = (value_len - 1) % 3 + 1
return (value[:offset] + "." + value[offset:offset+2] +
- suffixes[index // 3] + unit)
- return value + unit
+ suffixes[index // 3])
+ return value
def combine_dict(a, b):
@@ -139,6 +133,17 @@ def delete_items(obj, keys):
del obj[key]
+def enumerate_reversed(iterable, start=0, length=None):
+ """Enumerate 'iterable' and return its elements in reverse order"""
+ start -= 1
+ if length is None:
+ length = len(iterable)
+ return zip(
+ range(length - start, start, -1),
+ reversed(iterable),
+ )
+
+
def number_to_string(value, numbers=(int, float)):
"""Convert numbers (int, float) to string; Return everything else as is."""
return str(value) if value.__class__ in numbers else value
@@ -409,6 +414,24 @@ def compile_expression(expr, name="<expr>", globals=GLOBALS):
return functools.partial(eval, code_object, globals)
+def build_duration_func(duration, min=0.0):
+ if not duration:
+ return None
+
+ try:
+ lower, upper = duration
+ except TypeError:
+ pass
+ else:
+ return functools.partial(
+ random.uniform,
+ lower if lower > min else min,
+ upper if upper > min else min,
+ )
+
+ return functools.partial(identity, duration if duration > min else min)
+
+
def build_predicate(predicates):
if not predicates:
return lambda url, kwdict: True
@@ -534,557 +557,6 @@ class ExtendedUrl():
return self.value
-class Formatter():
- """Custom, extended version of string.Formatter
-
- This string formatter implementation is a mostly performance-optimized
- variant of the original string.Formatter class. Unnecessary features have
- been removed (positional arguments, unused argument check) and new
- formatting options have been added.
-
- Extra Conversions:
- - "l": calls str.lower on the target value
- - "u": calls str.upper
- - "c": calls str.capitalize
- - "C": calls string.capwords
- - "j". calls json.dumps
- - "t": calls str.strip
- - "d": calls text.parse_timestamp
- - "U": calls urllib.parse.unquote
- - "S": calls util.to_string()
- - "T": calls util.to_timestamü()
- - Example: {f!l} -> "example"; {f!u} -> "EXAMPLE"
-
- Extra Format Specifiers:
- - "?<before>/<after>/":
- Adds <before> and <after> to the actual value if it evaluates to True.
- Otherwise the whole replacement field becomes an empty string.
- Example: {f:?-+/+-/} -> "-+Example+-" (if "f" contains "Example")
- -> "" (if "f" is None, 0, "")
-
- - "L<maxlen>/<replacement>/":
- Replaces the output with <replacement> if its length (in characters)
- exceeds <maxlen>. Otherwise everything is left as is.
- Example: {f:L5/too long/} -> "foo" (if "f" is "foo")
- -> "too long" (if "f" is "foobar")
-
- - "J<separator>/":
- Joins elements of a list (or string) using <separator>
- Example: {f:J - /} -> "a - b - c" (if "f" is ["a", "b", "c"])
-
- - "R<old>/<new>/":
- Replaces all occurrences of <old> with <new>
- Example: {f:R /_/} -> "f_o_o_b_a_r" (if "f" is "f o o b a r")
- """
- CACHE = {}
- CONVERSIONS = {
- "l": str.lower,
- "u": str.upper,
- "c": str.capitalize,
- "C": string.capwords,
- "j": json.dumps,
- "t": str.strip,
- "T": to_timestamp,
- "d": text.parse_timestamp,
- "U": urllib.parse.unquote,
- "S": to_string,
- "s": str,
- "r": repr,
- "a": ascii,
- }
-
- def __init__(self, format_string, default=None):
- self.default = default
- key = (format_string, default)
-
- try:
- self.result, self.fields = self.CACHE[key]
- except KeyError:
- self.result = []
- self.fields = []
-
- for literal_text, field_name, format_spec, conv in \
- _string.formatter_parser(format_string):
- if literal_text:
- self.result.append(literal_text)
- if field_name:
- self.fields.append((
- len(self.result),
- self._field_access(field_name, format_spec, conv),
- ))
- self.result.append("")
-
- self.CACHE[key] = (self.result, self.fields)
-
- if len(self.result) == 1:
- if self.fields:
- self.format_map = self.fields[0][1]
- else:
- self.format_map = lambda _: format_string
- del self.result, self.fields
-
- def format_map(self, kwdict):
- """Apply 'kwdict' to the initial format_string and return its result"""
- result = self.result
- for index, func in self.fields:
- result[index] = func(kwdict)
- return "".join(result)
-
- def _field_access(self, field_name, format_spec, conversion):
- fmt = self._parse_format_spec(format_spec, conversion)
-
- if "|" in field_name:
- return self._apply_list([
- self._parse_field_name(fn)
- for fn in field_name.split("|")
- ], fmt)
- else:
- key, funcs = self._parse_field_name(field_name)
- if funcs:
- return self._apply(key, funcs, fmt)
- return self._apply_simple(key, fmt)
-
- @staticmethod
- def _parse_field_name(field_name):
- first, rest = _string.formatter_field_name_split(field_name)
- funcs = []
-
- for is_attr, key in rest:
- if is_attr:
- func = operator.attrgetter
- else:
- func = operator.itemgetter
- try:
- if ":" in key:
- start, _, stop = key.partition(":")
- stop, _, step = stop.partition(":")
- start = int(start) if start else None
- stop = int(stop) if stop else None
- step = int(step) if step else None
- key = slice(start, stop, step)
- except TypeError:
- pass # key is an integer
-
- funcs.append(func(key))
-
- return first, funcs
-
- def _parse_format_spec(self, format_spec, conversion):
- fmt = self._build_format_func(format_spec)
- if not conversion:
- return fmt
-
- conversion = self.CONVERSIONS[conversion]
- if fmt is format:
- return conversion
- else:
- def chain(obj):
- return fmt(conversion(obj))
- return chain
-
- def _build_format_func(self, format_spec):
- if format_spec:
- fmt = format_spec[0]
- if fmt == "?":
- return self._parse_optional(format_spec)
- if fmt == "L":
- return self._parse_maxlen(format_spec)
- if fmt == "J":
- return self._parse_join(format_spec)
- if fmt == "R":
- return self._parse_replace(format_spec)
- return self._default_format(format_spec)
- return format
-
- def _apply(self, key, funcs, fmt):
- def wrap(kwdict):
- try:
- obj = kwdict[key]
- for func in funcs:
- obj = func(obj)
- except Exception:
- obj = self.default
- return fmt(obj)
- return wrap
-
- def _apply_simple(self, key, fmt):
- def wrap(kwdict):
- return fmt(kwdict[key] if key in kwdict else self.default)
- return wrap
-
- def _apply_list(self, lst, fmt):
- def wrap(kwdict):
- for key, funcs in lst:
- try:
- obj = kwdict[key]
- for func in funcs:
- obj = func(obj)
- if obj:
- break
- except Exception:
- pass
- else:
- obj = self.default
- return fmt(obj)
- return wrap
-
- def _parse_optional(self, format_spec):
- before, after, format_spec = format_spec.split("/", 2)
- before = before[1:]
- fmt = self._build_format_func(format_spec)
-
- def optional(obj):
- return before + fmt(obj) + after if obj else ""
- return optional
-
- def _parse_maxlen(self, format_spec):
- maxlen, replacement, format_spec = format_spec.split("/", 2)
- maxlen = text.parse_int(maxlen[1:])
- fmt = self._build_format_func(format_spec)
-
- def mlen(obj):
- obj = fmt(obj)
- return obj if len(obj) <= maxlen else replacement
- return mlen
-
- def _parse_join(self, format_spec):
- separator, _, format_spec = format_spec.partition("/")
- separator = separator[1:]
- fmt = self._build_format_func(format_spec)
-
- def join(obj):
- return fmt(separator.join(obj))
- return join
-
- def _parse_replace(self, format_spec):
- old, new, format_spec = format_spec.split("/", 2)
- old = old[1:]
- fmt = self._build_format_func(format_spec)
-
- def replace(obj):
- return fmt(obj.replace(old, new))
- return replace
-
- @staticmethod
- def _default_format(format_spec):
- def wrap(obj):
- return format(obj, format_spec)
- return wrap
-
-
-class PathFormat():
- EXTENSION_MAP = {
- "jpeg": "jpg",
- "jpe" : "jpg",
- "jfif": "jpg",
- "jif" : "jpg",
- "jfi" : "jpg",
- }
-
- def __init__(self, extractor):
- config = extractor.config
- kwdefault = config("keywords-default")
-
- filename_fmt = config("filename")
- try:
- if filename_fmt is None:
- filename_fmt = extractor.filename_fmt
- elif isinstance(filename_fmt, dict):
- self.filename_conditions = [
- (compile_expression(expr),
- Formatter(fmt, kwdefault).format_map)
- for expr, fmt in filename_fmt.items() if expr
- ]
- self.build_filename = self.build_filename_conditional
- filename_fmt = filename_fmt.get("", extractor.filename_fmt)
-
- self.filename_formatter = Formatter(
- filename_fmt, kwdefault).format_map
- except Exception as exc:
- raise exception.FilenameFormatError(exc)
-
- directory_fmt = config("directory")
- try:
- if directory_fmt is None:
- directory_fmt = extractor.directory_fmt
- elif isinstance(directory_fmt, dict):
- self.directory_conditions = [
- (compile_expression(expr), [
- Formatter(fmt, kwdefault).format_map
- for fmt in fmts
- ])
- for expr, fmts in directory_fmt.items() if expr
- ]
- self.build_directory = self.build_directory_conditional
- directory_fmt = directory_fmt.get("", extractor.directory_fmt)
-
- self.directory_formatters = [
- Formatter(dirfmt, kwdefault).format_map
- for dirfmt in directory_fmt
- ]
- except Exception as exc:
- raise exception.DirectoryFormatError(exc)
-
- self.kwdict = {}
- self.directory = self.realdirectory = \
- self.filename = self.extension = self.prefix = \
- self.path = self.realpath = self.temppath = ""
- self.delete = self._create_directory = False
-
- extension_map = config("extension-map")
- if extension_map is None:
- extension_map = self.EXTENSION_MAP
- self.extension_map = extension_map.get
-
- restrict = config("path-restrict", "auto")
- replace = config("path-replace", "_")
- if restrict == "auto":
- restrict = "\\\\|/<>:\"?*" if WINDOWS else "/"
- elif restrict == "unix":
- restrict = "/"
- elif restrict == "windows":
- restrict = "\\\\|/<>:\"?*"
- elif restrict == "ascii":
- restrict = "^0-9A-Za-z_."
- self.clean_segment = self._build_cleanfunc(restrict, replace)
-
- remove = config("path-remove", "\x00-\x1f\x7f")
- self.clean_path = self._build_cleanfunc(remove, "")
-
- strip = config("path-strip", "auto")
- if strip == "auto":
- strip = ". " if WINDOWS else ""
- elif strip == "unix":
- strip = ""
- elif strip == "windows":
- strip = ". "
- self.strip = strip
-
- basedir = extractor._parentdir
- if not basedir:
- basedir = config("base-directory")
- sep = os.sep
- if basedir is None:
- basedir = "." + sep + "gallery-dl" + sep
- elif basedir:
- basedir = expand_path(basedir)
- altsep = os.altsep
- if altsep and altsep in basedir:
- basedir = basedir.replace(altsep, sep)
- if basedir[-1] != sep:
- basedir += sep
- basedir = self.clean_path(basedir)
- self.basedirectory = basedir
-
- @staticmethod
- def _build_cleanfunc(chars, repl):
- if not chars:
- return identity
- elif isinstance(chars, dict):
- def func(x, table=str.maketrans(chars)):
- return x.translate(table)
- elif len(chars) == 1:
- def func(x, c=chars, r=repl):
- return x.replace(c, r)
- else:
- return functools.partial(
- re.compile("[" + chars + "]").sub, repl)
- return func
-
- def open(self, mode="wb"):
- """Open file and return a corresponding file object"""
- return open(self.temppath, mode)
-
- def exists(self):
- """Return True if the file exists on disk"""
- if self.extension and os.path.exists(self.realpath):
- return self.check_file()
- return False
-
- @staticmethod
- def check_file():
- return True
-
- def _enum_file(self):
- num = 1
- try:
- while True:
- self.prefix = str(num) + "."
- self.set_extension(self.extension, False)
- os.stat(self.realpath) # raises OSError if file doesn't exist
- num += 1
- except OSError:
- pass
- return False
-
- def set_directory(self, kwdict):
- """Build directory path and create it if necessary"""
- self.kwdict = kwdict
- sep = os.sep
-
- segments = self.build_directory(kwdict)
- if segments:
- self.directory = directory = self.basedirectory + self.clean_path(
- sep.join(segments) + sep)
- else:
- self.directory = directory = self.basedirectory
-
- if WINDOWS:
- # Enable longer-than-260-character paths on Windows
- directory = "\\\\?\\" + os.path.abspath(directory)
-
- # abspath() in Python 3.7+ removes trailing path separators (#402)
- if directory[-1] != sep:
- directory += sep
-
- self.realdirectory = directory
- self._create_directory = True
-
- def set_filename(self, kwdict):
- """Set general filename data"""
- self.kwdict = kwdict
- self.temppath = self.prefix = ""
-
- ext = kwdict["extension"]
- kwdict["extension"] = self.extension = self.extension_map(ext, ext)
-
- if self.extension:
- self.build_path()
- else:
- self.filename = ""
-
- def set_extension(self, extension, real=True):
- """Set filename extension"""
- extension = self.extension_map(extension, extension)
- if real:
- self.extension = extension
- self.kwdict["extension"] = self.prefix + extension
- self.build_path()
-
- def fix_extension(self, _=None):
- """Fix filenames without a given filename extension"""
- if not self.extension:
- self.set_extension("", False)
- if self.path[-1] == ".":
- self.path = self.path[:-1]
- self.temppath = self.realpath = self.realpath[:-1]
- return True
-
- def build_filename(self, kwdict):
- """Apply 'kwdict' to filename format string"""
- try:
- return self.clean_path(self.clean_segment(
- self.filename_formatter(kwdict)))
- except Exception as exc:
- raise exception.FilenameFormatError(exc)
-
- def build_filename_conditional(self, kwdict):
- try:
- for condition, formatter in self.filename_conditions:
- if condition(kwdict):
- break
- else:
- formatter = self.filename_formatter
- return self.clean_path(self.clean_segment(formatter(kwdict)))
- except Exception as exc:
- raise exception.FilenameFormatError(exc)
-
- def build_directory(self, kwdict):
- """Apply 'kwdict' to directory format strings"""
- segments = []
- append = segments.append
- strip = self.strip
-
- try:
- for formatter in self.directory_formatters:
- segment = formatter(kwdict).strip()
- if strip:
- # remove trailing dots and spaces (#647)
- segment = segment.rstrip(strip)
- if segment:
- append(self.clean_segment(segment))
- return segments
- except Exception as exc:
- raise exception.DirectoryFormatError(exc)
-
- def build_directory_conditional(self, kwdict):
- segments = []
- append = segments.append
- strip = self.strip
-
- try:
- for condition, formatters in self.directory_conditions:
- if condition(kwdict):
- break
- else:
- formatters = self.directory_formatters
- for formatter in formatters:
- segment = formatter(kwdict).strip()
- if strip:
- segment = segment.rstrip(strip)
- if segment:
- append(self.clean_segment(segment))
- return segments
- except Exception as exc:
- raise exception.DirectoryFormatError(exc)
-
- def build_path(self):
- """Combine directory and filename to full paths"""
- if self._create_directory:
- os.makedirs(self.realdirectory, exist_ok=True)
- self._create_directory = False
- self.filename = filename = self.build_filename(self.kwdict)
- self.path = self.directory + filename
- self.realpath = self.realdirectory + filename
- if not self.temppath:
- self.temppath = self.realpath
-
- def part_enable(self, part_directory=None):
- """Enable .part file usage"""
- if self.extension:
- self.temppath += ".part"
- else:
- self.set_extension("part", False)
- if part_directory:
- self.temppath = os.path.join(
- part_directory,
- os.path.basename(self.temppath),
- )
-
- def part_size(self):
- """Return size of .part file"""
- try:
- return os.stat(self.temppath).st_size
- except OSError:
- pass
- return 0
-
- def finalize(self):
- """Move tempfile to its target location"""
- if self.delete:
- self.delete = False
- os.unlink(self.temppath)
- return
-
- if self.temppath != self.realpath:
- # Move temp file to its actual location
- try:
- os.replace(self.temppath, self.realpath)
- except OSError:
- shutil.copyfile(self.temppath, self.realpath)
- os.unlink(self.temppath)
-
- mtime = self.kwdict.get("_mtime")
- if mtime:
- # Set file modification time
- try:
- if isinstance(mtime, str):
- mtime = mktime_tz(parsedate_tz(mtime))
- os.utime(self.realpath, (time.time(), mtime))
- except Exception:
- pass
-
-
class DownloadArchive():
def __init__(self, path, extractor):
diff --git a/gallery_dl/version.py b/gallery_dl/version.py
index 7e6458f..acc3b8d 100644
--- a/gallery_dl/version.py
+++ b/gallery_dl/version.py
@@ -6,4 +6,4 @@
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
-__version__ = "1.18.4"
+__version__ = "1.19.0"