aboutsummaryrefslogtreecommitdiffstats
path: root/gallery_dl
diff options
context:
space:
mode:
authorLibravatarUnit 193 <unit193@unit193.net>2024-06-06 02:40:15 -0400
committerLibravatarUnit 193 <unit193@unit193.net>2024-06-06 02:40:15 -0400
commit1c28712d865e30ed752988ba0b6944882250b665 (patch)
treee5d5083a418f5c19616cb940c090c2dfb646d3cb /gallery_dl
parent6e662211019a89caec44de8a57c675872b0b5498 (diff)
New upstream version 1.27.0.upstream/1.27.0
Diffstat (limited to 'gallery_dl')
-rw-r--r--gallery_dl/__init__.py21
-rw-r--r--gallery_dl/archive.py98
-rw-r--r--gallery_dl/cookies.py17
-rw-r--r--gallery_dl/downloader/http.py9
-rw-r--r--gallery_dl/extractor/4archive.py2
-rw-r--r--gallery_dl/extractor/8chan.py3
-rw-r--r--gallery_dl/extractor/artstation.py1
-rw-r--r--gallery_dl/extractor/bluesky.py8
-rw-r--r--gallery_dl/extractor/cien.py86
-rw-r--r--gallery_dl/extractor/common.py85
-rw-r--r--gallery_dl/extractor/deviantart.py5
-rw-r--r--gallery_dl/extractor/exhentai.py10
-rw-r--r--gallery_dl/extractor/foolfuuka.py24
-rw-r--r--gallery_dl/extractor/furaffinity.py2
-rw-r--r--gallery_dl/extractor/gelbooru.py35
-rw-r--r--gallery_dl/extractor/gelbooru_v02.py31
-rw-r--r--gallery_dl/extractor/hiperdex.py20
-rw-r--r--gallery_dl/extractor/hotleak.py1
-rw-r--r--gallery_dl/extractor/imgur.py6
-rw-r--r--gallery_dl/extractor/inkbunny.py13
-rw-r--r--gallery_dl/extractor/kemonoparty.py50
-rw-r--r--gallery_dl/extractor/mastodon.py71
-rw-r--r--gallery_dl/extractor/newgrounds.py47
-rw-r--r--gallery_dl/extractor/oauth.py12
-rw-r--r--gallery_dl/extractor/patreon.py5
-rw-r--r--gallery_dl/extractor/pixeldrain.py16
-rw-r--r--gallery_dl/extractor/pixiv.py19
-rw-r--r--gallery_dl/extractor/poipiku.py8
-rw-r--r--gallery_dl/extractor/readcomiconline.py5
-rw-r--r--gallery_dl/extractor/reddit.py49
-rw-r--r--gallery_dl/extractor/seiga.py58
-rw-r--r--gallery_dl/extractor/slideshare.py12
-rw-r--r--gallery_dl/extractor/subscribestar.py2
-rw-r--r--gallery_dl/extractor/tapas.py15
-rw-r--r--gallery_dl/extractor/tcbscans.py2
-rw-r--r--gallery_dl/extractor/tumblr.py6
-rw-r--r--gallery_dl/extractor/twitter.py321
-rw-r--r--gallery_dl/extractor/vsco.py30
-rw-r--r--gallery_dl/extractor/wikimedia.py20
-rw-r--r--gallery_dl/formatter.py7
-rw-r--r--gallery_dl/job.py76
-rw-r--r--gallery_dl/option.py33
-rw-r--r--gallery_dl/output.py92
-rw-r--r--gallery_dl/postprocessor/common.py25
-rw-r--r--gallery_dl/postprocessor/exec.py5
-rw-r--r--gallery_dl/postprocessor/mtime.py3
-rw-r--r--gallery_dl/postprocessor/ugoira.py6
-rw-r--r--gallery_dl/text.py22
-rw-r--r--gallery_dl/update.py218
-rw-r--r--gallery_dl/util.py73
-rw-r--r--gallery_dl/version.py3
51 files changed, 1367 insertions, 421 deletions
diff --git a/gallery_dl/__init__.py b/gallery_dl/__init__.py
index 19ea77b..bc44b35 100644
--- a/gallery_dl/__init__.py
+++ b/gallery_dl/__init__.py
@@ -38,6 +38,11 @@ def main():
except ImportError:
import toml
config.load(args.configs_toml, strict=True, loads=toml.loads)
+ if not args.colors:
+ output.ANSI = False
+ config.set((), "colors", False)
+ if util.WINDOWS:
+ config.set(("output",), "ansi", False)
if args.filename:
filename = args.filename
if filename == "/O":
@@ -86,7 +91,7 @@ def main():
signal.signal(signal_num, signal.SIG_IGN)
# enable ANSI escape sequences on Windows
- if util.WINDOWS and config.get(("output",), "ansi"):
+ if util.WINDOWS and config.get(("output",), "ansi", output.COLORS):
from ctypes import windll, wintypes, byref
kernel32 = windll.kernel32
mode = wintypes.DWORD()
@@ -113,7 +118,7 @@ def main():
# loglevels
output.configure_logging(args.loglevel)
- if args.loglevel >= logging.ERROR:
+ if args.loglevel >= logging.WARNING:
config.set(("output",), "mode", "null")
config.set(("downloader",), "progress", None)
elif args.loglevel <= logging.DEBUG:
@@ -122,7 +127,7 @@ def main():
extra = ""
if util.EXECUTABLE:
- extra = " - Executable"
+ extra = " - Executable ({})".format(version.__variant__)
else:
git_head = util.git_head()
if git_head:
@@ -178,7 +183,13 @@ def main():
else:
extractor._module_iter = iter(modules[0])
- if args.list_modules:
+ if args.update:
+ from . import update
+ extr = update.UpdateExtractor.from_url("update:" + args.update)
+ ujob = update.UpdateJob(extr)
+ return ujob.run()
+
+ elif args.list_modules:
extractor.modules.append("")
sys.stdout.write("\n".join(extractor.modules))
@@ -202,6 +213,7 @@ def main():
if cnt is None:
log.error("Database file not available")
+ return 1
else:
log.info(
"Deleted %d %s from '%s'",
@@ -294,6 +306,7 @@ def main():
input_manager.next()
return retval
+ return 0
except KeyboardInterrupt:
raise SystemExit("\nKeyboardInterrupt")
diff --git a/gallery_dl/archive.py b/gallery_dl/archive.py
new file mode 100644
index 0000000..5f05bbf
--- /dev/null
+++ b/gallery_dl/archive.py
@@ -0,0 +1,98 @@
+# -*- coding: utf-8 -*-
+
+# Copyright 2024 Mike Fährmann
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Download Archives"""
+
+import os
+import sqlite3
+from . import formatter
+
+
+class DownloadArchive():
+
+ def __init__(self, path, format_string, pragma=None,
+ cache_key="_archive_key"):
+ try:
+ con = sqlite3.connect(path, timeout=60, check_same_thread=False)
+ except sqlite3.OperationalError:
+ os.makedirs(os.path.dirname(path))
+ con = sqlite3.connect(path, timeout=60, check_same_thread=False)
+ con.isolation_level = None
+
+ self.keygen = formatter.parse(format_string).format_map
+ self.connection = con
+ self.close = con.close
+ self.cursor = cursor = con.cursor()
+ self._cache_key = cache_key
+
+ if pragma:
+ for stmt in pragma:
+ cursor.execute("PRAGMA " + stmt)
+
+ try:
+ cursor.execute("CREATE TABLE IF NOT EXISTS archive "
+ "(entry TEXT PRIMARY KEY) WITHOUT ROWID")
+ except sqlite3.OperationalError:
+ # fallback for missing WITHOUT ROWID support (#553)
+ cursor.execute("CREATE TABLE IF NOT EXISTS archive "
+ "(entry TEXT PRIMARY KEY)")
+
+ def add(self, kwdict):
+ """Add item described by 'kwdict' to archive"""
+ key = kwdict.get(self._cache_key) or self.keygen(kwdict)
+ self.cursor.execute(
+ "INSERT OR IGNORE INTO archive (entry) VALUES (?)", (key,))
+
+ def check(self, kwdict):
+ """Return True if the item described by 'kwdict' exists in archive"""
+ key = kwdict[self._cache_key] = self.keygen(kwdict)
+ self.cursor.execute(
+ "SELECT 1 FROM archive WHERE entry=? LIMIT 1", (key,))
+ return self.cursor.fetchone()
+
+ def finalize(self):
+ pass
+
+
+class DownloadArchiveMemory(DownloadArchive):
+
+ def __init__(self, path, format_string, pragma=None,
+ cache_key="_archive_key"):
+ DownloadArchive.__init__(self, path, format_string, pragma, cache_key)
+ self.keys = set()
+
+ def add(self, kwdict):
+ self.keys.add(
+ kwdict.get(self._cache_key) or
+ self.keygen(kwdict))
+
+ def check(self, kwdict):
+ key = kwdict[self._cache_key] = self.keygen(kwdict)
+ if key in self.keys:
+ return True
+ self.cursor.execute(
+ "SELECT 1 FROM archive WHERE entry=? LIMIT 1", (key,))
+ return self.cursor.fetchone()
+
+ def finalize(self):
+ if not self.keys:
+ return
+
+ cursor = self.cursor
+ with self.connection:
+ try:
+ cursor.execute("BEGIN")
+ except sqlite3.OperationalError:
+ pass
+
+ stmt = "INSERT OR IGNORE INTO archive (entry) VALUES (?)"
+ if len(self.keys) < 100:
+ for key in self.keys:
+ cursor.execute(stmt, (key,))
+ else:
+ cursor.executemany(stmt, ((key,) for key in self.keys))
diff --git a/gallery_dl/cookies.py b/gallery_dl/cookies.py
index 478abb6..b4986c1 100644
--- a/gallery_dl/cookies.py
+++ b/gallery_dl/cookies.py
@@ -10,7 +10,6 @@
# https://github.com/yt-dlp/yt-dlp/blob/master/yt_dlp/cookies.py
import binascii
-import contextlib
import ctypes
import logging
import os
@@ -147,7 +146,8 @@ def load_cookies_chrome(cookiejar, browser_name, profile=None,
set_cookie(Cookie(
0, name, value, None, False,
domain, bool(domain), domain.startswith("."),
- path, bool(path), secure, expires, False, None, None, {},
+ path, bool(path), secure, expires or None, False,
+ None, None, {},
))
if failed_cookies > 0:
@@ -682,7 +682,8 @@ def _get_gnome_keyring_password(browser_keyring_name):
# lists all keys and presumably searches for its key in the list.
# It appears that we must do the same.
# https://github.com/jaraco/keyring/issues/556
- with contextlib.closing(secretstorage.dbus_init()) as con:
+ con = secretstorage.dbus_init()
+ try:
col = secretstorage.get_default_collection(con)
label = browser_keyring_name + " Safe Storage"
for item in col.get_all_items():
@@ -691,6 +692,8 @@ def _get_gnome_keyring_password(browser_keyring_name):
else:
_log_error("Failed to read from GNOME keyring")
return b""
+ finally:
+ con.close()
def _get_linux_keyring_password(browser_keyring_name, keyring):
@@ -857,7 +860,7 @@ class DatabaseConnection():
def Popen_communicate(*args):
- proc = subprocess.Popen(
+ proc = util.Popen(
args, stdout=subprocess.PIPE, stderr=subprocess.DEVNULL)
try:
stdout, stderr = proc.communicate()
@@ -999,6 +1002,12 @@ def _decrypt_windows_dpapi(ciphertext):
def _find_most_recently_used_file(root, filename):
+ # if the provided root points to an exact profile path
+ # check if it contains the wanted filename
+ first_choice = os.path.join(root, filename)
+ if os.path.exists(first_choice):
+ return first_choice
+
# if there are multiple browser profiles, take the most recently used one
paths = []
for curr_root, dirs, files in os.walk(root):
diff --git a/gallery_dl/downloader/http.py b/gallery_dl/downloader/http.py
index 0ff5dd9..54750ac 100644
--- a/gallery_dl/downloader/http.py
+++ b/gallery_dl/downloader/http.py
@@ -98,6 +98,8 @@ class HttpDownloader(DownloaderBase):
metadata = self.metadata
kwdict = pathfmt.kwdict
+ expected_status = kwdict.get(
+ "_http_expected_status", ())
adjust_extension = kwdict.get(
"_http_adjust_extension", self.adjust_extension)
@@ -151,7 +153,7 @@ class HttpDownloader(DownloaderBase):
# check response
code = response.status_code
- if code == 200: # OK
+ if code == 200 or code in expected_status: # OK
offset = 0
size = response.headers.get("Content-Length")
elif code == 206: # Partial Content
@@ -399,6 +401,8 @@ MIME_TYPES = {
"video/webm": "webm",
"video/ogg" : "ogg",
"video/mp4" : "mp4",
+ "video/m4v" : "m4v",
+ "video/x-m4v": "m4v",
"video/quicktime": "mov",
"audio/wav" : "wav",
@@ -441,7 +445,8 @@ SIGNATURE_CHECKS = {
"cur" : lambda s: s[0:4] == b"\x00\x00\x02\x00",
"psd" : lambda s: s[0:4] == b"8BPS",
"mp4" : lambda s: (s[4:8] == b"ftyp" and s[8:11] in (
- b"mp4", b"avc", b"iso", b"M4V")),
+ b"mp4", b"avc", b"iso")),
+ "m4v" : lambda s: s[4:11] == b"ftypM4V",
"mov" : lambda s: s[4:12] == b"ftypqt ",
"webm": lambda s: s[0:4] == b"\x1A\x45\xDF\xA3",
"ogg" : lambda s: s[0:4] == b"OggS",
diff --git a/gallery_dl/extractor/4archive.py b/gallery_dl/extractor/4archive.py
index d198369..948a605 100644
--- a/gallery_dl/extractor/4archive.py
+++ b/gallery_dl/extractor/4archive.py
@@ -64,7 +64,7 @@ class _4archiveThreadExtractor(Extractor):
data = {
"name": extr('class="name">', "</span>"),
"date": text.parse_datetime(
- extr('class="dateTime postNum" >', "<").strip(),
+ extr('class="dateTime postNum">', "<").strip(),
"%Y-%m-%d %H:%M:%S"),
"no" : text.parse_int(extr('href="#p', '"')),
}
diff --git a/gallery_dl/extractor/8chan.py b/gallery_dl/extractor/8chan.py
index fc16f43..a4b0997 100644
--- a/gallery_dl/extractor/8chan.py
+++ b/gallery_dl/extractor/8chan.py
@@ -26,6 +26,9 @@ class _8chanExtractor(Extractor):
self.root = "https://8chan." + match.group(1)
Extractor.__init__(self, match)
+ def _init(self):
+ self.cookies.set("TOS", "1", domain=self.root.rpartition("/")[2])
+
@memcache()
def cookies_prepare(self):
# fetch captcha cookies
diff --git a/gallery_dl/extractor/artstation.py b/gallery_dl/extractor/artstation.py
index 49fde7b..ce1a78d 100644
--- a/gallery_dl/extractor/artstation.py
+++ b/gallery_dl/extractor/artstation.py
@@ -22,6 +22,7 @@ class ArtstationExtractor(Extractor):
directory_fmt = ("{category}", "{userinfo[username]}")
archive_fmt = "{asset[id]}"
browser = "firefox"
+ tls12 = False
root = "https://www.artstation.com"
def __init__(self, match):
diff --git a/gallery_dl/extractor/bluesky.py b/gallery_dl/extractor/bluesky.py
index 84c3187..c97bf65 100644
--- a/gallery_dl/extractor/bluesky.py
+++ b/gallery_dl/extractor/bluesky.py
@@ -317,7 +317,7 @@ class BlueskyAPI():
def get_author_feed(self, actor, filter="posts_and_author_threads"):
endpoint = "app.bsky.feed.getAuthorFeed"
params = {
- "actor" : self._did_from_actor(actor),
+ "actor" : self._did_from_actor(actor, True),
"filter": filter,
"limit" : "100",
}
@@ -327,7 +327,7 @@ class BlueskyAPI():
endpoint = "app.bsky.feed.getFeed"
params = {
"feed" : "at://{}/app.bsky.feed.generator/{}".format(
- self._did_from_actor(actor, False), feed),
+ self._did_from_actor(actor), feed),
"limit": "100",
}
return self._pagination(endpoint, params)
@@ -344,7 +344,7 @@ class BlueskyAPI():
endpoint = "app.bsky.feed.getListFeed"
params = {
"list" : "at://{}/app.bsky.graph.list/{}".format(
- self._did_from_actor(actor, False), list),
+ self._did_from_actor(actor), list),
"limit": "100",
}
return self._pagination(endpoint, params)
@@ -391,7 +391,7 @@ class BlueskyAPI():
}
return self._pagination(endpoint, params, "posts")
- def _did_from_actor(self, actor, user_did=True):
+ def _did_from_actor(self, actor, user_did=False):
if actor.startswith("did:"):
did = actor
else:
diff --git a/gallery_dl/extractor/cien.py b/gallery_dl/extractor/cien.py
new file mode 100644
index 0000000..a9ccab5
--- /dev/null
+++ b/gallery_dl/extractor/cien.py
@@ -0,0 +1,86 @@
+# -*- coding: utf-8 -*-
+
+# Copyright 2024 Mike Fährmann
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Extractors for https://ci-en.net/"""
+
+from .common import Extractor, Message
+from .. import text, util
+
+BASE_PATTERN = r"(?:https?://)?ci-en\.(?:net|dlsite\.com)"
+
+
+class CienExtractor(Extractor):
+ category = "cien"
+ root = "https://ci-en.net"
+
+ def __init__(self, match):
+ self.root = text.root_from_url(match.group(0))
+ Extractor.__init__(self, match)
+
+ def _pagination_articles(self, url, params):
+ data = {"extractor": CienArticleExtractor}
+ params["page"] = text.parse_int(params.get("page"), 1)
+
+ while True:
+ page = self.request(url, params=params).text
+
+ for card in text.extract_iter(
+ page, ' class="c-cardCase-item', '</div>'):
+ article_url = text.extr(card, ' href="', '"')
+ yield Message.Queue, article_url, data
+
+ if ' rel="next"' not in page:
+ return
+ params["page"] += 1
+
+
+class CienArticleExtractor(CienExtractor):
+ subcategory = "article"
+ pattern = BASE_PATTERN + r"/creator/(\d+)/article/(\d+)"
+ example = "https://ci-en.net/creator/123/article/12345"
+
+ def items(self):
+ url = "{}/creator/{}/article/{}".format(
+ self.root, self.groups[0], self.groups[1])
+ page = self.request(url, notfound="article").text
+ return
+ yield 1
+
+
+class CienCreatorExtractor(CienExtractor):
+ subcategory = "creator"
+ pattern = BASE_PATTERN + r"/creator/(\d+)(?:/article(?:\?([^#]+))?)?/?$"
+ example = "https://ci-en.net/creator/123"
+
+ def items(self):
+ url = "{}/creator/{}/article".format(self.root, self.groups[0])
+ params = text.parse_query(self.groups[1])
+ params["mode"] = "list"
+ return self._pagination_articles(url, params)
+
+
+class CienRecentExtractor(CienExtractor):
+ subcategory = "recent"
+ pattern = BASE_PATTERN + r"/mypage/recent(?:\?([^#]+))?"
+ example = "https://ci-en.net/mypage/recent"
+
+ def items(self):
+ url = self.root + "/mypage/recent"
+ params = text.parse_query(self.groups[0])
+ return self._pagination_articles(url, params)
+
+
+class CienFollowingExtractor(CienExtractor):
+ subcategory = "following"
+ pattern = BASE_PATTERN + r"/mypage/subscription(/following)?"
+ example = "https://ci-en.net/mypage/subscription"
+
+ def items(self):
+ url = self.root + "/mypage/recent"
+ params = text.parse_query(self.groups[0])
+ return self._pagination_articles(url, params)
diff --git a/gallery_dl/extractor/common.py b/gallery_dl/extractor/common.py
index d14e13a..8771261 100644
--- a/gallery_dl/extractor/common.py
+++ b/gallery_dl/extractor/common.py
@@ -14,6 +14,7 @@ import ssl
import time
import netrc
import queue
+import getpass
import logging
import datetime
import requests
@@ -21,6 +22,7 @@ import threading
from requests.adapters import HTTPAdapter
from .message import Message
from .. import config, text, util, cache, exception
+urllib3 = requests.packages.urllib3
class Extractor():
@@ -45,6 +47,8 @@ class Extractor():
def __init__(self, match):
self.log = logging.getLogger(self.category)
self.url = match.string
+ self.match = match
+ self.groups = match.groups()
self._cfgpath = ("extractor", self.category, self.subcategory)
self._parentdir = ""
@@ -168,22 +172,25 @@ class Extractor():
requests.exceptions.ChunkedEncodingError,
requests.exceptions.ContentDecodingError) as exc:
msg = exc
+ code = 0
except (requests.exceptions.RequestException) as exc:
raise exception.HttpError(exc)
else:
code = response.status_code
if self._write_pages:
self._dump_response(response)
- if 200 <= code < 400 or fatal is None and \
- (400 <= code < 500) or not fatal and \
- (400 <= code < 429 or 431 <= code < 500):
+ if (
+ code < 400 or
+ code < 500 and (not fatal and code != 429 or fatal is None)
+ ):
if encoding:
response.encoding = encoding
return response
if notfound and code == 404:
raise exception.NotFoundError(notfound)
- msg = "'{} {}' for '{}'".format(code, response.reason, url)
+ msg = "'{} {}' for '{}'".format(
+ code, response.reason, response.url)
server = response.headers.get("Server")
if server and server.startswith("cloudflare") and \
code in (403, 503):
@@ -194,7 +201,10 @@ class Extractor():
if b'name="captcha-bypass"' in content:
self.log.warning("Cloudflare CAPTCHA")
break
- if code not in retry_codes and code < 500:
+
+ if code == 429 and self._interval_429:
+ pass
+ elif code not in retry_codes and code < 500:
break
finally:
@@ -204,20 +214,24 @@ class Extractor():
if tries > retries:
break
+ seconds = tries
if self._interval:
- seconds = self._interval()
- if seconds < tries:
- seconds = tries
+ s = self._interval()
+ if seconds < s:
+ seconds = s
+ if code == 429 and self._interval_429:
+ s = self._interval_429()
+ if seconds < s:
+ seconds = s
+ self.wait(seconds=seconds, reason="429 Too Many Requests")
else:
- seconds = tries
-
- self.sleep(seconds, "retry")
+ self.sleep(seconds, "retry")
tries += 1
raise exception.HttpError(msg, response)
def wait(self, seconds=None, until=None, adjust=1.0,
- reason="rate limit reset"):
+ reason="rate limit"):
now = time.time()
if seconds:
@@ -240,7 +254,7 @@ class Extractor():
if reason:
t = datetime.datetime.fromtimestamp(until).time()
isotime = "{:02}:{:02}:{:02}".format(t.hour, t.minute, t.second)
- self.log.info("Waiting until %s for %s.", isotime, reason)
+ self.log.info("Waiting until %s (%s)", isotime, reason)
time.sleep(seconds)
def sleep(self, seconds, reason):
@@ -248,6 +262,15 @@ class Extractor():
seconds, reason)
time.sleep(seconds)
+ def input(self, prompt, echo=True):
+ if echo:
+ try:
+ return input(prompt)
+ except (EOFError, OSError):
+ return None
+ else:
+ return getpass.getpass(prompt)
+
def _get_auth_info(self):
"""Return authentication information as (username, password) tuple"""
username = self.config("username")
@@ -280,6 +303,9 @@ class Extractor():
self.config("sleep-request", self.request_interval),
self.request_interval_min,
)
+ self._interval_429 = util.build_duration_func(
+ self.config("sleep-429", 60),
+ )
if self._retries < 0:
self._retries = float("inf")
@@ -439,9 +465,11 @@ class Extractor():
if not path:
return
+ path_tmp = path + ".tmp"
try:
- with open(path, "w") as fp:
+ with open(path_tmp, "w") as fp:
util.cookiestxt_store(fp, self.cookies)
+ os.replace(path_tmp, path)
except OSError as exc:
self.log.warning("cookies: %s", exc)
@@ -599,7 +627,7 @@ class GalleryExtractor(Extractor):
def __init__(self, match, url=None):
Extractor.__init__(self, match)
- self.gallery_url = self.root + match.group(1) if url is None else url
+ self.gallery_url = self.root + self.groups[0] if url is None else url
def items(self):
self.login()
@@ -674,7 +702,7 @@ class MangaExtractor(Extractor):
def __init__(self, match, url=None):
Extractor.__init__(self, match)
- self.manga_url = url or self.root + match.group(1)
+ self.manga_url = self.root + self.groups[0] if url is None else url
if self.config("chapter-reverse", False):
self.reverse = not self.reverse
@@ -736,17 +764,18 @@ class BaseExtractor(Extractor):
instances = ()
def __init__(self, match):
- if not self.category:
- self._init_category(match)
Extractor.__init__(self, match)
+ if not self.category:
+ self._init_category()
+ self._cfgpath = ("extractor", self.category, self.subcategory)
- def _init_category(self, match):
- for index, group in enumerate(match.groups()):
+ def _init_category(self):
+ for index, group in enumerate(self.groups):
if group is not None:
if index:
self.category, self.root, info = self.instances[index-1]
if not self.root:
- self.root = text.root_from_url(match.group(0))
+ self.root = text.root_from_url(self.match.group(0))
self.config_instance = info.get
else:
self.root = group
@@ -806,12 +835,12 @@ def _build_requests_adapter(ssl_options, ssl_ciphers, source_address):
pass
if ssl_options or ssl_ciphers:
- ssl_context = ssl.create_default_context()
- if ssl_options:
- ssl_context.options |= ssl_options
- if ssl_ciphers:
- ssl_context.set_ecdh_curve("prime256v1")
- ssl_context.set_ciphers(ssl_ciphers)
+ ssl_context = urllib3.connection.create_urllib3_context(
+ options=ssl_options or None, ciphers=ssl_ciphers)
+ if requests.__version__ > "2.31":
+ # https://github.com/psf/requests/pull/6731
+ ssl_context.load_default_certs()
+ ssl_context.check_hostname = False
else:
ssl_context = None
@@ -931,8 +960,6 @@ SSL_CIPHERS = {
}
-urllib3 = requests.packages.urllib3
-
# detect brotli support
try:
BROTLI = urllib3.response.brotli is not None
diff --git a/gallery_dl/extractor/deviantart.py b/gallery_dl/extractor/deviantart.py
index ca8acaa..993885a 100644
--- a/gallery_dl/extractor/deviantart.py
+++ b/gallery_dl/extractor/deviantart.py
@@ -1457,9 +1457,8 @@ class DeviantartOAuthAPI():
self.log.info(
"Register your own OAuth application and use its "
"credentials to prevent this error: "
- "https://github.com/mikf/gallery-dl/blob/master/do"
- "cs/configuration.rst#extractordeviantartclient-id"
- "--client-secret")
+ "https://gdl-org.github.io/docs/configuration.html"
+ "#extractor-deviantart-client-id-client-secret")
else:
if log:
self.log.error(msg)
diff --git a/gallery_dl/extractor/exhentai.py b/gallery_dl/extractor/exhentai.py
index acad95c..1805403 100644
--- a/gallery_dl/extractor/exhentai.py
+++ b/gallery_dl/extractor/exhentai.py
@@ -50,7 +50,7 @@ class ExhentaiExtractor(Extractor):
def request(self, url, **kwargs):
response = Extractor.request(self, url, **kwargs)
- if response.history and response.headers.get("Content-Length") == "0":
+ if "Cache-Control" not in response.headers and not response.content:
self.log.info("blank page")
raise exception.AuthorizationError()
return response
@@ -95,7 +95,11 @@ class ExhentaiExtractor(Extractor):
self.cookies.clear()
response = self.request(url, method="POST", headers=headers, data=data)
- if b"You are now logged in as:" not in response.content:
+ content = response.content
+ if b"You are now logged in as:" not in content:
+ if b"The captcha was not entered correctly" in content:
+ raise exception.AuthenticationError(
+ "CAPTCHA required. Use cookies instead.")
raise exception.AuthenticationError()
# collect more cookies
@@ -437,7 +441,7 @@ class ExhentaiGalleryExtractor(ExhentaiExtractor):
raise exception.AuthorizationError()
if page.startswith(("Key missing", "Gallery not found")):
raise exception.NotFoundError("gallery")
- if "hentai.org/mpv/" in page:
+ if page.count("hentai.org/mpv/") > 1:
self.log.warning("Enabled Multi-Page Viewer is not supported")
return page
diff --git a/gallery_dl/extractor/foolfuuka.py b/gallery_dl/extractor/foolfuuka.py
index 715abcb..85dd896 100644
--- a/gallery_dl/extractor/foolfuuka.py
+++ b/gallery_dl/extractor/foolfuuka.py
@@ -117,8 +117,8 @@ class FoolfuukaThreadExtractor(FoolfuukaExtractor):
def __init__(self, match):
FoolfuukaExtractor.__init__(self, match)
- self.board = match.group(match.lastindex-1)
- self.thread = match.group(match.lastindex)
+ self.board = self.groups[-2]
+ self.thread = self.groups[-1]
self.data = None
def metadata(self):
@@ -140,20 +140,22 @@ class FoolfuukaThreadExtractor(FoolfuukaExtractor):
class FoolfuukaBoardExtractor(FoolfuukaExtractor):
"""Base extractor for FoolFuuka based boards/archives"""
subcategory = "board"
- pattern = BASE_PATTERN + r"/([^/?#]+)/\d*$"
+ pattern = BASE_PATTERN + r"/([^/?#]+)(?:/(?:page/)?(\d*))?$"
example = "https://archived.moe/a/"
def __init__(self, match):
FoolfuukaExtractor.__init__(self, match)
- self.board = match.group(match.lastindex)
+ self.board = self.groups[-2]
+ self.page = self.groups[-1]
def items(self):
index_base = "{}/_/api/chan/index/?board={}&page=".format(
self.root, self.board)
thread_base = "{}/{}/thread/".format(self.root, self.board)
- for page in itertools.count(1):
- with self.request(index_base + format(page)) as response:
+ page = self.page
+ for pnum in itertools.count(text.parse_int(page, 1)):
+ with self.request(index_base + format(pnum)) as response:
try:
threads = response.json()
except ValueError:
@@ -167,6 +169,9 @@ class FoolfuukaBoardExtractor(FoolfuukaExtractor):
thread["_extractor"] = FoolfuukaThreadExtractor
yield Message.Queue, thread["url"], thread
+ if page:
+ return
+
class FoolfuukaSearchExtractor(FoolfuukaExtractor):
"""Base extractor for search results on FoolFuuka based boards/archives"""
@@ -179,17 +184,16 @@ class FoolfuukaSearchExtractor(FoolfuukaExtractor):
def __init__(self, match):
FoolfuukaExtractor.__init__(self, match)
self.params = params = {}
- args = match.group(match.lastindex).split("/")
- key = None
- for arg in args:
+ key = None
+ for arg in self.groups[-1].split("/"):
if key:
params[key] = text.unescape(arg)
key = None
else:
key = arg
- board = match.group(match.lastindex-1)
+ board = self.groups[-2]
if board != "_":
params["boards"] = board
diff --git a/gallery_dl/extractor/furaffinity.py b/gallery_dl/extractor/furaffinity.py
index 56721d0..6040187 100644
--- a/gallery_dl/extractor/furaffinity.py
+++ b/gallery_dl/extractor/furaffinity.py
@@ -11,7 +11,7 @@
from .common import Extractor, Message
from .. import text, util
-BASE_PATTERN = r"(?:https?://)?(?:www\.|sfw\.)?furaffinity\.net"
+BASE_PATTERN = r"(?:https?://)?(?:www\.|sfw\.)?(?:f[ux]|f?xfu)raffinity\.net"
class FuraffinityExtractor(Extractor):
diff --git a/gallery_dl/extractor/gelbooru.py b/gallery_dl/extractor/gelbooru.py
index 2459a61..37c776e 100644
--- a/gallery_dl/extractor/gelbooru.py
+++ b/gallery_dl/extractor/gelbooru.py
@@ -51,19 +51,44 @@ class GelbooruBase():
params["pid"] = self.page_start
params["limit"] = self.per_page
limit = self.per_page // 2
+ pid = False
+
+ if "tags" in params:
+ tags = params["tags"].split()
+ op = "<"
+ id = False
+
+ for tag in tags:
+ if tag.startswith("sort:"):
+ if tag == "sort:id:asc":
+ op = ">"
+ elif tag == "sort:id" or tag.startswith("sort:id:"):
+ op = "<"
+ else:
+ pid = True
+ elif tag.startswith("id:"):
+ id = True
+
+ if not pid:
+ if id:
+ tag = "id:" + op
+ tags = [t for t in tags if not t.startswith(tag)]
+ tags = "{} id:{}".format(" ".join(tags), op)
while True:
posts = self._api_request(params)
- for post in posts:
- yield post
+ yield from posts
if len(posts) < limit:
return
- if "pid" in params:
- del params["pid"]
- params["tags"] = "{} id:<{}".format(self.tags, post["id"])
+ if pid:
+ params["pid"] += 1
+ else:
+ if "pid" in params:
+ del params["pid"]
+ params["tags"] = tags + str(posts[-1]["id"])
def _pagination_html(self, params):
url = self.root + "/index.php"
diff --git a/gallery_dl/extractor/gelbooru_v02.py b/gallery_dl/extractor/gelbooru_v02.py
index 7ab6d02..8d8b8ad 100644
--- a/gallery_dl/extractor/gelbooru_v02.py
+++ b/gallery_dl/extractor/gelbooru_v02.py
@@ -25,7 +25,7 @@ class GelbooruV02Extractor(booru.BooruExtractor):
self.api_root = self.config_instance("api_root") or self.root
if self.category == "realbooru":
- self._file_url = self._file_url_realbooru
+ self.items = self._items_realbooru
self._tags = self._tags_realbooru
def _api_request(self, params):
@@ -124,6 +124,35 @@ class GelbooruV02Extractor(booru.BooruExtractor):
self.root, md5[0:2], md5[2:4], md5, url.rpartition(".")[2])
return url
+ def _items_realbooru(self):
+ from .common import Message
+ data = self.metadata()
+
+ for post in self.posts():
+ try:
+ html = self._html(post)
+ fallback = post["file_url"]
+ url = post["file_url"] = text.rextract(
+ html, 'href="', '"', html.index(">Original<"))[0]
+ except Exception:
+ self.log.debug("Unable to fetch download URL for post %s "
+ "(md5: %s)", post.get("id"), post.get("md5"))
+ continue
+
+ text.nameext_from_url(url, post)
+ post.update(data)
+ self._prepare(post)
+ self._tags(post, html)
+
+ path = url.rpartition("/")[0]
+ post["_fallback"] = (
+ "{}/{}.{}".format(path, post["md5"], post["extension"]),
+ fallback,
+ )
+
+ yield Message.Directory, post
+ yield Message.Url, url, post
+
def _tags_realbooru(self, post, page):
tag_container = text.extr(page, 'id="tagLink"', '</div>')
tags = collections.defaultdict(list)
diff --git a/gallery_dl/extractor/hiperdex.py b/gallery_dl/extractor/hiperdex.py
index aadce6c..4a9759f 100644
--- a/gallery_dl/extractor/hiperdex.py
+++ b/gallery_dl/extractor/hiperdex.py
@@ -6,7 +6,7 @@
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
-"""Extractors for https://hiperdex.com/"""
+"""Extractors for https://hiperdex.top/"""
from .common import ChapterExtractor, MangaExtractor
from .. import text
@@ -14,18 +14,18 @@ from ..cache import memcache
import re
BASE_PATTERN = (r"((?:https?://)?(?:www\.)?"
- r"(?:1st)?hiperdex\d?\.(?:com|net|info))")
+ r"(?:1st)?hiperdex\d?\.(?:com|net|info|top))")
class HiperdexBase():
"""Base class for hiperdex extractors"""
category = "hiperdex"
- root = "https://hiperdex.com"
+ root = "https://hiperdex.top"
@memcache(keyarg=1)
def manga_data(self, manga, page=None):
if not page:
- url = "{}/mangas/{}/".format(self.root, manga)
+ url = "{}/manga/{}/".format(self.root, manga)
page = self.request(url).text
extr = text.extract_from(page)
@@ -67,9 +67,9 @@ class HiperdexBase():
class HiperdexChapterExtractor(HiperdexBase, ChapterExtractor):
- """Extractor for manga chapters from hiperdex.com"""
+ """Extractor for hiperdex manga chapters"""
pattern = BASE_PATTERN + r"(/mangas?/([^/?#]+)/([^/?#]+))"
- example = "https://hiperdex.com/mangas/MANGA/CHAPTER/"
+ example = "https://hiperdex.top/manga/MANGA/CHAPTER/"
def __init__(self, match):
root, path, self.manga, self.chapter = match.groups()
@@ -88,10 +88,10 @@ class HiperdexChapterExtractor(HiperdexBase, ChapterExtractor):
class HiperdexMangaExtractor(HiperdexBase, MangaExtractor):
- """Extractor for manga from hiperdex.com"""
+ """Extractor for hiperdex manga"""
chapterclass = HiperdexChapterExtractor
pattern = BASE_PATTERN + r"(/mangas?/([^/?#]+))/?$"
- example = "https://hiperdex.com/mangas/MANGA/"
+ example = "https://hiperdex.top/manga/MANGA/"
def __init__(self, match):
root, path, self.manga = match.groups()
@@ -121,13 +121,13 @@ class HiperdexMangaExtractor(HiperdexBase, MangaExtractor):
class HiperdexArtistExtractor(HiperdexBase, MangaExtractor):
- """Extractor for an artists's manga on hiperdex.com"""
+ """Extractor for an artists's manga on hiperdex"""
subcategory = "artist"
categorytransfer = False
chapterclass = HiperdexMangaExtractor
reverse = False
pattern = BASE_PATTERN + r"(/manga-a(?:rtist|uthor)/(?:[^/?#]+))"
- example = "https://hiperdex.com/manga-artist/NAME/"
+ example = "https://hiperdex.top/manga-artist/NAME/"
def __init__(self, match):
self.root = text.ensure_http_scheme(match.group(1))
diff --git a/gallery_dl/extractor/hotleak.py b/gallery_dl/extractor/hotleak.py
index 6d3184d..a2b51be 100644
--- a/gallery_dl/extractor/hotleak.py
+++ b/gallery_dl/extractor/hotleak.py
@@ -23,6 +23,7 @@ class HotleakExtractor(Extractor):
def items(self):
for post in self.posts():
+ post["_http_expected_status"] = (404,)
yield Message.Directory, post
yield Message.Url, post["url"], post
diff --git a/gallery_dl/extractor/imgur.py b/gallery_dl/extractor/imgur.py
index 86b1edd..481fb1e 100644
--- a/gallery_dl/extractor/imgur.py
+++ b/gallery_dl/extractor/imgur.py
@@ -68,7 +68,7 @@ class ImgurImageExtractor(ImgurExtractor):
filename_fmt = "{category}_{id}{title:?_//}.{extension}"
archive_fmt = "{id}"
pattern = (BASE_PATTERN + r"/(?!gallery|search)"
- r"(?:r/\w+/)?(\w{7}|\w{5})[sbtmlh]?")
+ r"(?:r/\w+/)?(?:[^/?#]+-)?(\w{7}|\w{5})[sbtmlh]?")
example = "https://imgur.com/abcdefg"
def items(self):
@@ -93,7 +93,7 @@ class ImgurAlbumExtractor(ImgurExtractor):
directory_fmt = ("{category}", "{album[id]}{album[title]:? - //}")
filename_fmt = "{category}_{album[id]}_{num:>03}_{id}.{extension}"
archive_fmt = "{album[id]}_{id}"
- pattern = BASE_PATTERN + r"/a/(\w{7}|\w{5})"
+ pattern = BASE_PATTERN + r"/a/(?:[^/?#]+-)?(\w{7}|\w{5})"
example = "https://imgur.com/a/abcde"
def items(self):
@@ -126,7 +126,7 @@ class ImgurAlbumExtractor(ImgurExtractor):
class ImgurGalleryExtractor(ImgurExtractor):
"""Extractor for imgur galleries"""
subcategory = "gallery"
- pattern = BASE_PATTERN + r"/(?:gallery|t/\w+)/(\w{7}|\w{5})"
+ pattern = BASE_PATTERN + r"/(?:gallery|t/\w+)/(?:[^/?#]+-)?(\w{7}|\w{5})"
example = "https://imgur.com/gallery/abcde"
def items(self):
diff --git a/gallery_dl/extractor/inkbunny.py b/gallery_dl/extractor/inkbunny.py
index 62586af..2ae8cbe 100644
--- a/gallery_dl/extractor/inkbunny.py
+++ b/gallery_dl/extractor/inkbunny.py
@@ -330,15 +330,18 @@ class InkbunnyAPI():
def _call(self, endpoint, params):
url = "https://inkbunny.net/api_" + endpoint + ".php"
params["sid"] = self.session_id
- data = self.extractor.request(url, params=params).json()
- if "error_code" in data:
+ while True:
+ data = self.extractor.request(url, params=params).json()
+
+ if "error_code" not in data:
+ return data
+
if str(data["error_code"]) == "2":
self.authenticate(invalidate=True)
- return self._call(endpoint, params)
- raise exception.StopExtraction(data.get("error_message"))
+ continue
- return data
+ raise exception.StopExtraction(data.get("error_message"))
def _pagination_search(self, params):
params["page"] = 1
diff --git a/gallery_dl/extractor/kemonoparty.py b/gallery_dl/extractor/kemonoparty.py
index 9c77b7a..b0c24de 100644
--- a/gallery_dl/extractor/kemonoparty.py
+++ b/gallery_dl/extractor/kemonoparty.py
@@ -57,7 +57,7 @@ class KemonopartyExtractor(Extractor):
generators = self._build_file_generators(self.config("files"))
duplicates = self.config("duplicates")
comments = self.config("comments")
- username = dms = None
+ username = dms = announcements = None
# prevent files from being sent with gzip compression
headers = {"Accept-Encoding": "identity"}
@@ -68,6 +68,8 @@ class KemonopartyExtractor(Extractor):
'<meta name="artist_name" content="', '"')[0])
if self.config("dms"):
dms = True
+ if self.config("announcements"):
+ announcements = True
posts = self.posts()
max_posts = self.config("max-posts")
@@ -80,7 +82,7 @@ class KemonopartyExtractor(Extractor):
self.root, post["service"], post["user"], post["id"])
post["_http_headers"] = headers
post["date"] = self._parse_datetime(
- post["published"] or post["added"])
+ post.get("published") or post.get("added") or "")
if username:
post["username"] = username
@@ -88,8 +90,12 @@ class KemonopartyExtractor(Extractor):
post["comments"] = self._extract_comments(post)
if dms is not None:
if dms is True:
- dms = self._extract_dms(post)
+ dms = self._extract_cards(post, "dms")
post["dms"] = dms
+ if announcements is not None:
+ if announcements is True:
+ announcements = self._extract_cards(post, "announcements")
+ post["announcements"] = announcements
files = []
hashes = set()
@@ -156,7 +162,7 @@ class KemonopartyExtractor(Extractor):
def _file(self, post):
file = post["file"]
- if not file:
+ if not file or "path" not in file:
return ()
file["type"] = "file"
return (file,)
@@ -200,21 +206,21 @@ class KemonopartyExtractor(Extractor):
})
return comments
- def _extract_dms(self, post):
- url = "{}/{}/user/{}/dms".format(
- self.root, post["service"], post["user"])
+ def _extract_cards(self, post, type):
+ url = "{}/{}/user/{}/{}".format(
+ self.root, post["service"], post["user"], type)
page = self.request(url).text
- dms = []
- for dm in text.extract_iter(page, "<article", "</article>"):
- footer = text.extr(dm, "<footer", "</footer>")
- dms.append({
+ cards = []
+ for card in text.extract_iter(page, "<article", "</article>"):
+ footer = text.extr(card, "<footer", "</footer>")
+ cards.append({
"body": text.unescape(text.extr(
- dm, "<pre>", "</pre></",
+ card, "<pre>", "</pre></",
).strip()),
- "date": text.extr(footer, 'Published: ', '\n'),
+ "date": text.extr(footer, ': ', '\n'),
})
- return dms
+ return cards
def _parse_datetime(self, date_string):
if len(date_string) > 19:
@@ -494,7 +500,8 @@ class KemonopartyFavoriteExtractor(KemonopartyExtractor):
def __init__(self, match):
KemonopartyExtractor.__init__(self, match)
- self.favorites = (text.parse_query(match.group(3)).get("type") or
+ self.params = text.parse_query(match.group(3))
+ self.favorites = (self.params.get("type") or
self.config("favorites") or
"artist")
@@ -502,9 +509,17 @@ class KemonopartyFavoriteExtractor(KemonopartyExtractor):
self._prepare_ddosguard_cookies()
self.login()
+ sort = self.params.get("sort")
+ order = self.params.get("order") or "desc"
+
if self.favorites == "artist":
users = self.request(
self.root + "/api/v1/account/favorites?type=artist").json()
+
+ if not sort:
+ sort = "updated"
+ users.sort(key=lambda x: x[sort], reverse=(order == "desc"))
+
for user in users:
user["_extractor"] = KemonopartyUserExtractor
url = "{}/{}/user/{}".format(
@@ -514,6 +529,11 @@ class KemonopartyFavoriteExtractor(KemonopartyExtractor):
elif self.favorites == "post":
posts = self.request(
self.root + "/api/v1/account/favorites?type=post").json()
+
+ if not sort:
+ sort = "faved_seq"
+ posts.sort(key=lambda x: x[sort], reverse=(order == "desc"))
+
for post in posts:
post["_extractor"] = KemonopartyPostExtractor
url = "{}/{}/user/{}/post/{}".format(
diff --git a/gallery_dl/extractor/mastodon.py b/gallery_dl/extractor/mastodon.py
index 030d7d1..cb7f701 100644
--- a/gallery_dl/extractor/mastodon.py
+++ b/gallery_dl/extractor/mastodon.py
@@ -29,6 +29,7 @@ class MastodonExtractor(BaseExtractor):
self.instance = self.root.partition("://")[2]
self.reblogs = self.config("reblogs", False)
self.replies = self.config("replies", True)
+ self.cards = self.config("cards", False)
def items(self):
for status in self.statuses():
@@ -48,6 +49,17 @@ class MastodonExtractor(BaseExtractor):
if status["reblog"]:
attachments.extend(status["reblog"]["media_attachments"])
+ if self.cards:
+ card = status.get("card")
+ if card:
+ url = card.get("image")
+ if url:
+ card["weburl"] = card.get("url")
+ card["url"] = url
+ card["id"] = "card" + "".join(
+ url.split("/")[6:-2]).lstrip("0")
+ attachments.append(card)
+
status["instance"] = self.instance
acct = status["account"]["acct"]
status["instance_remote"] = \
@@ -120,6 +132,7 @@ class MastodonUserExtractor(MastodonExtractor):
api.account_id_by_username(self.item),
only_media=(
not self.reblogs and
+ not self.cards and
not self.config("text-posts", False)
),
exclude_replies=not self.replies,
@@ -136,6 +149,36 @@ class MastodonBookmarkExtractor(MastodonExtractor):
return MastodonAPI(self).account_bookmarks()
+class MastodonFavoriteExtractor(MastodonExtractor):
+ """Extractor for mastodon favorites"""
+ subcategory = "favorite"
+ pattern = BASE_PATTERN + r"/favourites"
+ example = "https://mastodon.social/favourites"
+
+ def statuses(self):
+ return MastodonAPI(self).account_favorites()
+
+
+class MastodonListExtractor(MastodonExtractor):
+ """Extractor for mastodon lists"""
+ subcategory = "list"
+ pattern = BASE_PATTERN + r"/lists/(\w+)"
+ example = "https://mastodon.social/lists/12345"
+
+ def statuses(self):
+ return MastodonAPI(self).timelines_list(self.item)
+
+
+class MastodonHashtagExtractor(MastodonExtractor):
+ """Extractor for mastodon hashtags"""
+ subcategory = "hashtag"
+ pattern = BASE_PATTERN + r"/tags/(\w+)"
+ example = "https://mastodon.social/tags/NAME"
+
+ def statuses(self):
+ return MastodonAPI(self).timelines_tag(self.item)
+
+
class MastodonFollowingExtractor(MastodonExtractor):
"""Extractor for followed mastodon users"""
subcategory = "following"
@@ -205,37 +248,55 @@ class MastodonAPI():
raise exception.NotFoundError("account")
def account_bookmarks(self):
+ """Statuses the user has bookmarked"""
endpoint = "/v1/bookmarks"
return self._pagination(endpoint, None)
+ def account_favorites(self):
+ """Statuses the user has favourited"""
+ endpoint = "/v1/favourites"
+ return self._pagination(endpoint, None)
+
def account_following(self, account_id):
+ """Accounts which the given account is following"""
endpoint = "/v1/accounts/{}/following".format(account_id)
return self._pagination(endpoint, None)
def account_lookup(self, username):
+ """Quickly lookup a username to see if it is available"""
endpoint = "/v1/accounts/lookup"
params = {"acct": username}
return self._call(endpoint, params).json()
def account_search(self, query, limit=40):
- """Search for accounts"""
+ """Search for matching accounts by username or display name"""
endpoint = "/v1/accounts/search"
params = {"q": query, "limit": limit}
return self._call(endpoint, params).json()
def account_statuses(self, account_id, only_media=True,
exclude_replies=False):
- """Fetch an account's statuses"""
+ """Statuses posted to the given account"""
endpoint = "/v1/accounts/{}/statuses".format(account_id)
- params = {"only_media" : "1" if only_media else "0",
- "exclude_replies": "1" if exclude_replies else "0"}
+ params = {"only_media" : "true" if only_media else "false",
+ "exclude_replies": "true" if exclude_replies else "false"}
return self._pagination(endpoint, params)
def status(self, status_id):
- """Fetch a status"""
+ """Obtain information about a status"""
endpoint = "/v1/statuses/" + status_id
return self._call(endpoint).json()
+ def timelines_list(self, list_id):
+ """View statuses in the given list timeline"""
+ endpoint = "/v1/timelines/list/" + list_id
+ return self._pagination(endpoint, None)
+
+ def timelines_tag(self, hashtag):
+ """View public statuses containing the given hashtag"""
+ endpoint = "/v1/timelines/tag/" + hashtag
+ return self._pagination(endpoint, None)
+
def _call(self, endpoint, params=None):
if endpoint.startswith("http"):
url = endpoint
diff --git a/gallery_dl/extractor/newgrounds.py b/gallery_dl/extractor/newgrounds.py
index 4cdcf87..7ac3a3a 100644
--- a/gallery_dl/extractor/newgrounds.py
+++ b/gallery_dl/extractor/newgrounds.py
@@ -102,30 +102,55 @@ class NewgroundsExtractor(Extractor):
def _login_impl(self, username, password):
self.log.info("Logging in as %s", username)
- url = self.root + "/passport/"
+ url = self.root + "/passport"
response = self.request(url)
if response.history and response.url.endswith("/social"):
return self.cookies
page = response.text
- headers = {"Origin": self.root, "Referer": url}
+ headers = {
+ "Accept": "application/json, text/javascript, */*; q=0.01",
+ "Content-Type": "application/x-www-form-urlencoded; charset=UTF-8",
+ "X-Requested-With": "XMLHttpRequest",
+ "Origin": self.root,
+ "Referer": url,
+ }
url = text.urljoin(self.root, text.extr(page, 'action="', '"'))
data = {
- "username": username,
- "password": password,
- "remember": "1",
- "login" : "1",
"auth" : text.extr(page, 'name="auth" value="', '"'),
+ "remember": "1",
+ "username": username,
+ "password": str(password),
+ "code" : "",
+ "codehint": "------",
+ "mfaCheck": "1",
}
- response = self.request(url, method="POST", headers=headers, data=data)
- if not response.history:
- raise exception.AuthenticationError()
+ while True:
+ response = self.request(
+ url, method="POST", headers=headers, data=data)
+ result = response.json()
+
+ if result.get("success"):
+ break
+ if "errors" in result:
+ raise exception.AuthenticationError(
+ '"' + '", "'.join(result["errors"]) + '"')
+
+ if result.get("requiresMfa"):
+ data["code"] = self.input("Verification Code: ")
+ data["codehint"] = " "
+ elif result.get("requiresEmailMfa"):
+ email = result.get("obfuscatedEmail")
+ prompt = "Email Verification Code ({}): ".format(email)
+ data["code"] = self.input(prompt)
+ data["codehint"] = " "
+
+ data.pop("mfaCheck", None)
return {
cookie.name: cookie.value
- for cookie in response.history[0].cookies
- if cookie.expires and cookie.domain == self.cookies_domain
+ for cookie in response.cookies
}
def extract_post(self, post_url):
diff --git a/gallery_dl/extractor/oauth.py b/gallery_dl/extractor/oauth.py
index 8c8a5a9..5571575 100644
--- a/gallery_dl/extractor/oauth.py
+++ b/gallery_dl/extractor/oauth.py
@@ -110,7 +110,7 @@ class OAuthBase(Extractor):
# get a request token
params = {"oauth_callback": self.redirect_uri}
- data = self.session.get(request_token_url, params=params).text
+ data = self.request(request_token_url, params=params).text
data = text.parse_query(data)
self.session.auth.token_secret = data["oauth_token_secret"]
@@ -120,7 +120,7 @@ class OAuthBase(Extractor):
data = self.open(authorize_url, params)
# exchange the request token for an access token
- data = self.session.get(access_token_url, params=data).text
+ data = self.request(access_token_url, params=data).text
data = text.parse_query(data)
token = data["oauth_token"]
token_secret = data["oauth_token_secret"]
@@ -189,7 +189,8 @@ class OAuthBase(Extractor):
data["client_id"] = client_id
data["client_secret"] = client_secret
- data = self.session.post(token_url, data=data, auth=auth).json()
+ data = self.request(
+ token_url, method="POST", data=data, auth=auth).json()
# check token response
if "error" in data:
@@ -386,7 +387,7 @@ class OAuthMastodon(OAuthBase):
"redirect_uris": self.redirect_uri,
"scopes": "read",
}
- data = self.session.post(url, data=data).json()
+ data = self.request(url, method="POST", data=data).json()
if "client_id" not in data or "client_secret" not in data:
raise exception.StopExtraction(
@@ -441,7 +442,8 @@ class OAuthPixiv(OAuthBase):
"redirect_uri" : "https://app-api.pixiv.net"
"/web/v1/users/auth/pixiv/callback",
}
- data = self.session.post(url, headers=headers, data=data).json()
+ data = self.request(
+ url, method="POST", headers=headers, data=data).json()
if "error" in data:
stdout_write("\n{}\n".format(data))
diff --git a/gallery_dl/extractor/patreon.py b/gallery_dl/extractor/patreon.py
index 62d11f2..eb6d677 100644
--- a/gallery_dl/extractor/patreon.py
+++ b/gallery_dl/extractor/patreon.py
@@ -263,8 +263,9 @@ class PatreonExtractor(Extractor):
page, 'id="__NEXT_DATA__" type="application/json">', '</script')
if data:
try:
- return (util.json_loads(data)["props"]["pageProps"]
- ["bootstrapEnvelope"]["bootstrap"])
+ data = util.json_loads(data)
+ env = data["props"]["pageProps"]["bootstrapEnvelope"]
+ return env.get("pageBootstrap") or env["bootstrap"]
except Exception as exc:
self.log.debug("%s: %s", exc.__class__.__name__, exc)
diff --git a/gallery_dl/extractor/pixeldrain.py b/gallery_dl/extractor/pixeldrain.py
index 5cfdc43..83f3577 100644
--- a/gallery_dl/extractor/pixeldrain.py
+++ b/gallery_dl/extractor/pixeldrain.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2023 Mike Fährmann
+# Copyright 2023-2024 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -59,12 +59,13 @@ class PixeldrainAlbumExtractor(PixeldrainExtractor):
directory_fmt = ("{category}",
"{album[date]:%Y-%m-%d} {album[title]} ({album[id]})")
filename_fmt = "{num:>03} {filename[:230]} ({id}).{extension}"
- pattern = BASE_PATTERN + r"/(?:l|api/list)/(\w+)"
+ pattern = BASE_PATTERN + r"/(?:l|api/list)/(\w+)(?:#item=(\d+))?"
example = "https://pixeldrain.com/l/abcdefgh"
def __init__(self, match):
Extractor.__init__(self, match)
self.album_id = match.group(1)
+ self.file_index = match.group(2)
def items(self):
url = "{}/api/list/{}".format(self.root, self.album_id)
@@ -74,11 +75,20 @@ class PixeldrainAlbumExtractor(PixeldrainExtractor):
album["count"] = album["file_count"]
album["date"] = self.parse_datetime(album["date_created"])
+ if self.file_index:
+ idx = text.parse_int(self.file_index)
+ try:
+ files = (files[idx],)
+ except LookupError:
+ files = ()
+ else:
+ idx = 0
+
del album["files"]
del album["file_count"]
yield Message.Directory, {"album": album}
- for num, file in enumerate(files, 1):
+ for num, file in enumerate(files, idx+1):
file["album"] = album
file["num"] = num
file["url"] = url = "{}/api/file/{}?download".format(
diff --git a/gallery_dl/extractor/pixiv.py b/gallery_dl/extractor/pixiv.py
index 862a7db..d732894 100644
--- a/gallery_dl/extractor/pixiv.py
+++ b/gallery_dl/extractor/pixiv.py
@@ -104,8 +104,9 @@ class PixivExtractor(Extractor):
elif work["page_count"] == 1:
url = meta_single_page["original_image_url"]
if url == url_sanity:
- self.log.debug("Skipping 'sanity_level' warning (%s)",
- work["id"])
+ self.log.warning(
+ "Unable to download work %s ('sanity_level' warning)",
+ work["id"])
continue
work["date_url"] = self._date_from_url(url)
yield Message.Url, url, text.nameext_from_url(url, work)
@@ -619,6 +620,7 @@ class PixivNovelExtractor(PixivExtractor):
meta_user = self.config("metadata")
meta_bookmark = self.config("metadata-bookmark")
embeds = self.config("embeds")
+ covers = self.config("covers")
if embeds:
headers = {
@@ -658,6 +660,19 @@ class PixivNovelExtractor(PixivExtractor):
novel["extension"] = "txt"
yield Message.Url, "text:" + content, novel
+ if covers:
+ path = novel["image_urls"]["large"].partition("/img/")[2]
+ url = ("https://i.pximg.net/novel-cover-original/img/" +
+ path.rpartition(".")[0].replace("_master1200", ""))
+ novel["date_url"] = self._date_from_url(url)
+ novel["num"] += 1
+ novel["suffix"] = "_p{:02}".format(novel["num"])
+ novel["_fallback"] = (url + ".png",)
+ url_jpg = url + ".jpg"
+ text.nameext_from_url(url_jpg, novel)
+ yield Message.Url, url_jpg, novel
+ del novel["_fallback"]
+
if embeds:
desktop = False
illusts = {}
diff --git a/gallery_dl/extractor/poipiku.py b/gallery_dl/extractor/poipiku.py
index f42016f..bd22283 100644
--- a/gallery_dl/extractor/poipiku.py
+++ b/gallery_dl/extractor/poipiku.py
@@ -23,6 +23,12 @@ class PoipikuExtractor(Extractor):
archive_fmt = "{post_id}_{num}"
request_interval = (0.5, 1.5)
+ def _init(self):
+ self.cookies.set(
+ "LANG", "en", domain="poipiku.com")
+ self.cookies.set(
+ "POIPIKU_CONTENTS_VIEW_MODE", "1", domain="poipiku.com")
+
def items(self):
password = self.config("password", "")
@@ -59,7 +65,7 @@ class PoipikuExtractor(Extractor):
"//img.", "//img-org.", 1)
yield Message.Url, url, text.nameext_from_url(url, post)
- if not extr(' show all(+', '<'):
+ if not extr('ShowAppendFile', '<'):
continue
url = self.root + "/f/ShowAppendFileF.jsp"
diff --git a/gallery_dl/extractor/readcomiconline.py b/gallery_dl/extractor/readcomiconline.py
index 3569860..115de9a 100644
--- a/gallery_dl/extractor/readcomiconline.py
+++ b/gallery_dl/extractor/readcomiconline.py
@@ -35,10 +35,7 @@ class ReadcomiconlineBase():
self.log.warning(
"Redirect to \n%s\nVisit this URL in your browser, solve "
"the CAPTCHA, and press ENTER to continue", response.url)
- try:
- input()
- except (EOFError, OSError):
- pass
+ self.input()
else:
raise exception.StopExtraction(
"Redirect to \n%s\nVisit this URL in your browser and "
diff --git a/gallery_dl/extractor/reddit.py b/gallery_dl/extractor/reddit.py
index e099c7e..ce602f6 100644
--- a/gallery_dl/extractor/reddit.py
+++ b/gallery_dl/extractor/reddit.py
@@ -74,8 +74,8 @@ class RedditExtractor(Extractor):
yield Message.Url, url, submission
elif "gallery_data" in media:
- for submission["num"], url in enumerate(
- self._extract_gallery(media), 1):
+ for url in self._extract_gallery(media):
+ submission["num"] += 1
text.nameext_from_url(url, submission)
yield Message.Url, url, submission
@@ -99,7 +99,10 @@ class RedditExtractor(Extractor):
urls.append((url, submission))
for comment in comments:
html = comment["body_html"] or ""
- if ' href="' in html:
+ href = (' href="' in html)
+ media = ("media_metadata" in comment)
+
+ if media or href:
comment["date"] = text.parse_timestamp(
comment["created_utc"])
if submission:
@@ -107,6 +110,14 @@ class RedditExtractor(Extractor):
data["comment"] = comment
else:
data = comment
+
+ if media:
+ for embed in self._extract_embed(comment):
+ submission["num"] += 1
+ text.nameext_from_url(embed, submission)
+ yield Message.Url, embed, submission
+
+ if href:
for url in text.extract_iter(html, ' href="', '"'):
urls.append((url, data))
@@ -118,6 +129,7 @@ class RedditExtractor(Extractor):
if url.startswith((
"https://www.reddit.com/message/compose",
"https://reddit.com/message/compose",
+ "https://preview.redd.it/",
)):
continue
@@ -172,6 +184,27 @@ class RedditExtractor(Extractor):
submission["id"], item["media_id"])
self.log.debug(src)
+ def _extract_embed(self, submission):
+ meta = submission["media_metadata"]
+ if not meta:
+ return
+
+ for mid, data in meta.items():
+ if data["status"] != "valid" or "s" not in data:
+ self.log.warning(
+ "embed %s: skipping item %s (status: %s)",
+ submission["id"], mid, data.get("status"))
+ continue
+ src = data["s"]
+ url = src.get("u") or src.get("gif") or src.get("mp4")
+ if url:
+ yield url.partition("?")[0].replace("/preview.", "/i.", 1)
+ else:
+ self.log.error(
+ "embed %s: unable to fetch download URL for item %s",
+ submission["id"], mid)
+ self.log.debug(src)
+
def _extract_video_ytdl(self, submission):
return "https://www.reddit.com" + submission["permalink"]
@@ -454,14 +487,14 @@ class RedditAPI():
remaining = response.headers.get("x-ratelimit-remaining")
if remaining and float(remaining) < 2:
- if self._warn_429:
- self._warn_429 = False
+ self.log.warning("API rate limit exceeded")
+ if self._warn_429 and self.client_id == self.CLIENT_ID:
self.log.info(
"Register your own OAuth application and use its "
"credentials to prevent this error: "
- "https://github.com/mikf/gallery-dl/blob/master"
- "/docs/configuration.rst"
- "#extractorredditclient-id--user-agent")
+ "https://gdl-org.github.io/docs/configuration.html"
+ "#extractor-reddit-client-id-user-agent")
+ self._warn_429 = False
self.extractor.wait(
seconds=response.headers["x-ratelimit-reset"])
continue
diff --git a/gallery_dl/extractor/seiga.py b/gallery_dl/extractor/seiga.py
index edfe1dc..23ba340 100644
--- a/gallery_dl/extractor/seiga.py
+++ b/gallery_dl/extractor/seiga.py
@@ -10,6 +10,7 @@
from .common import Extractor, Message
from .. import text, util, exception
+from ..cache import cache
class SeigaExtractor(Extractor):
@@ -17,6 +18,7 @@ class SeigaExtractor(Extractor):
category = "seiga"
archive_fmt = "{image_id}"
cookies_domain = ".nicovideo.jp"
+ cookies_names = ("user_session",)
root = "https://seiga.nicovideo.jp"
def __init__(self, match):
@@ -24,8 +26,7 @@ class SeigaExtractor(Extractor):
self.start_image = 0
def items(self):
- if not self.cookies_check(("user_session",)):
- raise exception.StopExtraction("'user_session' cookie required")
+ self.login()
images = iter(self.get_images())
data = next(images)
@@ -50,6 +51,59 @@ class SeigaExtractor(Extractor):
"HTTP redirect to login page (%s)", location.partition("?")[0])
return location.replace("/o/", "/priv/", 1)
+ def login(self):
+ if self.cookies_check(self.cookies_names):
+ return
+
+ username, password = self._get_auth_info()
+ if username:
+ return self.cookies_update(self._login_impl(username, password))
+
+ raise exception.AuthorizationError(
+ "username & password or 'user_session' cookie required")
+
+ @cache(maxage=365*86400, keyarg=1)
+ def _login_impl(self, username, password):
+ self.log.info("Logging in as %s", username)
+
+ root = "https://account.nicovideo.jp"
+ response = self.request(root + "/login?site=seiga")
+ page = response.text
+
+ data = {
+ "mail_tel": username,
+ "password": password,
+ }
+ url = root + text.unescape(text.extr(page, '<form action="', '"'))
+ response = self.request(url, method="POST", data=data)
+
+ if "message=cant_login" in response.url:
+ raise exception.AuthenticationError()
+
+ if "/mfa" in response.url:
+ page = response.text
+ email = text.extr(page, 'class="userAccount">', "<")
+ code = self.input("Email Confirmation Code ({}): ".format(email))
+
+ data = {
+ "otp": code,
+ "loginBtn": "Login",
+ "device_name": "gdl",
+ }
+ url = root + text.unescape(text.extr(page, '<form action="', '"'))
+ response = self.request(url, method="POST", data=data)
+
+ if not response.history and \
+ b"Confirmation code is incorrect" in response.content:
+ raise exception.AuthenticationError(
+ "Incorrect Confirmation Code")
+
+ return {
+ cookie.name: cookie.value
+ for cookie in self.cookies
+ if cookie.expires and cookie.domain == self.cookies_domain
+ }
+
class SeigaUserExtractor(SeigaExtractor):
"""Extractor for images of a user from seiga.nicovideo.jp"""
diff --git a/gallery_dl/extractor/slideshare.py b/gallery_dl/extractor/slideshare.py
index b56ed27..e5e7a6b 100644
--- a/gallery_dl/extractor/slideshare.py
+++ b/gallery_dl/extractor/slideshare.py
@@ -47,13 +47,13 @@ class SlidesharePresentationExtractor(GalleryExtractor):
}
def images(self, page):
- parts = self.slideshow["slideImages"][0]["baseUrl"].split("/")
-
- begin = "{}/95/{}-".format(
- "/".join(parts[:4]),
- self.slideshow["strippedTitle"],
+ slides = self.slideshow["slides"]
+ begin = "{}/{}/95/{}-".format(
+ slides["host"],
+ slides["imageLocation"],
+ slides["title"],
)
- end = "-1024.jpg?" + parts[-1].rpartition("?")[2]
+ end = "-1024.jpg"
return [
(begin + str(n) + end, None)
diff --git a/gallery_dl/extractor/subscribestar.py b/gallery_dl/extractor/subscribestar.py
index d4adfed..0abb3ab 100644
--- a/gallery_dl/extractor/subscribestar.py
+++ b/gallery_dl/extractor/subscribestar.py
@@ -43,6 +43,8 @@ class SubscribestarExtractor(Extractor):
item.update(data)
item["num"] = num
text.nameext_from_url(item.get("name") or item["url"], item)
+ if item["url"][0] == "/":
+ item["url"] = self.root + item["url"]
yield Message.Url, item["url"], item
def posts(self):
diff --git a/gallery_dl/extractor/tapas.py b/gallery_dl/extractor/tapas.py
index 0a9df20..167953d 100644
--- a/gallery_dl/extractor/tapas.py
+++ b/gallery_dl/extractor/tapas.py
@@ -151,3 +151,18 @@ class TapasEpisodeExtractor(TapasExtractor):
def episode_ids(self):
return (self.episode_id,)
+
+
+class TapasCreatorExtractor(TapasExtractor):
+ subcategory = "creator"
+ pattern = BASE_PATTERN + r"/(?!series|episode)([^/?#]+)"
+ example = "https://tapas.io/CREATOR"
+
+ def items(self):
+ url = "{}/{}/series".format(self.root, self.groups[0])
+ page = self.request(url).text
+ page = text.extr(page, '<ul class="content-list-wrap', "</ul>")
+
+ data = {"_extractor": TapasSeriesExtractor}
+ for path in text.extract_iter(page, ' href="', '"'):
+ yield Message.Queue, self.root + path, data
diff --git a/gallery_dl/extractor/tcbscans.py b/gallery_dl/extractor/tcbscans.py
index a3ef26c..de6f3ee 100644
--- a/gallery_dl/extractor/tcbscans.py
+++ b/gallery_dl/extractor/tcbscans.py
@@ -30,7 +30,7 @@ class TcbscansChapterExtractor(ChapterExtractor):
page, 'font-bold mt-8">', "</h1>").rpartition(" - Chapter ")
chapter, sep, minor = chapter.partition(".")
return {
- "manga": text.unescape(manga),
+ "manga": text.unescape(manga).strip(),
"chapter": text.parse_int(chapter),
"chapter_minor": sep + minor,
"lang": "en", "language": "English",
diff --git a/gallery_dl/extractor/tumblr.py b/gallery_dl/extractor/tumblr.py
index fee0145..c34910f 100644
--- a/gallery_dl/extractor/tumblr.py
+++ b/gallery_dl/extractor/tumblr.py
@@ -447,9 +447,9 @@ class TumblrAPI(oauth.OAuth1API):
if api_key == self.API_KEY:
self.log.info(
"Register your own OAuth application and use its "
- "credentials to prevent this error: https://githu"
- "b.com/mikf/gallery-dl/blob/master/docs/configurat"
- "ion.rst#extractortumblrapi-key--api-secret")
+ "credentials to prevent this error: "
+ "https://gdl-org.github.io/docs/configuration.html"
+ "#extractor-tumblr-api-key-api-secret")
if self.extractor.config("ratelimit") == "wait":
self.extractor.wait(seconds=reset)
diff --git a/gallery_dl/extractor/twitter.py b/gallery_dl/extractor/twitter.py
index a5bd984..ff77828 100644
--- a/gallery_dl/extractor/twitter.py
+++ b/gallery_dl/extractor/twitter.py
@@ -6,17 +6,18 @@
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
-"""Extractors for https://twitter.com/"""
+"""Extractors for https://x.com/"""
from .common import Extractor, Message
from .. import text, util, exception
from ..cache import cache, memcache
import itertools
+import random
import json
import re
BASE_PATTERN = (r"(?:https?://)?(?:www\.|mobile\.)?"
- r"(?:(?:[fv]x)?twitter|(?:fixup)?x)\.com")
+ r"(?:(?:[fv]x)?twitter|(?:fix(?:up|v))?x)\.com")
class TwitterExtractor(Extractor):
@@ -25,9 +26,9 @@ class TwitterExtractor(Extractor):
directory_fmt = ("{category}", "{user[name]}")
filename_fmt = "{tweet_id}_{num}.{extension}"
archive_fmt = "{tweet_id}_{retweet_id}_{num}"
- cookies_domain = ".twitter.com"
+ cookies_domain = ".x.com"
cookies_names = ("auth_token",)
- root = "https://twitter.com"
+ root = "https://x.com"
browser = "firefox"
def __init__(self, match):
@@ -243,8 +244,8 @@ class TwitterExtractor(Extractor):
# collect URLs from entities
for url in tweet["entities"].get("urls") or ():
- url = url["expanded_url"]
- if "//twitpic.com/" not in url or "/photos/" in url:
+ url = url.get("expanded_url") or url.get("url") or ""
+ if not url or "//twitpic.com/" not in url or "/photos/" in url:
continue
if url.startswith("http:"):
url = "https" + url[4:]
@@ -336,12 +337,20 @@ class TwitterExtractor(Extractor):
urls = entities.get("urls")
if urls:
for url in urls:
- content = content.replace(url["url"], url["expanded_url"])
+ try:
+ content = content.replace(url["url"], url["expanded_url"])
+ except KeyError:
+ pass
txt, _, tco = content.rpartition(" ")
tdata["content"] = txt if tco.startswith("https://t.co/") else content
if "birdwatch_pivot" in tweet:
- tdata["birdwatch"] = tweet["birdwatch_pivot"]["subtitle"]["text"]
+ try:
+ tdata["birdwatch"] = \
+ tweet["birdwatch_pivot"]["subtitle"]["text"]
+ except KeyError:
+ self.log.debug("Unable to extract 'birdwatch' note from %s",
+ tweet["birdwatch_pivot"])
if "in_reply_to_screen_name" in legacy:
tdata["reply_to"] = legacy["in_reply_to_screen_name"]
if "quoted_by" in legacy:
@@ -398,7 +407,10 @@ class TwitterExtractor(Extractor):
urls = entities["description"].get("urls")
if urls:
for url in urls:
- descr = descr.replace(url["url"], url["expanded_url"])
+ try:
+ descr = descr.replace(url["url"], url["expanded_url"])
+ except KeyError:
+ pass
udata["description"] = descr
if "url" in entities:
@@ -483,7 +495,13 @@ class TwitterExtractor(Extractor):
username, password = self._get_auth_info()
if username:
- self.cookies_update(_login_impl(self, username, password))
+ return self.cookies_update(_login_impl(self, username, password))
+
+ for cookie in self.cookies:
+ if cookie.domain == ".twitter.com":
+ self.cookies.set(
+ cookie.name, cookie.value, domain=self.cookies_domain,
+ expires=cookie.expires, secure=cookie.secure)
class TwitterUserExtractor(TwitterExtractor):
@@ -491,7 +509,7 @@ class TwitterUserExtractor(TwitterExtractor):
subcategory = "user"
pattern = (BASE_PATTERN + r"/(?!search)(?:([^/?#]+)/?(?:$|[?#])"
r"|i(?:/user/|ntent/user\?user_id=)(\d+))")
- example = "https://twitter.com/USER"
+ example = "https://x.com/USER"
def __init__(self, match):
TwitterExtractor.__init__(self, match)
@@ -519,7 +537,7 @@ class TwitterTimelineExtractor(TwitterExtractor):
"""Extractor for a Twitter user timeline"""
subcategory = "timeline"
pattern = BASE_PATTERN + r"/(?!search)([^/?#]+)/timeline(?!\w)"
- example = "https://twitter.com/USER/timeline"
+ example = "https://x.com/USER/timeline"
def tweets(self):
# yield initial batch of (media) tweets
@@ -566,7 +584,7 @@ class TwitterTweetsExtractor(TwitterExtractor):
"""Extractor for Tweets from a user's Tweets timeline"""
subcategory = "tweets"
pattern = BASE_PATTERN + r"/(?!search)([^/?#]+)/tweets(?!\w)"
- example = "https://twitter.com/USER/tweets"
+ example = "https://x.com/USER/tweets"
def tweets(self):
return self.api.user_tweets(self.user)
@@ -576,7 +594,7 @@ class TwitterRepliesExtractor(TwitterExtractor):
"""Extractor for Tweets from a user's timeline including replies"""
subcategory = "replies"
pattern = BASE_PATTERN + r"/(?!search)([^/?#]+)/with_replies(?!\w)"
- example = "https://twitter.com/USER/with_replies"
+ example = "https://x.com/USER/with_replies"
def tweets(self):
return self.api.user_tweets_and_replies(self.user)
@@ -586,7 +604,7 @@ class TwitterMediaExtractor(TwitterExtractor):
"""Extractor for Tweets from a user's Media timeline"""
subcategory = "media"
pattern = BASE_PATTERN + r"/(?!search)([^/?#]+)/media(?!\w)"
- example = "https://twitter.com/USER/media"
+ example = "https://x.com/USER/media"
def tweets(self):
return self.api.user_media(self.user)
@@ -596,7 +614,7 @@ class TwitterLikesExtractor(TwitterExtractor):
"""Extractor for liked tweets"""
subcategory = "likes"
pattern = BASE_PATTERN + r"/(?!search)([^/?#]+)/likes(?!\w)"
- example = "https://twitter.com/USER/likes"
+ example = "https://x.com/USER/likes"
def metadata(self):
return {"user_likes": self.user}
@@ -609,7 +627,7 @@ class TwitterBookmarkExtractor(TwitterExtractor):
"""Extractor for bookmarked tweets"""
subcategory = "bookmark"
pattern = BASE_PATTERN + r"/i/bookmarks()"
- example = "https://twitter.com/i/bookmarks"
+ example = "https://x.com/i/bookmarks"
def tweets(self):
return self.api.user_bookmarks()
@@ -625,7 +643,7 @@ class TwitterListExtractor(TwitterExtractor):
"""Extractor for Twitter lists"""
subcategory = "list"
pattern = BASE_PATTERN + r"/i/lists/(\d+)/?$"
- example = "https://twitter.com/i/lists/12345"
+ example = "https://x.com/i/lists/12345"
def tweets(self):
return self.api.list_latest_tweets_timeline(self.user)
@@ -635,7 +653,7 @@ class TwitterListMembersExtractor(TwitterExtractor):
"""Extractor for members of a Twitter list"""
subcategory = "list-members"
pattern = BASE_PATTERN + r"/i/lists/(\d+)/members"
- example = "https://twitter.com/i/lists/12345/members"
+ example = "https://x.com/i/lists/12345/members"
def items(self):
self.login()
@@ -646,7 +664,7 @@ class TwitterFollowingExtractor(TwitterExtractor):
"""Extractor for followed users"""
subcategory = "following"
pattern = BASE_PATTERN + r"/(?!search)([^/?#]+)/following(?!\w)"
- example = "https://twitter.com/USER/following"
+ example = "https://x.com/USER/following"
def items(self):
self.login()
@@ -657,7 +675,7 @@ class TwitterSearchExtractor(TwitterExtractor):
"""Extractor for Twitter search results"""
subcategory = "search"
pattern = BASE_PATTERN + r"/search/?\?(?:[^&#]+&)*q=([^&#]+)"
- example = "https://twitter.com/search?q=QUERY"
+ example = "https://x.com/search?q=QUERY"
def metadata(self):
return {"search": text.unquote(self.user)}
@@ -688,7 +706,7 @@ class TwitterHashtagExtractor(TwitterExtractor):
"""Extractor for Twitter hashtags"""
subcategory = "hashtag"
pattern = BASE_PATTERN + r"/hashtag/([^/?#]+)"
- example = "https://twitter.com/hashtag/NAME"
+ example = "https://x.com/hashtag/NAME"
def items(self):
url = "{}/search?q=%23{}".format(self.root, self.user)
@@ -700,7 +718,7 @@ class TwitterCommunityExtractor(TwitterExtractor):
"""Extractor for a Twitter community"""
subcategory = "community"
pattern = BASE_PATTERN + r"/i/communities/(\d+)"
- example = "https://twitter.com/i/communities/12345"
+ example = "https://x.com/i/communities/12345"
def tweets(self):
if self.textonly:
@@ -712,7 +730,7 @@ class TwitterCommunitiesExtractor(TwitterExtractor):
"""Extractor for followed Twitter communities"""
subcategory = "communities"
pattern = BASE_PATTERN + r"/([^/?#]+)/communities/?$"
- example = "https://twitter.com/i/communities"
+ example = "https://x.com/i/communities"
def tweets(self):
return self.api.communities_main_page_timeline(self.user)
@@ -724,7 +742,7 @@ class TwitterEventExtractor(TwitterExtractor):
directory_fmt = ("{category}", "Events",
"{event[id]} {event[short_title]}")
pattern = BASE_PATTERN + r"/i/events/(\d+)"
- example = "https://twitter.com/i/events/12345"
+ example = "https://x.com/i/events/12345"
def metadata(self):
return {"event": self.api.live_event(self.user)}
@@ -736,8 +754,9 @@ class TwitterEventExtractor(TwitterExtractor):
class TwitterTweetExtractor(TwitterExtractor):
"""Extractor for individual tweets"""
subcategory = "tweet"
- pattern = BASE_PATTERN + r"/([^/?#]+|i/web)/status/(\d+)/?$"
- example = "https://twitter.com/USER/status/12345"
+ pattern = (BASE_PATTERN + r"/([^/?#]+|i/web)/status/(\d+)"
+ r"/?(?:$|\?|#|photo/|video/)")
+ example = "https://x.com/USER/status/12345"
def __init__(self, match):
TwitterExtractor.__init__(self, match)
@@ -817,7 +836,7 @@ class TwitterQuotesExtractor(TwitterExtractor):
"""Extractor for quotes of a Tweet"""
subcategory = "quotes"
pattern = BASE_PATTERN + r"/(?:[^/?#]+|i/web)/status/(\d+)/quotes"
- example = "https://twitter.com/USER/status/12345/quotes"
+ example = "https://x.com/USER/status/12345/quotes"
def items(self):
url = "{}/search?q=quoted_tweet_id:{}".format(self.root, self.user)
@@ -830,7 +849,7 @@ class TwitterAvatarExtractor(TwitterExtractor):
filename_fmt = "avatar {date}.{extension}"
archive_fmt = "AV_{user[id]}_{date}"
pattern = BASE_PATTERN + r"/(?!search)([^/?#]+)/photo"
- example = "https://twitter.com/USER/photo"
+ example = "https://x.com/USER/photo"
def tweets(self):
self.api._user_id_by_screen_name(self.user)
@@ -852,7 +871,7 @@ class TwitterBackgroundExtractor(TwitterExtractor):
filename_fmt = "background {date}.{extension}"
archive_fmt = "BG_{user[id]}_{date}"
pattern = BASE_PATTERN + r"/(?!search)([^/?#]+)/header_photo"
- example = "https://twitter.com/USER/header_photo"
+ example = "https://x.com/USER/header_photo"
def tweets(self):
self.api._user_id_by_screen_name(self.user)
@@ -899,7 +918,7 @@ class TwitterAPI():
self.extractor = extractor
self.log = extractor.log
- self.root = "https://twitter.com/i/api"
+ self.root = "https://x.com/i/api"
self._nsfw_warning = True
self._json_dumps = json.JSONEncoder(separators=(",", ":")).encode
@@ -919,7 +938,7 @@ class TwitterAPI():
self.headers = {
"Accept": "*/*",
- "Referer": "https://twitter.com/",
+ "Referer": extractor.root + "/",
"content-type": "application/json",
"x-guest-token": None,
"x-twitter-auth-type": "OAuth2Session" if auth_token else None,
@@ -1262,7 +1281,7 @@ class TwitterAPI():
endpoint = "/1.1/guest/activate.json"
self.log.info("Requesting guest token")
return str(self._call(
- endpoint, None, "POST", False, "https://api.twitter.com",
+ endpoint, None, "POST", False, "https://api.x.com",
)["guest_token"])
def _authenticate_guest(self):
@@ -1288,63 +1307,72 @@ class TwitterAPI():
if csrf_token:
self.headers["x-csrf-token"] = csrf_token
- if response.status_code < 400:
+ remaining = int(response.headers.get("x-rate-limit-remaining", 6))
+ if remaining < 6 and remaining <= random.randrange(1, 6):
+ self._handle_ratelimit(response)
+ continue
+
+ try:
data = response.json()
+ except ValueError:
+ data = {"errors": ({"message": response.text},)}
+
+ errors = data.get("errors")
+ if not errors:
+ return data
+
+ retry = False
+ for error in errors:
+ msg = error.get("message") or "Unspecified"
+ self.log.debug("API error: '%s'", msg)
+
+ if "this account is temporarily locked" in msg:
+ msg = "Account temporarily locked"
+ if self.extractor.config("locked") != "wait":
+ raise exception.AuthorizationError(msg)
+ self.log.warning(msg)
+ self.extractor.input("Press ENTER to retry.")
+ retry = True
+
+ elif "Could not authenticate you" in msg:
+ if not self.extractor.config("relogin", True):
+ continue
- errors = data.get("errors")
- if not errors:
- return data
+ username, password = self.extractor._get_auth_info()
+ if not username:
+ continue
- retry = False
- for error in errors:
- msg = error.get("message") or "Unspecified"
- self.log.debug("API error: '%s'", msg)
+ _login_impl.invalidate(username)
+ self.extractor.cookies_update(
+ _login_impl(self.extractor, username, password))
+ self.__init__(self.extractor)
+ retry = True
- if "this account is temporarily locked" in msg:
- msg = "Account temporarily locked"
- if self.extractor.config("locked") != "wait":
- raise exception.AuthorizationError(msg)
- self.log.warning("%s. Press ENTER to retry.", msg)
- try:
- input()
- except (EOFError, OSError):
- pass
- retry = True
-
- elif msg.lower().startswith("timeout"):
- retry = True
+ elif msg.lower().startswith("timeout"):
+ retry = True
- if not retry:
- return data
- elif self.headers["x-twitter-auth-type"]:
+ if retry:
+ if self.headers["x-twitter-auth-type"]:
self.log.debug("Retrying API request")
continue
+ else:
+ # fall through to "Login Required"
+ response.status_code = 404
- # fall through to "Login Required"
- response.status_code = 404
-
- if response.status_code == 429:
- # rate limit exceeded
- if self.extractor.config("ratelimit") == "abort":
- raise exception.StopExtraction("Rate limit exceeded")
-
- until = response.headers.get("x-rate-limit-reset")
- seconds = None if until else 60
- self.extractor.wait(until=until, seconds=seconds)
- continue
-
- if response.status_code in (403, 404) and \
+ if response.status_code < 400:
+ return data
+ elif response.status_code in (403, 404) and \
not self.headers["x-twitter-auth-type"]:
raise exception.AuthorizationError("Login required")
+ elif response.status_code == 429:
+ self._handle_ratelimit(response)
+ continue
# error
try:
- data = response.json()
- errors = ", ".join(e["message"] for e in data["errors"])
- except ValueError:
- errors = response.text
+ errors = ", ".join(e["message"] for e in errors)
except Exception:
- errors = data.get("errors", "")
+ pass
raise exception.StopExtraction(
"%s %s (%s)", response.status_code, response.reason, errors)
@@ -1680,6 +1708,13 @@ class TwitterAPI():
return
variables["cursor"] = cursor
+ def _handle_ratelimit(self, response):
+ if self.extractor.config("ratelimit") == "abort":
+ raise exception.StopExtraction("Rate limit exceeded")
+
+ until = response.headers.get("x-rate-limit-reset")
+ self.extractor.wait(until=until, seconds=None if until else 60)
+
def _process_tombstone(self, entry, tombstone):
text = (tombstone.get("richText") or tombstone["text"])["text"]
tweet_id = entry["entryId"].rpartition("-")[2]
@@ -1695,22 +1730,22 @@ class TwitterAPI():
@cache(maxage=365*86400, keyarg=1)
def _login_impl(extr, username, password):
- import re
- import random
+ def process(data, params=None):
+ response = extr.request(
+ url, params=params, headers=headers, json=data,
+ method="POST", fatal=None)
- if re.fullmatch(r"[\w.%+-]+@[\w.-]+\.\w{2,}", username):
- extr.log.warning(
- "Login with email is no longer possible. "
- "You need to provide your username or phone number instead.")
-
- def process(response):
try:
data = response.json()
except ValueError:
data = {"errors": ({"message": "Invalid response"},)}
else:
if response.status_code < 400:
- return data["flow_token"]
+ try:
+ return (data["flow_token"],
+ data["subtasks"][0]["subtask_id"])
+ except LookupError:
+ pass
errors = []
for error in data.get("errors") or ():
@@ -1719,9 +1754,13 @@ def _login_impl(extr, username, password):
extr.log.debug(response.text)
raise exception.AuthenticationError(", ".join(errors))
- extr.cookies.clear()
+ cookies = extr.cookies
+ cookies.clear()
api = TwitterAPI(extr)
api._authenticate_guest()
+
+ url = "https://api.x.com/1.1/onboarding/task.json"
+ params = {"flow_name": "login"}
headers = api.headers
extr.log.info("Logging in as %s", username)
@@ -1778,31 +1817,18 @@ def _login_impl(extr, username, password):
"web_modal": 1,
},
}
- url = "https://api.twitter.com/1.1/onboarding/task.json?flow_name=login"
- response = extr.request(url, method="POST", headers=headers, json=data)
- data = {
- "flow_token": process(response),
- "subtask_inputs": [
- {
- "subtask_id": "LoginJsInstrumentationSubtask",
+ flow_token, subtask = process(data, params)
+ while not cookies.get("auth_token"):
+ if subtask == "LoginJsInstrumentationSubtask":
+ data = {
"js_instrumentation": {
"response": "{}",
"link": "next_link",
},
- },
- ],
- }
- url = "https://api.twitter.com/1.1/onboarding/task.json"
- response = extr.request(
- url, method="POST", headers=headers, json=data, fatal=None)
-
- # username
- data = {
- "flow_token": process(response),
- "subtask_inputs": [
- {
- "subtask_id": "LoginEnterUserIdentifierSSO",
+ }
+ elif subtask == "LoginEnterUserIdentifierSSO":
+ data = {
"settings_list": {
"setting_responses": [
{
@@ -1814,48 +1840,61 @@ def _login_impl(extr, username, password):
],
"link": "next_link",
},
- },
- ],
- }
- # url = "https://api.twitter.com/1.1/onboarding/task.json"
- extr.sleep(random.uniform(2.0, 4.0), "login (username)")
- response = extr.request(
- url, method="POST", headers=headers, json=data, fatal=None)
-
- # password
- data = {
- "flow_token": process(response),
- "subtask_inputs": [
- {
- "subtask_id": "LoginEnterPassword",
+ }
+ elif subtask == "LoginEnterPassword":
+ data = {
"enter_password": {
"password": password,
"link": "next_link",
},
- },
- ],
- }
- # url = "https://api.twitter.com/1.1/onboarding/task.json"
- extr.sleep(random.uniform(2.0, 4.0), "login (password)")
- response = extr.request(
- url, method="POST", headers=headers, json=data, fatal=None)
-
- # account duplication check ?
- data = {
- "flow_token": process(response),
- "subtask_inputs": [
- {
- "subtask_id": "AccountDuplicationCheck",
+ }
+ elif subtask == "LoginEnterAlternateIdentifierSubtask":
+ alt = extr.input(
+ "Alternate Identifier (username, email, phone number): ")
+ data = {
+ "enter_text": {
+ "text": alt,
+ "link": "next_link",
+ },
+ }
+ elif subtask == "LoginTwoFactorAuthChallenge":
+ data = {
+ "enter_text": {
+ "text": extr.input("2FA Token: "),
+ "link": "next_link",
+ },
+ }
+ elif subtask == "LoginAcid":
+ data = {
+ "enter_text": {
+ "text": extr.input("Email Verification Code: "),
+ "link": "next_link",
+ },
+ }
+ elif subtask == "AccountDuplicationCheck":
+ data = {
"check_logged_in_account": {
"link": "AccountDuplicationCheck_false",
},
- },
- ],
- }
- # url = "https://api.twitter.com/1.1/onboarding/task.json"
- response = extr.request(
- url, method="POST", headers=headers, json=data, fatal=None)
- process(response)
+ }
+ elif subtask == "ArkoseLogin":
+ raise exception.AuthenticationError("Login requires CAPTCHA")
+ elif subtask == "DenyLoginSubtask":
+ raise exception.AuthenticationError("Login rejected as suspicious")
+ elif subtask == "ArkoseLogin":
+ raise exception.AuthenticationError("No auth token cookie")
+ else:
+ raise exception.StopExtraction("Unrecognized subtask %s", subtask)
+
+ inputs = {"subtask_id": subtask}
+ inputs.update(data)
+ data = {
+ "flow_token": flow_token,
+ "subtask_inputs": [inputs],
+ }
+
+ extr.sleep(random.uniform(1.0, 3.0), "login ({})".format(subtask))
+ flow_token, subtask = process(data)
return {
cookie.name: cookie.value
diff --git a/gallery_dl/extractor/vsco.py b/gallery_dl/extractor/vsco.py
index 41141c6..c112f4a 100644
--- a/gallery_dl/extractor/vsco.py
+++ b/gallery_dl/extractor/vsco.py
@@ -46,6 +46,8 @@ class VscoExtractor(Extractor):
url = "https://image-{}.vsco.co/{}".format(cdn, path)
elif cdn.isdecimal():
url = "https://image.vsco.co/" + base
+ elif img["responsive_url"].startswith("http"):
+ url = img["responsive_url"]
else:
url = "https://" + img["responsive_url"]
@@ -238,6 +240,34 @@ class VscoSpacesExtractor(VscoExtractor):
yield Message.Queue, url, space
+class VscoAvatarExtractor(VscoExtractor):
+ """Extractor for vsco.co user avatars"""
+ subcategory = "avatar"
+ pattern = USER_PATTERN + r"/avatar"
+ example = "https://vsco.co/USER/avatar"
+
+ def images(self):
+ url = "{}/{}/gallery".format(self.root, self.user)
+ page = self.request(url).text
+ piid = text.extr(page, '"profileImageId":"', '"')
+
+ url = "https://im.vsco.co/" + piid
+ # needs GET request, since HEAD does not redirect to full URL
+ response = self.request(url, allow_redirects=False)
+
+ return ({
+ "_id" : piid,
+ "is_video" : False,
+ "grid_name" : "",
+ "upload_date" : 0,
+ "responsive_url": response.headers["Location"],
+ "video_url" : "",
+ "image_meta" : None,
+ "width" : 0,
+ "height" : 0,
+ },)
+
+
class VscoImageExtractor(VscoExtractor):
"""Extractor for individual images on vsco.co"""
subcategory = "image"
diff --git a/gallery_dl/extractor/wikimedia.py b/gallery_dl/extractor/wikimedia.py
index ac00682..9370cfb 100644
--- a/gallery_dl/extractor/wikimedia.py
+++ b/gallery_dl/extractor/wikimedia.py
@@ -27,9 +27,9 @@ class WikimediaExtractor(BaseExtractor):
if self.category == "wikimedia":
self.category = self.root.split(".")[-2]
- elif self.category == "fandom":
- self.category = \
- "fandom-" + self.root.partition(".")[0].rpartition("/")[2]
+ elif self.category in ("fandom", "wikigg"):
+ self.category = "{}-{}".format(
+ self.category, self.root.partition(".")[0].rpartition("/")[2])
if path.startswith("wiki/"):
path = path[5:]
@@ -69,14 +69,18 @@ class WikimediaExtractor(BaseExtractor):
def items(self):
for info in self._pagination(self.params):
- image = info["imageinfo"][0]
+ try:
+ image = info["imageinfo"][0]
+ except LookupError:
+ self.log.debug("Missing 'imageinfo' for %s", info)
+ continue
image["metadata"] = {
m["name"]: m["value"]
- for m in image["metadata"]}
+ for m in image["metadata"] or ()}
image["commonmetadata"] = {
m["name"]: m["value"]
- for m in image["commonmetadata"]}
+ for m in image["commonmetadata"] or ()}
filename = image["canonicaltitle"]
image["filename"], _, image["extension"] = \
@@ -148,6 +152,10 @@ BASE_PATTERN = WikimediaExtractor.update({
"root": None,
"pattern": r"[\w-]+\.fandom\.com",
},
+ "wikigg": {
+ "root": None,
+ "pattern": r"\w+\.wiki\.gg",
+ },
"mariowiki": {
"root": "https://www.mariowiki.com",
"pattern": r"(?:www\.)?mariowiki\.com",
diff --git a/gallery_dl/formatter.py b/gallery_dl/formatter.py
index b83cf21..0b212d5 100644
--- a/gallery_dl/formatter.py
+++ b/gallery_dl/formatter.py
@@ -243,13 +243,12 @@ class TemplateFStringFormatter(FStringFormatter):
def parse_field_name(field_name):
+ if field_name[0] == "'":
+ return "_lit", (operator.itemgetter(field_name[1:-1]),)
+
first, rest = _string.formatter_field_name_split(field_name)
funcs = []
- if first[0] == "'":
- funcs.append(operator.itemgetter(first[1:-1]))
- first = "_lit"
-
for is_attr, key in rest:
if is_attr:
func = operator.attrgetter
diff --git a/gallery_dl/job.py b/gallery_dl/job.py
index eb10a0c..4562b05 100644
--- a/gallery_dl/job.py
+++ b/gallery_dl/job.py
@@ -11,10 +11,23 @@ import errno
import logging
import functools
import collections
-from . import extractor, downloader, postprocessor
-from . import config, text, util, path, formatter, output, exception, version
+
+from . import (
+ extractor,
+ downloader,
+ postprocessor,
+ archive,
+ config,
+ exception,
+ formatter,
+ output,
+ path,
+ text,
+ util,
+ version,
+)
from .extractor.message import Message
-from .output import stdout_write
+stdout_write = output.stdout_write
class Job():
@@ -29,8 +42,9 @@ class Job():
self.extractor = extr
self.pathfmt = None
- self.kwdict = {}
self.status = 0
+ self.kwdict = {}
+ self.kwdict_eval = False
cfgpath = []
if parent:
@@ -107,7 +121,16 @@ class Job():
# user-supplied metadata
kwdict = extr.config("keywords")
if kwdict:
- self.kwdict.update(kwdict)
+ if extr.config("keywords-eval"):
+ self.kwdict_eval = []
+ for key, value in kwdict.items():
+ if isinstance(value, str):
+ fmt = formatter.parse(value, None, util.identity)
+ self.kwdict_eval.append((key, fmt.format_map))
+ else:
+ self.kwdict[key] = value
+ else:
+ self.kwdict.update(kwdict)
def run(self):
"""Execute or run the job"""
@@ -202,6 +225,9 @@ class Job():
kwdict.pop(self.metadata_http, None)
if self.kwdict:
kwdict.update(self.kwdict)
+ if self.kwdict_eval:
+ for key, valuegen in self.kwdict_eval:
+ kwdict[key] = valuegen(kwdict)
def _init(self):
self.extractor.initialize()
@@ -423,6 +449,8 @@ class DownloadJob(Job):
def handle_finalize(self):
if self.archive:
+ if not self.status:
+ self.archive.finalize()
self.archive.close()
pathfmt = self.pathfmt
@@ -453,9 +481,12 @@ class DownloadJob(Job):
for callback in self.hooks["skip"]:
callback(pathfmt)
if self._skipexc:
- self._skipcnt += 1
- if self._skipcnt >= self._skipmax:
- raise self._skipexc()
+ if not self._skipftr or self._skipftr(pathfmt.kwdict):
+ self._skipcnt += 1
+ if self._skipcnt >= self._skipmax:
+ raise self._skipexc()
+ else:
+ self._skipcnt = 0
def download(self, url):
"""Download 'url'"""
@@ -507,23 +538,28 @@ class DownloadJob(Job):
# monkey-patch method to do nothing and always return True
self.download = pathfmt.fix_extension
- archive = cfg("archive")
- if archive:
- archive = util.expand_path(archive)
+ archive_path = cfg("archive")
+ if archive_path:
+ archive_path = util.expand_path(archive_path)
archive_format = (cfg("archive-prefix", extr.category) +
cfg("archive-format", extr.archive_fmt))
archive_pragma = (cfg("archive-pragma"))
try:
- if "{" in archive:
- archive = formatter.parse(archive).format_map(kwdict)
- self.archive = util.DownloadArchive(
- archive, archive_format, archive_pragma)
+ if "{" in archive_path:
+ archive_path = formatter.parse(
+ archive_path).format_map(kwdict)
+ if cfg("archive-mode") == "memory":
+ archive_cls = archive.DownloadArchiveMemory
+ else:
+ archive_cls = archive.DownloadArchive
+ self.archive = archive_cls(
+ archive_path, archive_format, archive_pragma)
except Exception as exc:
extr.log.warning(
"Failed to open download archive at '%s' (%s: %s)",
- archive, exc.__class__.__name__, exc)
+ archive_path, exc.__class__.__name__, exc)
else:
- extr.log.debug("Using download archive '%s'", archive)
+ extr.log.debug("Using download archive '%s'", archive_path)
skip = cfg("skip", True)
if skip:
@@ -539,6 +575,12 @@ class DownloadJob(Job):
elif skip == "exit":
self._skipexc = SystemExit
self._skipmax = text.parse_int(smax)
+
+ skip_filter = cfg("skip-filter")
+ if skip_filter:
+ self._skipftr = util.compile_expression(skip_filter)
+ else:
+ self._skipftr = None
else:
# monkey-patch methods to always return False
pathfmt.exists = lambda x=None: False
diff --git a/gallery_dl/option.py b/gallery_dl/option.py
index 72a602f..12622d0 100644
--- a/gallery_dl/option.py
+++ b/gallery_dl/option.py
@@ -173,6 +173,28 @@ def build_parser():
action="version", version=version.__version__,
help="Print program version and exit",
)
+ if util.EXECUTABLE:
+ general.add_argument(
+ "-U", "--update",
+ dest="update", action="store_const", const="latest",
+ help="Update to the latest version",
+ )
+ general.add_argument(
+ "--update-to",
+ dest="update", metavar="[CHANNEL@]TAG",
+ help="Upgrade/downgrade to a specific version",
+ )
+ general.add_argument(
+ "--update-check",
+ dest="update", action="store_const", const="check",
+ help="Check if a newer version is available",
+ )
+ else:
+ general.add_argument(
+ "-U", "--update-check",
+ dest="update", action="store_const", const="check",
+ help="Check if a newer version is available",
+ )
general.add_argument(
"-f", "--filename",
dest="filename", metavar="FORMAT",
@@ -250,6 +272,12 @@ def build_parser():
help="Activate quiet mode",
)
output.add_argument(
+ "-w", "--warning",
+ dest="loglevel",
+ action="store_const", const=logging.WARNING,
+ help="Print only warnings and errors",
+ )
+ output.add_argument(
"-v", "--verbose",
dest="loglevel",
action="store_const", const=logging.DEBUG,
@@ -319,6 +347,11 @@ def build_parser():
help=("Write downloaded intermediary pages to files "
"in the current directory to debug problems"),
)
+ output.add_argument(
+ "--no-colors",
+ dest="colors", action="store_false",
+ help=("Do not emit ANSI color codes in output"),
+ )
downloader = parser.add_argument_group("Downloader Options")
downloader.add_argument(
diff --git a/gallery_dl/output.py b/gallery_dl/output.py
index 2bcc222..3518545 100644
--- a/gallery_dl/output.py
+++ b/gallery_dl/output.py
@@ -16,11 +16,39 @@ from . import config, util, formatter
# --------------------------------------------------------------------
+# Globals
+
+COLORS = not os.environ.get("NO_COLOR")
+COLORS_DEFAULT = {
+ "success": "1;32",
+ "skip" : "2",
+ "debug" : "0;37",
+ "info" : "1;37",
+ "warning": "1;33",
+ "error" : "1;31",
+} if COLORS else {}
+
+if util.WINDOWS:
+ ANSI = COLORS and os.environ.get("TERM") == "ANSI"
+ OFFSET = 1
+ CHAR_SKIP = "# "
+ CHAR_SUCCESS = "* "
+ CHAR_ELLIPSIES = "..."
+else:
+ ANSI = COLORS
+ OFFSET = 0
+ CHAR_SKIP = "# "
+ CHAR_SUCCESS = "✔ "
+ CHAR_ELLIPSIES = "…"
+
+
+# --------------------------------------------------------------------
# Logging
LOG_FORMAT = "[{name}][{levelname}] {message}"
LOG_FORMAT_DATE = "%Y-%m-%d %H:%M:%S"
LOG_LEVEL = logging.INFO
+LOG_LEVELS = ("debug", "info", "warning", "error")
class Logger(logging.Logger):
@@ -129,7 +157,7 @@ class Formatter(logging.Formatter):
def __init__(self, fmt, datefmt):
if isinstance(fmt, dict):
- for key in ("debug", "info", "warning", "error"):
+ for key in LOG_LEVELS:
value = fmt[key] if key in fmt else LOG_FORMAT
fmt[key] = (formatter.parse(value).format_map,
"{asctime" in value)
@@ -187,16 +215,36 @@ def configure_logging(loglevel):
# stream logging handler
handler = root.handlers[0]
opts = config.interpolate(("output",), "log")
+
+ colors = config.interpolate(("output",), "colors")
+ if colors is None:
+ colors = COLORS_DEFAULT
+ if colors and not opts:
+ opts = LOG_FORMAT
+
if opts:
if isinstance(opts, str):
- opts = {"format": opts}
- if handler.level == LOG_LEVEL and "level" in opts:
+ logfmt = opts
+ opts = {}
+ elif "format" in opts:
+ logfmt = opts["format"]
+ else:
+ logfmt = LOG_FORMAT
+
+ if not isinstance(logfmt, dict) and colors:
+ ansifmt = "\033[{}m{}\033[0m".format
+ lf = {}
+ for level in LOG_LEVELS:
+ c = colors.get(level)
+ lf[level] = ansifmt(c, logfmt) if c else logfmt
+ logfmt = lf
+
+ handler.setFormatter(Formatter(
+ logfmt, opts.get("format-date", LOG_FORMAT_DATE)))
+
+ if "level" in opts and handler.level == LOG_LEVEL:
handler.setLevel(opts["level"])
- if "format" in opts or "format-date" in opts:
- handler.setFormatter(Formatter(
- opts.get("format", LOG_FORMAT),
- opts.get("format-date", LOG_FORMAT_DATE),
- ))
+
if minlevel > handler.level:
minlevel = handler.level
@@ -307,9 +355,12 @@ def select():
mode = config.get(("output",), "mode")
if mode is None or mode == "auto":
- if hasattr(sys.stdout, "isatty") and sys.stdout.isatty():
- output = ColorOutput() if ANSI else TerminalOutput()
- else:
+ try:
+ if sys.stdout.isatty():
+ output = ColorOutput() if ANSI else TerminalOutput()
+ else:
+ output = PipeOutput()
+ except Exception:
output = PipeOutput()
elif isinstance(mode, dict):
output = CustomOutput(mode)
@@ -388,7 +439,10 @@ class ColorOutput(TerminalOutput):
def __init__(self):
TerminalOutput.__init__(self)
- colors = config.get(("output",), "colors") or {}
+ colors = config.interpolate(("output",), "colors")
+ if colors is None:
+ colors = COLORS_DEFAULT
+
self.color_skip = "\033[{}m".format(
colors.get("skip", "2"))
self.color_success = "\r\033[{}m".format(
@@ -514,17 +568,3 @@ def shorten_string_eaw(txt, limit, sep="…", cache=EAWCache()):
right -= 1
return txt[:left] + sep + txt[right+1:]
-
-
-if util.WINDOWS:
- ANSI = os.environ.get("TERM") == "ANSI"
- OFFSET = 1
- CHAR_SKIP = "# "
- CHAR_SUCCESS = "* "
- CHAR_ELLIPSIES = "..."
-else:
- ANSI = True
- OFFSET = 0
- CHAR_SKIP = "# "
- CHAR_SUCCESS = "✔ "
- CHAR_ELLIPSIES = "…"
diff --git a/gallery_dl/postprocessor/common.py b/gallery_dl/postprocessor/common.py
index 1d2fba8..d4e1603 100644
--- a/gallery_dl/postprocessor/common.py
+++ b/gallery_dl/postprocessor/common.py
@@ -8,7 +8,7 @@
"""Common classes and constants used by postprocessor modules."""
-from .. import util, formatter
+from .. import util, formatter, archive
class PostProcessor():
@@ -22,30 +22,31 @@ class PostProcessor():
return self.__class__.__name__
def _init_archive(self, job, options, prefix=None):
- archive = options.get("archive")
- if archive:
+ archive_path = options.get("archive")
+ if archive_path:
extr = job.extractor
- archive = util.expand_path(archive)
+ archive_path = util.expand_path(archive_path)
if not prefix:
prefix = "_" + self.name.upper() + "_"
archive_format = (
options.get("archive-prefix", extr.category) +
options.get("archive-format", prefix + extr.archive_fmt))
try:
- if "{" in archive:
- archive = formatter.parse(archive).format_map(
+ if "{" in archive_path:
+ archive_path = formatter.parse(archive_path).format_map(
job.pathfmt.kwdict)
- self.archive = util.DownloadArchive(
- archive, archive_format,
+ self.archive = archive.DownloadArchive(
+ archive_path, archive_format,
options.get("archive-pragma"),
"_archive_" + self.name)
except Exception as exc:
self.log.warning(
"Failed to open %s archive at '%s' (%s: %s)",
- self.name, archive, exc.__class__.__name__, exc)
+ self.name, archive_path, exc.__class__.__name__, exc)
else:
- self.log.debug("Using %s archive '%s'", self.name, archive)
+ self.log.debug(
+ "Using %s archive '%s'", self.name, archive_path)
return True
- else:
- self.archive = None
+
+ self.archive = None
return False
diff --git a/gallery_dl/postprocessor/exec.py b/gallery_dl/postprocessor/exec.py
index e7ed2f6..7d2be2b 100644
--- a/gallery_dl/postprocessor/exec.py
+++ b/gallery_dl/postprocessor/exec.py
@@ -10,7 +10,6 @@
from .common import PostProcessor
from .. import util, formatter
-import subprocess
import os
import re
@@ -80,14 +79,14 @@ class ExecPP(PostProcessor):
def _exec(self, args, shell):
self.log.debug("Running '%s'", args)
- retcode = subprocess.Popen(args, shell=shell).wait()
+ retcode = util.Popen(args, shell=shell).wait()
if retcode:
self.log.warning("'%s' returned with non-zero exit status (%d)",
args, retcode)
def _exec_async(self, args, shell):
self.log.debug("Running '%s'", args)
- subprocess.Popen(args, shell=shell)
+ util.Popen(args, shell=shell)
def _replace(self, match):
name = match.group(1)
diff --git a/gallery_dl/postprocessor/mtime.py b/gallery_dl/postprocessor/mtime.py
index ea61b7b..6ded1e2 100644
--- a/gallery_dl/postprocessor/mtime.py
+++ b/gallery_dl/postprocessor/mtime.py
@@ -33,6 +33,9 @@ class MtimePP(PostProcessor):
def run(self, pathfmt):
mtime = self._get(pathfmt.kwdict)
+ if mtime is None:
+ return
+
pathfmt.kwdict["_mtime"] = (
util.datetime_to_timestamp(mtime)
if isinstance(mtime, datetime) else
diff --git a/gallery_dl/postprocessor/ugoira.py b/gallery_dl/postprocessor/ugoira.py
index b713c6f..c63a3d9 100644
--- a/gallery_dl/postprocessor/ugoira.py
+++ b/gallery_dl/postprocessor/ugoira.py
@@ -155,7 +155,9 @@ class UgoiraPP(PostProcessor):
self.log.error("Unable to invoke FFmpeg (%s: %s)",
exc.__class__.__name__, exc)
pathfmt.realpath = pathfmt.temppath
- except Exception:
+ except Exception as exc:
+ print()
+ self.log.error("%s: %s", exc.__class__.__name__, exc)
pathfmt.realpath = pathfmt.temppath
else:
if self.mtime:
@@ -171,7 +173,7 @@ class UgoiraPP(PostProcessor):
def _exec(self, args):
self.log.debug(args)
out = None if self.output else subprocess.DEVNULL
- retcode = subprocess.Popen(args, stdout=out, stderr=out).wait()
+ retcode = util.Popen(args, stdout=out, stderr=out).wait()
if retcode:
print()
self.log.error("Non-zero exit status when running %s (%s)",
diff --git a/gallery_dl/text.py b/gallery_dl/text.py
index b7b5211..9258187 100644
--- a/gallery_dl/text.py
+++ b/gallery_dl/text.py
@@ -73,7 +73,7 @@ def filename_from_url(url):
"""Extract the last part of an URL to use as a filename"""
try:
return url.partition("?")[0].rpartition("/")[2]
- except (TypeError, AttributeError):
+ except Exception:
return ""
@@ -122,7 +122,7 @@ def extract(txt, begin, end, pos=0):
first = txt.index(begin, pos) + len(begin)
last = txt.index(end, first)
return txt[first:last], last+len(end)
- except (ValueError, TypeError, AttributeError):
+ except Exception:
return None, pos
@@ -131,7 +131,7 @@ def extr(txt, begin, end, default=""):
try:
first = txt.index(begin) + len(begin)
return txt[first:txt.index(end, first)]
- except (ValueError, TypeError, AttributeError):
+ except Exception:
return default
@@ -141,7 +141,7 @@ def rextract(txt, begin, end, pos=-1):
first = txt.rindex(begin, 0, pos)
last = txt.index(end, first + lbeg)
return txt[first + lbeg:last], first
- except (ValueError, TypeError, AttributeError):
+ except Exception:
return None, pos
@@ -167,7 +167,7 @@ def extract_iter(txt, begin, end, pos=0):
last = index(end, first)
pos = last + lend
yield txt[first:last]
- except (ValueError, TypeError, AttributeError):
+ except Exception:
return
@@ -180,7 +180,7 @@ def extract_from(txt, pos=0, default=""):
last = index(end, first)
pos = last + len(end)
return txt[first:last]
- except (ValueError, TypeError, AttributeError):
+ except Exception:
return default
return extr
@@ -200,7 +200,7 @@ def parse_bytes(value, default=0, suffixes="bkmgtp"):
"""Convert a bytes-amount ("500k", "2.5M", ...) to int"""
try:
last = value[-1].lower()
- except (TypeError, LookupError):
+ except Exception:
return default
if last in suffixes:
@@ -221,7 +221,7 @@ def parse_int(value, default=0):
return default
try:
return int(value)
- except (ValueError, TypeError):
+ except Exception:
return default
@@ -231,7 +231,7 @@ def parse_float(value, default=0.0):
return default
try:
return float(value)
- except (ValueError, TypeError):
+ except Exception:
return default
@@ -242,7 +242,7 @@ def parse_query(qs):
for key, value in urllib.parse.parse_qsl(qs):
if key not in result:
result[key] = value
- except AttributeError:
+ except Exception:
pass
return result
@@ -251,7 +251,7 @@ def parse_timestamp(ts, default=None):
"""Create a datetime object from a unix timestamp"""
try:
return datetime.datetime.utcfromtimestamp(int(ts))
- except (TypeError, ValueError, OverflowError):
+ except Exception:
return default
diff --git a/gallery_dl/update.py b/gallery_dl/update.py
new file mode 100644
index 0000000..b068e37
--- /dev/null
+++ b/gallery_dl/update.py
@@ -0,0 +1,218 @@
+# -*- coding: utf-8 -*-
+
+# Copyright 2024 Mike Fährmann
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+import os
+import re
+import sys
+
+from .extractor.common import Extractor, Message
+from .job import DownloadJob
+from . import util, version, exception
+
+REPOS = {
+ "stable" : "mikf/gallery-dl",
+ "dev" : "gdl-org/builds",
+ "nightly": "gdl-org/builds",
+ "master" : "gdl-org/builds",
+}
+
+BINARIES_STABLE = {
+ "windows" : "gallery-dl.exe",
+ "windows_x86": "gallery-dl.exe",
+ "windows_x64": "gallery-dl.exe",
+ "linux" : "gallery-dl.bin",
+}
+BINARIES_DEV = {
+ "windows" : "gallery-dl_windows.exe",
+ "windows_x86": "gallery-dl_windows_x86.exe",
+ "windows_x64": "gallery-dl_windows.exe",
+ "linux" : "gallery-dl_linux",
+ "macos" : "gallery-dl_macos",
+}
+BINARIES = {
+ "stable" : BINARIES_STABLE,
+ "dev" : BINARIES_DEV,
+ "nightly": BINARIES_DEV,
+ "master" : BINARIES_DEV,
+}
+
+
+class UpdateJob(DownloadJob):
+
+ def handle_url(self, url, kwdict):
+ if not self._check_update(kwdict):
+ if kwdict["_check"]:
+ self.status |= 1
+ return self.extractor.log.info(
+ "gallery-dl is up to date (%s)", version.__version__)
+
+ if kwdict["_check"]:
+ return self.extractor.log.info(
+ "A new release is available: %s -> %s",
+ version.__version__, kwdict["tag_name"])
+
+ self.extractor.log.info(
+ "Updating from %s to %s",
+ version.__version__, kwdict["tag_name"])
+
+ path_old = sys.executable + ".old"
+ path_new = sys.executable + ".new"
+ directory, filename = os.path.split(sys.executable)
+
+ pathfmt = self.pathfmt
+ pathfmt.extension = "new"
+ pathfmt.filename = filename
+ pathfmt.temppath = path_new
+ pathfmt.realpath = pathfmt.path = sys.executable
+ pathfmt.realdirectory = pathfmt.directory = directory
+
+ self._newline = True
+ if not self.download(url):
+ self.status |= 4
+ return self._error("Failed to download %s", url.rpartition("/")[2])
+
+ if not util.WINDOWS:
+ try:
+ mask = os.stat(sys.executable).st_mode
+ except OSError:
+ mask = 0o755
+ self._warning("Unable to get file permission bits")
+
+ try:
+ os.replace(sys.executable, path_old)
+ except OSError:
+ return self._error("Unable to move current executable")
+
+ try:
+ pathfmt.finalize()
+ except OSError:
+ self._error("Unable to overwrite current executable")
+ return os.replace(path_old, sys.executable)
+
+ if util.WINDOWS:
+ import atexit
+ import subprocess
+
+ cmd = 'ping 127.0.0.1 -n 5 -w 1000 & del /F "{}"'.format(path_old)
+ atexit.register(
+ util.Popen, cmd, shell=True,
+ stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL,
+ )
+
+ else:
+ try:
+ os.unlink(path_old)
+ except OSError:
+ self._warning("Unable to delete old executable")
+
+ try:
+ os.chmod(sys.executable, mask)
+ except OSError:
+ self._warning("Unable to restore file permission bits")
+
+ self.out.success(pathfmt.path)
+
+ def _check_update(self, kwdict):
+ if kwdict["_exact"]:
+ return True
+
+ tag = kwdict["tag_name"]
+
+ if tag[0] == "v":
+ kwdict["tag_name"] = tag = tag[1:]
+ ver, _, dev = version.__version__.partition("-")
+
+ version_local = [int(v) for v in ver.split(".")]
+ version_remote = [int(v) for v in tag.split(".")]
+
+ if dev:
+ version_local[-1] -= 0.5
+ if version_local >= version_remote:
+ return False
+
+ elif version.__version__.endswith(":" + tag):
+ return False
+
+ return True
+
+ def _warning(self, msg, *args):
+ if self._newline:
+ self._newline = False
+ print()
+ self.extractor.log.warning(msg, *args)
+
+ def _error(self, msg, *args):
+ if self._newline:
+ self._newline = False
+ print()
+ self.status |= 1
+ self.extractor.log.error(msg, *args)
+
+
+class UpdateExtractor(Extractor):
+ category = "update"
+ root = "https://github.com"
+ root_api = "https://api.github.com"
+ pattern = r"update(?::(.+))?"
+
+ def items(self):
+ tag = "latest"
+ check = exact = False
+
+ variant = version.__variant__ or "stable/windows"
+ repo, _, binary = variant.partition("/")
+
+ target = self.groups[0]
+ if target == "latest":
+ pass
+ elif target == "check":
+ check = True
+ else:
+ channel, sep, target = target.partition("@")
+ if sep:
+ repo = channel
+ tag = target
+ exact = True
+ elif channel in REPOS:
+ repo = channel
+ else:
+ tag = channel
+ exact = True
+
+ if re.match(r"\d\.\d+\.\d+", tag):
+ tag = "v" + tag
+
+ try:
+ path_repo = REPOS[repo or "stable"]
+ except KeyError:
+ raise exception.StopExtraction("Invalid channel '%s'", repo)
+
+ path_tag = tag if tag == "latest" else "tags/" + tag
+ url = "{}/repos/{}/releases/{}".format(
+ self.root_api, path_repo, path_tag)
+ headers = {
+ "Accept": "application/vnd.github+json",
+ "User-Agent": util.USERAGENT,
+ "X-GitHub-Api-Version": "2022-11-28",
+ }
+ data = self.request(url, headers=headers, notfound="tag").json()
+ data["_check"] = check
+ data["_exact"] = exact
+
+ if binary == "linux" and \
+ repo != "stable" and \
+ data["tag_name"] <= "2024.05.28":
+ binary_name = "gallery-dl_ubuntu"
+ else:
+ binary_name = BINARIES[repo][binary]
+
+ url = "{}/{}/releases/download/{}/{}".format(
+ self.root, path_repo, data["tag_name"], binary_name)
+
+ yield Message.Directory, data
+ yield Message.Url, url, data
diff --git a/gallery_dl/util.py b/gallery_dl/util.py
index bc9418f..861ec7e 100644
--- a/gallery_dl/util.py
+++ b/gallery_dl/util.py
@@ -16,7 +16,6 @@ import time
import random
import getpass
import hashlib
-import sqlite3
import binascii
import datetime
import functools
@@ -339,7 +338,7 @@ def extract_headers(response):
@functools.lru_cache(maxsize=None)
def git_head():
try:
- out, err = subprocess.Popen(
+ out, err = Popen(
("git", "rev-parse", "--short", "HEAD"),
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
@@ -579,6 +578,33 @@ GLOBALS = {
}
+if EXECUTABLE and hasattr(sys, "_MEIPASS"):
+ # https://github.com/pyinstaller/pyinstaller/blob/develop/doc
+ # /runtime-information.rst#ld_library_path--libpath-considerations
+ _popen_env = os.environ.copy()
+
+ orig = _popen_env.get("LD_LIBRARY_PATH_ORIG")
+ if orig is None:
+ _popen_env.pop("LD_LIBRARY_PATH", None)
+ else:
+ _popen_env["LD_LIBRARY_PATH"] = orig
+
+ orig = _popen_env.get("DYLD_LIBRARY_PATH_ORIG")
+ if orig is None:
+ _popen_env.pop("DYLD_LIBRARY_PATH", None)
+ else:
+ _popen_env["DYLD_LIBRARY_PATH"] = orig
+
+ del orig
+
+ class Popen(subprocess.Popen):
+ def __init__(self, args, **kwargs):
+ kwargs["env"] = _popen_env
+ subprocess.Popen.__init__(self, args, **kwargs)
+else:
+ Popen = subprocess.Popen
+
+
def compile_expression(expr, name="<expr>", globals=None):
code_object = compile(expr, name, "eval")
return functools.partial(eval, code_object, globals or GLOBALS)
@@ -825,46 +851,3 @@ class FilterPredicate():
raise
except Exception as exc:
raise exception.FilterError(exc)
-
-
-class DownloadArchive():
-
- def __init__(self, path, format_string, pragma=None,
- cache_key="_archive_key"):
- try:
- con = sqlite3.connect(path, timeout=60, check_same_thread=False)
- except sqlite3.OperationalError:
- os.makedirs(os.path.dirname(path))
- con = sqlite3.connect(path, timeout=60, check_same_thread=False)
- con.isolation_level = None
-
- from . import formatter
- self.keygen = formatter.parse(format_string).format_map
- self.close = con.close
- self.cursor = cursor = con.cursor()
- self._cache_key = cache_key
-
- if pragma:
- for stmt in pragma:
- cursor.execute("PRAGMA " + stmt)
-
- try:
- cursor.execute("CREATE TABLE IF NOT EXISTS archive "
- "(entry TEXT PRIMARY KEY) WITHOUT ROWID")
- except sqlite3.OperationalError:
- # fallback for missing WITHOUT ROWID support (#553)
- cursor.execute("CREATE TABLE IF NOT EXISTS archive "
- "(entry TEXT PRIMARY KEY)")
-
- def check(self, kwdict):
- """Return True if the item described by 'kwdict' exists in archive"""
- key = kwdict[self._cache_key] = self.keygen(kwdict)
- self.cursor.execute(
- "SELECT 1 FROM archive WHERE entry=? LIMIT 1", (key,))
- return self.cursor.fetchone()
-
- def add(self, kwdict):
- """Add item described by 'kwdict' to archive"""
- key = kwdict.get(self._cache_key) or self.keygen(kwdict)
- self.cursor.execute(
- "INSERT OR IGNORE INTO archive (entry) VALUES (?)", (key,))
diff --git a/gallery_dl/version.py b/gallery_dl/version.py
index d438ba4..6557763 100644
--- a/gallery_dl/version.py
+++ b/gallery_dl/version.py
@@ -6,4 +6,5 @@
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
-__version__ = "1.26.9"
+__version__ = "1.27.0"
+__variant__ = None