summaryrefslogtreecommitdiffstats
path: root/gallery_dl
diff options
context:
space:
mode:
Diffstat (limited to 'gallery_dl')
-rw-r--r--gallery_dl/__init__.py277
-rw-r--r--gallery_dl/downloader/http.py6
-rw-r--r--gallery_dl/exception.py10
-rw-r--r--gallery_dl/extractor/__init__.py2
-rw-r--r--gallery_dl/extractor/behance.py33
-rw-r--r--gallery_dl/extractor/blogger.py36
-rw-r--r--gallery_dl/extractor/common.py6
-rw-r--r--gallery_dl/extractor/cyberdrop.py56
-rw-r--r--gallery_dl/extractor/erome.py4
-rw-r--r--gallery_dl/extractor/exhentai.py24
-rw-r--r--gallery_dl/extractor/fapello.py31
-rw-r--r--gallery_dl/extractor/foolslide.py4
-rw-r--r--gallery_dl/extractor/hentaicosplays.py2
-rw-r--r--gallery_dl/extractor/hentaifoundry.py20
-rw-r--r--gallery_dl/extractor/hiperdex.py4
-rw-r--r--gallery_dl/extractor/idolcomplex.py129
-rw-r--r--gallery_dl/extractor/imagefap.py5
-rw-r--r--gallery_dl/extractor/instagram.py5
-rw-r--r--gallery_dl/extractor/mangaread.py4
-rw-r--r--gallery_dl/extractor/mastodon.py8
-rw-r--r--gallery_dl/extractor/nitter.py2
-rw-r--r--gallery_dl/extractor/oauth.py5
-rw-r--r--gallery_dl/extractor/pixeldrain.py88
-rw-r--r--gallery_dl/extractor/pornhub.py2
-rw-r--r--gallery_dl/extractor/sankaku.py4
-rw-r--r--gallery_dl/extractor/tmohentai.py48
-rw-r--r--gallery_dl/extractor/tumblr.py9
-rw-r--r--gallery_dl/extractor/twitter.py19
-rw-r--r--gallery_dl/extractor/wallpapercave.py14
-rw-r--r--gallery_dl/extractor/warosu.py5
-rw-r--r--gallery_dl/extractor/webtoons.py7
-rw-r--r--gallery_dl/extractor/weibo.py11
-rw-r--r--gallery_dl/extractor/xvideos.py2
-rw-r--r--gallery_dl/extractor/zerochan.py10
-rw-r--r--gallery_dl/job.py27
-rw-r--r--gallery_dl/option.py226
-rw-r--r--gallery_dl/util.py85
-rw-r--r--gallery_dl/version.py2
38 files changed, 878 insertions, 354 deletions
diff --git a/gallery_dl/__init__.py b/gallery_dl/__init__.py
index d3a0f58..287faf1 100644
--- a/gallery_dl/__init__.py
+++ b/gallery_dl/__init__.py
@@ -18,19 +18,6 @@ __email__ = "mike_faehrmann@web.de"
__version__ = version.__version__
-def progress(urls, pformat):
- """Wrapper around urls to output a simple progress indicator"""
- if pformat is True:
- pformat = "[{current}/{total}] {url}\n"
- else:
- pformat += "\n"
-
- pinfo = {"total": len(urls)}
- for pinfo["current"], pinfo["url"] in enumerate(urls, 1):
- output.stderr_write(pformat.format_map(pinfo))
- yield pinfo["url"]
-
-
def main():
try:
parser = option.build_parser()
@@ -128,6 +115,7 @@ def main():
output.configure_logging(args.loglevel)
if args.loglevel >= logging.ERROR:
config.set(("output",), "mode", "null")
+ config.set(("downloader",), "progress", None)
elif args.loglevel <= logging.DEBUG:
import platform
import requests
@@ -224,7 +212,7 @@ def main():
return config.initialize()
else:
- if not args.urls and not args.inputfiles:
+ if not args.urls and not args.input_files:
parser.error(
"The following arguments are required: URL\n"
"Use 'gallery-dl --help' to get a list of all options.")
@@ -238,22 +226,6 @@ def main():
else:
jobtype = args.jobtype or job.DownloadJob
- urls = args.urls
- if args.inputfiles:
- for inputfile in args.inputfiles:
- try:
- if inputfile == "-":
- if sys.stdin:
- urls += util.parse_inputfile(sys.stdin, log)
- else:
- log.warning(
- "input file: stdin is not readable")
- else:
- with open(inputfile, encoding="utf-8") as file:
- urls += util.parse_inputfile(file, log)
- except OSError as exc:
- log.warning("input file: %s", exc)
-
# unsupported file logging handler
handler = output.setup_logging_handler(
"unsupportedfile", fmt="{message}")
@@ -263,25 +235,44 @@ def main():
ulog.propagate = False
job.Job.ulog = ulog
+ # collect input URLs
+ input_manager = InputManager()
+ input_manager.log = input_log = logging.getLogger("inputfile")
+ input_manager.add_list(args.urls)
+
+ if args.input_files:
+ for input_file, action in args.input_files:
+ try:
+ path = util.expand_path(input_file)
+ input_manager.add_file(path, action)
+ except Exception as exc:
+ input_log.error(exc)
+ return getattr(exc, "code", 128)
+
pformat = config.get(("output",), "progress", True)
- if pformat and len(urls) > 1 and args.loglevel < logging.ERROR:
- urls = progress(urls, pformat)
- else:
- urls = iter(urls)
+ if pformat and len(input_manager.urls) > 1 and \
+ args.loglevel < logging.ERROR:
+ input_manager.progress(pformat)
+ # process input URLs
retval = 0
- url = next(urls, None)
-
- while url is not None:
+ for url in input_manager:
try:
log.debug("Starting %s for '%s'", jobtype.__name__, url)
- if isinstance(url, util.ExtendedUrl):
+
+ if isinstance(url, ExtendedUrl):
for opts in url.gconfig:
config.set(*opts)
with config.apply(url.lconfig):
- retval |= jobtype(url.value).run()
+ status = jobtype(url.value).run()
+ else:
+ status = jobtype(url).run()
+
+ if status:
+ retval |= status
else:
- retval |= jobtype(url).run()
+ input_manager.success()
+
except exception.TerminateExtraction:
pass
except exception.RestartExtraction:
@@ -291,8 +282,7 @@ def main():
log.error("Unsupported URL '%s'", url)
retval |= 64
- url = next(urls, None)
-
+ input_manager.next()
return retval
except KeyboardInterrupt:
@@ -304,3 +294,206 @@ def main():
if exc.errno != errno.EPIPE:
raise
return 1
+
+
+class InputManager():
+
+ def __init__(self):
+ self.urls = []
+ self.files = ()
+ self._index = 0
+ self._current = None
+ self._pformat = None
+
+ def add_url(self, url):
+ self.urls.append(url)
+
+ def add_list(self, urls):
+ self.urls += urls
+
+ def add_file(self, path, action=None):
+ """Process an input file.
+
+ Lines starting with '#' and empty lines will be ignored.
+ Lines starting with '-' will be interpreted as a key-value pair
+ separated by an '='. where
+ 'key' is a dot-separated option name and
+ 'value' is a JSON-parsable string.
+ These configuration options will be applied
+ while processing the next URL only.
+ Lines starting with '-G' are the same as above, except these options
+ will be applied for *all* following URLs, i.e. they are Global.
+ Everything else will be used as a potential URL.
+
+ Example input file:
+
+ # settings global options
+ -G base-directory = "/tmp/"
+ -G skip = false
+
+ # setting local options for the next URL
+ -filename="spaces_are_optional.jpg"
+ -skip = true
+
+ https://example.org/
+
+ # next URL uses default filename and 'skip' is false.
+ https://example.com/index.htm # comment1
+ https://example.com/404.htm # comment2
+ """
+ if path == "-" and not action:
+ try:
+ lines = sys.stdin.readlines()
+ except Exception:
+ raise exception.InputFileError("stdin is not readable")
+ path = None
+ else:
+ try:
+ with open(path, encoding="utf-8") as fp:
+ lines = fp.readlines()
+ except Exception as exc:
+ raise exception.InputFileError(str(exc))
+
+ if self.files:
+ self.files[path] = lines
+ else:
+ self.files = {path: lines}
+
+ if action == "c":
+ action = self._action_comment
+ elif action == "d":
+ action = self._action_delete
+ else:
+ action = None
+
+ gconf = []
+ lconf = []
+ indicies = []
+ strip_comment = None
+ append = self.urls.append
+
+ for n, line in enumerate(lines):
+ line = line.strip()
+
+ if not line or line[0] == "#":
+ # empty line or comment
+ continue
+
+ elif line[0] == "-":
+ # config spec
+ if len(line) >= 2 and line[1] == "G":
+ conf = gconf
+ line = line[2:]
+ else:
+ conf = lconf
+ line = line[1:]
+ if action:
+ indicies.append(n)
+
+ key, sep, value = line.partition("=")
+ if not sep:
+ raise exception.InputFileError(
+ "Invalid KEY=VALUE pair '%s' on line %s in %s",
+ line, n+1, path)
+
+ try:
+ value = util.json_loads(value.strip())
+ except ValueError as exc:
+ self.log.debug("%s: %s", exc.__class__.__name__, exc)
+ raise exception.InputFileError(
+ "Unable to parse '%s' on line %s in %s",
+ value, n+1, path)
+
+ key = key.strip().split(".")
+ conf.append((key[:-1], key[-1], value))
+
+ else:
+ # url
+ if " #" in line or "\t#" in line:
+ if strip_comment is None:
+ import re
+ strip_comment = re.compile(r"\s+#.*").sub
+ line = strip_comment("", line)
+ if gconf or lconf:
+ url = ExtendedUrl(line, gconf, lconf)
+ gconf = []
+ lconf = []
+ else:
+ url = line
+
+ if action:
+ indicies.append(n)
+ append((url, path, action, indicies))
+ indicies = []
+ else:
+ append(url)
+
+ def progress(self, pformat=True):
+ if pformat is True:
+ pformat = "[{current}/{total}] {url}\n"
+ else:
+ pformat += "\n"
+ self._pformat = pformat.format_map
+
+ def next(self):
+ self._index += 1
+
+ def success(self):
+ if self._current:
+ url, path, action, indicies = self._current
+ lines = self.files[path]
+ action(lines, indicies)
+ try:
+ with open(path, "w", encoding="utf-8") as fp:
+ fp.writelines(lines)
+ except Exception as exc:
+ self.log.warning(
+ "Unable to update '%s' (%s: %s)",
+ path, exc.__class__.__name__, exc)
+
+ @staticmethod
+ def _action_comment(lines, indicies):
+ for i in indicies:
+ lines[i] = "# " + lines[i]
+
+ @staticmethod
+ def _action_delete(lines, indicies):
+ for i in indicies:
+ lines[i] = ""
+
+ def __iter__(self):
+ self._index = 0
+ return self
+
+ def __next__(self):
+ try:
+ item = self.urls[self._index]
+ except IndexError:
+ raise StopIteration
+
+ if isinstance(item, tuple):
+ self._current = item
+ item = item[0]
+ else:
+ self._current = None
+
+ if self._pformat:
+ output.stderr_write(self._pformat({
+ "total" : len(self.urls),
+ "current": self._index + 1,
+ "url" : item,
+ }))
+ return item
+
+
+class ExtendedUrl():
+ """URL with attached config key-value pairs"""
+ __slots__ = ("value", "gconfig", "lconfig")
+
+ def __init__(self, url, gconf, lconf):
+ self.value = url
+ self.gconfig = gconf
+ self.lconfig = lconf
+
+ def __str__(self):
+ return self.value
diff --git a/gallery_dl/downloader/http.py b/gallery_dl/downloader/http.py
index 30ac001..f493947 100644
--- a/gallery_dl/downloader/http.py
+++ b/gallery_dl/downloader/http.py
@@ -200,13 +200,15 @@ class HttpDownloader(DownloaderBase):
self.log.warning(
"File size smaller than allowed minimum (%s < %s)",
size, self.minsize)
- return False
+ pathfmt.temppath = ""
+ return True
if self.maxsize and size > self.maxsize:
self.release_conn(response)
self.log.warning(
"File size larger than allowed maximum (%s > %s)",
size, self.maxsize)
- return False
+ pathfmt.temppath = ""
+ return True
build_path = False
diff --git a/gallery_dl/exception.py b/gallery_dl/exception.py
index ef190f2..ee183fc 100644
--- a/gallery_dl/exception.py
+++ b/gallery_dl/exception.py
@@ -21,6 +21,7 @@ Exception
| +-- FilenameFormatError
| +-- DirectoryFormatError
+-- FilterError
+ +-- InputFileError
+-- NoExtractorError
+-- StopExtraction
+-- TerminateExtraction
@@ -99,6 +100,15 @@ class FilterError(GalleryDLException):
code = 32
+class InputFileError(GalleryDLException):
+ """Error when parsing input file"""
+ code = 32
+
+ def __init__(self, message, *args):
+ GalleryDLException.__init__(
+ self, message % args if args else message)
+
+
class NoExtractorError(GalleryDLException):
"""No extractor can handle the given URL"""
code = 64
diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py
index 22e4fe3..72239d5 100644
--- a/gallery_dl/extractor/__init__.py
+++ b/gallery_dl/extractor/__init__.py
@@ -117,6 +117,7 @@ modules = [
"piczel",
"pillowfort",
"pinterest",
+ "pixeldrain",
"pixiv",
"pixnet",
"plurk",
@@ -147,6 +148,7 @@ modules = [
"tapas",
"tcbscans",
"telegraph",
+ "tmohentai",
"toyhouse",
"tsumino",
"tumblr",
diff --git a/gallery_dl/extractor/behance.py b/gallery_dl/extractor/behance.py
index a92918e..ad0caf9 100644
--- a/gallery_dl/extractor/behance.py
+++ b/gallery_dl/extractor/behance.py
@@ -89,6 +89,17 @@ class BehanceGalleryExtractor(BehanceExtractor):
BehanceExtractor.__init__(self, match)
self.gallery_id = match.group(1)
+ def _init(self):
+ BehanceExtractor._init(self)
+
+ modules = self.config("modules")
+ if modules:
+ if isinstance(modules, str):
+ modules = modules.split(",")
+ self.modules = set(modules)
+ else:
+ self.modules = {"image", "video", "mediacollection", "embed"}
+
def items(self):
data = self.get_gallery_data()
imgs = self.get_images(data)
@@ -97,7 +108,8 @@ class BehanceGalleryExtractor(BehanceExtractor):
yield Message.Directory, data
for data["num"], (url, module) in enumerate(imgs, 1):
data["module"] = module
- data["extension"] = text.ext_from_url(url)
+ data["extension"] = (module.get("extension") or
+ text.ext_from_url(url))
yield Message.Url, url, data
def get_gallery_data(self):
@@ -133,13 +145,17 @@ class BehanceGalleryExtractor(BehanceExtractor):
append = result.append
for module in data["modules"]:
- mtype = module["__typename"]
+ mtype = module["__typename"][:-6].lower()
- if mtype == "ImageModule":
+ if mtype not in self.modules:
+ self.log.debug("Skipping '%s' module", mtype)
+ continue
+
+ if mtype == "image":
url = module["imageSizes"]["size_original"]["url"]
append((url, module))
- elif mtype == "VideoModule":
+ elif mtype == "video":
try:
renditions = module["videoData"]["renditions"]
except Exception:
@@ -158,7 +174,7 @@ class BehanceGalleryExtractor(BehanceExtractor):
append((url, module))
- elif mtype == "MediaCollectionModule":
+ elif mtype == "mediacollection":
for component in module["components"]:
for size in component["imageSizes"].values():
if size:
@@ -167,12 +183,17 @@ class BehanceGalleryExtractor(BehanceExtractor):
append(("/".join(parts), module))
break
- elif mtype == "EmbedModule":
+ elif mtype == "embed":
embed = module.get("originalEmbed") or module.get("fluidEmbed")
if embed:
embed = text.unescape(text.extr(embed, 'src="', '"'))
+ module["extension"] = "mp4"
append(("ytdl:" + embed, module))
+ elif mtype == "text":
+ module["extension"] = "txt"
+ append(("text:" + module["text"], module))
+
return result
diff --git a/gallery_dl/extractor/blogger.py b/gallery_dl/extractor/blogger.py
index d75c349..58ae59d 100644
--- a/gallery_dl/extractor/blogger.py
+++ b/gallery_dl/extractor/blogger.py
@@ -8,30 +8,22 @@
"""Extractors for Blogger blogs"""
-from .common import Extractor, Message
+from .common import BaseExtractor, Message
from .. import text, util
import re
-BASE_PATTERN = (
- r"(?:blogger:(?:https?://)?([^/]+)|"
- r"(?:https?://)?([\w-]+\.blogspot\.com))")
-
-class BloggerExtractor(Extractor):
+class BloggerExtractor(BaseExtractor):
"""Base class for blogger extractors"""
- category = "blogger"
- directory_fmt = ("{category}", "{blog[name]}",
+ basecategory = "blogger"
+ directory_fmt = ("blogger", "{blog[name]}",
"{post[date]:%Y-%m-%d} {post[title]}")
filename_fmt = "{num:>03}.{extension}"
archive_fmt = "{post[id]}_{num}"
- root = "https://www.blogger.com"
-
- def __init__(self, match):
- Extractor.__init__(self, match)
- self.blog = match.group(1) or match.group(2)
def _init(self):
self.api = BloggerAPI(self)
+ self.blog = self.root.rpartition("/")[2]
self.videos = self.config("videos", True)
def items(self):
@@ -92,6 +84,18 @@ class BloggerExtractor(Extractor):
"""Return additional metadata"""
+BASE_PATTERN = BloggerExtractor.update({
+ "blogspot": {
+ "root": None,
+ "pattern": r"[\w-]+\.blogspot\.com",
+ },
+ "micmicidol": {
+ "root": "https://www.micmicidol.club",
+ "pattern": r"(?:www\.)?micmicidol\.club",
+ },
+})
+
+
class BloggerPostExtractor(BloggerExtractor):
"""Extractor for a single blog post"""
subcategory = "post"
@@ -100,7 +104,7 @@ class BloggerPostExtractor(BloggerExtractor):
def __init__(self, match):
BloggerExtractor.__init__(self, match)
- self.path = match.group(3)
+ self.path = match.group(match.lastindex)
def posts(self, blog):
return (self.api.post_by_path(blog["id"], self.path),)
@@ -124,7 +128,7 @@ class BloggerSearchExtractor(BloggerExtractor):
def __init__(self, match):
BloggerExtractor.__init__(self, match)
- self.query = text.unquote(match.group(3))
+ self.query = text.unquote(match.group(match.lastindex))
def posts(self, blog):
return self.api.blog_search(blog["id"], self.query)
@@ -141,7 +145,7 @@ class BloggerLabelExtractor(BloggerExtractor):
def __init__(self, match):
BloggerExtractor.__init__(self, match)
- self.label = text.unquote(match.group(3))
+ self.label = text.unquote(match.group(match.lastindex))
def posts(self, blog):
return self.api.blog_posts(blog["id"], self.label)
diff --git a/gallery_dl/extractor/common.py b/gallery_dl/extractor/common.py
index 3bec424..f378427 100644
--- a/gallery_dl/extractor/common.py
+++ b/gallery_dl/extractor/common.py
@@ -78,6 +78,12 @@ class Extractor():
def config(self, key, default=None):
return config.interpolate(self._cfgpath, key, default)
+ def config2(self, key, key2, default=None, sentinel=util.SENTINEL):
+ value = self.config(key, sentinel)
+ if value is not sentinel:
+ return value
+ return self.config(key2, default)
+
def config_deprecated(self, key, deprecated, default=None,
sentinel=util.SENTINEL, history=set()):
value = self.config(deprecated, sentinel)
diff --git a/gallery_dl/extractor/cyberdrop.py b/gallery_dl/extractor/cyberdrop.py
index 59fd1e5..d864960 100644
--- a/gallery_dl/extractor/cyberdrop.py
+++ b/gallery_dl/extractor/cyberdrop.py
@@ -7,6 +7,7 @@
"""Extractors for https://cyberdrop.me/"""
from . import lolisafe
+from .common import Message
from .. import text
@@ -16,24 +17,43 @@ class CyberdropAlbumExtractor(lolisafe.LolisafeAlbumExtractor):
pattern = r"(?:https?://)?(?:www\.)?cyberdrop\.(?:me|to)/a/([^/?#]+)"
example = "https://cyberdrop.me/a/ID"
+ def items(self):
+ files, data = self.fetch_album(self.album_id)
+
+ yield Message.Directory, data
+ for data["num"], file in enumerate(files, 1):
+ file.update(data)
+ text.nameext_from_url(file["name"], file)
+ file["name"], sep, file["id"] = file["filename"].rpartition("-")
+ yield Message.Url, file["url"], file
+
def fetch_album(self, album_id):
- url = self.root + "/a/" + self.album_id
- extr = text.extract_from(self.request(url).text)
-
- files = []
- append = files.append
- while True:
- url = text.unescape(extr('id="file" href="', '"'))
- if not url:
- break
- append({"file": url,
- "_fallback": (self.root + url[url.find("/", 8):],)})
-
- return files, {
+ url = "{}/a/{}".format(self.root, album_id)
+ page = self.request(url).text
+ extr = text.extract_from(page)
+
+ desc = extr('property="og:description" content="', '"')
+ if desc.startswith("A privacy-focused censorship-resistant file "
+ "sharing platform free for everyone."):
+ desc = ""
+ extr('id="title"', "")
+
+ album = {
"album_id" : self.album_id,
- "album_name" : extr("name: '", "'"),
- "date" : text.parse_timestamp(extr("timestamp: ", ",")),
- "album_size" : text.parse_int(extr("totalSize: ", ",")),
- "description": extr("description: `", "`"),
- "count" : len(files),
+ "album_name" : text.unescape(extr('title="', '"')),
+ "album_size" : text.parse_bytes(extr(
+ '<p class="title">', "B")),
+ "date" : text.parse_datetime(extr(
+ '<p class="title">', '<'), "%d.%m.%Y"),
+ "description": text.unescape(text.unescape( # double
+ desc.rpartition(" [R")[0])),
}
+
+ file_ids = list(text.extract_iter(page, 'id="file" href="/f/', '"'))
+ album["count"] = len(file_ids)
+ return self._extract_files(file_ids), album
+
+ def _extract_files(self, file_ids):
+ for file_id in file_ids:
+ url = "{}/api/f/{}".format(self.root, file_id)
+ yield self.request(url).json()
diff --git a/gallery_dl/extractor/erome.py b/gallery_dl/extractor/erome.py
index 2aed678..6a0e069 100644
--- a/gallery_dl/extractor/erome.py
+++ b/gallery_dl/extractor/erome.py
@@ -44,11 +44,15 @@ class EromeExtractor(Extractor):
pos = page.index('<div class="user-profile', pos)
user, pos = text.extract(
page, 'href="https://www.erome.com/', '"', pos)
+ count, pos = text.extract(
+ page, 'fa-camera"></i>', '</span>', pos)
+
data = {
"album_id" : album_id,
"title" : text.unescape(title),
"user" : text.unquote(user),
"_http_headers": {"Referer": url},
+ "count" : text.parse_int(count),
}
yield Message.Directory, data
diff --git a/gallery_dl/extractor/exhentai.py b/gallery_dl/extractor/exhentai.py
index 182910c..5dc498f 100644
--- a/gallery_dl/extractor/exhentai.py
+++ b/gallery_dl/extractor/exhentai.py
@@ -40,6 +40,7 @@ class ExhentaiExtractor(Extractor):
if domain == "auto":
domain = ("ex" if self.version == "ex" else "e-") + "hentai.org"
self.root = "https://" + domain
+ self.api_url = self.root + "/api.php"
self.cookies_domain = "." + domain
Extractor.initialize(self)
@@ -120,7 +121,6 @@ class ExhentaiGalleryExtractor(ExhentaiExtractor):
self.key_start = None
self.key_show = None
self.key_next = None
- self.api_url = ""
self.count = 0
def _init(self):
@@ -171,6 +171,21 @@ class ExhentaiGalleryExtractor(ExhentaiExtractor):
# declared inside 'items()' to be able to access 'data'
if not response.history and response.headers.get(
"content-type", "").startswith("text/html"):
+ page = response.text
+ self.log.warning("'%s'", page)
+
+ if " requires GP" in page:
+ gp = self.config("gp")
+ if gp == "stop":
+ raise exception.StopExtraction("Not enough GP")
+ elif gp == "wait":
+ input("Press ENTER to continue.")
+ return response.url
+
+ self.log.info("Falling back to non-original downloads")
+ self.original = False
+ return data["_url_1280"]
+
self._report_limits(data)
return True
@@ -212,7 +227,10 @@ class ExhentaiGalleryExtractor(ExhentaiExtractor):
def metadata_from_page(self, page):
extr = text.extract_from(page)
- self.api_url = extr('var api_url = "', '"') or (self.root + "/api.php")
+
+ api_url = extr('var api_url = "', '"')
+ if api_url:
+ self.api_url = api_url
data = {
"gid" : self.gallery_id,
@@ -296,6 +314,7 @@ class ExhentaiGalleryExtractor(ExhentaiExtractor):
data["num"] = self.image_num
data["image_token"] = self.key_start = extr('var startkey="', '";')
+ data["_url_1280"] = iurl
self.key_show = extr('var showkey="', '";')
self._check_509(iurl, data)
@@ -345,6 +364,7 @@ class ExhentaiGalleryExtractor(ExhentaiExtractor):
data["num"] = request["page"]
data["image_token"] = imgkey
+ data["_url_1280"] = imgurl
self._check_509(imgurl, data)
yield url, text.nameext_from_url(url, data)
diff --git a/gallery_dl/extractor/fapello.py b/gallery_dl/extractor/fapello.py
index d4524e0..aff8e61 100644
--- a/gallery_dl/extractor/fapello.py
+++ b/gallery_dl/extractor/fapello.py
@@ -10,6 +10,9 @@ from .common import Extractor, Message
from .. import text, exception
+BASE_PATTERN = r"(?:https?://)?(?:www\.)?fapello\.(?:com|su)"
+
+
class FapelloPostExtractor(Extractor):
"""Extractor for individual posts on fapello.com"""
category = "fapello"
@@ -17,16 +20,16 @@ class FapelloPostExtractor(Extractor):
directory_fmt = ("{category}", "{model}")
filename_fmt = "{model}_{id}.{extension}"
archive_fmt = "{type}_{model}_{id}"
- pattern = (r"(?:https?://)?(?:www\.)?fapello\.com"
- r"/(?!search/|popular_videos/)([^/?#]+)/(\d+)")
+ pattern = BASE_PATTERN + r"/(?!search/|popular_videos/)([^/?#]+)/(\d+)"
example = "https://fapello.com/MODEL/12345/"
def __init__(self, match):
Extractor.__init__(self, match)
+ self.root = text.root_from_url(match.group(0))
self.model, self.id = match.groups()
def items(self):
- url = "https://fapello.com/{}/{}/".format(self.model, self.id)
+ url = "{}/{}/{}/".format(self.root, self.model, self.id)
page = text.extr(
self.request(url, allow_redirects=False).text,
'class="uk-align-center"', "</div>", None)
@@ -48,27 +51,29 @@ class FapelloModelExtractor(Extractor):
"""Extractor for all posts from a fapello model"""
category = "fapello"
subcategory = "model"
- pattern = (r"(?:https?://)?(?:www\.)?fapello\.com"
- r"/(?!top-(?:likes|followers)|popular_videos"
+ pattern = (BASE_PATTERN + r"/(?!top-(?:likes|followers)|popular_videos"
r"|videos|trending|search/?$)"
r"([^/?#]+)/?$")
example = "https://fapello.com/model/"
def __init__(self, match):
Extractor.__init__(self, match)
+ self.root = text.root_from_url(match.group(0))
self.model = match.group(1)
def items(self):
num = 1
data = {"_extractor": FapelloPostExtractor}
while True:
- url = "https://fapello.com/ajax/model/{}/page-{}/".format(
- self.model, num)
+ url = "{}/ajax/model/{}/page-{}/".format(
+ self.root, self.model, num)
page = self.request(url).text
if not page:
return
for url in text.extract_iter(page, '<a href="', '"'):
+ if url == "javascript:void(0);":
+ continue
yield Message.Queue, url, data
num += 1
@@ -77,13 +82,14 @@ class FapelloPathExtractor(Extractor):
"""Extractor for models and posts from fapello.com paths"""
category = "fapello"
subcategory = "path"
- pattern = (r"(?:https?://)?(?:www\.)?fapello\.com"
+ pattern = (BASE_PATTERN +
r"/(?!search/?$)(top-(?:likes|followers)|videos|trending"
r"|popular_videos/[^/?#]+)/?$")
example = "https://fapello.com/trending/"
def __init__(self, match):
Extractor.__init__(self, match)
+ self.root = text.root_from_url(match.group(0))
self.path = match.group(1)
def items(self):
@@ -93,9 +99,14 @@ class FapelloPathExtractor(Extractor):
else:
data = {"_extractor": FapelloPostExtractor}
+ if "fapello.su" in self.root:
+ self.path = self.path.replace("-", "/")
+ if self.path == "trending":
+ data = {"_extractor": FapelloModelExtractor}
+
while True:
- page = self.request("https://fapello.com/ajax/{}/page-{}/".format(
- self.path, num)).text
+ page = self.request("{}/ajax/{}/page-{}/".format(
+ self.root, self.path, num)).text
if not page:
return
diff --git a/gallery_dl/extractor/foolslide.py b/gallery_dl/extractor/foolslide.py
index b0699b0..bb684c2 100644
--- a/gallery_dl/extractor/foolslide.py
+++ b/gallery_dl/extractor/foolslide.py
@@ -38,10 +38,6 @@ class FoolslideExtractor(BaseExtractor):
BASE_PATTERN = FoolslideExtractor.update({
- "powermanga": {
- "root": "https://read.powermanga.org",
- "pattern": r"read(?:er)?\.powermanga\.org",
- },
})
diff --git a/gallery_dl/extractor/hentaicosplays.py b/gallery_dl/extractor/hentaicosplays.py
index 62df192..d5ff8c8 100644
--- a/gallery_dl/extractor/hentaicosplays.py
+++ b/gallery_dl/extractor/hentaicosplays.py
@@ -42,7 +42,7 @@ class HentaicosplaysGalleryExtractor(GalleryExtractor):
def images(self, page):
return [
- (url, None)
+ (url.replace("http:", "https:", 1), None)
for url in text.extract_iter(
page, '<amp-img class="auto-style" src="', '"')
]
diff --git a/gallery_dl/extractor/hentaifoundry.py b/gallery_dl/extractor/hentaifoundry.py
index 8ba23c2..c75c90d 100644
--- a/gallery_dl/extractor/hentaifoundry.py
+++ b/gallery_dl/extractor/hentaifoundry.py
@@ -133,9 +133,25 @@ class HentaifoundryExtractor(Extractor):
return text.nameext_from_url(data["src"], data)
- def _init_site_filters(self):
+ def _request_check(self, url, **kwargs):
+ self.request = self._request_original
+
+ # check for Enter button / front page
+ # and update PHPSESSID and content filters if necessary
+ response = self.request(url, **kwargs)
+ content = response.content
+ if len(content) < 5000 and \
+ b'<div id="entryButtonContainer"' in content:
+ self._init_site_filters(False)
+ response = self.request(url, **kwargs)
+ return response
+
+ def _init_site_filters(self, check_cookies=True):
"""Set site-internal filters to show all images"""
- if self.cookies.get("PHPSESSID", domain=self.cookies_domain):
+ if check_cookies and self.cookies.get(
+ "PHPSESSID", domain=self.cookies_domain):
+ self._request_original = self.request
+ self.request = self._request_check
return
url = self.root + "/?enterAgree=1"
diff --git a/gallery_dl/extractor/hiperdex.py b/gallery_dl/extractor/hiperdex.py
index 32ca151..20491b5 100644
--- a/gallery_dl/extractor/hiperdex.py
+++ b/gallery_dl/extractor/hiperdex.py
@@ -30,10 +30,10 @@ class HiperdexBase():
extr = text.extract_from(page)
return {
- "manga" : text.unescape(extr(
- "<title>", "<").rpartition(" Manga - ")[0].strip()),
"url" : text.unescape(extr(
'property="og:url" content="', '"')),
+ "manga" : text.unescape(extr(
+ '"headline": "', '"')),
"score" : text.parse_float(extr(
'id="averagerate">', '<')),
"author" : text.remove_html(extr(
diff --git a/gallery_dl/extractor/idolcomplex.py b/gallery_dl/extractor/idolcomplex.py
index b7b6ef1..5c7a1b3 100644
--- a/gallery_dl/extractor/idolcomplex.py
+++ b/gallery_dl/extractor/idolcomplex.py
@@ -15,15 +15,17 @@ from .. import text, util, exception
import collections
import re
+BASE_PATTERN = r"(?:https?://)?idol\.sankakucomplex\.com(?:/[a-z]{2})?"
+
class IdolcomplexExtractor(SankakuExtractor):
"""Base class for idolcomplex extractors"""
category = "idolcomplex"
+ root = "https://idol.sankakucomplex.com"
cookies_domain = "idol.sankakucomplex.com"
- cookies_names = ("login", "pass_hash")
- root = "https://" + cookies_domain
+ cookies_names = ("_idolcomplex_session",)
referer = False
- request_interval = 5.0
+ request_interval = (4.0, 6.0)
def __init__(self, match):
SankakuExtractor.__init__(self, match)
@@ -32,14 +34,16 @@ class IdolcomplexExtractor(SankakuExtractor):
self.start_post = 0
def _init(self):
- self.extags = self.config("tags", False)
+ self.find_tags = re.compile(
+ r'tag-type-([^"]+)">\s*<div [^>]+>\s*<a href="/\?tags=([^"]+)'
+ ).findall
def items(self):
self.login()
data = self.metadata()
for post_id in util.advance(self.post_ids(), self.start_post):
- post = self._parse_post(post_id)
+ post = self._extract_post(post_id)
url = post["file_url"]
post.update(data)
text.nameext_from_url(url, post)
@@ -67,63 +71,75 @@ class IdolcomplexExtractor(SankakuExtractor):
def _login_impl(self, username, password):
self.log.info("Logging in as %s", username)
- url = self.root + "/user/authenticate"
+ url = self.root + "/users/login"
+ page = self.request(url).text
+
+ headers = {
+ "Referer": url,
+ }
+ url = self.root + (text.extr(page, '<form action="', '"') or
+ "/en/user/authenticate")
data = {
+ "authenticity_token": text.unescape(text.extr(
+ page, 'name="authenticity_token" value="', '"')),
"url" : "",
"user[name]" : username,
"user[password]": password,
"commit" : "Login",
}
- response = self.request(url, method="POST", data=data)
+ response = self.request(url, method="POST", headers=headers, data=data)
- if not response.history or response.url != self.root + "/user/home":
+ if not response.history or response.url.endswith("/user/home"):
raise exception.AuthenticationError()
- cookies = response.history[0].cookies
- return {c: cookies[c] for c in self.cookies_names}
+ return {c.name: c.value for c in response.history[0].cookies}
- def _parse_post(self, post_id):
- """Extract metadata of a single post"""
- url = self.root + "/post/show/" + post_id
+ def _extract_post(self, post_id):
+ url = self.root + "/posts/" + post_id
page = self.request(url, retries=10).text
- extr = text.extract
+ extr = text.extract_from(page)
- tags , pos = extr(page, "<title>", " | ")
- vavg , pos = extr(page, "itemprop=ratingValue>", "<", pos)
- vcnt , pos = extr(page, "itemprop=reviewCount>", "<", pos)
- _ , pos = extr(page, "Posted: <", "", pos)
- created, pos = extr(page, ' title="', '"', pos)
- rating = extr(page, "<li>Rating: ", "<", pos)[0]
+ tags = extr("<title>", " | ")
+ vavg = extr('itemprop="ratingValue">', "<")
+ vcnt = extr('itemprop="reviewCount">', "<")
+ pid = extr(">Post ID:", "<")
+ created = extr(' title="', '"')
- file_url, pos = extr(page, '<li>Original: <a href="', '"', pos)
+ file_url = extr('>Original:', 'id=')
if file_url:
- width , pos = extr(page, '>', 'x', pos)
- height, pos = extr(page, '', ' ', pos)
+ file_url = extr(' href="', '"')
+ width = extr(">", "x")
+ height = extr("", " ")
else:
- width , pos = extr(page, '<object width=', ' ', pos)
- height, pos = extr(page, 'height=', '>', pos)
- file_url = extr(page, '<embed src="', '"', pos)[0]
+ width = extr('<object width=', ' ')
+ height = extr('height=', '>')
+ file_url = extr('<embed src="', '"')
+
+ rating = extr(">Rating:", "<br")
data = {
- "id": text.parse_int(post_id),
- "md5": file_url.rpartition("/")[2].partition(".")[0],
- "tags": text.unescape(tags),
+ "id" : text.parse_int(pid),
+ "md5" : file_url.rpartition("/")[2].partition(".")[0],
+ "tags" : text.unescape(tags),
"vote_average": text.parse_float(vavg),
- "vote_count": text.parse_int(vcnt),
- "created_at": created,
- "rating": (rating or "?")[0].lower(),
- "file_url": "https:" + text.unescape(file_url),
- "width": text.parse_int(width),
- "height": text.parse_int(height),
+ "vote_count" : text.parse_int(vcnt),
+ "created_at" : created,
+ "date" : text.parse_datetime(
+ created, "%Y-%m-%d %H:%M:%S.%f"),
+ "rating" : text.remove_html(rating).lower(),
+ "file_url" : "https:" + text.unescape(file_url),
+ "width" : text.parse_int(width),
+ "height" : text.parse_int(height),
}
- if self.extags:
- tags = collections.defaultdict(list)
- tags_html = text.extr(page, '<ul id=tag-sidebar>', '</ul>')
- pattern = re.compile(r'tag-type-([^>]+)><a href="/\?tags=([^"]+)')
- for tag_type, tag_name in pattern.findall(tags_html or ""):
- tags[tag_type].append(text.unquote(tag_name))
- for key, value in tags.items():
- data["tags_" + key] = " ".join(value)
+ tags = collections.defaultdict(list)
+ tags_list = []
+ tags_html = text.extr(page, '<ul id="tag-sidebar"', '</ul>')
+ for tag_type, tag_name in self.find_tags(tags_html or ""):
+ tags[tag_type].append(text.unquote(tag_name))
+ for key, value in tags.items():
+ data["tags_" + key] = " ".join(value)
+ tags_list += value
+ data["tags"] = " ".join(tags_list)
return data
@@ -178,15 +194,16 @@ class IdolcomplexTagExtractor(IdolcomplexExtractor):
while True:
page = self.request(self.root, params=params, retries=10).text
- pos = page.find("<div id=more-popular-posts-link>") + 1
- yield from text.extract_iter(page, '" id=p', '>', pos)
+ pos = ((page.find('id="more-popular-posts-link"') + 1) or
+ (page.find('<span class="thumb') + 1))
+ yield from text.extract_iter(page, ' href="/posts/', '"', pos)
next_url = text.extract(page, 'next-page-url="', '"', pos)[0]
if not next_url:
return
- next_params = text.parse_query(text.unescape(
- next_url).lstrip("?/"))
+ next_params = text.parse_query(text.unescape(text.unescape(
+ next_url).lstrip("?/")))
if "next" in next_params:
# stop if the same "next" value occurs twice in a row (#265)
@@ -201,8 +218,8 @@ class IdolcomplexPoolExtractor(IdolcomplexExtractor):
subcategory = "pool"
directory_fmt = ("{category}", "pool", "{pool}")
archive_fmt = "p_{pool}_{id}"
- pattern = r"(?:https?://)?idol\.sankakucomplex\.com/pool/show/(\d+)"
- example = "https://idol.sankakucomplex.com/pool/show/12345"
+ pattern = r"(?:https?://)?idol\.sankakucomplex\.com/pools?/show/(\d+)"
+ example = "https://idol.sankakucomplex.com/pools/show/12345"
per_page = 24
def __init__(self, match):
@@ -219,15 +236,17 @@ class IdolcomplexPoolExtractor(IdolcomplexExtractor):
return {"pool": self.pool_id}
def post_ids(self):
- url = self.root + "/pool/show/" + self.pool_id
+ url = self.root + "/pools/show/" + self.pool_id
params = {"page": self.start_page}
while True:
page = self.request(url, params=params, retries=10).text
- ids = list(text.extract_iter(page, '" id=p', '>'))
+ pos = page.find('id="pool-show"') + 1
+ post_ids = list(text.extract_iter(
+ page, ' href="/posts/', '"', pos))
- yield from ids
- if len(ids) < self.per_page:
+ yield from post_ids
+ if len(post_ids) < self.per_page:
return
params["page"] += 1
@@ -236,8 +255,8 @@ class IdolcomplexPostExtractor(IdolcomplexExtractor):
"""Extractor for single images from idol.sankakucomplex.com"""
subcategory = "post"
archive_fmt = "{id}"
- pattern = r"(?:https?://)?idol\.sankakucomplex\.com/post/show/(\d+)"
- example = "https://idol.sankakucomplex.com/post/show/12345"
+ pattern = BASE_PATTERN + r"/posts?/(?:show/)?([0-9a-f]+)"
+ example = "https://idol.sankakucomplex.com/posts/0123456789abcdef"
def __init__(self, match):
IdolcomplexExtractor.__init__(self, match)
diff --git a/gallery_dl/extractor/imagefap.py b/gallery_dl/extractor/imagefap.py
index aca101e..3bdcfdf 100644
--- a/gallery_dl/extractor/imagefap.py
+++ b/gallery_dl/extractor/imagefap.py
@@ -126,14 +126,15 @@ class ImagefapImageExtractor(ImagefapExtractor):
url = "{}/photo/{}/".format(self.root, self.image_id)
page = self.request(url).text
+ url, pos = text.extract(
+ page, 'original="', '"')
info, pos = text.extract(
- page, '<script type="application/ld+json">', '</script>')
+ page, '<script type="application/ld+json">', '</script>', pos)
image_id, pos = text.extract(
page, 'id="imageid_input" value="', '"', pos)
gallery_id, pos = text.extract(
page, 'id="galleryid_input" value="', '"', pos)
info = util.json_loads(info)
- url = info["contentUrl"]
return url, text.nameext_from_url(url, {
"title": text.unescape(info["name"]),
diff --git a/gallery_dl/extractor/instagram.py b/gallery_dl/extractor/instagram.py
index b0789be..8ec6741 100644
--- a/gallery_dl/extractor/instagram.py
+++ b/gallery_dl/extractor/instagram.py
@@ -217,9 +217,10 @@ class InstagramExtractor(Extractor):
data["post_shortcode"])
continue
- if "video_versions" in item:
+ video_versions = item.get("video_versions")
+ if video_versions:
video = max(
- item["video_versions"],
+ video_versions,
key=lambda x: (x["width"], x["height"], x["type"]),
)
media = video
diff --git a/gallery_dl/extractor/mangaread.py b/gallery_dl/extractor/mangaread.py
index 8f19374..4b017dc 100644
--- a/gallery_dl/extractor/mangaread.py
+++ b/gallery_dl/extractor/mangaread.py
@@ -50,8 +50,8 @@ class MangareadChapterExtractor(MangareadBase, ChapterExtractor):
page = text.extr(
page, '<div class="reading-content">', '<div class="entry-header')
return [
- (url.strip(), None)
- for url in text.extract_iter(page, 'data-src="', '"')
+ (text.extr(img, 'src="', '"').strip(), None)
+ for img in text.extract_iter(page, '<img id="image-', '>')
]
diff --git a/gallery_dl/extractor/mastodon.py b/gallery_dl/extractor/mastodon.py
index 3c2b03e..c5fe840 100644
--- a/gallery_dl/extractor/mastodon.py
+++ b/gallery_dl/extractor/mastodon.py
@@ -45,6 +45,9 @@ class MastodonExtractor(BaseExtractor):
attachments = status["media_attachments"]
del status["media_attachments"]
+ if status["reblog"]:
+ attachments.extend(status["reblog"]["media_attachments"])
+
status["instance"] = self.instance
acct = status["account"]["acct"]
status["instance_remote"] = \
@@ -113,7 +116,10 @@ class MastodonUserExtractor(MastodonExtractor):
return api.account_statuses(
api.account_id_by_username(self.item),
- only_media=not self.config("text-posts", False),
+ only_media=(
+ not self.reblogs and
+ not self.config("text-posts", False)
+ ),
exclude_replies=not self.replies,
)
diff --git a/gallery_dl/extractor/nitter.py b/gallery_dl/extractor/nitter.py
index 9f5cc9d..bc7b308 100644
--- a/gallery_dl/extractor/nitter.py
+++ b/gallery_dl/extractor/nitter.py
@@ -96,6 +96,8 @@ class NitterExtractor(BaseExtractor):
for url in text.extract_iter(
attachments, '<source src="', '"'):
+ if url[0] == "/":
+ url = self.root + url
append(text.nameext_from_url(url, {"url": url}))
else:
diff --git a/gallery_dl/extractor/oauth.py b/gallery_dl/extractor/oauth.py
index 45313c5..d1f135d 100644
--- a/gallery_dl/extractor/oauth.py
+++ b/gallery_dl/extractor/oauth.py
@@ -11,7 +11,7 @@
from .common import Extractor, Message
from .. import text, oauth, util, config, exception
from ..output import stdout_write
-from ..cache import cache
+from ..cache import cache, memcache
import urllib.parse
import binascii
import hashlib
@@ -31,6 +31,9 @@ class OAuthBase(Extractor):
def _init(self):
self.cache = config.get(("extractor", self.category), "cache", True)
+ if self.cache and cache is memcache:
+ self.log.warning("cache file is not writeable")
+ self.cache = False
def oauth_config(self, key, default=None):
value = config.interpolate(("extractor", self.subcategory), key)
diff --git a/gallery_dl/extractor/pixeldrain.py b/gallery_dl/extractor/pixeldrain.py
new file mode 100644
index 0000000..34b4ebf
--- /dev/null
+++ b/gallery_dl/extractor/pixeldrain.py
@@ -0,0 +1,88 @@
+# -*- coding: utf-8 -*-
+
+# Copyright 2023 Mike Fährmann
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Extractors for https://pixeldrain.com/"""
+
+from .common import Extractor, Message
+from .. import text
+
+BASE_PATTERN = r"(?:https?://)?pixeldrain\.com"
+
+
+class PixeldrainExtractor(Extractor):
+ """Base class for pixeldrain extractors"""
+ category = "pixeldrain"
+ root = "https://pixeldrain.com"
+ archive_fmt = "{id}"
+
+ def _init(self):
+ api_key = self.config("api-key")
+ if api_key:
+ self.session.auth = ("", api_key)
+
+ def parse_datetime(self, date_string):
+ return text.parse_datetime(
+ date_string, "%Y-%m-%dT%H:%M:%S.%fZ")
+
+
+class PixeldrainFileExtractor(PixeldrainExtractor):
+ """Extractor for pixeldrain files"""
+ subcategory = "file"
+ filename_fmt = "{filename[:230]} ({id}).{extension}"
+ pattern = BASE_PATTERN + r"/(?:u|api/file)/(\w+)"
+ example = "https://pixeldrain.com/u/abcdefgh"
+
+ def __init__(self, match):
+ Extractor.__init__(self, match)
+ self.file_id = match.group(1)
+
+ def items(self):
+ url = "{}/api/file/{}".format(self.root, self.file_id)
+ file = self.request(url + "/info").json()
+
+ file["url"] = url + "?download"
+ file["date"] = self.parse_datetime(file["date_upload"])
+
+ text.nameext_from_url(file["name"], file)
+ yield Message.Directory, file
+ yield Message.Url, file["url"], file
+
+
+class PixeldrainAlbumExtractor(PixeldrainExtractor):
+ """Extractor for pixeldrain albums"""
+ subcategory = "album"
+ directory_fmt = ("{category}",
+ "{album[date]:%Y-%m-%d} {album[title]} ({album[id]})")
+ filename_fmt = "{num:>03} {filename[:230]} ({id}).{extension}"
+ pattern = BASE_PATTERN + r"/(?:l|api/list)/(\w+)"
+ example = "https://pixeldrain.com/l/abcdefgh"
+
+ def __init__(self, match):
+ Extractor.__init__(self, match)
+ self.album_id = match.group(1)
+
+ def items(self):
+ url = "{}/api/list/{}".format(self.root, self.album_id)
+ album = self.request(url).json()
+
+ files = album["files"]
+ album["count"] = album["file_count"]
+ album["date"] = self.parse_datetime(album["date_created"])
+
+ del album["files"]
+ del album["file_count"]
+
+ yield Message.Directory, {"album": album}
+ for num, file in enumerate(files, 1):
+ file["album"] = album
+ file["num"] = num
+ file["url"] = url = "{}/api/file/{}?download".format(
+ self.root, file["id"])
+ file["date"] = self.parse_datetime(file["date_upload"])
+ text.nameext_from_url(file["name"], file)
+ yield Message.Url, url, file
diff --git a/gallery_dl/extractor/pornhub.py b/gallery_dl/extractor/pornhub.py
index c5ce832..7ff40a3 100644
--- a/gallery_dl/extractor/pornhub.py
+++ b/gallery_dl/extractor/pornhub.py
@@ -143,7 +143,7 @@ class PornhubGifExtractor(PornhubExtractor):
"url" : extr('"contentUrl": "', '"'),
"date" : text.parse_datetime(
extr('"uploadDate": "', '"'), "%Y-%m-%d"),
- "user" : extr('data-mxptext="', '"'),
+ "user" : text.remove_html(extr("Created by:", "</div>")),
}
yield Message.Directory, gif
diff --git a/gallery_dl/extractor/sankaku.py b/gallery_dl/extractor/sankaku.py
index bebea2a..8941258 100644
--- a/gallery_dl/extractor/sankaku.py
+++ b/gallery_dl/extractor/sankaku.py
@@ -117,7 +117,7 @@ class SankakuPoolExtractor(SankakuExtractor):
subcategory = "pool"
directory_fmt = ("{category}", "pool", "{pool[id]} {pool[name_en]}")
archive_fmt = "p_{pool}_{id}"
- pattern = BASE_PATTERN + r"/(?:books|pool/show)/(\d+)"
+ pattern = BASE_PATTERN + r"/(?:books|pools?/show)/(\d+)"
example = "https://sankaku.app/books/12345"
def __init__(self, match):
@@ -143,7 +143,7 @@ class SankakuPostExtractor(SankakuExtractor):
"""Extractor for single posts from sankaku.app"""
subcategory = "post"
archive_fmt = "{id}"
- pattern = BASE_PATTERN + r"/post(?:s|/show)/([0-9a-f]+)"
+ pattern = BASE_PATTERN + r"/posts?(?:/show)?/([0-9a-f]+)"
example = "https://sankaku.app/post/show/12345"
def __init__(self, match):
diff --git a/gallery_dl/extractor/tmohentai.py b/gallery_dl/extractor/tmohentai.py
new file mode 100644
index 0000000..9c29727
--- /dev/null
+++ b/gallery_dl/extractor/tmohentai.py
@@ -0,0 +1,48 @@
+# -*- coding: utf-8 -*-
+
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Extractors for https://tmohentai.com/"""
+
+from .common import GalleryExtractor
+from .. import text
+
+BASE_PATTERN = r"(?:https?://)?tmohentai\.com"
+
+
+class TmohentaiGalleryExtractor(GalleryExtractor):
+ category = "tmohentai"
+ root = "http://tmohentai.com"
+ directory_fmt = ("{category}", "{title} ({gallery_id})")
+ pattern = BASE_PATTERN + r"/(?:contents|reader)/(\w+)"
+ example = "https://tmohentai.com/contents/12345a67b89c0"
+
+ def __init__(self, match):
+ self.gallery_id = match.group(1)
+ url = "{}/contents/{}".format(self.root, self.gallery_id)
+ GalleryExtractor.__init__(self, match, url)
+
+ def images(self, page):
+ fmt = "https://imgrojo.tmohentai.com/contents/{}/{{:>03}}.webp".format(
+ self.gallery_id).format
+ cnt = page.count('class="lanzador')
+ return [(fmt(i), None) for i in range(0, cnt)]
+
+ def metadata(self, page):
+ extr = text.extract_from(page)
+
+ return {
+ "gallery_id": self.gallery_id,
+ "title" : text.unescape(extr("<h3>", "<").strip()),
+ "artists" : text.split_html(extr(
+ "<label>Artists and Artists Groups</label>", "</ul>")),
+ "genres" : text.split_html(extr(
+ "<label>Genders</label>", "</ul>")),
+ "tags" : text.split_html(extr(
+ "<label>Tags</label>", "</ul>")),
+ "uploader" : text.remove_html(extr(
+ "<label>Uploaded By</label>", "</ul>")),
+ "language" : extr("&nbsp;", "\n"),
+ }
diff --git a/gallery_dl/extractor/tumblr.py b/gallery_dl/extractor/tumblr.py
index 3dab16e..f50ddb7 100644
--- a/gallery_dl/extractor/tumblr.py
+++ b/gallery_dl/extractor/tumblr.py
@@ -322,12 +322,15 @@ class TumblrDayExtractor(TumblrExtractor):
def __init__(self, match):
TumblrExtractor.__init__(self, match)
year, month, day = match.group(4).split("/")
- self.date_min = (
- # 719163 == date(1970, 1, 1).toordinal()
- date(int(year), int(month), int(day)).toordinal() - 719163) * 86400
+ self.ordinal = date(int(year), int(month), int(day)).toordinal()
def _init(self):
TumblrExtractor._init(self)
+
+ self.date_min = (
+ # 719163 == date(1970, 1, 1).toordinal()
+ (self.ordinal - 719163) * 86400)
+
self.api.before = self.date_min + 86400
def posts(self):
diff --git a/gallery_dl/extractor/twitter.py b/gallery_dl/extractor/twitter.py
index 4766ae5..ca1e906 100644
--- a/gallery_dl/extractor/twitter.py
+++ b/gallery_dl/extractor/twitter.py
@@ -43,6 +43,7 @@ class TwitterExtractor(Extractor):
self.quoted = self.config("quoted", False)
self.videos = self.config("videos", True)
self.cards = self.config("cards", False)
+ self.ads = self.config("ads", False)
self.cards_blacklist = self.config("cards-blacklist")
self.syndication = self.config("syndication")
@@ -1034,7 +1035,7 @@ class TwitterAPI():
"focalTweetId": tweet_id,
"referrer": "profile",
"with_rux_injections": False,
- "includePromotedContent": True,
+ "includePromotedContent": False,
"withCommunity": True,
"withQuickPromoteEligibilityTweetFields": True,
"withBirdwatchNotes": True,
@@ -1049,7 +1050,7 @@ class TwitterAPI():
variables = {
"userId": self._user_id_by_screen_name(screen_name),
"count": 100,
- "includePromotedContent": True,
+ "includePromotedContent": False,
"withQuickPromoteEligibilityTweetFields": True,
"withVoice": True,
"withV2Timeline": True,
@@ -1061,7 +1062,7 @@ class TwitterAPI():
variables = {
"userId": self._user_id_by_screen_name(screen_name),
"count": 100,
- "includePromotedContent": True,
+ "includePromotedContent": False,
"withCommunity": True,
"withVoice": True,
"withV2Timeline": True,
@@ -1498,13 +1499,21 @@ class TwitterAPI():
for entry in tweets:
try:
- tweet = ((entry.get("content") or entry["item"])
- ["itemContent"]["tweet_results"]["result"])
+ item = ((entry.get("content") or entry["item"])
+ ["itemContent"])
+ if "promotedMetadata" in item and not extr.ads:
+ extr.log.debug(
+ "Skipping %s (ad)",
+ (entry.get("entryId") or "").rpartition("-")[2])
+ continue
+
+ tweet = item["tweet_results"]["result"]
if "tombstone" in tweet:
tweet = self._process_tombstone(
entry, tweet["tombstone"])
if not tweet:
continue
+
if "tweet" in tweet:
tweet = tweet["tweet"]
legacy = tweet["legacy"]
diff --git a/gallery_dl/extractor/wallpapercave.py b/gallery_dl/extractor/wallpapercave.py
index bce1026..faf3b0d 100644
--- a/gallery_dl/extractor/wallpapercave.py
+++ b/gallery_dl/extractor/wallpapercave.py
@@ -1,6 +1,7 @@
# -*- coding: utf-8 -*-
# Copyright 2021 David Hoppenbrouwers
+# Copyright 2023 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -22,7 +23,20 @@ class WallpapercaveImageExtractor(Extractor):
def items(self):
page = self.request(text.ensure_http_scheme(self.url)).text
+
+ path = None
for path in text.extract_iter(page, 'class="download" href="', '"'):
image = text.nameext_from_url(path)
yield Message.Directory, image
yield Message.Url, self.root + path, image
+
+ if path is None:
+ try:
+ path = text.rextract(
+ page, 'href="', '"', page.index('id="tdownload"'))[0]
+ except Exception:
+ pass
+ else:
+ image = text.nameext_from_url(path)
+ yield Message.Directory, image
+ yield Message.Url, self.root + path, image
diff --git a/gallery_dl/extractor/warosu.py b/gallery_dl/extractor/warosu.py
index 8e6b842..3bb635d 100644
--- a/gallery_dl/extractor/warosu.py
+++ b/gallery_dl/extractor/warosu.py
@@ -90,4 +90,7 @@ class WarosuThreadExtractor(Extractor):
data["filename"] = text.unquote(extr(
"", "<").rstrip().rpartition(".")[0])
extr("<br>", "")
- data["image"] = self.root + extr("<a href=", ">")
+
+ data["image"] = url = extr("<a href=", ">")
+ if url[0] == "/":
+ data["image"] = self.root + url
diff --git a/gallery_dl/extractor/webtoons.py b/gallery_dl/extractor/webtoons.py
index dc9a4f1..3f2f410 100644
--- a/gallery_dl/extractor/webtoons.py
+++ b/gallery_dl/extractor/webtoons.py
@@ -146,7 +146,12 @@ class WebtoonsComicExtractor(WebtoonsBase, Extractor):
if page and path not in page:
return
- page = self.request(self.root + path).text
+ response = self.request(self.root + path)
+ if response.history:
+ parts = response.url.split("/")
+ self.path = "/".join(parts[3:-1])
+
+ page = response.text
data["page"] = self.page_no
for url in self.get_episode_urls(page):
diff --git a/gallery_dl/extractor/weibo.py b/gallery_dl/extractor/weibo.py
index ed05e1f..7413b5a 100644
--- a/gallery_dl/extractor/weibo.py
+++ b/gallery_dl/extractor/weibo.py
@@ -41,9 +41,14 @@ class WeiboExtractor(Extractor):
def request(self, url, **kwargs):
response = Extractor.request(self, url, **kwargs)
- if response.history and "passport.weibo.com" in response.url:
- self._sina_visitor_system(response)
- response = Extractor.request(self, url, **kwargs)
+ if response.history:
+ if "login.sina.com" in response.url:
+ raise exception.StopExtraction(
+ "HTTP redirect to login page (%s)",
+ response.url.partition("?")[0])
+ if "passport.weibo.com" in response.url:
+ self._sina_visitor_system(response)
+ response = Extractor.request(self, url, **kwargs)
return response
diff --git a/gallery_dl/extractor/xvideos.py b/gallery_dl/extractor/xvideos.py
index a28d8f5..46e574e 100644
--- a/gallery_dl/extractor/xvideos.py
+++ b/gallery_dl/extractor/xvideos.py
@@ -38,13 +38,13 @@ class XvideosGalleryExtractor(XvideosBase, GalleryExtractor):
def metadata(self, page):
extr = text.extract_from(page)
- title = extr('"title":"', '"')
user = {
"id" : text.parse_int(extr('"id_user":', ',')),
"display": extr('"display":"', '"'),
"sex" : extr('"sex":"', '"'),
"name" : self.user,
}
+ title = extr('"title":"', '"')
user["description"] = extr(
'<small class="mobile-hide">', '</small>').strip()
tags = extr('<em>Tagged:</em>', '<').strip()
diff --git a/gallery_dl/extractor/zerochan.py b/gallery_dl/extractor/zerochan.py
index 5fe1943..1307399 100644
--- a/gallery_dl/extractor/zerochan.py
+++ b/gallery_dl/extractor/zerochan.py
@@ -63,14 +63,14 @@ class ZerochanExtractor(BooruExtractor):
data = {
"id" : text.parse_int(entry_id),
- "author" : extr('"author": "', '"'),
+ "author" : text.parse_unicode_escapes(extr(' "name": "', '"')),
"file_url": extr('"contentUrl": "', '"'),
"date" : text.parse_datetime(extr('"datePublished": "', '"')),
"width" : text.parse_int(extr('"width": "', ' ')),
"height" : text.parse_int(extr('"height": "', ' ')),
"size" : text.parse_bytes(extr('"contentSize": "', 'B')),
"path" : text.split_html(extr(
- 'class="breadcrumbs', '</p>'))[2:],
+ 'class="breadcrumbs', '</nav>'))[2:],
"uploader": extr('href="/user/', '"'),
"tags" : extr('<ul id="tags"', '</ul>'),
"source" : extr('<h2>Source</h2>', '</p><h2>').rpartition(
@@ -80,9 +80,9 @@ class ZerochanExtractor(BooruExtractor):
html = data["tags"]
tags = data["tags"] = []
for tag in html.split("<li class=")[1:]:
- category = text.extr(tag, 'alt="', '"')
- name = text.extr(tag, ">-->", "</a>")
- tags.append(category + ":" + name.strip())
+ category = text.extr(tag, 'data-type="', '"')
+ name = text.extr(tag, 'data-tag="', '"')
+ tags.append(category.capitalize() + ":" + name)
return data
diff --git a/gallery_dl/job.py b/gallery_dl/job.py
index 1e80cbf..ac2ac7a 100644
--- a/gallery_dl/job.py
+++ b/gallery_dl/job.py
@@ -87,24 +87,27 @@ class Job():
extr.category = pextr.category
extr.subcategory = pextr.subcategory
- self.metadata_url = extr.config("url-metadata")
- self.metadata_http = extr.config("http-metadata")
+ self.metadata_url = extr.config2("metadata-url", "url-metadata")
+ self.metadata_http = extr.config2("metadata-http", "http-metadata")
+ metadata_path = extr.config2("metadata-path", "path-metadata")
+ metadata_version = extr.config2("metadata-version", "version-metadata")
+ metadata_extractor = extr.config2(
+ "metadata-extractor", "extractor-metadata")
- version_info = extr.config("version-metadata")
- metadata_path = extr.config("path-metadata")
-
- # user-supplied metadata
- kwdict = extr.config("keywords")
- if kwdict:
- self.kwdict.update(kwdict)
if metadata_path:
self.kwdict[metadata_path] = path_proxy
- if version_info:
- self.kwdict[version_info] = {
+ if metadata_extractor:
+ self.kwdict[metadata_extractor] = extr
+ if metadata_version:
+ self.kwdict[metadata_version] = {
"version" : version.__version__,
"is_executable" : util.EXECUTABLE,
"current_git_head": util.git_head()
}
+ # user-supplied metadata
+ kwdict = extr.config("keywords")
+ if kwdict:
+ self.kwdict.update(kwdict)
def run(self):
"""Execute or run the job"""
@@ -375,7 +378,7 @@ class DownloadJob(Job):
else:
extr._parentdir = pextr._parentdir
- pmeta = pextr.config("parent-metadata")
+ pmeta = pextr.config2("parent-metadata", "metadata-parent")
if pmeta:
if isinstance(pmeta, str):
data = self.kwdict.copy()
diff --git a/gallery_dl/option.py b/gallery_dl/option.py
index 1982b71..255d9f2 100644
--- a/gallery_dl/option.py
+++ b/gallery_dl/option.py
@@ -44,21 +44,94 @@ class DeprecatedConfigConstAction(argparse.Action):
namespace.options.append(((), self.dest, self.const))
-class ParseAction(argparse.Action):
- """Parse <key>=<value> options and set them as config values"""
+class ConfigParseAction(argparse.Action):
+ """Parse KEY=VALUE config options"""
def __call__(self, parser, namespace, values, option_string=None):
key, value = _parse_option(values)
key = key.split(".") # splitting an empty string becomes [""]
namespace.options.append((key[:-1], key[-1], value))
-class OptionAction(argparse.Action):
- """Parse <key>=<value> options for """
+class PPParseAction(argparse.Action):
+ """Parse KEY=VALUE post processor options"""
def __call__(self, parser, namespace, values, option_string=None):
key, value = _parse_option(values)
namespace.options_pp[key] = value
+class InputfileAction(argparse.Action):
+ """Collect input files"""
+ def __call__(self, parser, namespace, value, option_string=None):
+ namespace.input_files.append((value, self.const))
+
+
+class MtimeAction(argparse.Action):
+ """Configure mtime post processors"""
+ def __call__(self, parser, namespace, value, option_string=None):
+ namespace.postprocessors.append({
+ "name": "mtime",
+ "value": "{" + (self.const or value) + "}",
+ })
+
+
+class UgoiraAction(argparse.Action):
+ """Configure ugoira post processors"""
+ def __call__(self, parser, namespace, value, option_string=None):
+ if self.const:
+ value = self.const
+ else:
+ value = value.strip().lower()
+
+ if value in ("webm", "vp9"):
+ pp = {
+ "extension" : "webm",
+ "ffmpeg-args" : ("-c:v", "libvpx-vp9",
+ "-crf", "12",
+ "-b:v", "0", "-an"),
+ }
+ elif value == "vp9-lossless":
+ pp = {
+ "extension" : "webm",
+ "ffmpeg-args" : ("-c:v", "libvpx-vp9",
+ "-lossless", "1",
+ "-pix_fmt", "yuv420p", "-an"),
+ }
+ elif value == "vp8":
+ pp = {
+ "extension" : "webm",
+ "ffmpeg-args" : ("-c:v", "libvpx",
+ "-crf", "4",
+ "-b:v", "5000k", "-an"),
+ }
+ elif value == "mp4":
+ pp = {
+ "extension" : "mp4",
+ "ffmpeg-args" : ("-c:v", "libx264", "-an", "-b:v", "5M"),
+ "libx264-prevent-odd": True,
+ }
+ elif value == "gif":
+ pp = {
+ "extension" : "gif",
+ "ffmpeg-args" : ("-filter_complex", "[0:v] split [a][b];"
+ "[a] palettegen [p];[b][p] paletteuse"),
+ "repeat-last-frame": False,
+ }
+ elif value in ("mkv", "copy"):
+ pp = {
+ "extension" : "mkv",
+ "ffmpeg-args" : ("-c:v", "copy"),
+ "repeat-last-frame": False,
+ }
+ else:
+ parser.error("Unsupported Ugoira format '{}'".format(value))
+
+ pp["name"] = "ugoira"
+ pp["whitelist"] = ("pixiv", "danbooru")
+
+ namespace.options.append(((), "ugoira", True))
+ namespace.postprocessors.append(pp)
+
+
class Formatter(argparse.HelpFormatter):
"""Custom HelpFormatter class to customize help output"""
def __init__(self, prog):
@@ -101,12 +174,6 @@ def build_parser():
help="Print program version and exit",
)
general.add_argument(
- "-i", "--input-file",
- dest="inputfiles", metavar="FILE", action="append",
- help=("Download URLs found in FILE ('-' for stdin). "
- "More than one --input-file can be specified"),
- )
- general.add_argument(
"-f", "--filename",
dest="filename", metavar="FORMAT",
help=("Filename format string for downloaded files "
@@ -149,6 +216,32 @@ def build_parser():
"(ALL to delete everything)",
)
+ input = parser.add_argument_group("Input Options")
+ input.add_argument(
+ "urls",
+ metavar="URL", nargs="*",
+ help=argparse.SUPPRESS,
+ )
+ input.add_argument(
+ "-i", "--input-file",
+ dest="input_files", metavar="FILE", action=InputfileAction, const=None,
+ default=[],
+ help=("Download URLs found in FILE ('-' for stdin). "
+ "More than one --input-file can be specified"),
+ )
+ input.add_argument(
+ "-I", "--input-file-comment",
+ dest="input_files", metavar="FILE", action=InputfileAction, const="c",
+ help=("Download URLs found in FILE. "
+ "Comment them out after they were downloaded successfully."),
+ )
+ input.add_argument(
+ "-x", "--input-file-delete",
+ dest="input_files", metavar="FILE", action=InputfileAction, const="d",
+ help=("Download URLs found in FILE. "
+ "Delete them after they were downloaded successfully."),
+ )
+
output = parser.add_argument_group("Output Options")
output.add_argument(
"-q", "--quiet",
@@ -308,7 +401,8 @@ def build_parser():
configuration = parser.add_argument_group("Configuration Options")
configuration.add_argument(
"-o", "--option",
- dest="options", metavar="KEY=VALUE", action=ParseAction, default=[],
+ dest="options", metavar="KEY=VALUE",
+ action=ConfigParseAction, default=[],
help=("Additional options. "
"Example: -o browser=firefox") ,
)
@@ -437,43 +531,15 @@ def build_parser():
}
postprocessor = parser.add_argument_group("Post-processing Options")
postprocessor.add_argument(
- "--zip",
- dest="postprocessors",
- action="append_const", const="zip",
- help="Store downloaded files in a ZIP archive",
- )
- postprocessor.add_argument(
- "--ugoira-conv",
- dest="postprocessors", action="append_const", const={
- "name" : "ugoira",
- "ffmpeg-args" : ("-c:v", "libvpx", "-crf", "4", "-b:v", "5000k"),
- "ffmpeg-twopass": True,
- "whitelist" : ("pixiv", "danbooru"),
- },
- help="Convert Pixiv Ugoira to WebM (requires FFmpeg)",
- )
- postprocessor.add_argument(
- "--ugoira-conv-lossless",
- dest="postprocessors", action="append_const", const={
- "name" : "ugoira",
- "ffmpeg-args" : ("-c:v", "libvpx-vp9", "-lossless", "1",
- "-pix_fmt", "yuv420p"),
- "ffmpeg-twopass": False,
- "whitelist" : ("pixiv", "danbooru"),
- },
- help="Convert Pixiv Ugoira to WebM in VP9 lossless mode",
+ "-P", "--postprocessor",
+ dest="postprocessors", metavar="NAME", action="append", default=[],
+ help="Activate the specified post processor",
)
postprocessor.add_argument(
- "--ugoira-conv-copy",
- dest="postprocessors", action="append_const", const={
- "name" : "ugoira",
- "extension" : "mkv",
- "ffmpeg-args" : ("-c:v", "copy"),
- "ffmpeg-twopass" : False,
- "repeat-last-frame": False,
- "whitelist" : ("pixiv", "danbooru"),
- },
- help="Convert Pixiv Ugoira to MKV without re-encoding any frames",
+ "-O", "--postprocessor-option",
+ dest="options_pp", metavar="KEY=VALUE",
+ action=PPParseAction, default={},
+ help="Additional post processor options",
)
postprocessor.add_argument(
"--write-metadata",
@@ -500,10 +566,54 @@ def build_parser():
help="Write image tags to separate text files",
)
postprocessor.add_argument(
- "--mtime-from-date",
+ "--zip",
dest="postprocessors",
- action="append_const", const="mtime",
- help="Set file modification times according to 'date' metadata",
+ action="append_const", const="zip",
+ help="Store downloaded files in a ZIP archive",
+ )
+ postprocessor.add_argument(
+ "--cbz",
+ dest="postprocessors",
+ action="append_const", const={
+ "name" : "zip",
+ "extension": "cbz",
+ },
+ help="Store downloaded files in a CBZ archive",
+ )
+ postprocessor.add_argument(
+ "--mtime",
+ dest="postprocessors", metavar="NAME", action=MtimeAction,
+ help=("Set file modification times according to metadata "
+ "selected by NAME. Examples: 'date' or 'status[date]'"),
+ )
+ postprocessor.add_argument(
+ "--mtime-from-date",
+ dest="postprocessors", nargs=0, action=MtimeAction,
+ const="date|status[date]",
+ help=argparse.SUPPRESS,
+ )
+ postprocessor.add_argument(
+ "--ugoira",
+ dest="postprocessors", metavar="FORMAT", action=UgoiraAction,
+ help=("Convert Pixiv Ugoira to FORMAT using FFmpeg. "
+ "Supported formats are 'webm', 'mp4', 'gif', "
+ "'vp8', 'vp9', 'vp9-lossless', 'copy'."),
+ )
+ postprocessor.add_argument(
+ "--ugoira-conv",
+ dest="postprocessors", nargs=0, action=UgoiraAction, const="vp8",
+ help=argparse.SUPPRESS,
+ )
+ postprocessor.add_argument(
+ "--ugoira-conv-lossless",
+ dest="postprocessors", nargs=0, action=UgoiraAction,
+ const="vp9-lossless",
+ help=argparse.SUPPRESS,
+ )
+ postprocessor.add_argument(
+ "--ugoira-conv-copy",
+ dest="postprocessors", nargs=0, action=UgoiraAction, const="copy",
+ help=argparse.SUPPRESS,
)
postprocessor.add_argument(
"--exec",
@@ -519,25 +629,9 @@ def build_parser():
dest="postprocessors", metavar="CMD",
action=AppendCommandAction, const={
"name": "exec", "event": "finalize"},
- help=("Execute CMD after all files were downloaded successfully. "
+ help=("Execute CMD after all files were downloaded. "
"Example: --exec-after \"cd {_directory} "
"&& convert * ../doc.pdf\""),
)
- postprocessor.add_argument(
- "-P", "--postprocessor",
- dest="postprocessors", metavar="NAME", action="append",
- help="Activate the specified post processor",
- )
- postprocessor.add_argument(
- "-O", "--postprocessor-option",
- dest="options_pp", metavar="OPT", action=OptionAction, default={},
- help="Additional '<key>=<value>' post processor options",
- )
-
- parser.add_argument(
- "urls",
- metavar="URL", nargs="*",
- help=argparse.SUPPRESS,
- )
return parser
diff --git a/gallery_dl/util.py b/gallery_dl/util.py
index 6255d49..62aa12d 100644
--- a/gallery_dl/util.py
+++ b/gallery_dl/util.py
@@ -487,82 +487,6 @@ CODES = {
}
-def parse_inputfile(file, log):
- """Filter and process strings from an input file.
-
- Lines starting with '#' and empty lines will be ignored.
- Lines starting with '-' will be interpreted as a key-value pair separated
- by an '='. where 'key' is a dot-separated option name and 'value' is a
- JSON-parsable value. These configuration options will be applied while
- processing the next URL.
- Lines starting with '-G' are the same as above, except these options will
- be applied for *all* following URLs, i.e. they are Global.
- Everything else will be used as a potential URL.
-
- Example input file:
-
- # settings global options
- -G base-directory = "/tmp/"
- -G skip = false
-
- # setting local options for the next URL
- -filename="spaces_are_optional.jpg"
- -skip = true
-
- https://example.org/
-
- # next URL uses default filename and 'skip' is false.
- https://example.com/index.htm # comment1
- https://example.com/404.htm # comment2
- """
- gconf = []
- lconf = []
- strip_comment = None
-
- for line in file:
- line = line.strip()
-
- if not line or line[0] == "#":
- # empty line or comment
- continue
-
- elif line[0] == "-":
- # config spec
- if len(line) >= 2 and line[1] == "G":
- conf = gconf
- line = line[2:]
- else:
- conf = lconf
- line = line[1:]
-
- key, sep, value = line.partition("=")
- if not sep:
- log.warning("input file: invalid <key>=<value> pair: %s", line)
- continue
-
- try:
- value = json_loads(value.strip())
- except ValueError as exc:
- log.warning("input file: unable to parse '%s': %s", value, exc)
- continue
-
- key = key.strip().split(".")
- conf.append((key[:-1], key[-1], value))
-
- else:
- # url
- if " #" in line or "\t#" in line:
- if strip_comment is None:
- strip_comment = re.compile(r"\s+#.*").sub
- line = strip_comment("", line)
- if gconf or lconf:
- yield ExtendedUrl(line, gconf, lconf)
- gconf = []
- lconf = []
- else:
- yield line
-
-
class CustomNone():
"""None-style type that supports more operations than regular None"""
__slots__ = ()
@@ -873,15 +797,6 @@ class FilterPredicate():
raise exception.FilterError(exc)
-class ExtendedUrl():
- """URL with attached config key-value pairs"""
- def __init__(self, url, gconf, lconf):
- self.value, self.gconfig, self.lconfig = url, gconf, lconf
-
- def __str__(self):
- return self.value
-
-
class DownloadArchive():
def __init__(self, path, format_string, pragma=None,
diff --git a/gallery_dl/version.py b/gallery_dl/version.py
index 5050174..5034fb2 100644
--- a/gallery_dl/version.py
+++ b/gallery_dl/version.py
@@ -6,4 +6,4 @@
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
-__version__ = "1.26.2"
+__version__ = "1.26.3"