summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLibravatarUnit 193 <unit193@unit193.net>2021-12-01 14:44:00 -0500
committerLibravatarUnit 193 <unit193@unit193.net>2021-12-01 14:44:00 -0500
commita5aecc343fd2886e7ae09bb3e2afeec38f175755 (patch)
tree06a284b3d73700bd38116423e2480afa516255c2
parentfc8c5e642017e2b4e5299e2093e72b316479690d (diff)
New upstream version 1.19.3.upstream/1.19.3
-rw-r--r--CHANGELOG.md31
-rw-r--r--PKG-INFO6
-rw-r--r--README.rst4
-rw-r--r--data/man/gallery-dl.12
-rw-r--r--data/man/gallery-dl.conf.5115
-rw-r--r--docs/gallery-dl.conf5
-rw-r--r--gallery_dl.egg-info/PKG-INFO6
-rw-r--r--gallery_dl.egg-info/SOURCES.txt2
-rw-r--r--gallery_dl/downloader/ytdl.py87
-rw-r--r--gallery_dl/extractor/__init__.py1
-rw-r--r--gallery_dl/extractor/dynastyscans.py25
-rw-r--r--gallery_dl/extractor/exhentai.py8
-rw-r--r--gallery_dl/extractor/foolfuuka.py6
-rw-r--r--gallery_dl/extractor/gelbooru_v02.py15
-rw-r--r--gallery_dl/extractor/instagram.py20
-rw-r--r--gallery_dl/extractor/kemonoparty.py125
-rw-r--r--gallery_dl/extractor/mangadex.py42
-rw-r--r--gallery_dl/extractor/mangoxo.py12
-rw-r--r--gallery_dl/extractor/philomena.py12
-rw-r--r--gallery_dl/extractor/reactor.py228
-rw-r--r--gallery_dl/extractor/seisoparty.py201
-rw-r--r--gallery_dl/extractor/shopify.py6
-rw-r--r--gallery_dl/extractor/skeb.py3
-rw-r--r--gallery_dl/extractor/subscribestar.py14
-rw-r--r--gallery_dl/extractor/twitter.py37
-rw-r--r--gallery_dl/extractor/webtoons.py5
-rw-r--r--gallery_dl/extractor/xvideos.py4
-rw-r--r--gallery_dl/extractor/ytdl.py79
-rw-r--r--gallery_dl/formatter.py12
-rw-r--r--gallery_dl/job.py69
-rw-r--r--gallery_dl/util.py70
-rw-r--r--gallery_dl/version.py2
-rw-r--r--gallery_dl/ytdl.py513
-rw-r--r--test/test_formatter.py11
-rw-r--r--test/test_job.py30
-rw-r--r--test/test_results.py8
-rw-r--r--test/test_util.py68
37 files changed, 1276 insertions, 608 deletions
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 9fa1540..16e843f 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,36 @@
# Changelog
+## 1.19.3 - 2021-11-27
+### Additions
+- [dynastyscans] add `manga` extractor ([#2035](https://github.com/mikf/gallery-dl/issues/2035))
+- [instagram] include user metadata for `tagged` downloads ([#2024](https://github.com/mikf/gallery-dl/issues/2024))
+- [kemonoparty] implement `files` option ([#1991](https://github.com/mikf/gallery-dl/issues/1991))
+- [kemonoparty] add `dms` option ([#2008](https://github.com/mikf/gallery-dl/issues/2008))
+- [mangadex] always provide `artist`, `author`, and `group` metadata fields ([#2049](https://github.com/mikf/gallery-dl/issues/2049))
+- [philomena] support furbooru.org ([#1995](https://github.com/mikf/gallery-dl/issues/1995))
+- [reactor] support thatpervert.com ([#2029](https://github.com/mikf/gallery-dl/issues/2029))
+- [shopify] support loungeunderwear.com ([#2053](https://github.com/mikf/gallery-dl/issues/2053))
+- [skeb] add `thumbnails` option ([#2047](https://github.com/mikf/gallery-dl/issues/2047), [#2051](https://github.com/mikf/gallery-dl/issues/2051))
+- [subscribestar] add `num` enumeration index ([#2040](https://github.com/mikf/gallery-dl/issues/2040))
+- [subscribestar] emit metadata for posts without media ([#1569](https://github.com/mikf/gallery-dl/issues/1569))
+- [ytdl] implement `cmdline-args` and `config-file` options to allow parsing ytdl command-line options ([#1680](https://github.com/mikf/gallery-dl/issues/1680))
+- [formatter] implement `D` format specifier
+- extend `blacklist`/`whitelist` syntax ([#2025](https://github.com/mikf/gallery-dl/issues/2025))
+### Fixes
+- [dynastyscans] provide `date` as datetime object ([#2050](https://github.com/mikf/gallery-dl/issues/2050))
+- [exhentai] fix extraction for disowned galleries ([#2055](https://github.com/mikf/gallery-dl/issues/2055))
+- [gelbooru] apply workaround for pagination limits
+- [kemonoparty] skip duplicate files ([#2032](https://github.com/mikf/gallery-dl/issues/2032), [#1991](https://github.com/mikf/gallery-dl/issues/1991), [#1899](https://github.com/mikf/gallery-dl/issues/1899))
+- [kemonoparty] provide `date` metadata for gumroad ([#2007](https://github.com/mikf/gallery-dl/issues/2007))
+- [mangoxo] fix metadata extraction
+- [twitter] distinguish between fatal & nonfatal errors ([#2020](https://github.com/mikf/gallery-dl/issues/2020))
+- [twitter] fix extractor for direct image links ([#2030](https://github.com/mikf/gallery-dl/issues/2030))
+- [webtoons] use download URLs that do not require a `Referer` header ([#2005](https://github.com/mikf/gallery-dl/issues/2005))
+- [ytdl] improve error handling ([#1680](https://github.com/mikf/gallery-dl/issues/1680))
+- [downloader:ytdl] prevent crash in `_progress_hook()` ([#1680](https://github.com/mikf/gallery-dl/issues/1680))
+### Removals
+- [seisoparty] remove module
+
## 1.19.2 - 2021-11-05
### Additions
- [kemonoparty] add `comments` option ([#1980](https://github.com/mikf/gallery-dl/issues/1980))
diff --git a/PKG-INFO b/PKG-INFO
index e40d119..b758e4c 100644
--- a/PKG-INFO
+++ b/PKG-INFO
@@ -1,6 +1,6 @@
Metadata-Version: 2.1
Name: gallery_dl
-Version: 1.19.2
+Version: 1.19.3
Summary: Command-line program to download image galleries and collections from several image hosting sites
Home-page: https://github.com/mikf/gallery-dl
Author: Mike Fährmann
@@ -75,8 +75,8 @@ Description: ==========
Prebuilt executable files with a Python interpreter and
required Python packages included are available for
- - `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.19.2/gallery-dl.exe>`__
- - `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.19.2/gallery-dl.bin>`__
+ - `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.19.3/gallery-dl.exe>`__
+ - `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.19.3/gallery-dl.bin>`__
| Executables build from the latest commit can be found at
| https://github.com/mikf/gallery-dl/actions/workflows/executables.yml
diff --git a/README.rst b/README.rst
index 5586fda..72f7c82 100644
--- a/README.rst
+++ b/README.rst
@@ -64,8 +64,8 @@ Standalone Executable
Prebuilt executable files with a Python interpreter and
required Python packages included are available for
-- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.19.2/gallery-dl.exe>`__
-- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.19.2/gallery-dl.bin>`__
+- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.19.3/gallery-dl.exe>`__
+- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.19.3/gallery-dl.bin>`__
| Executables build from the latest commit can be found at
| https://github.com/mikf/gallery-dl/actions/workflows/executables.yml
diff --git a/data/man/gallery-dl.1 b/data/man/gallery-dl.1
index ba0aa8d..e7741ef 100644
--- a/data/man/gallery-dl.1
+++ b/data/man/gallery-dl.1
@@ -1,4 +1,4 @@
-.TH "GALLERY-DL" "1" "2021-11-05" "1.19.2" "gallery-dl Manual"
+.TH "GALLERY-DL" "1" "2021-11-27" "1.19.3" "gallery-dl Manual"
.\" disable hyphenation
.nh
diff --git a/data/man/gallery-dl.conf.5 b/data/man/gallery-dl.conf.5
index dc097d2..09d2820 100644
--- a/data/man/gallery-dl.conf.5
+++ b/data/man/gallery-dl.conf.5
@@ -1,4 +1,4 @@
-.TH "GALLERY-DL.CONF" "5" "2021-11-05" "1.19.2" "gallery-dl Manual"
+.TH "GALLERY-DL.CONF" "5" "2021-11-27" "1.19.3" "gallery-dl Manual"
.\" disable hyphenation
.nh
.\" disable justification (adjust text to left margin only)
@@ -1529,37 +1529,50 @@ Download video files.
Extract \f[I]comments\f[] metadata.
-.SS extractor.kemonoparty.max-posts
+.SS extractor.kemonoparty.dms
.IP "Type:" 6
-\f[I]integer\f[]
+\f[I]bool\f[]
.IP "Default:" 9
-\f[I]null\f[]
+\f[I]false\f[]
.IP "Description:" 4
-Limit the number of posts to download.
+Extract a user's direct messages as \f[I]dms\f[] metadata.
-.SS extractor.kemonoparty.metadata
+.SS extractor.kemonoparty.files
.IP "Type:" 6
-\f[I]bool\f[]
+\f[I]list\f[] of \f[I]strings\f[]
.IP "Default:" 9
-\f[I]false\f[]
+\f[I]["file", "attachments", "inline"]\f[]
.IP "Description:" 4
-Extract \f[I]username\f[] metadata
+Determines the type and order of files to be downloaded.
+
+Available types are \f[I]file\f[], \f[I]attachments\f[], and \f[I]inline\f[].
+
+
+.SS extractor.kemonoparty.max-posts
+.IP "Type:" 6
+\f[I]integer\f[]
+
+.IP "Default:" 9
+\f[I]null\f[]
+
+.IP "Description:" 4
+Limit the number of posts to download.
-.SS extractor.kemonoparty.patreon-skip-file
+.SS extractor.kemonoparty.metadata
.IP "Type:" 6
\f[I]bool\f[]
.IP "Default:" 9
-\f[I]true\f[]
+\f[I]false\f[]
.IP "Description:" 4
-Skip main files in Patreon posts to avoid duplicates.
+Extract \f[I]username\f[] metadata
.SS extractor.khinsider.format
@@ -1630,17 +1643,6 @@ and \f[I]/user/follows/manga/feed\f[])
to filter chapters by.
-.SS extractor.mangadex.metadata
-.IP "Type:" 6
-\f[I]bool\f[]
-
-.IP "Default:" 9
-\f[I]false\f[]
-
-.IP "Description:" 4
-Provide \f[I]artist\f[], \f[I]author\f[], and \f[I]group\f[] metadata fields.
-
-
.SS extractor.mangadex.ratings
.IP "Type:" 6
\f[I]list\f[] of \f[I]strings\f[]
@@ -2154,6 +2156,17 @@ Download video embeds from external sites.
Download videos.
+.SS extractor.skeb.thumbnails
+.IP "Type:" 6
+\f[I]bool\f[]
+
+.IP "Default:" 9
+\f[I]false\f[]
+
+.IP "Description:" 4
+Download thumbnails.
+
+
.SS extractor.smugmug.videos
.IP "Type:" 6
\f[I]bool\f[]
@@ -2580,6 +2593,34 @@ All available options can be found in \f[I]youtube-dl's docstrings
<https://github.com/ytdl-org/youtube-dl/blob/master/youtube_dl/YoutubeDL.py#L138-L318>\f[].
+.SS extractor.ytdl.cmdline-args
+.IP "Type:" 6
+.br
+* \f[I]string\f[]
+.br
+* \f[I]list\f[] of \f[I]strings\f[]
+
+.IP "Example:" 4
+.br
+* "--quiet --write-sub --merge-output-format mkv"
+.br
+* ["--quiet", "--write-sub", "--merge-output-format", "mkv"]
+
+.IP "Description:" 4
+Additional options specified as youtube-dl command-line arguments.
+
+
+.SS extractor.ytdl.config-file
+.IP "Type:" 6
+\f[I]Path\f[]
+
+.IP "Example:" 4
+"~/.config/youtube-dl/config"
+
+.IP "Description:" 4
+Location of a youtube-dl configuration file to load options from.
+
+
.SS extractor.[booru].tags
.IP "Type:" 6
\f[I]bool\f[]
@@ -2894,6 +2935,34 @@ All available options can be found in \f[I]youtube-dl's docstrings
<https://github.com/ytdl-org/youtube-dl/blob/master/youtube_dl/YoutubeDL.py#L138-L318>\f[].
+.SS downloader.ytdl.cmdline-args
+.IP "Type:" 6
+.br
+* \f[I]string\f[]
+.br
+* \f[I]list\f[] of \f[I]strings\f[]
+
+.IP "Example:" 4
+.br
+* "--quiet --write-sub --merge-output-format mkv"
+.br
+* ["--quiet", "--write-sub", "--merge-output-format", "mkv"]
+
+.IP "Description:" 4
+Additional options specified as youtube-dl command-line arguments.
+
+
+.SS downloader.ytdl.config-file
+.IP "Type:" 6
+\f[I]Path\f[]
+
+.IP "Example:" 4
+"~/.config/youtube-dl/config"
+
+.IP "Description:" 4
+Location of a youtube-dl configuration file to load options from.
+
+
.SH OUTPUT OPTIONS
.SS output.fallback
.IP "Type:" 6
diff --git a/docs/gallery-dl.conf b/docs/gallery-dl.conf
index 5e2628f..0800ec7 100644
--- a/docs/gallery-dl.conf
+++ b/docs/gallery-dl.conf
@@ -156,8 +156,9 @@
"mangadex":
{
"api-server": "https://api.mangadex.org",
- "metadata": false,
- "lang": null
+ "api-parameters": null,
+ "lang": null,
+ "ratings": ["safe", "suggestive", "erotica", "pornographic"]
},
"mangoxo":
{
diff --git a/gallery_dl.egg-info/PKG-INFO b/gallery_dl.egg-info/PKG-INFO
index 7e6d632..bf70cac 100644
--- a/gallery_dl.egg-info/PKG-INFO
+++ b/gallery_dl.egg-info/PKG-INFO
@@ -1,6 +1,6 @@
Metadata-Version: 2.1
Name: gallery-dl
-Version: 1.19.2
+Version: 1.19.3
Summary: Command-line program to download image galleries and collections from several image hosting sites
Home-page: https://github.com/mikf/gallery-dl
Author: Mike Fährmann
@@ -75,8 +75,8 @@ Description: ==========
Prebuilt executable files with a Python interpreter and
required Python packages included are available for
- - `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.19.2/gallery-dl.exe>`__
- - `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.19.2/gallery-dl.bin>`__
+ - `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.19.3/gallery-dl.exe>`__
+ - `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.19.3/gallery-dl.bin>`__
| Executables build from the latest commit can be found at
| https://github.com/mikf/gallery-dl/actions/workflows/executables.yml
diff --git a/gallery_dl.egg-info/SOURCES.txt b/gallery_dl.egg-info/SOURCES.txt
index 409b8e7..d05066c 100644
--- a/gallery_dl.egg-info/SOURCES.txt
+++ b/gallery_dl.egg-info/SOURCES.txt
@@ -24,6 +24,7 @@ gallery_dl/path.py
gallery_dl/text.py
gallery_dl/util.py
gallery_dl/version.py
+gallery_dl/ytdl.py
gallery_dl.egg-info/PKG-INFO
gallery_dl.egg-info/SOURCES.txt
gallery_dl.egg-info/dependency_links.txt
@@ -149,7 +150,6 @@ gallery_dl/extractor/redgifs.py
gallery_dl/extractor/sankaku.py
gallery_dl/extractor/sankakucomplex.py
gallery_dl/extractor/seiga.py
-gallery_dl/extractor/seisoparty.py
gallery_dl/extractor/senmanga.py
gallery_dl/extractor/sexcom.py
gallery_dl/extractor/shopify.py
diff --git a/gallery_dl/downloader/ytdl.py b/gallery_dl/downloader/ytdl.py
index f4d3e05..8416ca0 100644
--- a/gallery_dl/downloader/ytdl.py
+++ b/gallery_dl/downloader/ytdl.py
@@ -9,7 +9,7 @@
"""Downloader module for URLs requiring youtube-dl support"""
from .common import DownloaderBase
-from .. import text
+from .. import ytdl, text
import os
@@ -17,70 +17,53 @@ class YoutubeDLDownloader(DownloaderBase):
scheme = "ytdl"
def __init__(self, job):
- module = __import__(self.config("module") or "youtube_dl")
-
DownloaderBase.__init__(self, job)
- extractor = job.extractor
+ extractor = job.extractor
retries = self.config("retries", extractor._retries)
- options = {
- "format": self.config("format") or None,
- "ratelimit": text.parse_bytes(self.config("rate"), None),
+ self.ytdl_opts = {
"retries": retries+1 if retries >= 0 else float("inf"),
"socket_timeout": self.config("timeout", extractor._timeout),
"nocheckcertificate": not self.config("verify", extractor._verify),
- "nopart": not self.part,
- "updatetime": self.config("mtime", True),
- "proxy": extractor.session.proxies.get("http"),
- "min_filesize": text.parse_bytes(
- self.config("filesize-min"), None),
- "max_filesize": text.parse_bytes(
- self.config("filesize-max"), None),
}
- raw_options = self.config("raw-options")
- if raw_options:
- options.update(raw_options)
-
- self.progress = self.config("progress", 3.0)
- if self.progress is not None:
- options["progress_hooks"] = (self._progress_hook,)
-
- if self.config("logging", True):
- options["logger"] = self.log
+ self.ytdl_instance = None
self.forward_cookies = self.config("forward-cookies", False)
-
+ self.progress = self.config("progress", 3.0)
self.outtmpl = self.config("outtmpl")
- if self.outtmpl == "default":
- self.outtmpl = module.DEFAULT_OUTTMPL
-
- self.ytdl = module.YoutubeDL(options)
def download(self, url, pathfmt):
kwdict = pathfmt.kwdict
- ytdl = kwdict.pop("_ytdl_instance", None)
- if ytdl:
- if self.progress is not None and not ytdl._progress_hooks:
- ytdl.add_progress_hook(self._progress_hook)
- else:
- ytdl = self.ytdl
+ ytdl_instance = kwdict.pop("_ytdl_instance", None)
+ if not ytdl_instance:
+ ytdl_instance = self.ytdl_instance
+ if not ytdl_instance:
+ module = __import__(self.config("module") or "youtube_dl")
+ self.ytdl_instance = ytdl_instance = ytdl.construct_YoutubeDL(
+ module, self, self.ytdl_opts)
+ if self.outtmpl == "default":
+ self.outtmpl = module.DEFAULT_OUTTMPL
if self.forward_cookies:
- set_cookie = ytdl.cookiejar.set_cookie
+ set_cookie = ytdl_instance.cookiejar.set_cookie
for cookie in self.session.cookies:
set_cookie(cookie)
+ if self.progress is not None and not ytdl_instance._progress_hooks:
+ ytdl_instance.add_progress_hook(self._progress_hook)
+
info_dict = kwdict.pop("_ytdl_info_dict", None)
if not info_dict:
try:
- info_dict = ytdl.extract_info(url[5:], download=False)
+ info_dict = ytdl_instance.extract_info(url[5:], download=False)
except Exception:
return False
if "entries" in info_dict:
index = kwdict.get("_ytdl_index")
if index is None:
- return self._download_playlist(ytdl, pathfmt, info_dict)
+ return self._download_playlist(
+ ytdl_instance, pathfmt, info_dict)
else:
info_dict = info_dict["entries"][index]
@@ -88,9 +71,9 @@ class YoutubeDLDownloader(DownloaderBase):
if extra:
info_dict.update(extra)
- return self._download_video(ytdl, pathfmt, info_dict)
+ return self._download_video(ytdl_instance, pathfmt, info_dict)
- def _download_video(self, ytdl, pathfmt, info_dict):
+ def _download_video(self, ytdl_instance, pathfmt, info_dict):
if "url" in info_dict:
text.nameext_from_url(info_dict["url"], pathfmt.kwdict)
@@ -99,8 +82,9 @@ class YoutubeDLDownloader(DownloaderBase):
info_dict["ext"] = "mkv"
if self.outtmpl:
- self._set_outtmpl(ytdl, self.outtmpl)
- pathfmt.filename = filename = ytdl.prepare_filename(info_dict)
+ self._set_outtmpl(ytdl_instance, self.outtmpl)
+ pathfmt.filename = filename = \
+ ytdl_instance.prepare_filename(info_dict)
pathfmt.extension = info_dict["ext"]
pathfmt.path = pathfmt.directory + filename
pathfmt.realpath = pathfmt.temppath = (
@@ -115,40 +99,41 @@ class YoutubeDLDownloader(DownloaderBase):
pathfmt.temppath = os.path.join(
self.partdir, pathfmt.filename)
- self._set_outtmpl(ytdl, pathfmt.temppath.replace("%", "%%"))
+ self._set_outtmpl(ytdl_instance, pathfmt.temppath.replace("%", "%%"))
self.out.start(pathfmt.path)
try:
- ytdl.process_info(info_dict)
+ ytdl_instance.process_info(info_dict)
except Exception:
self.log.debug("Traceback", exc_info=True)
return False
return True
- def _download_playlist(self, ytdl, pathfmt, info_dict):
+ def _download_playlist(self, ytdl_instance, pathfmt, info_dict):
pathfmt.set_extension("%(playlist_index)s.%(ext)s")
- self._set_outtmpl(ytdl, pathfmt.realpath)
+ self._set_outtmpl(ytdl_instance, pathfmt.realpath)
for entry in info_dict["entries"]:
- ytdl.process_info(entry)
+ ytdl_instance.process_info(entry)
return True
def _progress_hook(self, info):
if info["status"] == "downloading" and \
info["elapsed"] >= self.progress:
total = info.get("total_bytes") or info.get("total_bytes_estimate")
+ speed = info.get("speed")
self.out.progress(
None if total is None else int(total),
info["downloaded_bytes"],
- int(info["speed"]),
+ int(speed) if speed else 0,
)
@staticmethod
- def _set_outtmpl(ytdl, outtmpl):
+ def _set_outtmpl(ytdl_instance, outtmpl):
try:
- ytdl.outtmpl_dict["default"] = outtmpl
+ ytdl_instance.outtmpl_dict["default"] = outtmpl
except AttributeError:
- ytdl.params["outtmpl"] = outtmpl
+ ytdl_instance.params["outtmpl"] = outtmpl
def compatible_formats(formats):
diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py
index 79fe971..dd9da01 100644
--- a/gallery_dl/extractor/__init__.py
+++ b/gallery_dl/extractor/__init__.py
@@ -111,7 +111,6 @@ modules = [
"sankaku",
"sankakucomplex",
"seiga",
- "seisoparty",
"senmanga",
"sexcom",
"simplyhentai",
diff --git a/gallery_dl/extractor/dynastyscans.py b/gallery_dl/extractor/dynastyscans.py
index 4541d25..ab1044f 100644
--- a/gallery_dl/extractor/dynastyscans.py
+++ b/gallery_dl/extractor/dynastyscans.py
@@ -8,7 +8,7 @@
"""Extractors for https://dynasty-scans.com/"""
-from .common import ChapterExtractor, Extractor, Message
+from .common import ChapterExtractor, MangaExtractor, Extractor, Message
from .. import text
import json
import re
@@ -48,12 +48,12 @@ class DynastyscansChapterExtractor(DynastyscansBase, ChapterExtractor):
(("http://dynasty-scans.com/chapters/"
"hitoribocchi_no_oo_seikatsu_ch33"), {
"url": "dce64e8c504118f1ab4135c00245ea12413896cb",
- "keyword": "1564965671ac69bb7fbc340538397f6bd0aa269b",
+ "keyword": "b67599703c27316a2fe4f11c3232130a1904e032",
}),
(("http://dynasty-scans.com/chapters/"
"new_game_the_spinoff_special_13"), {
"url": "dbe5bbb74da2edcfb1832895a484e2a40bc8b538",
- "keyword": "22b35029bc65d6d95db2e2c147b0a37f2d290f29",
+ "keyword": "6b674eb3a274999153f6be044973b195008ced2f",
}),
)
@@ -76,7 +76,8 @@ class DynastyscansChapterExtractor(DynastyscansBase, ChapterExtractor):
"author" : text.remove_html(author),
"group" : (text.remove_html(group) or
text.extract(group, ' alt="', '"')[0] or ""),
- "date" : extr('"icon-calendar"></i> ', '<'),
+ "date" : text.parse_datetime(extr(
+ '"icon-calendar"></i> ', '<'), "%b %d, %Y"),
"lang" : "en",
"language": "English",
}
@@ -89,6 +90,22 @@ class DynastyscansChapterExtractor(DynastyscansBase, ChapterExtractor):
]
+class DynastyscansMangaExtractor(DynastyscansBase, MangaExtractor):
+ chapterclass = DynastyscansChapterExtractor
+ reverse = False
+ pattern = BASE_PATTERN + r"(/series/[^/?#]+)"
+ test = ("https://dynasty-scans.com/series/hitoribocchi_no_oo_seikatsu", {
+ "pattern": DynastyscansChapterExtractor.pattern,
+ "count": ">= 100",
+ })
+
+ def chapters(self, page):
+ return [
+ (self.root + path, {})
+ for path in text.extract_iter(page, '<dd>\n<a href="', '"')
+ ]
+
+
class DynastyscansSearchExtractor(DynastyscansBase, Extractor):
"""Extrator for image search results on dynasty-scans.com"""
subcategory = "search"
diff --git a/gallery_dl/extractor/exhentai.py b/gallery_dl/extractor/exhentai.py
index aabfe6b..7ffb214 100644
--- a/gallery_dl/extractor/exhentai.py
+++ b/gallery_dl/extractor/exhentai.py
@@ -122,7 +122,7 @@ class ExhentaiGalleryExtractor(ExhentaiExtractor):
"date": "dt:2018-03-18 20:15:00",
"eh_category": "Non-H",
"expunged": False,
- "favorites": "18",
+ "favorites": "19",
"filecount": "4",
"filesize": 1488978,
"gid": 1200119,
@@ -239,7 +239,7 @@ class ExhentaiGalleryExtractor(ExhentaiExtractor):
"title_jpn" : text.unescape(extr('<h1 id="gj">', '</h1>')),
"_" : extr('<div id="gdc"><div class="cs ct', '"'),
"eh_category" : extr('>', '<'),
- "uploader" : text.unquote(extr('/uploader/', '"')),
+ "uploader" : extr('<div id="gdn">', '</div>'),
"date" : text.parse_datetime(extr(
'>Posted:</td><td class="gdt2">', '</td>'), "%Y-%m-%d %H:%M"),
"parent" : extr(
@@ -255,6 +255,10 @@ class ExhentaiGalleryExtractor(ExhentaiExtractor):
"torrentcount" : extr('>Torrent Download (', ')'),
}
+ if data["uploader"].startswith("<"):
+ data["uploader"] = text.unescape(text.extract(
+ data["uploader"], ">", "<")[0])
+
f = data["favorites"][0]
if f == "N":
data["favorites"] = "0"
diff --git a/gallery_dl/extractor/foolfuuka.py b/gallery_dl/extractor/foolfuuka.py
index d2c5e8f..6ddd689 100644
--- a/gallery_dl/extractor/foolfuuka.py
+++ b/gallery_dl/extractor/foolfuuka.py
@@ -122,7 +122,7 @@ class FoolfuukaThreadExtractor(FoolfuukaExtractor):
"url": "d309713d2f838797096b3e9cb44fe514a9c9d07a",
}),
("https://desuarchive.org/a/thread/159542679/", {
- "url": "3ae1473f6916ac831efe5cc4d4e7d3298ce79406",
+ "url": "2bddbe03b01b4630337f6916f6df36d1d443b7b8",
}),
("https://boards.fireden.net/sci/thread/11264294/", {
"url": "61cab625c95584a12a30049d054931d64f8d20aa",
@@ -131,10 +131,10 @@ class FoolfuukaThreadExtractor(FoolfuukaExtractor):
"url": "bbe6f82944a45e359f5c8daf53f565913dc13e4f",
}),
("https://rbt.asia/g/thread/61487650/", {
- "url": "61896d9d9a2edb556b619000a308a984307b6d30",
+ "url": "b4692707cddb4ad1c9ba1cde77c4703025cb86e5",
}),
("https://archive.rebeccablacktech.com/g/thread/61487650/", {
- "url": "61896d9d9a2edb556b619000a308a984307b6d30",
+ "url": "b4692707cddb4ad1c9ba1cde77c4703025cb86e5",
}),
("https://thebarchive.com/b/thread/739772332/", {
"url": "e8b18001307d130d67db31740ce57c8561b5d80c",
diff --git a/gallery_dl/extractor/gelbooru_v02.py b/gallery_dl/extractor/gelbooru_v02.py
index e09e190..a42a202 100644
--- a/gallery_dl/extractor/gelbooru_v02.py
+++ b/gallery_dl/extractor/gelbooru_v02.py
@@ -27,8 +27,21 @@ class GelbooruV02Extractor(booru.BooruExtractor):
params["pid"] = self.page_start
params["limit"] = self.per_page
+ post = None
while True:
- root = self._api_request(params)
+ try:
+ root = self._api_request(params)
+ except ElementTree.ParseError:
+ if "tags" not in params or post is None:
+ raise
+ taglist = [tag for tag in params["tags"].split()
+ if not tag.startswith("id:<")]
+ taglist.append("id:<" + str(post.attrib["id"]))
+ params["tags"] = " ".join(taglist)
+ params["pid"] = 0
+ continue
+
+ post = None
for post in root:
yield post.attrib
diff --git a/gallery_dl/extractor/instagram.py b/gallery_dl/extractor/instagram.py
index bf479ab..a1dd465 100644
--- a/gallery_dl/extractor/instagram.py
+++ b/gallery_dl/extractor/instagram.py
@@ -439,15 +439,27 @@ class InstagramTaggedExtractor(InstagramExtractor):
test = ("https://www.instagram.com/instagram/tagged/", {
"range": "1-16",
"count": ">= 16",
+ "keyword": {
+ "tagged_owner_id" : "25025320",
+ "tagged_username" : "instagram",
+ "tagged_full_name": "Instagram",
+ },
})
- def posts(self):
+ def metadata(self):
url = "{}/{}/".format(self.root, self.item)
- user = self._extract_profile_page(url)
+ self.user = user = self._extract_profile_page(url)
+
+ return {
+ "tagged_owner_id" : user["id"],
+ "tagged_username" : user["username"],
+ "tagged_full_name": user["full_name"],
+ }
+ def posts(self):
query_hash = "be13233562af2d229b008d2976b998b5"
- variables = {"id": user["id"], "first": 50}
- edge = self._get_edge_data(user, None)
+ variables = {"id": self.user["id"], "first": 50}
+ edge = self._get_edge_data(self.user, None)
return self._pagination_graphql(query_hash, variables, edge)
diff --git a/gallery_dl/extractor/kemonoparty.py b/gallery_dl/extractor/kemonoparty.py
index 2e1d0b2..6483278 100644
--- a/gallery_dl/extractor/kemonoparty.py
+++ b/gallery_dl/extractor/kemonoparty.py
@@ -14,7 +14,7 @@ from ..cache import cache
import itertools
import re
-BASE_PATTERN = r"(?:https?://)?kemono\.party"
+BASE_PATTERN = r"(?:https?://)?(?:www\.)?kemono\.party"
USER_PATTERN = BASE_PATTERN + r"/([^/?#]+)/user/([^/?#]+)"
@@ -30,19 +30,20 @@ class KemonopartyExtractor(Extractor):
def items(self):
self._prepare_ddosguard_cookies()
- find_inline = re.compile(
+ self._find_inline = re.compile(
r'src="(?:https?://kemono\.party)?(/inline/[^"]+'
r'|/[0-9a-f]{2}/[0-9a-f]{2}/[0-9a-f]{64}\.[^"]+)').findall
- skip_service = \
- "patreon" if self.config("patreon-skip-file", True) else None
+ find_hash = re.compile("/[0-9a-f]{2}/[0-9a-f]{2}/([0-9a-f]{64})").match
+ generators = self._build_file_generators(self.config("files"))
comments = self.config("comments")
+ username = dms = None
if self.config("metadata"):
username = text.unescape(text.extract(
self.request(self.user_url).text,
'<meta name="artist_name" content="', '"')[0])
- else:
- username = None
+ if self.config("dms"):
+ dms = True
posts = self.posts()
max_posts = self.config("max-posts")
@@ -51,31 +52,38 @@ class KemonopartyExtractor(Extractor):
for post in posts:
- files = []
- append = files.append
- file = post["file"]
-
- if file:
- file["type"] = "file"
- if post["service"] != skip_service or not post["attachments"]:
- append(file)
- for attachment in post["attachments"]:
- attachment["type"] = "attachment"
- append(attachment)
- for path in find_inline(post["content"] or ""):
- append({"path": path, "name": path, "type": "inline"})
-
post["date"] = text.parse_datetime(
- post["published"], "%a, %d %b %Y %H:%M:%S %Z")
+ post["published"] or post["added"],
+ "%a, %d %b %Y %H:%M:%S %Z")
if username:
post["username"] = username
if comments:
post["comments"] = self._extract_comments(post)
+ if dms is not None:
+ if dms is True:
+ dms = self._extract_dms(post)
+ post["dms"] = dms
yield Message.Directory, post
- for post["num"], file in enumerate(files, 1):
- post["type"] = file["type"]
+ hashes = set()
+ post["num"] = 0
+ for file in itertools.chain.from_iterable(
+ g(post) for g in generators):
url = file["path"]
+
+ match = find_hash(url)
+ if match:
+ post["hash"] = hash = match.group(1)
+ if hash in hashes:
+ self.log.debug("Skipping %s (duplicate)", url)
+ continue
+ hashes.add(hash)
+ else:
+ post["hash"] = ""
+
+ post["type"] = file["type"]
+ post["num"] += 1
+
if url[0] == "/":
url = self.root + "/data" + url
elif url.startswith("https://kemono.party"):
@@ -103,6 +111,34 @@ class KemonopartyExtractor(Extractor):
return {c.name: c.value for c in response.history[0].cookies}
+ def _file(self, post):
+ file = post["file"]
+ if not file:
+ return ()
+ file["type"] = "file"
+ return (file,)
+
+ def _attachments(self, post):
+ for attachment in post["attachments"]:
+ attachment["type"] = "attachment"
+ return post["attachments"]
+
+ def _inline(self, post):
+ for path in self._find_inline(post["content"] or ""):
+ yield {"path": path, "name": path, "type": "inline"}
+
+ def _build_file_generators(self, filetypes):
+ if filetypes is None:
+ return (self._file, self._attachments, self._inline)
+ genmap = {
+ "file" : self._file,
+ "attachments": self._attachments,
+ "inline" : self._inline,
+ }
+ if isinstance(filetypes, str):
+ filetypes = filetypes.split(",")
+ return [genmap[ft] for ft in filetypes]
+
def _extract_comments(self, post):
url = "{}/{}/user/{}/post/{}".format(
self.root, post["service"], post["user"], post["id"])
@@ -121,6 +157,21 @@ class KemonopartyExtractor(Extractor):
})
return comments
+ def _extract_dms(self, post):
+ url = "{}/{}/user/{}/dms".format(
+ self.root, post["service"], post["user"])
+ page = self.request(url).text
+
+ dms = []
+ for dm in text.extract_iter(page, "<article", "</article>"):
+ dms.append({
+ "body": text.unescape(text.extract(
+ dm, '<div class="dm-card__content">', '</div>',
+ )[0].strip()),
+ "date": text.extract(dm, 'datetime="', '"')[0],
+ })
+ return dms
+
class KemonopartyUserExtractor(KemonopartyExtractor):
"""Extractor for all posts from a kemono.party user listing"""
@@ -175,6 +226,8 @@ class KemonopartyPostExtractor(KemonopartyExtractor):
"embed": dict,
"extension": "jpeg",
"filename": "P058kDFYus7DbqAkGlfWTlOr",
+ "hash": "210f35388e28bbcf756db18dd516e2d8"
+ "2ce758e0d32881eeee76d43e1716d382",
"id": "506575",
"num": 1,
"published": "Sun, 11 Aug 2019 02:09:04 GMT",
@@ -188,25 +241,39 @@ class KemonopartyPostExtractor(KemonopartyExtractor):
}),
# inline image (#1286)
("https://kemono.party/fanbox/user/7356311/post/802343", {
- "pattern": r"https://kemono\.party/data/inline/fanbox"
- r"/uaozO4Yga6ydkGIJFAQDixfE\.jpeg",
+ "pattern": r"https://kemono\.party/data/47/b5/47b5c014ecdcfabdf2c8"
+ r"5eec53f1133a76336997ae8596f332e97d956a460ad2\.jpg",
+ "keyword": {"hash": "47b5c014ecdcfabdf2c85eec53f1133a"
+ "76336997ae8596f332e97d956a460ad2"},
}),
# kemono.party -> data.kemono.party
("https://kemono.party/gumroad/user/trylsc/post/IURjT", {
- "pattern": r"https://kemono\.party/data/(file|attachment)s"
- r"/gumroad/trylsc/IURjT/",
+ "pattern": r"https://kemono\.party/data/("
+ r"files/gumroad/trylsc/IURjT/reward8\.jpg|"
+ r"c6/04/c6048f5067fd9dbfa7a8be565ac194efdfb6e4.+\.zip)",
}),
# username (#1548, #1652)
("https://kemono.party/gumroad/user/3252870377455/post/aJnAH", {
"options": (("metadata", True),),
"keyword": {"username": "Kudalyn's Creations"},
}),
- # skip patreon main file (#1667, #1689)
+ # skip patreon duplicates
("https://kemono.party/patreon/user/4158582/post/32099982", {
"count": 2,
- "keyword": {"type": "attachment"},
+ }),
+ # DMs (#2008)
+ ("https://kemono.party/patreon/user/34134344/post/38129255", {
+ "options": (("dms", True),),
+ "keyword": {"dms": [{
+ "body": r"re:Hi! Thank you very much for supporting the work I"
+ r" did in May. Here's your reward pack! I hope you fin"
+ r"d something you enjoy in it. :\)\n\nhttps://www.medi"
+ r"afire.com/file/\w+/Set13_tier_2.zip/file",
+ "date": "2021-07-31 02:47:51.327865",
+ }]},
}),
("https://kemono.party/subscribestar/user/alcorart/post/184330"),
+ ("https://www.kemono.party/subscribestar/user/alcorart/post/184330"),
)
def __init__(self, match):
diff --git a/gallery_dl/extractor/mangadex.py b/gallery_dl/extractor/mangadex.py
index ff1d7c3..393f4e2 100644
--- a/gallery_dl/extractor/mangadex.py
+++ b/gallery_dl/extractor/mangadex.py
@@ -46,10 +46,10 @@ class MangadexExtractor(Extractor):
def _transform(self, chapter):
relationships = defaultdict(list)
for item in chapter["relationships"]:
- relationships[item["type"]].append(item["id"])
- manga = self.api.manga(relationships["manga"][0])
+ relationships[item["type"]].append(item)
+ manga = self.api.manga(relationships["manga"][0]["id"])
for item in manga["relationships"]:
- relationships[item["type"]].append(item["id"])
+ relationships[item["type"]].append(item)
cattributes = chapter["attributes"]
mattributes = manga["attributes"]
@@ -75,16 +75,12 @@ class MangadexExtractor(Extractor):
"count" : len(cattributes["data"]),
}
- if self.config("metadata"):
- data["artist"] = [
- self.api.author(uuid)["attributes"]["name"]
- for uuid in relationships["artist"]]
- data["author"] = [
- self.api.author(uuid)["attributes"]["name"]
- for uuid in relationships["author"]]
- data["group"] = [
- self.api.group(uuid)["attributes"]["name"]
- for uuid in relationships["scanlation_group"]]
+ data["artist"] = [artist["attributes"]["name"]
+ for artist in relationships["artist"]]
+ data["author"] = [author["attributes"]["name"]
+ for author in relationships["author"]]
+ data["group"] = [group["attributes"]["name"]
+ for group in relationships["scanlation_group"]]
return data
@@ -95,12 +91,11 @@ class MangadexChapterExtractor(MangadexExtractor):
pattern = BASE_PATTERN + r"/chapter/([0-9a-f-]+)"
test = (
("https://mangadex.org/chapter/f946ac53-0b71-4b5d-aeb2-7931b13c4aaa", {
- "keyword": "f6c2b908df06eb834d56193dfe1fa1f7c2c4dccd",
+ "keyword": "86fb262cf767dac6d965cd904ad499adba466404",
# "content": "50383a4c15124682057b197d40261641a98db514",
}),
# oneshot
("https://mangadex.org/chapter/61a88817-9c29-4281-bdf1-77b3c1be9831", {
- "options": (("metadata", True),),
"count": 64,
"keyword": "6abcbe1e24eeb1049dc931958853cd767ee483fb",
}),
@@ -147,6 +142,8 @@ class MangadexMangaExtractor(MangadexExtractor):
"date" : "type:datetime",
"lang" : str,
"language": str,
+ "artist" : ["Arakawa Hiromu"],
+ "author" : ["Arakawa Hiromu"],
},
}),
("https://mangadex.cc/manga/d0c88e3b-ea64-4e07-9841-c1d2ac982f4a/", {
@@ -193,20 +190,14 @@ class MangadexAPI():
def athome_server(self, uuid):
return self._call("/at-home/server/" + uuid)
- @memcache(keyarg=1)
- def author(self, uuid):
- return self._call("/author/" + uuid)["data"]
-
def chapter(self, uuid):
- return self._call("/chapter/" + uuid)["data"]
-
- @memcache(keyarg=1)
- def group(self, uuid):
- return self._call("/group/" + uuid)["data"]
+ params = {"includes[]": ("scanlation_group",)}
+ return self._call("/chapter/" + uuid, params)["data"]
@memcache(keyarg=1)
def manga(self, uuid):
- return self._call("/manga/" + uuid)["data"]
+ params = {"includes[]": ("artist", "author")}
+ return self._call("/manga/" + uuid, params)["data"]
def manga_feed(self, uuid):
order = "desc" if self.extractor.config("chapter-reverse") else "asc"
@@ -275,6 +266,7 @@ class MangadexAPI():
ratings = ("safe", "suggestive", "erotica", "pornographic")
params["contentRating[]"] = ratings
+ params["includes[]"] = ("scanlation_group",)
params["translatedLanguage[]"] = config("lang")
params["offset"] = 0
diff --git a/gallery_dl/extractor/mangoxo.py b/gallery_dl/extractor/mangoxo.py
index d45fbc9..1486057 100644
--- a/gallery_dl/extractor/mangoxo.py
+++ b/gallery_dl/extractor/mangoxo.py
@@ -122,18 +122,18 @@ class MangoxoAlbumExtractor(MangoxoExtractor):
def metadata(self, page):
"""Return general metadata"""
extr = text.extract_from(page)
- title = extr('<title>', '</title>')
- count = extr('id="pic-count">', '<')
- cid = extr('<img alt="', '"')
+ title = extr('<img id="cover-img" alt="', '"')
+ cid = extr('href="https://www.mangoxo.com/user/', '"')
+ cname = extr('<img alt="', '"')
cover = extr(' src="', '"')
- cname = extr('target="_blank">', '<')
- date = extr('</i>', '<')
+ count = extr('id="pic-count">', '<')
+ date = extr('class="fa fa-calendar"></i>', '<')
descr = extr('<pre>', '</pre>')
return {
"channel": {
"id": cid,
- "name": text.unescape(cname.strip()),
+ "name": text.unescape(cname),
"cover": cover,
},
"album": {
diff --git a/gallery_dl/extractor/philomena.py b/gallery_dl/extractor/philomena.py
index d3b3bb1..51a0d38 100644
--- a/gallery_dl/extractor/philomena.py
+++ b/gallery_dl/extractor/philomena.py
@@ -62,6 +62,8 @@ INSTANCES = {
"filter_id": "56027"},
"ponybooru" : {"root": "https://ponybooru.org",
"filter_id": "2"},
+ "furbooru" : {"root": "https://furbooru.org",
+ "filter_id": "2"},
}
BASE_PATTERN = PhilomenaExtractor.update(INSTANCES)
@@ -124,6 +126,9 @@ class PhilomenaPostExtractor(PhilomenaExtractor):
("https://ponybooru.org/images/1", {
"content": "bca26f58fafd791fe07adcd2a28efd7751824605",
}),
+ ("https://furbooru.org/images/1", {
+ "content": "9eaa1e1b32fa0f16520912257dbefaff238d5fd2",
+ }),
)
def __init__(self, match):
@@ -157,6 +162,10 @@ class PhilomenaSearchExtractor(PhilomenaExtractor):
"range": "40-60",
"count": 21,
}),
+ ("https://furbooru.org/search?q=cute", {
+ "range": "40-60",
+ "count": 21,
+ }),
)
def __init__(self, match):
@@ -210,6 +219,9 @@ class PhilomenaGalleryExtractor(PhilomenaExtractor):
("https://ponybooru.org/galleries/27", {
"count": ">= 24",
}),
+ ("https://furbooru.org/galleries/27", {
+ "count": ">= 13",
+ }),
)
def __init__(self, match):
diff --git a/gallery_dl/extractor/reactor.py b/gallery_dl/extractor/reactor.py
index 04fe581..b3a620a 100644
--- a/gallery_dl/extractor/reactor.py
+++ b/gallery_dl/extractor/reactor.py
@@ -8,29 +8,29 @@
"""Generic extractors for *reactor sites"""
-from .common import Extractor, Message
+from .common import BaseExtractor, Message
from .. import text
import urllib.parse
import json
-BASE_PATTERN = r"(?:https?://)?((?:[^/.]+\.)?reactor\.cc)"
-
-class ReactorExtractor(Extractor):
+class ReactorExtractor(BaseExtractor):
"""Base class for *reactor.cc extractors"""
basecategory = "reactor"
filename_fmt = "{post_id}_{num:>02}{title[:100]:?_//}.{extension}"
archive_fmt = "{post_id}_{num}"
- instances = ()
request_interval = 5.0
def __init__(self, match):
- Extractor.__init__(self, match)
- self.root = "http://" + match.group(1)
+ BaseExtractor.__init__(self, match)
+ url = text.ensure_http_scheme(match.group(0), "http://")
+ pos = url.index("/", 10)
+
+ self.root, self.path = url[:pos], url[pos:]
self.session.headers["Referer"] = self.root
self.gif = self.config("gif", False)
- if not self.category:
+ if self.category == "reactor":
# set category based on domain name
netloc = urllib.parse.urlsplit(self.root).netloc
self.category = netloc.rpartition(".")[0]
@@ -50,7 +50,7 @@ class ReactorExtractor(Extractor):
def posts(self):
"""Return all relevant post-objects"""
- return self._pagination(self.url)
+ return self._pagination(self.root + self.path)
def _pagination(self, url):
while True:
@@ -145,91 +145,63 @@ class ReactorExtractor(Extractor):
}
+BASE_PATTERN = ReactorExtractor.update({
+ "reactor" : {
+ "root": "http://reactor.cc",
+ "pattern": r"(?:[^/.]+\.)?reactor\.cc",
+ },
+ "joyreactor" : {
+ "root": "http://joyreactor.cc",
+ "pattern": r"(?:www\.)?joyreactor\.c(?:c|om)",
+ },
+ "pornreactor": {
+ "root": "http://pornreactor.cc",
+ "pattern": r"(?:www\.)?(?:pornreactor\.cc|fapreactor.com)",
+ },
+ "thatpervert": {
+ "root": "http://thatpervert.com",
+ },
+})
+
+
class ReactorTagExtractor(ReactorExtractor):
"""Extractor for tag searches on *reactor.cc sites"""
subcategory = "tag"
directory_fmt = ("{category}", "{search_tags}")
archive_fmt = "{search_tags}_{post_id}_{num}"
pattern = BASE_PATTERN + r"/tag/([^/?#]+)"
- test = ("http://anime.reactor.cc/tag/Anime+Art",)
+ test = (
+ ("http://reactor.cc/tag/gif"),
+ ("http://anime.reactor.cc/tag/Anime+Art"),
+ ("http://joyreactor.cc/tag/Advent+Cirno", {
+ "count": ">= 15",
+ }),
+ ("http://joyreactor.com/tag/Cirno", {
+ "url": "aa59090590b26f4654881301fe8fe748a51625a8",
+ }),
+ ("http://pornreactor.cc/tag/RiceGnat", {
+ "range": "1-25",
+ "count": ">= 25",
+ }),
+ ("http://fapreactor.com/tag/RiceGnat"),
+ )
def __init__(self, match):
ReactorExtractor.__init__(self, match)
- self.tag = match.group(2)
+ self.tag = match.group(match.lastindex)
def metadata(self):
return {"search_tags": text.unescape(self.tag).replace("+", " ")}
-class ReactorSearchExtractor(ReactorTagExtractor):
+class ReactorSearchExtractor(ReactorExtractor):
"""Extractor for search results on *reactor.cc sites"""
subcategory = "search"
directory_fmt = ("{category}", "search", "{search_tags}")
archive_fmt = "s_{search_tags}_{post_id}_{num}"
pattern = BASE_PATTERN + r"/search(?:/|\?q=)([^/?#]+)"
- test = ("http://anime.reactor.cc/search?q=Art",)
-
-
-class ReactorUserExtractor(ReactorExtractor):
- """Extractor for all posts of a user on *reactor.cc sites"""
- subcategory = "user"
- directory_fmt = ("{category}", "user", "{user}")
- pattern = BASE_PATTERN + r"/user/([^/?#]+)"
- test = ("http://anime.reactor.cc/user/Shuster",)
-
- def __init__(self, match):
- ReactorExtractor.__init__(self, match)
- self.user = match.group(2)
-
- def metadata(self):
- return {"user": text.unescape(self.user).replace("+", " ")}
-
-
-class ReactorPostExtractor(ReactorExtractor):
- """Extractor for single posts on *reactor.cc sites"""
- subcategory = "post"
- pattern = BASE_PATTERN + r"/post/(\d+)"
- test = ("http://anime.reactor.cc/post/3576250",)
-
- def __init__(self, match):
- ReactorExtractor.__init__(self, match)
- self.post_id = match.group(2)
-
- def items(self):
- post = self.request(self.url).text
- pos = post.find('class="uhead">')
- for image in self._parse_post(post[pos:]):
- if image["num"] == 1:
- yield Message.Directory, image
- url = image["url"]
- yield Message.Url, url, text.nameext_from_url(url, image)
-
-
-# --------------------------------------------------------------------
-# JoyReactor
-
-JR_BASE_PATTERN = r"(?:https?://)?(?:www\.)?(joyreactor\.c(?:c|om))"
-
-
-class JoyreactorTagExtractor(ReactorTagExtractor):
- """Extractor for tag searches on joyreactor.cc"""
- category = "joyreactor"
- pattern = JR_BASE_PATTERN + r"/tag/([^/?#]+)"
- test = (
- ("http://joyreactor.cc/tag/Advent+Cirno", {
- "count": ">= 15",
- }),
- ("http://joyreactor.com/tag/Cirno", {
- "url": "aa59090590b26f4654881301fe8fe748a51625a8",
- }),
- )
-
-
-class JoyreactorSearchExtractor(ReactorSearchExtractor):
- """Extractor for search results on joyreactor.cc"""
- category = "joyreactor"
- pattern = JR_BASE_PATTERN + r"/search(?:/|\?q=)([^/?#]+)"
test = (
+ ("http://reactor.cc/search?q=Art"),
("http://joyreactor.cc/search/Nature", {
"range": "1-25",
"count": ">= 20",
@@ -238,26 +210,54 @@ class JoyreactorSearchExtractor(ReactorSearchExtractor):
"range": "1-25",
"count": ">= 20",
}),
+ ("http://pornreactor.cc/search?q=ecchi+hentai"),
+ ("http://fapreactor.com/search/ecchi+hentai"),
)
+ def __init__(self, match):
+ ReactorExtractor.__init__(self, match)
+ self.tag = match.group(match.lastindex)
+
+ def metadata(self):
+ return {"search_tags": text.unescape(self.tag).replace("+", " ")}
+
-class JoyreactorUserExtractor(ReactorUserExtractor):
- """Extractor for all posts of a user on joyreactor.cc"""
- category = "joyreactor"
- pattern = JR_BASE_PATTERN + r"/user/([^/?#]+)"
+class ReactorUserExtractor(ReactorExtractor):
+ """Extractor for all posts of a user on *reactor.cc sites"""
+ subcategory = "user"
+ directory_fmt = ("{category}", "user", "{user}")
+ pattern = BASE_PATTERN + r"/user/([^/?#]+)"
test = (
+ ("http://reactor.cc/user/Dioklet"),
+ ("http://anime.reactor.cc/user/Shuster"),
("http://joyreactor.cc/user/hemantic"),
("http://joyreactor.com/user/Tacoman123", {
"url": "60ce9a3e3db791a0899f7fb7643b5b87d09ae3b5",
}),
+ ("http://pornreactor.cc/user/Disillusion", {
+ "range": "1-25",
+ "count": ">= 20",
+ }),
+ ("http://fapreactor.com/user/Disillusion"),
)
+ def __init__(self, match):
+ ReactorExtractor.__init__(self, match)
+ self.user = match.group(match.lastindex)
+
+ def metadata(self):
+ return {"user": text.unescape(self.user).replace("+", " ")}
+
-class JoyreactorPostExtractor(ReactorPostExtractor):
- """Extractor for single posts on joyreactor.cc"""
- category = "joyreactor"
- pattern = JR_BASE_PATTERN + r"/post/(\d+)"
+class ReactorPostExtractor(ReactorExtractor):
+ """Extractor for single posts on *reactor.cc sites"""
+ subcategory = "post"
+ pattern = BASE_PATTERN + r"/post/(\d+)"
test = (
+ ("http://reactor.cc/post/4999736", {
+ "url": "dfc74d150d7267384d8c229c4b82aa210755daa0",
+ }),
+ ("http://anime.reactor.cc/post/3576250"),
("http://joyreactor.com/post/3721876", { # single image
"pattern": r"http://img\d\.joyreactor\.com/pics/post/full"
r"/cartoon-painting-monster-lake-4841316.jpeg",
@@ -281,57 +281,6 @@ class JoyreactorPostExtractor(ReactorPostExtractor):
("http://joyreactor.cc/post/1299", { # "malformed" JSON
"url": "ab02c6eb7b4035ad961b29ee0770ee41be2fcc39",
}),
- )
-
-
-# --------------------------------------------------------------------
-# PornReactor
-
-PR_BASE_PATTERN = r"(?:https?://)?(?:www\.)?(pornreactor\.cc|fapreactor.com)"
-
-
-class PornreactorTagExtractor(ReactorTagExtractor):
- """Extractor for tag searches on pornreactor.cc"""
- category = "pornreactor"
- pattern = PR_BASE_PATTERN + r"/tag/([^/?#]+)"
- test = (
- ("http://pornreactor.cc/tag/RiceGnat", {
- "range": "1-25",
- "count": ">= 25",
- }),
- ("http://fapreactor.com/tag/RiceGnat"),
- )
-
-
-class PornreactorSearchExtractor(ReactorSearchExtractor):
- """Extractor for search results on pornreactor.cc"""
- category = "pornreactor"
- pattern = PR_BASE_PATTERN + r"/search(?:/|\?q=)([^/?#]+)"
- test = (
- ("http://pornreactor.cc/search?q=ecchi+hentai"),
- ("http://fapreactor.com/search/ecchi+hentai"),
- )
-
-
-class PornreactorUserExtractor(ReactorUserExtractor):
- """Extractor for all posts of a user on pornreactor.cc"""
- category = "pornreactor"
- pattern = PR_BASE_PATTERN + r"/user/([^/?#]+)"
- test = (
- ("http://pornreactor.cc/user/Disillusion", {
- "range": "1-25",
- "count": ">= 20",
- }),
- ("http://fapreactor.com/user/Disillusion"),
- )
-
-
-class PornreactorPostExtractor(ReactorPostExtractor):
- """Extractor for single posts on pornreactor.cc"""
- category = "pornreactor"
- subcategory = "post"
- pattern = PR_BASE_PATTERN + r"/post/(\d+)"
- test = (
("http://pornreactor.cc/post/863166", {
"url": "a09fb0577489e1f9564c25d0ad576f81b19c2ef3",
"content": "ec6b0568bfb1803648744077da082d14de844340",
@@ -340,3 +289,16 @@ class PornreactorPostExtractor(ReactorPostExtractor):
"url": "2a956ce0c90e8bc47b4392db4fa25ad1342f3e54",
}),
)
+
+ def __init__(self, match):
+ ReactorExtractor.__init__(self, match)
+ self.post_id = match.group(match.lastindex)
+
+ def items(self):
+ post = self.request(self.root + self.path).text
+ pos = post.find('class="uhead">')
+ for image in self._parse_post(post[pos:]):
+ if image["num"] == 1:
+ yield Message.Directory, image
+ url = image["url"]
+ yield Message.Url, url, text.nameext_from_url(url, image)
diff --git a/gallery_dl/extractor/seisoparty.py b/gallery_dl/extractor/seisoparty.py
deleted file mode 100644
index a2a24e0..0000000
--- a/gallery_dl/extractor/seisoparty.py
+++ /dev/null
@@ -1,201 +0,0 @@
-# -*- coding: utf-8 -*-
-
-# Copyright 2021 Mike Fährmann
-#
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License version 2 as
-# published by the Free Software Foundation.
-
-"""Extractors for https://seiso.party/"""
-
-from .common import Extractor, Message
-from .. import text, exception
-from ..cache import cache
-import re
-
-
-class SeisopartyExtractor(Extractor):
- """Base class for seisoparty extractors"""
- category = "seisoparty"
- root = "https://seiso.party"
- directory_fmt = ("{category}", "{service}", "{username}")
- filename_fmt = "{id}_{title}_{num:>02}_{filename}.{extension}"
- archive_fmt = "{service}_{user}_{id}_{num}"
- cookiedomain = ".seiso.party"
-
- def __init__(self, match):
- Extractor.__init__(self, match)
- self.user_name = None
- self._find_files = re.compile(
- r'href="(https://cdn(?:-\d)?\.seiso\.party/files/[^"]+)').findall
-
- def items(self):
- self._prepare_ddosguard_cookies()
-
- for post in self.posts():
- files = post.pop("files")
- yield Message.Directory, post
- for post["num"], url in enumerate(files, 1):
- yield Message.Url, url, text.nameext_from_url(url, post)
-
- def _parse_post(self, page, post_id):
- extr = text.extract_from(page)
- return {
- "service" : self.service,
- "user" : self.user_id,
- "username": self.user_name,
- "id" : post_id,
- "date" : text.parse_datetime(extr(
- '<div class="margin-bottom-15 minor-text">', '<'),
- "%Y-%m-%d %H:%M:%S %Z"),
- "title" : text.unescape(extr('class="post-title">', '<')),
- "content" : text.unescape(extr("\n<p>\n", "\n</p>\n").strip()),
- "files" : self._find_files(page),
- }
-
- def login(self):
- username, password = self._get_auth_info()
- if username:
- self._update_cookies(self._login_impl(username, password))
-
- @cache(maxage=28*24*3600, keyarg=1)
- def _login_impl(self, username, password):
- self.log.info("Logging in as %s", username)
-
- url = self.root + "/account/login"
- data = {"username": username, "password": password}
-
- response = self.request(url, method="POST", data=data)
- if response.url.endswith("/account/login") and \
- "Username or password is incorrect" in response.text:
- raise exception.AuthenticationError()
-
- return {c.name: c.value for c in response.history[0].cookies}
-
-
-class SeisopartyUserExtractor(SeisopartyExtractor):
- """Extractor for all posts from a seiso.party user listing"""
- subcategory = "user"
- pattern = r"(?:https?://)?seiso\.party/artists/([^/?#]+)/([^/?#]+)"
- test = (
- ("https://seiso.party/artists/fanbox/21", {
- "pattern": r"https://cdn\.seiso\.party/files/fanbox/\d+/",
- "count": ">=15",
- "keyword": {
- "content": str,
- "date": "type:datetime",
- "id": r"re:\d+",
- "num": int,
- "service": "fanbox",
- "title": str,
- "user": "21",
- "username": "雨",
- },
- }),
- )
-
- def __init__(self, match):
- SeisopartyExtractor.__init__(self, match)
- self.service, self.user_id = match.groups()
-
- def posts(self):
- url = "{}/artists/{}/{}".format(self.root, self.service, self.user_id)
- page = self.request(url).text
- self.user_name, pos = text.extract(page, '<span class="title">', '<')
-
- url = self.root + text.extract(
- page, 'href="', '"', page.index('id="content"', pos))[0]
- response = self.request(url)
- headers = {"Referer": url}
-
- while True:
- yield self._parse_post(response.text, url.rpartition("/")[2])
- response = self.request(url + "/next", headers=headers)
- if url == response.url:
- return
- url = headers["Referer"] = response.url
-
-
-class SeisopartyPostExtractor(SeisopartyExtractor):
- """Extractor for a single seiso.party post"""
- subcategory = "post"
- pattern = r"(?:https?://)?seiso\.party/post/([^/?#]+)/([^/?#]+)/([^/?#]+)"
- test = (
- ("https://seiso.party/post/fanbox/21/371", {
- "url": "75f13b92de0ce399b6163c3de18f1f36011c2366",
- "count": 2,
- "keyword": {
- "content": "この前描いためぐるちゃんのPSDファイルです。<br/>"
- "どうぞよろしくお願いします。",
- "date": "dt:2021-05-06 12:38:31",
- "extension": "re:psd|jpg",
- "filename": "re:backcourt|ffb2ccb7a3586d05f9a4620329dd131e",
- "id": "371",
- "num": int,
- "service": "fanbox",
- "title": "MEGURU.PSD",
- "user": "21",
- "username": "雨",
- },
- }),
- ("https://seiso.party/post/patreon/429/95949", {
- "pattern": r"https://cdn-2\.seiso\.party/files/patreon/95949/",
- "count": 2,
- }),
- )
-
- def __init__(self, match):
- SeisopartyExtractor.__init__(self, match)
- self.service, self.user_id, self.post_id = match.groups()
-
- def posts(self):
- url = "{}/artists/{}/{}".format(self.root, self.service, self.user_id)
- page = self.request(url).text
- self.user_name, pos = text.extract(page, '<span class="title">', '<')
-
- url = "{}/post/{}/{}/{}".format(
- self.root, self.service, self.user_id, self.post_id)
- return (self._parse_post(self.request(url).text, self.post_id),)
-
-
-class SeisopartyFavoriteExtractor(SeisopartyExtractor):
- """Extractor for seiso.party favorites"""
- subcategory = "favorite"
- pattern = r"(?:https?://)?seiso\.party/favorites/artists/?(?:\?([^#]+))?"
- test = (
- ("https://seiso.party/favorites/artists", {
- "pattern": SeisopartyUserExtractor.pattern,
- "url": "0c862434bc3bbbe84cbf41c3a6152473a8cde683",
- "count": 3,
- }),
- ("https://seiso.party/favorites/artists?sort=id&sort_direction=asc", {
- "url": "629a8b9c6d3a8a64f521908bdb3d7426ac03f8d3",
- }),
- )
-
- def __init__(self, match):
- SeisopartyExtractor.__init__(self, match)
- self.query = match.group(1)
-
- def items(self):
- self._prepare_ddosguard_cookies()
- self.login()
-
- url = self.root + "/favorites/artists"
- data = {"_extractor": SeisopartyUserExtractor}
- params = text.parse_query(self.query)
- params["page"] = text.parse_int(params.get("page"), 1)
-
- while True:
- page = self.request(url, params=params).text
-
- cnt = 0
- for card in text.extract_iter(
- page, '<div class="artist-card', '</a>'):
- path = text.extract(card, '<a href="', '"')[0]
- yield Message.Queue, self.root + path, data
- cnt += 1
-
- if cnt < 25:
- return
- params["page"] += 1
diff --git a/gallery_dl/extractor/shopify.py b/gallery_dl/extractor/shopify.py
index 6d924de..f276e84 100644
--- a/gallery_dl/extractor/shopify.py
+++ b/gallery_dl/extractor/shopify.py
@@ -53,6 +53,10 @@ BASE_PATTERN = ShopifyExtractor.update({
"windsorstore": {
"root": "https://www.windsorstore.com",
},
+ "loungeunderwear": {
+ "root": "https://loungeunderwear.com",
+ "pattern": r"(?:[a-z]+\.)?loungeunderwear\.com",
+ },
})
@@ -70,6 +74,7 @@ class ShopifyCollectionExtractor(ShopifyExtractor):
("https://www.fashionnova.com/collections/mini-dresses#1"),
("https://www.omgmiamiswimwear.com/collections/fajas"),
("https://www.windsorstore.com/collections/dresses-ball-gowns"),
+ ("https://loungeunderwear.com/collections/apparel"),
)
def metadata(self):
@@ -105,6 +110,7 @@ class ShopifyProductExtractor(ShopifyExtractor):
("https://www.fashionnova.com/collections/flats/products/name"),
("https://www.windsorstore.com/collections/accessories-belts/products"
"/rhine-buckle-dbl-o-ring-pu-strap-belt-073010158001"),
+ ("https://de.loungeunderwear.com/products/ribbed-crop-top-black"),
)
def products(self):
diff --git a/gallery_dl/extractor/skeb.py b/gallery_dl/extractor/skeb.py
index c1a8878..2c806ad 100644
--- a/gallery_dl/extractor/skeb.py
+++ b/gallery_dl/extractor/skeb.py
@@ -21,6 +21,7 @@ class SkebExtractor(Extractor):
def __init__(self, match):
Extractor.__init__(self, match)
self.user_name = match.group(1)
+ self.thumbnails = self.config("thumbnails", False)
def items(self):
for post_num in self.posts():
@@ -94,7 +95,7 @@ class SkebExtractor(Extractor):
return resp, post
def _get_urls_from_post(self, resp, post):
- if "og_image_url" in resp:
+ if self.thumbnails and "og_image_url" in resp:
post["content_category"] = "thumb"
post["file_id"] = "thumb"
post["file_url"] = resp["og_image_url"]
diff --git a/gallery_dl/extractor/subscribestar.py b/gallery_dl/extractor/subscribestar.py
index ae8b58d..69e3854 100644
--- a/gallery_dl/extractor/subscribestar.py
+++ b/gallery_dl/extractor/subscribestar.py
@@ -38,12 +38,11 @@ class SubscribestarExtractor(Extractor):
self.login()
for post_html in self.posts():
media = self._media_from_post(post_html)
- if not media:
- continue
data = self._data_from_post(post_html)
yield Message.Directory, data
- for item in media:
+ for num, item in enumerate(media, 1):
item.update(data)
+ item["num"] = num
text.nameext_from_url(item.get("name") or item["url"], item)
yield Message.Url, item["url"], item
@@ -140,8 +139,7 @@ class SubscribestarUserExtractor(SubscribestarExtractor):
test = (
("https://www.subscribestar.com/subscribestar", {
"count": ">= 20",
- "pattern": r"https://(star-uploads|ss-uploads-prod)\.s\d+-us-west-"
- r"\d+\.amazonaws\.com/uploads(_v2)?/users/11/",
+ "pattern": r"https://\w+\.cloudfront\.net/uploads(_v2)?/users/11/",
"keyword": {
"author_id": 11,
"author_name": "subscribestar",
@@ -149,6 +147,7 @@ class SubscribestarUserExtractor(SubscribestarExtractor):
"content": str,
"date" : "type:datetime",
"id" : int,
+ "num" : int,
"post_id": int,
"type" : "re:image|video|attachment",
"url" : str,
@@ -190,7 +189,7 @@ class SubscribestarPostExtractor(SubscribestarExtractor):
pattern = BASE_PATTERN + r"/posts/(\d+)"
test = (
("https://www.subscribestar.com/posts/102468", {
- "url": "612da5a98af056dd78dc846fbcfa705e721f6675",
+ "count": 1,
"keyword": {
"author_id": 11,
"author_name": "subscribestar",
@@ -202,6 +201,7 @@ class SubscribestarPostExtractor(SubscribestarExtractor):
"group": "imgs_and_videos",
"height": 291,
"id": 203885,
+ "num": 1,
"pinned": False,
"post_id": 102468,
"type": "image",
@@ -209,7 +209,7 @@ class SubscribestarPostExtractor(SubscribestarExtractor):
},
}),
("https://subscribestar.adult/posts/22950", {
- "url": "440d745a368e6b3e218415f593a5045f384afa0d",
+ "count": 1,
"keyword": {"date": "dt:2019-04-28 07:32:00"},
}),
)
diff --git a/gallery_dl/extractor/twitter.py b/gallery_dl/extractor/twitter.py
index 00f3b04..f1c392d 100644
--- a/gallery_dl/extractor/twitter.py
+++ b/gallery_dl/extractor/twitter.py
@@ -41,7 +41,9 @@ class TwitterExtractor(Extractor):
self.videos = self.config("videos", True)
self.cards = self.config("cards", False)
self._user_cache = {}
+ self._init_sizes()
+ def _init_sizes(self):
size = self.config("size")
if size is None:
self._size_image = "orig"
@@ -580,13 +582,17 @@ class TwitterImageExtractor(Extractor):
subcategory = "image"
pattern = r"https?://pbs\.twimg\.com/media/([\w-]+)(?:\?format=|\.)(\w+)"
test = (
- ("https://pbs.twimg.com/media/EqcpviCVoAAG-QG?format=jpg%name=orig"),
+ ("https://pbs.twimg.com/media/EqcpviCVoAAG-QG?format=jpg&name=orig", {
+ "options": (("size", "4096x4096,orig"),),
+ "url": "cb3042a6f6826923da98f0d2b66c427e9385114c",
+ }),
("https://pbs.twimg.com/media/EqcpviCVoAAG-QG.jpg:orig"),
)
def __init__(self, match):
Extractor.__init__(self, match)
self.id, self.fmt = match.groups()
+ TwitterExtractor._init_sizes(self)
def items(self):
base = "https://pbs.twimg.com/media/{}?format={}&name=".format(
@@ -595,11 +601,11 @@ class TwitterImageExtractor(Extractor):
data = {
"filename": self.id,
"extension": self.fmt,
- "_fallback": TwitterExtractor._image_fallback(base),
+ "_fallback": TwitterExtractor._image_fallback(self, base),
}
yield Message.Directory, data
- yield Message.Url, base + "orig", data
+ yield Message.Url, base + self._size_image, data
class TwitterAPI():
@@ -793,16 +799,21 @@ class TwitterAPI():
data = response.json()
if "errors" in data:
try:
- msg = ", ".join(
- '"' + error["message"] + '"'
- for error in data["errors"]
- )
+ errors, warnings = [], []
+ for error in data["errors"]:
+ if error.get("kind") == "NonFatal":
+ warnings.append(error["message"])
+ else:
+ errors.append(error["message"])
+ errors = ", ".join(errors)
except Exception:
- msg = data["errors"]
- if msg and response.status_code < 400:
- raise exception.StopExtraction(msg)
+ errors = data["errors"]
+ if warnings:
+ self.extractor.log.warning(", ".join(warnings))
+ if errors and response.status_code < 400:
+ raise exception.StopExtraction(errors)
else:
- msg = ""
+ errors = ""
if response.status_code < 400:
# success
@@ -816,7 +827,7 @@ class TwitterAPI():
continue
if response.status_code == 401 and \
- "have been blocked from viewing" in msg:
+ "have been blocked from viewing" in errors:
# account blocked
extr = self.extractor
if self.headers["x-twitter-auth-type"] and \
@@ -833,7 +844,7 @@ class TwitterAPI():
# error
raise exception.StopExtraction(
- "%s %s (%s)", response.status_code, response.reason, msg)
+ "%s %s (%s)", response.status_code, response.reason, errors)
def _pagination(self, endpoint, params=None):
if params is None:
diff --git a/gallery_dl/extractor/webtoons.py b/gallery_dl/extractor/webtoons.py
index e2474c9..cf5b192 100644
--- a/gallery_dl/extractor/webtoons.py
+++ b/gallery_dl/extractor/webtoons.py
@@ -48,7 +48,7 @@ class WebtoonsEpisodeExtractor(WebtoonsBase, GalleryExtractor):
test = (
(("https://www.webtoons.com/en/comedy/safely-endangered"
"/ep-572-earth/viewer?title_no=352&episode_no=572"), {
- "url": "11041d71a3f92728305c11a228e77cf0f7aa02ef",
+ "url": "55bec5d7c42aba19e3d0d56db25fdf0b0b13be38",
"content": ("1748c7e82b6db910fa179f6dc7c4281b0f680fa7",
"42055e44659f6ffc410b3fb6557346dfbb993df3",
"49e1f2def04c6f7a6a3dacf245a1cd9abe77a6a9"),
@@ -62,7 +62,6 @@ class WebtoonsEpisodeExtractor(WebtoonsBase, GalleryExtractor):
url = "{}/{}/viewer?{}".format(self.root, self.path, query)
GalleryExtractor.__init__(self, match, url)
self.setup_agegate_cookies()
- self.session.headers["Referer"] = url
query = text.parse_query(query)
self.title_no = query.get("title_no")
@@ -88,7 +87,7 @@ class WebtoonsEpisodeExtractor(WebtoonsBase, GalleryExtractor):
@staticmethod
def images(page):
return [
- (url, None)
+ (url.replace("://webtoon-phinf.", "://swebtoon-phinf."), None)
for url in text.extract_iter(
page, 'class="_images" data-url="', '"')
]
diff --git a/gallery_dl/extractor/xvideos.py b/gallery_dl/extractor/xvideos.py
index 0922c7c..0a55532 100644
--- a/gallery_dl/extractor/xvideos.py
+++ b/gallery_dl/extractor/xvideos.py
@@ -32,8 +32,8 @@ class XvideosGalleryExtractor(XvideosBase, GalleryExtractor):
test = (
("https://www.xvideos.com/profiles/pervertedcouple/photos/751031", {
"count": 8,
- "pattern": r"https://profile-pics-l3\.xvideos-cdn\.com"
- r"/[0-9a-f]{40}-\d+/videos/profiles/galleries/84/ca/37"
+ "pattern": r"https://profile-pics-cdn\d+\.xvideos-cdn\.com"
+ r"/[^/]+\,\d+/videos/profiles/galleries/84/ca/37"
r"/pervertedcouple/gal751031/pic_\d+_big\.jpg",
"keyword": {
"gallery": {
diff --git a/gallery_dl/extractor/ytdl.py b/gallery_dl/extractor/ytdl.py
index d380dab..8eb0c83 100644
--- a/gallery_dl/extractor/ytdl.py
+++ b/gallery_dl/extractor/ytdl.py
@@ -9,7 +9,7 @@
"""Extractors for sites supported by youtube-dl"""
from .common import Extractor, Message
-from .. import text, config, exception
+from .. import ytdl, config, exception
class YoutubeDLExtractor(Extractor):
@@ -54,52 +54,45 @@ class YoutubeDLExtractor(Extractor):
self.log.debug("Using %s", ytdl_module)
# construct YoutubeDL object
- options = {
- "format" : self.config("format"),
+ extr_opts = {
+ "extract_flat" : "in_playlist",
+ "force_generic_extractor": self.force_generic_extractor,
+ }
+ user_opts = {
"retries" : self._retries,
"socket_timeout" : self._timeout,
"nocheckcertificate" : not self._verify,
- "proxy" : self.session.proxies.get("http"),
- "force_generic_extractor": self.force_generic_extractor,
- "nopart" : not self.config("part", True),
- "updatetime" : self.config("mtime", True),
- "ratelimit" : text.parse_bytes(
- self.config("rate"), None),
- "min_filesize" : text.parse_bytes(
- self.config("filesize-min"), None),
- "max_filesize" : text.parse_bytes(
- self.config("filesize-max"), None),
}
- raw_options = self.config("raw-options")
- if raw_options:
- options.update(raw_options)
- if self.config("logging", True):
- options["logger"] = self.log
- options["extract_flat"] = "in_playlist"
-
username, password = self._get_auth_info()
if username:
- options["username"], options["password"] = username, password
+ user_opts["username"], user_opts["password"] = username, password
del username, password
- ytdl = ytdl_module.YoutubeDL(options)
+ ytdl_instance = ytdl.construct_YoutubeDL(
+ ytdl_module, self, user_opts, extr_opts)
# transfer cookies to ytdl
cookies = self.session.cookies
if cookies:
- set_cookie = self.ytdl.cookiejar.set_cookie
- for cookie in self.session.cookies:
+ set_cookie = ytdl_instance.cookiejar.set_cookie
+ for cookie in cookies:
set_cookie(cookie)
# extract youtube_dl info_dict
- info_dict = ytdl._YoutubeDL__extract_info(
- self.ytdl_url,
- ytdl.get_info_extractor(self.ytdl_ie_key),
- False, {}, True)
-
- if "entries" in info_dict:
- results = self._process_entries(ytdl, info_dict["entries"])
+ try:
+ info_dict = ytdl_instance._YoutubeDL__extract_info(
+ self.ytdl_url,
+ ytdl_instance.get_info_extractor(self.ytdl_ie_key),
+ False, {}, True)
+ except ytdl_module.utils.YoutubeDLError:
+ raise exception.StopExtraction("Failed to extract video data")
+
+ if not info_dict:
+ return
+ elif "entries" in info_dict:
+ results = self._process_entries(
+ ytdl_module, ytdl_instance, info_dict["entries"])
else:
results = (info_dict,)
@@ -107,7 +100,7 @@ class YoutubeDLExtractor(Extractor):
for info_dict in results:
info_dict["extension"] = None
info_dict["_ytdl_info_dict"] = info_dict
- info_dict["_ytdl_instance"] = ytdl
+ info_dict["_ytdl_instance"] = ytdl_instance
url = "ytdl:" + (info_dict.get("url") or
info_dict.get("webpage_url") or
@@ -116,15 +109,23 @@ class YoutubeDLExtractor(Extractor):
yield Message.Directory, info_dict
yield Message.Url, url, info_dict
- def _process_entries(self, ytdl, entries):
+ def _process_entries(self, ytdl_module, ytdl_instance, entries):
for entry in entries:
- if entry.get("_type") in ("url", "url_transparent"):
- info_dict = ytdl.extract_info(
- entry["url"], False,
- ie_key=entry.get("ie_key"))
- if "entries" in info_dict:
+ if not entry:
+ continue
+ elif entry.get("_type") in ("url", "url_transparent"):
+ try:
+ info_dict = ytdl_instance.extract_info(
+ entry["url"], False,
+ ie_key=entry.get("ie_key"))
+ except ytdl_module.utils.YoutubeDLError:
+ continue
+
+ if not info_dict:
+ continue
+ elif "entries" in info_dict:
yield from self._process_entries(
- ytdl, info_dict["entries"])
+ ytdl_module, ytdl_instance, info_dict["entries"])
else:
yield info_dict
else:
diff --git a/gallery_dl/formatter.py b/gallery_dl/formatter.py
index f5d961a..c2b4d99 100644
--- a/gallery_dl/formatter.py
+++ b/gallery_dl/formatter.py
@@ -274,6 +274,8 @@ def build_format_func(format_spec):
return _parse_join(format_spec)
if fmt == "R":
return _parse_replace(format_spec)
+ if fmt == "D":
+ return _parse_datetime(format_spec)
return _default_format(format_spec)
return format
@@ -319,6 +321,16 @@ def _parse_replace(format_spec):
return replace
+def _parse_datetime(format_spec):
+ dt_format, _, format_spec = format_spec.partition("/")
+ dt_format = dt_format[1:]
+ fmt = build_format_func(format_spec)
+
+ def dt(obj):
+ return fmt(text.parse_datetime(obj, dt_format))
+ return dt
+
+
def _default_format(format_spec):
def wrap(obj):
return format(obj, format_spec)
diff --git a/gallery_dl/job.py b/gallery_dl/job.py
index 4e185d0..97a8d3f 100644
--- a/gallery_dl/job.py
+++ b/gallery_dl/job.py
@@ -11,7 +11,6 @@ import json
import time
import errno
import logging
-import operator
import functools
import collections
from . import extractor, downloader, postprocessor
@@ -201,7 +200,6 @@ class DownloadJob(Job):
def __init__(self, url, parent=None):
Job.__init__(self, url, parent)
self.log = self.get_logger("download")
- self.blacklist = None
self.fallback = None
self.archive = None
self.sleep = None
@@ -209,6 +207,7 @@ class DownloadJob(Job):
self.downloaders = {}
self.out = output.select()
self.visited = parent.visited if parent else set()
+ self._extractor_filter = None
self._skipcnt = 0
def handle_url(self, url, kwdict):
@@ -297,9 +296,9 @@ class DownloadJob(Job):
else:
extr = extractor.find(url)
if extr:
- if self.blacklist is None:
- self.blacklist = self._build_blacklist()
- if extr.category in self.blacklist:
+ if self._extractor_filter is None:
+ self._extractor_filter = self._build_extractor_filter()
+ if not self._extractor_filter(extr):
extr = None
if extr:
@@ -444,22 +443,20 @@ class DownloadJob(Job):
self.hooks = collections.defaultdict(list)
pp_log = self.get_logger("postprocessor")
pp_list = []
- category = self.extractor.category
- basecategory = self.extractor.basecategory
pp_conf = config.get((), "postprocessor") or {}
for pp_dict in postprocessors:
if isinstance(pp_dict, str):
pp_dict = pp_conf.get(pp_dict) or {"name": pp_dict}
- whitelist = pp_dict.get("whitelist")
- if whitelist and category not in whitelist and \
- basecategory not in whitelist:
- continue
-
- blacklist = pp_dict.get("blacklist")
- if blacklist and (
- category in blacklist or basecategory in blacklist):
+ clist = pp_dict.get("whitelist")
+ if clist is not None:
+ negate = False
+ else:
+ clist = pp_dict.get("blacklist")
+ negate = True
+ if clist and not util.build_extractor_filter(
+ clist, negate)(self.extractor):
continue
name = pp_dict.get("name")
@@ -500,38 +497,18 @@ class DownloadJob(Job):
if condition(pathfmt.kwdict):
callback(pathfmt)
- def _build_blacklist(self):
- wlist = self.extractor.config("whitelist")
- if wlist is not None:
- if isinstance(wlist, str):
- wlist = wlist.split(",")
-
- # build a set of all categories
- blist = set()
- add = blist.add
- update = blist.update
- get = operator.itemgetter(0)
-
- for extr in extractor._list_classes():
- category = extr.category
- if category:
- add(category)
- else:
- update(map(get, extr.instances))
-
- # remove whitelisted categories
- blist.difference_update(wlist)
- return blist
-
- blist = self.extractor.config("blacklist")
- if blist is not None:
- if isinstance(blist, str):
- blist = blist.split(",")
- blist = set(blist)
+ def _build_extractor_filter(self):
+ clist = self.extractor.config("whitelist")
+ if clist is not None:
+ negate = False
else:
- blist = {self.extractor.category}
- blist |= util.SPECIAL_EXTRACTORS
- return blist
+ clist = self.extractor.config("blacklist")
+ negate = True
+ if clist is None:
+ clist = (self.extractor.category,)
+
+ return util.build_extractor_filter(
+ clist, negate, util.SPECIAL_EXTRACTORS)
class SimulationJob(DownloadJob):
diff --git a/gallery_dl/util.py b/gallery_dl/util.py
index 4a7fdbf..d25194e 100644
--- a/gallery_dl/util.py
+++ b/gallery_dl/util.py
@@ -81,6 +81,16 @@ def identity(x):
return x
+def true(_):
+ """Always returns True"""
+ return True
+
+
+def false(_):
+ """Always returns False"""
+ return False
+
+
def noop():
"""Does nothing"""
@@ -432,6 +442,66 @@ def build_duration_func(duration, min=0.0):
return functools.partial(identity, duration if duration > min else min)
+def build_extractor_filter(categories, negate=True, special=None):
+ """Build a function that takes an Extractor class as argument
+ and returns True if that class is allowed by 'categories'
+ """
+ if isinstance(categories, str):
+ categories = categories.split(",")
+
+ catset = set() # set of categories / basecategories
+ subset = set() # set of subcategories
+ catsub = [] # list of category-subcategory pairs
+
+ for item in categories:
+ category, _, subcategory = item.partition(":")
+ if category and category != "*":
+ if subcategory and subcategory != "*":
+ catsub.append((category, subcategory))
+ else:
+ catset.add(category)
+ elif subcategory and subcategory != "*":
+ subset.add(subcategory)
+
+ if special:
+ catset |= special
+ elif not catset and not subset and not catsub:
+ return true if negate else false
+
+ tests = []
+
+ if negate:
+ if catset:
+ tests.append(lambda extr:
+ extr.category not in catset and
+ extr.basecategory not in catset)
+ if subset:
+ tests.append(lambda extr: extr.subcategory not in subset)
+ else:
+ if catset:
+ tests.append(lambda extr:
+ extr.category in catset or
+ extr.basecategory in catset)
+ if subset:
+ tests.append(lambda extr: extr.subcategory in subset)
+
+ if catsub:
+ def test(extr):
+ for category, subcategory in catsub:
+ if category in (extr.category, extr.basecategory) and \
+ subcategory == extr.subcategory:
+ return not negate
+ return negate
+ tests.append(test)
+
+ if len(tests) == 1:
+ return tests[0]
+ if negate:
+ return lambda extr: all(t(extr) for t in tests)
+ else:
+ return lambda extr: any(t(extr) for t in tests)
+
+
def build_predicate(predicates):
if not predicates:
return lambda url, kwdict: True
diff --git a/gallery_dl/version.py b/gallery_dl/version.py
index 48817be..a363a97 100644
--- a/gallery_dl/version.py
+++ b/gallery_dl/version.py
@@ -6,4 +6,4 @@
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
-__version__ = "1.19.2"
+__version__ = "1.19.3"
diff --git a/gallery_dl/ytdl.py b/gallery_dl/ytdl.py
new file mode 100644
index 0000000..4266f48
--- /dev/null
+++ b/gallery_dl/ytdl.py
@@ -0,0 +1,513 @@
+# -*- coding: utf-8 -*-
+
+# Copyright 2021 Mike Fährmann
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Helpers for interacting with youtube-dl"""
+
+import re
+import shlex
+import itertools
+from . import text, util, exception
+
+
+def construct_YoutubeDL(module, obj, user_opts, system_opts=None):
+ opts = argv = None
+ config = obj.config
+
+ cfg = config("config-file")
+ if cfg:
+ with open(util.expand_path(cfg)) as fp:
+ contents = fp.read()
+ argv = shlex.split(contents, comments=True)
+
+ cmd = config("cmdline-args")
+ if cmd:
+ if isinstance(cmd, str):
+ cmd = shlex.split(cmd)
+ argv = (argv + cmd) if argv else cmd
+
+ try:
+ opts = parse_command_line(module, argv) if argv else user_opts
+ except SystemExit:
+ raise exception.StopExtraction("Invalid command-line option")
+
+ if opts.get("format") is None:
+ opts["format"] = config("format")
+ if opts.get("proxy") is None:
+ opts["proxy"] = obj.session.proxies.get("http")
+ if opts.get("nopart") is None:
+ opts["nopart"] = not config("part", True)
+ if opts.get("updatetime") is None:
+ opts["updatetime"] = config("mtime", True)
+ if opts.get("ratelimit") is None:
+ opts["ratelimit"] = text.parse_bytes(config("rate"), None)
+ if opts.get("min_filesize") is None:
+ opts["min_filesize"] = text.parse_bytes(config("filesize-min"), None)
+ if opts.get("max_filesize") is None:
+ opts["max_filesize"] = text.parse_bytes(config("filesize-max"), None)
+
+ raw_opts = config("raw-options")
+ if raw_opts:
+ opts.update(raw_opts)
+ if config("logging", True):
+ opts["logger"] = obj.log
+ if system_opts:
+ opts.update(system_opts)
+
+ return module.YoutubeDL(opts)
+
+
+def parse_command_line(module, argv):
+ parser, opts, args = module.parseOpts(argv)
+
+ ytdlp = (module.__name__ == "yt_dlp")
+ std_headers = module.std_headers
+ parse_bytes = module.FileDownloader.parse_bytes
+
+ # HTTP headers
+ if opts.user_agent is not None:
+ std_headers["User-Agent"] = opts.user_agent
+ if opts.referer is not None:
+ std_headers["Referer"] = opts.referer
+ if opts.headers:
+ if isinstance(opts.headers, dict):
+ std_headers.update(opts.headers)
+ else:
+ for h in opts.headers:
+ key, _, value = h.partition(":")
+ std_headers[key] = value
+
+ if opts.ratelimit is not None:
+ opts.ratelimit = parse_bytes(opts.ratelimit)
+ if getattr(opts, "throttledratelimit", None) is not None:
+ opts.throttledratelimit = parse_bytes(opts.throttledratelimit)
+ if opts.min_filesize is not None:
+ opts.min_filesize = parse_bytes(opts.min_filesize)
+ if opts.max_filesize is not None:
+ opts.max_filesize = parse_bytes(opts.max_filesize)
+ if opts.max_sleep_interval is None:
+ opts.max_sleep_interval = opts.sleep_interval
+ if getattr(opts, "overwrites", None):
+ opts.continue_dl = False
+ if opts.retries is not None:
+ opts.retries = parse_retries(opts.retries)
+ if opts.fragment_retries is not None:
+ opts.fragment_retries = parse_retries(opts.fragment_retries)
+ if getattr(opts, "extractor_retries", None) is not None:
+ opts.extractor_retries = parse_retries(opts.extractor_retries)
+ if opts.buffersize is not None:
+ opts.buffersize = parse_bytes(opts.buffersize)
+ if opts.http_chunk_size is not None:
+ opts.http_chunk_size = parse_bytes(opts.http_chunk_size)
+ if opts.extractaudio:
+ opts.audioformat = opts.audioformat.lower()
+ if opts.audioquality:
+ opts.audioquality = opts.audioquality.strip("kK")
+ if opts.recodevideo is not None:
+ opts.recodevideo = opts.recodevideo.replace(" ", "")
+ if getattr(opts, "remuxvideo", None) is not None:
+ opts.remuxvideo = opts.remuxvideo.replace(" ", "")
+
+ if opts.date is not None:
+ date = module.DateRange.day(opts.date)
+ else:
+ date = module.DateRange(opts.dateafter, opts.datebefore)
+
+ compat_opts = getattr(opts, "compat_opts", ())
+
+ def _unused_compat_opt(name):
+ if name not in compat_opts:
+ return False
+ compat_opts.discard(name)
+ compat_opts.update(["*%s" % name])
+ return True
+
+ def set_default_compat(
+ compat_name, opt_name, default=True, remove_compat=True):
+ attr = getattr(opts, opt_name, None)
+ if compat_name in compat_opts:
+ if attr is None:
+ setattr(opts, opt_name, not default)
+ return True
+ else:
+ if remove_compat:
+ _unused_compat_opt(compat_name)
+ return False
+ elif attr is None:
+ setattr(opts, opt_name, default)
+ return None
+
+ set_default_compat("abort-on-error", "ignoreerrors", "only_download")
+ set_default_compat("no-playlist-metafiles", "allow_playlist_files")
+ set_default_compat("no-clean-infojson", "clean_infojson")
+ if "format-sort" in compat_opts:
+ opts.format_sort.extend(module.InfoExtractor.FormatSort.ytdl_default)
+ _video_multistreams_set = set_default_compat(
+ "multistreams", "allow_multiple_video_streams",
+ False, remove_compat=False)
+ _audio_multistreams_set = set_default_compat(
+ "multistreams", "allow_multiple_audio_streams",
+ False, remove_compat=False)
+ if _video_multistreams_set is False and _audio_multistreams_set is False:
+ _unused_compat_opt("multistreams")
+
+ if isinstance(opts.outtmpl, dict):
+ outtmpl = opts.outtmpl
+ outtmpl_default = outtmpl.get("default")
+ else:
+ opts.outtmpl = outtmpl = outtmpl_default = ""
+
+ if "filename" in compat_opts:
+ if outtmpl_default is None:
+ outtmpl_default = outtmpl["default"] = "%(title)s-%(id)s.%(ext)s"
+ else:
+ _unused_compat_opt("filename")
+
+ if opts.extractaudio and not opts.keepvideo and opts.format is None:
+ opts.format = "bestaudio/best"
+
+ if ytdlp:
+ def metadataparser_actions(f):
+ if isinstance(f, str):
+ yield module.MetadataFromFieldPP.to_action(f)
+ else:
+ REPLACE = module.MetadataParserPP.Actions.REPLACE
+ args = f[1:]
+ for x in f[0].split(","):
+ action = [REPLACE, x]
+ action += args
+ yield action
+
+ if getattr(opts, "parse_metadata", None) is None:
+ opts.parse_metadata = []
+ if opts.metafromtitle is not None:
+ opts.parse_metadata.append("title:%s" % opts.metafromtitle)
+ opts.metafromtitle = None
+ opts.parse_metadata = list(itertools.chain.from_iterable(map(
+ metadataparser_actions, opts.parse_metadata)))
+ else:
+ opts.parse_metadata = ()
+
+ download_archive_fn = module.expand_path(opts.download_archive) \
+ if opts.download_archive is not None else opts.download_archive
+
+ if getattr(opts, "getcomments", None):
+ opts.writeinfojson = True
+
+ if getattr(opts, "no_sponsorblock", None):
+ opts.sponsorblock_mark = set()
+ opts.sponsorblock_remove = set()
+ else:
+ opts.sponsorblock_mark = \
+ getattr(opts, "sponsorblock_mark", None) or set()
+ opts.sponsorblock_remove = \
+ getattr(opts, "sponsorblock_remove", None) or set()
+ sponsorblock_query = opts.sponsorblock_mark | opts.sponsorblock_remove
+
+ addchapters = getattr(opts, "addchapters", None)
+ if (opts.addmetadata or opts.sponsorblock_mark) and addchapters is None:
+ addchapters = True
+ opts.remove_chapters = getattr(opts, "remove_chapters", None) or ()
+
+ # PostProcessors
+ postprocessors = []
+ if opts.metafromtitle:
+ postprocessors.append({
+ "key": "MetadataFromTitle",
+ "titleformat": opts.metafromtitle,
+ })
+ if getattr(opts, "add_postprocessors", None):
+ postprocessors += list(opts.add_postprocessors)
+ if sponsorblock_query:
+ postprocessors.append({
+ "key": "SponsorBlock",
+ "categories": sponsorblock_query,
+ "api": opts.sponsorblock_api,
+ "when": "pre_process",
+ })
+ if opts.parse_metadata:
+ postprocessors.append({
+ "key": "MetadataParser",
+ "actions": opts.parse_metadata,
+ "when": "pre_process",
+ })
+ if opts.convertsubtitles:
+ pp = {"key": "FFmpegSubtitlesConvertor",
+ "format": opts.convertsubtitles}
+ if ytdlp:
+ pp["when"] = "before_dl"
+ postprocessors.append(pp)
+ if getattr(opts, "convertthumbnails", None):
+ postprocessors.append({
+ "key": "FFmpegThumbnailsConvertor",
+ "format": opts.convertthumbnails,
+ "when": "before_dl",
+ })
+ if getattr(opts, "exec_before_dl_cmd", None):
+ postprocessors.append({
+ "key": "Exec",
+ "exec_cmd": opts.exec_before_dl_cmd,
+ "when": "before_dl",
+ })
+ if opts.extractaudio:
+ postprocessors.append({
+ "key": "FFmpegExtractAudio",
+ "preferredcodec": opts.audioformat,
+ "preferredquality": opts.audioquality,
+ "nopostoverwrites": opts.nopostoverwrites,
+ })
+ if getattr(opts, "remuxvideo", None):
+ postprocessors.append({
+ "key": "FFmpegVideoRemuxer",
+ "preferedformat": opts.remuxvideo,
+ })
+ if opts.recodevideo:
+ postprocessors.append({
+ "key": "FFmpegVideoConvertor",
+ "preferedformat": opts.recodevideo,
+ })
+ if opts.embedsubtitles:
+ pp = {"key": "FFmpegEmbedSubtitle"}
+ if ytdlp:
+ pp["already_have_subtitle"] = (
+ opts.writesubtitles and "no-keep-subs" not in compat_opts)
+ postprocessors.append(pp)
+ if not opts.writeautomaticsub and "no-keep-subs" not in compat_opts:
+ opts.writesubtitles = True
+ if opts.allsubtitles and not opts.writeautomaticsub:
+ opts.writesubtitles = True
+ remove_chapters_patterns, remove_ranges = [], []
+ for regex in opts.remove_chapters:
+ if regex.startswith("*"):
+ dur = list(map(module.parse_duration, regex[1:].split("-")))
+ if len(dur) == 2 and all(t is not None for t in dur):
+ remove_ranges.append(tuple(dur))
+ continue
+ remove_chapters_patterns.append(re.compile(regex))
+ if opts.remove_chapters or sponsorblock_query:
+ postprocessors.append({
+ "key": "ModifyChapters",
+ "remove_chapters_patterns": remove_chapters_patterns,
+ "remove_sponsor_segments": opts.sponsorblock_remove,
+ "remove_ranges": remove_ranges,
+ "sponsorblock_chapter_title": opts.sponsorblock_chapter_title,
+ "force_keyframes": opts.force_keyframes_at_cuts,
+ })
+ if opts.addmetadata or addchapters:
+ pp = {"key": "FFmpegMetadata"}
+ if ytdlp:
+ pp["add_chapters"] = addchapters
+ pp["add_metadata"] = opts.addmetadata
+ postprocessors.append(pp)
+ if getattr(opts, "sponskrub", False) is not False:
+ postprocessors.append({
+ "key": "SponSkrub",
+ "path": opts.sponskrub_path,
+ "args": opts.sponskrub_args,
+ "cut": opts.sponskrub_cut,
+ "force": opts.sponskrub_force,
+ "ignoreerror": opts.sponskrub is None,
+ })
+ if opts.embedthumbnail:
+ already_have_thumbnail = (opts.writethumbnail or
+ opts.write_all_thumbnails)
+ postprocessors.append({
+ "key": "EmbedThumbnail",
+ "already_have_thumbnail": already_have_thumbnail,
+ })
+ if not already_have_thumbnail:
+ opts.writethumbnail = True
+ if isinstance(opts.outtmpl, dict):
+ opts.outtmpl["pl_thumbnail"] = ""
+ if getattr(opts, "split_chapters", None):
+ postprocessors.append({
+ "key": "FFmpegSplitChapters",
+ "force_keyframes": opts.force_keyframes_at_cuts,
+ })
+ if opts.xattrs:
+ postprocessors.append({"key": "XAttrMetadata"})
+ if opts.exec_cmd:
+ postprocessors.append({
+ "key": "Exec",
+ "exec_cmd": opts.exec_cmd,
+ "when": "after_move",
+ })
+
+ match_filter = (
+ None if opts.match_filter is None
+ else module.match_filter_func(opts.match_filter))
+
+ return {
+ "usenetrc": opts.usenetrc,
+ "netrc_location": getattr(opts, "netrc_location", None),
+ "username": opts.username,
+ "password": opts.password,
+ "twofactor": opts.twofactor,
+ "videopassword": opts.videopassword,
+ "ap_mso": opts.ap_mso,
+ "ap_username": opts.ap_username,
+ "ap_password": opts.ap_password,
+ "quiet": opts.quiet,
+ "no_warnings": opts.no_warnings,
+ "forceurl": opts.geturl,
+ "forcetitle": opts.gettitle,
+ "forceid": opts.getid,
+ "forcethumbnail": opts.getthumbnail,
+ "forcedescription": opts.getdescription,
+ "forceduration": opts.getduration,
+ "forcefilename": opts.getfilename,
+ "forceformat": opts.getformat,
+ "forceprint": getattr(opts, "forceprint", None) or (),
+ "force_write_download_archive": getattr(
+ opts, "force_write_download_archive", None),
+ "simulate": opts.simulate,
+ "skip_download": opts.skip_download,
+ "format": opts.format,
+ "allow_unplayable_formats": getattr(
+ opts, "allow_unplayable_formats", None),
+ "ignore_no_formats_error": getattr(
+ opts, "ignore_no_formats_error", None),
+ "format_sort": getattr(
+ opts, "format_sort", None),
+ "format_sort_force": getattr(
+ opts, "format_sort_force", None),
+ "allow_multiple_video_streams": opts.allow_multiple_video_streams,
+ "allow_multiple_audio_streams": opts.allow_multiple_audio_streams,
+ "check_formats": getattr(
+ opts, "check_formats", None),
+ "listformats": opts.listformats,
+ "listformats_table": getattr(
+ opts, "listformats_table", None),
+ "outtmpl": opts.outtmpl,
+ "outtmpl_na_placeholder": opts.outtmpl_na_placeholder,
+ "paths": getattr(opts, "paths", None),
+ "autonumber_size": opts.autonumber_size,
+ "autonumber_start": opts.autonumber_start,
+ "restrictfilenames": opts.restrictfilenames,
+ "windowsfilenames": getattr(opts, "windowsfilenames", None),
+ "ignoreerrors": opts.ignoreerrors,
+ "force_generic_extractor": opts.force_generic_extractor,
+ "ratelimit": opts.ratelimit,
+ "throttledratelimit": getattr(opts, "throttledratelimit", None),
+ "overwrites": getattr(opts, "overwrites", None),
+ "retries": opts.retries,
+ "fragment_retries": opts.fragment_retries,
+ "extractor_retries": getattr(opts, "extractor_retries", None),
+ "skip_unavailable_fragments": opts.skip_unavailable_fragments,
+ "keep_fragments": opts.keep_fragments,
+ "concurrent_fragment_downloads": getattr(
+ opts, "concurrent_fragment_downloads", None),
+ "buffersize": opts.buffersize,
+ "noresizebuffer": opts.noresizebuffer,
+ "http_chunk_size": opts.http_chunk_size,
+ "continuedl": opts.continue_dl,
+ "noprogress": True if opts.noprogress is None else opts.noprogress,
+ "playliststart": opts.playliststart,
+ "playlistend": opts.playlistend,
+ "playlistreverse": opts.playlist_reverse,
+ "playlistrandom": opts.playlist_random,
+ "noplaylist": opts.noplaylist,
+ "logtostderr": outtmpl_default == "-",
+ "consoletitle": opts.consoletitle,
+ "nopart": opts.nopart,
+ "updatetime": opts.updatetime,
+ "writedescription": opts.writedescription,
+ "writeannotations": opts.writeannotations,
+ "writeinfojson": opts.writeinfojson,
+ "allow_playlist_files": opts.allow_playlist_files,
+ "clean_infojson": opts.clean_infojson,
+ "getcomments": getattr(opts, "getcomments", None),
+ "writethumbnail": opts.writethumbnail,
+ "write_all_thumbnails": opts.write_all_thumbnails,
+ "writelink": getattr(opts, "writelink", None),
+ "writeurllink": getattr(opts, "writeurllink", None),
+ "writewebloclink": getattr(opts, "writewebloclink", None),
+ "writedesktoplink": getattr(opts, "writedesktoplink", None),
+ "writesubtitles": opts.writesubtitles,
+ "writeautomaticsub": opts.writeautomaticsub,
+ "allsubtitles": opts.allsubtitles,
+ "listsubtitles": opts.listsubtitles,
+ "subtitlesformat": opts.subtitlesformat,
+ "subtitleslangs": opts.subtitleslangs,
+ "matchtitle": module.decodeOption(opts.matchtitle),
+ "rejecttitle": module.decodeOption(opts.rejecttitle),
+ "max_downloads": opts.max_downloads,
+ "prefer_free_formats": opts.prefer_free_formats,
+ "trim_file_name": getattr(opts, "trim_file_name", None),
+ "verbose": opts.verbose,
+ "dump_intermediate_pages": opts.dump_intermediate_pages,
+ "write_pages": opts.write_pages,
+ "test": opts.test,
+ "keepvideo": opts.keepvideo,
+ "min_filesize": opts.min_filesize,
+ "max_filesize": opts.max_filesize,
+ "min_views": opts.min_views,
+ "max_views": opts.max_views,
+ "daterange": date,
+ "cachedir": opts.cachedir,
+ "youtube_print_sig_code": opts.youtube_print_sig_code,
+ "age_limit": opts.age_limit,
+ "download_archive": download_archive_fn,
+ "break_on_existing": getattr(opts, "break_on_existing", None),
+ "break_on_reject": getattr(opts, "break_on_reject", None),
+ "skip_playlist_after_errors": getattr(
+ opts, "skip_playlist_after_errors", None),
+ "cookiefile": opts.cookiefile,
+ "cookiesfrombrowser": getattr(opts, "cookiesfrombrowser", None),
+ "nocheckcertificate": opts.no_check_certificate,
+ "prefer_insecure": opts.prefer_insecure,
+ "proxy": opts.proxy,
+ "socket_timeout": opts.socket_timeout,
+ "bidi_workaround": opts.bidi_workaround,
+ "debug_printtraffic": opts.debug_printtraffic,
+ "prefer_ffmpeg": opts.prefer_ffmpeg,
+ "include_ads": opts.include_ads,
+ "default_search": opts.default_search,
+ "dynamic_mpd": getattr(opts, "dynamic_mpd", None),
+ "extractor_args": getattr(opts, "extractor_args", None),
+ "youtube_include_dash_manifest": getattr(
+ opts, "youtube_include_dash_manifest", None),
+ "youtube_include_hls_manifest": getattr(
+ opts, "youtube_include_hls_manifest", None),
+ "encoding": opts.encoding,
+ "extract_flat": opts.extract_flat,
+ "mark_watched": opts.mark_watched,
+ "merge_output_format": opts.merge_output_format,
+ "postprocessors": postprocessors,
+ "fixup": opts.fixup,
+ "source_address": opts.source_address,
+ "sleep_interval_requests": getattr(
+ opts, "sleep_interval_requests", None),
+ "sleep_interval": opts.sleep_interval,
+ "max_sleep_interval": opts.max_sleep_interval,
+ "sleep_interval_subtitles": getattr(
+ opts, "sleep_interval_subtitles", None),
+ "external_downloader": opts.external_downloader,
+ "playlist_items": opts.playlist_items,
+ "xattr_set_filesize": opts.xattr_set_filesize,
+ "match_filter": match_filter,
+ "no_color": opts.no_color,
+ "ffmpeg_location": opts.ffmpeg_location,
+ "hls_prefer_native": opts.hls_prefer_native,
+ "hls_use_mpegts": opts.hls_use_mpegts,
+ "hls_split_discontinuity": getattr(
+ opts, "hls_split_discontinuity", None),
+ "external_downloader_args": opts.external_downloader_args,
+ "postprocessor_args": opts.postprocessor_args,
+ "cn_verification_proxy": opts.cn_verification_proxy,
+ "geo_verification_proxy": opts.geo_verification_proxy,
+ "geo_bypass": opts.geo_bypass,
+ "geo_bypass_country": opts.geo_bypass_country,
+ "geo_bypass_ip_block": opts.geo_bypass_ip_block,
+ "compat_opts": compat_opts,
+ }
+
+
+def parse_retries(retries, name=""):
+ if retries in ("inf", "infinite"):
+ return float("inf")
+ return int(retries)
diff --git a/test/test_formatter.py b/test/test_formatter.py
index 088b45b..8464b1b 100644
--- a/test/test_formatter.py
+++ b/test/test_formatter.py
@@ -29,6 +29,7 @@ class TestFormatter(unittest.TestCase):
"u": "&#x27;&lt; / &gt;&#x27;",
"t": 1262304000,
"dt": datetime.datetime(2010, 1, 1),
+ "ds": "2010-01-01T01:00:00+0100",
"name": "Name",
"title1": "Title",
"title2": "",
@@ -162,6 +163,11 @@ class TestFormatter(unittest.TestCase):
self._run_test("{a!l:Rl//}" , "heo word")
self._run_test("{name:Rame/othing/}", "Nothing")
+ def test_datetime(self):
+ self._run_test("{ds:D%Y-%m-%dT%H:%M:%S%z}", "2010-01-01 00:00:00")
+ self._run_test("{ds:D%Y}", "2010-01-01T01:00:00+0100")
+ self._run_test("{l:D%Y}", "None")
+
def test_chain_special(self):
# multiple replacements
self._run_test("{a:Rh/C/RE/e/RL/l/}", "Cello wOrld")
@@ -174,6 +180,9 @@ class TestFormatter(unittest.TestCase):
self._run_test("{d[a]:?</>/L1/too long/}", "<too long>")
self._run_test("{d[c]:?</>/L5/too long/}", "")
+ # parse and format datetime
+ self._run_test("{ds:D%Y-%m-%dT%H:%M:%S%z/%Y%m%d}", "20100101")
+
def test_globals_env(self):
os.environ["FORMATTER_TEST"] = value = self.kwdict["a"]
@@ -259,7 +268,7 @@ def noarg():
sys.path.pop(0)
self.assertEqual(fmt1.format_map(self.kwdict), "'Title' by Name")
- self.assertEqual(fmt2.format_map(self.kwdict), "65")
+ self.assertEqual(fmt2.format_map(self.kwdict), "89")
with self.assertRaises(TypeError):
self.assertEqual(fmt3.format_map(self.kwdict), "")
diff --git a/test/test_job.py b/test/test_job.py
index 1aeec1c..0276555 100644
--- a/test/test_job.py
+++ b/test/test_job.py
@@ -37,6 +37,31 @@ class TestJob(unittest.TestCase):
return buffer.getvalue()
+class TestDownloadJob(TestJob):
+ jobclass = job.DownloadJob
+
+ def test_extractor_filter(self):
+ extr = TestExtractor.from_url("test:")
+ tjob = self.jobclass(extr)
+
+ func = tjob._build_extractor_filter()
+ self.assertEqual(func(TestExtractor) , False)
+ self.assertEqual(func(TestExtractorParent), False)
+ self.assertEqual(func(TestExtractorAlt) , True)
+
+ config.set((), "blacklist", ":test_subcategory")
+ func = tjob._build_extractor_filter()
+ self.assertEqual(func(TestExtractor) , False)
+ self.assertEqual(func(TestExtractorParent), True)
+ self.assertEqual(func(TestExtractorAlt) , False)
+
+ config.set((), "whitelist", "test_category:test_subcategory")
+ func = tjob._build_extractor_filter()
+ self.assertEqual(func(TestExtractor) , True)
+ self.assertEqual(func(TestExtractorParent), False)
+ self.assertEqual(func(TestExtractorAlt) , False)
+
+
class TestKeywordJob(TestJob):
jobclass = job.KeywordJob
@@ -334,5 +359,10 @@ class TestExtractorException(Extractor):
return 1/0
+class TestExtractorAlt(Extractor):
+ category = "test_category_alt"
+ subcategory = "test_subcategory"
+
+
if __name__ == '__main__':
unittest.main()
diff --git a/test/test_results.py b/test/test_results.py
index ba1e0b1..944f14d 100644
--- a/test/test_results.py
+++ b/test/test_results.py
@@ -148,6 +148,14 @@ class TestExtractorResults(unittest.TestCase):
self._test_kwdict(value, test)
elif isinstance(test, type):
self.assertIsInstance(value, test, msg=key)
+ elif isinstance(test, list):
+ subtest = False
+ for idx, item in enumerate(test):
+ if isinstance(item, dict):
+ subtest = True
+ self._test_kwdict(value[idx], item)
+ if not subtest:
+ self.assertEqual(value, test, msg=key)
elif isinstance(test, str):
if test.startswith("re:"):
self.assertRegex(value, test[3:], msg=key)
diff --git a/test/test_util.py b/test/test_util.py
index 0fbbbce..32e9784 100644
--- a/test/test_util.py
+++ b/test/test_util.py
@@ -357,6 +357,58 @@ class TestOther(unittest.TestCase):
with self.assertRaises(exception.StopExtraction):
expr()
+ def test_extractor_filter(self):
+ # empty
+ func = util.build_extractor_filter("")
+ self.assertEqual(func(TestExtractor) , True)
+ self.assertEqual(func(TestExtractorParent), True)
+ self.assertEqual(func(TestExtractorAlt) , True)
+
+ # category
+ func = util.build_extractor_filter("test_category")
+ self.assertEqual(func(TestExtractor) , False)
+ self.assertEqual(func(TestExtractorParent), False)
+ self.assertEqual(func(TestExtractorAlt) , True)
+
+ # subcategory
+ func = util.build_extractor_filter("*:test_subcategory")
+ self.assertEqual(func(TestExtractor) , False)
+ self.assertEqual(func(TestExtractorParent), True)
+ self.assertEqual(func(TestExtractorAlt) , False)
+
+ # basecategory
+ func = util.build_extractor_filter("test_basecategory")
+ self.assertEqual(func(TestExtractor) , False)
+ self.assertEqual(func(TestExtractorParent), False)
+ self.assertEqual(func(TestExtractorAlt) , False)
+
+ # category-subcategory pair
+ func = util.build_extractor_filter("test_category:test_subcategory")
+ self.assertEqual(func(TestExtractor) , False)
+ self.assertEqual(func(TestExtractorParent), True)
+ self.assertEqual(func(TestExtractorAlt) , True)
+
+ # combination
+ func = util.build_extractor_filter(
+ ["test_category", "*:test_subcategory"])
+ self.assertEqual(func(TestExtractor) , False)
+ self.assertEqual(func(TestExtractorParent), False)
+ self.assertEqual(func(TestExtractorAlt) , False)
+
+ # whitelist
+ func = util.build_extractor_filter(
+ "test_category:test_subcategory", negate=False)
+ self.assertEqual(func(TestExtractor) , True)
+ self.assertEqual(func(TestExtractorParent), False)
+ self.assertEqual(func(TestExtractorAlt) , False)
+
+ func = util.build_extractor_filter(
+ ["test_category:test_subcategory", "*:test_subcategory_parent"],
+ negate=False)
+ self.assertEqual(func(TestExtractor) , True)
+ self.assertEqual(func(TestExtractorParent), True)
+ self.assertEqual(func(TestExtractorAlt) , False)
+
def test_generate_token(self):
tokens = set()
for _ in range(100):
@@ -469,5 +521,21 @@ class TestOther(unittest.TestCase):
self.assertIs(obj["key"], obj)
+class TestExtractor():
+ category = "test_category"
+ subcategory = "test_subcategory"
+ basecategory = "test_basecategory"
+
+
+class TestExtractorParent(TestExtractor):
+ category = "test_category"
+ subcategory = "test_subcategory_parent"
+
+
+class TestExtractorAlt(TestExtractor):
+ category = "test_category_alt"
+ subcategory = "test_subcategory"
+
+
if __name__ == '__main__':
unittest.main()