diff options
| -rw-r--r-- | CHANGELOG.md | 14 | ||||
| -rw-r--r-- | PKG-INFO | 6 | ||||
| -rw-r--r-- | README.rst | 4 | ||||
| -rw-r--r-- | data/completion/_gallery-dl | 4 | ||||
| -rw-r--r-- | data/completion/gallery-dl | 4 | ||||
| -rw-r--r-- | data/man/gallery-dl.1 | 11 | ||||
| -rw-r--r-- | data/man/gallery-dl.conf.5 | 16 | ||||
| -rw-r--r-- | gallery_dl.egg-info/PKG-INFO | 6 | ||||
| -rw-r--r-- | gallery_dl.egg-info/SOURCES.txt | 4 | ||||
| -rw-r--r-- | gallery_dl/extractor/common.py | 3 | ||||
| -rw-r--r-- | gallery_dl/extractor/gelbooru.py | 43 | ||||
| -rw-r--r-- | gallery_dl/extractor/gelbooru_v02.py | 18 | ||||
| -rw-r--r-- | gallery_dl/extractor/hitomi.py | 35 | ||||
| -rw-r--r-- | gallery_dl/extractor/mangadex.py | 17 | ||||
| -rw-r--r-- | gallery_dl/extractor/newgrounds.py | 64 | ||||
| -rw-r--r-- | gallery_dl/extractor/patreon.py | 1 | ||||
| -rw-r--r-- | gallery_dl/extractor/wordpress.py | 41 | ||||
| -rw-r--r-- | gallery_dl/job.py | 5 | ||||
| -rw-r--r-- | gallery_dl/option.py | 20 | ||||
| -rw-r--r-- | gallery_dl/path.py | 3 | ||||
| -rw-r--r-- | gallery_dl/version.py | 2 | ||||
| -rw-r--r-- | test/test_ytdl.py | 545 |
22 files changed, 220 insertions, 646 deletions
diff --git a/CHANGELOG.md b/CHANGELOG.md index 1dc4a21..c505c4b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,19 @@ # Changelog +## 1.20.1 - 2022-01-08 +### Additions +- [newgrounds] add `search` extractor ([#2161](https://github.com/mikf/gallery-dl/issues/2161)) +### Changes +- restore `-d/--dest` functionality from before 1.20.0 ([#2148](https://github.com/mikf/gallery-dl/issues/2148)) +- change short option for `--directory` to `-D` +### Fixes +- [gelbooru] handle changed API response format ([#2157](https://github.com/mikf/gallery-dl/issues/2157)) +- [hitomi] fix image URLs ([#2153](https://github.com/mikf/gallery-dl/issues/2153)) +- [mangadex] fix extraction ([#2177](https://github.com/mikf/gallery-dl/issues/2177)) +- [rule34] use `https://api.rule34.xxx` for API requests +- fix cookie checks for patreon, fanbox, fantia +- improve UNC path handling ([#2126](https://github.com/mikf/gallery-dl/issues/2126)) + ## 1.20.0 - 2021-12-29 ### Additions - [500px] add `favorite` extractor ([#1927](https://github.com/mikf/gallery-dl/issues/1927)) @@ -1,6 +1,6 @@ Metadata-Version: 2.1 Name: gallery_dl -Version: 1.20.0 +Version: 1.20.1 Summary: Command-line program to download image galleries and collections from several image hosting sites Home-page: https://github.com/mikf/gallery-dl Author: Mike Fährmann @@ -98,8 +98,8 @@ Standalone Executable Prebuilt executable files with a Python interpreter and required Python packages included are available for -- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.20.0/gallery-dl.exe>`__ -- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.20.0/gallery-dl.bin>`__ +- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.20.1/gallery-dl.exe>`__ +- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.20.1/gallery-dl.bin>`__ | Executables build from the latest commit can be found at | https://github.com/mikf/gallery-dl/actions/workflows/executables.yml @@ -64,8 +64,8 @@ Standalone Executable Prebuilt executable files with a Python interpreter and required Python packages included are available for -- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.20.0/gallery-dl.exe>`__ -- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.20.0/gallery-dl.bin>`__ +- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.20.1/gallery-dl.exe>`__ +- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.20.1/gallery-dl.bin>`__ | Executables build from the latest commit can be found at | https://github.com/mikf/gallery-dl/actions/workflows/executables.yml diff --git a/data/completion/_gallery-dl b/data/completion/_gallery-dl index 2ac93f7..6b3ac6d 100644 --- a/data/completion/_gallery-dl +++ b/data/completion/_gallery-dl @@ -7,10 +7,10 @@ local rc=1 _arguments -C -S \ {-h,--help}'[Print this help message and exit]' \ --version'[Print program version and exit]' \ ---dest'[==SUPPRESS==]':'<dest>':_files \ {-i,--input-file}'[Download URLs found in FILE ("-" for stdin). More than one --input-file can be specified]':'<file>':_files \ +{-d,--destination}'[Target location for file downloads]':'<path>' \ +{-D,--directory}'[Exact location for file downloads]':'<path>' \ {-f,--filename}'[Filename format string for downloaded files ("/O" for "original" filenames)]':'<format>' \ -{-d,--directory}'[Target location for file downloads]':'<path>' \ --cookies'[File to load additional cookies from]':'<file>':_files \ --proxy'[Use the specified proxy]':'<url>' \ --clear-cache'[Delete cached login sessions, cookies, etc. for MODULE (ALL to delete everything)]':'<module>' \ diff --git a/data/completion/gallery-dl b/data/completion/gallery-dl index 4085bb9..ac64645 100644 --- a/data/completion/gallery-dl +++ b/data/completion/gallery-dl @@ -7,10 +7,10 @@ _gallery_dl() if [[ "${prev}" =~ ^(-i|--input-file|--cookies|--write-log|--write-unsupported|-c|--config|--config-yaml|--download-archive)$ ]]; then COMPREPLY=( $(compgen -f -- "${cur}") ) - elif [[ "${prev}" =~ ^(--dest)$ ]]; then + elif [[ "${prev}" =~ ^()$ ]]; then COMPREPLY=( $(compgen -d -- "${cur}") ) else - COMPREPLY=( $(compgen -W "--help --version --dest --input-file --filename --directory --cookies --proxy --clear-cache --quiet --verbose --get-urls --resolve-urls --dump-json --simulate --extractor-info --list-keywords --list-modules --list-extractors --write-log --write-unsupported --write-pages --limit-rate --retries --http-timeout --sleep --sleep-request --sleep-extractor --filesize-min --filesize-max --no-part --no-skip --no-mtime --no-download --no-check-certificate --config --config-yaml --option --ignore-config --username --password --netrc --download-archive --abort --terminate --range --chapter-range --filter --chapter-filter --zip --ugoira-conv --ugoira-conv-lossless --write-metadata --write-info-json --write-infojson --write-tags --mtime-from-date --exec --exec-after --postprocessor" -- "${cur}") ) + COMPREPLY=( $(compgen -W "--help --version --input-file --destination --directory --filename --cookies --proxy --clear-cache --quiet --verbose --get-urls --resolve-urls --dump-json --simulate --extractor-info --list-keywords --list-modules --list-extractors --write-log --write-unsupported --write-pages --limit-rate --retries --http-timeout --sleep --sleep-request --sleep-extractor --filesize-min --filesize-max --no-part --no-skip --no-mtime --no-download --no-check-certificate --config --config-yaml --option --ignore-config --username --password --netrc --download-archive --abort --terminate --range --chapter-range --filter --chapter-filter --zip --ugoira-conv --ugoira-conv-lossless --write-metadata --write-info-json --write-infojson --write-tags --mtime-from-date --exec --exec-after --postprocessor" -- "${cur}") ) fi } diff --git a/data/man/gallery-dl.1 b/data/man/gallery-dl.1 index a7f51a7..992d0a6 100644 --- a/data/man/gallery-dl.1 +++ b/data/man/gallery-dl.1 @@ -1,4 +1,4 @@ -.TH "GALLERY-DL" "1" "2021-12-29" "1.20.0" "gallery-dl Manual" +.TH "GALLERY-DL" "1" "2022-01-08" "1.20.1" "gallery-dl Manual" .\" disable hyphenation .nh @@ -26,12 +26,15 @@ Print program version and exit .B "\-i, \-\-input\-file" \f[I]FILE\f[] Download URLs found in FILE ('-' for stdin). More than one --input-file can be specified .TP +.B "\-d, \-\-destination" \f[I]PATH\f[] +Target location for file downloads +.TP +.B "\-D, \-\-directory" \f[I]PATH\f[] +Exact location for file downloads +.TP .B "\-f, \-\-filename" \f[I]FORMAT\f[] Filename format string for downloaded files ('/O' for "original" filenames) .TP -.B "\-d, \-\-directory" \f[I]PATH\f[] -Target location for file downloads -.TP .B "\-\-cookies" \f[I]FILE\f[] File to load additional cookies from .TP diff --git a/data/man/gallery-dl.conf.5 b/data/man/gallery-dl.conf.5 index a574625..a62575b 100644 --- a/data/man/gallery-dl.conf.5 +++ b/data/man/gallery-dl.conf.5 @@ -1,4 +1,4 @@ -.TH "GALLERY-DL.CONF" "5" "2021-12-29" "1.20.0" "gallery-dl Manual" +.TH "GALLERY-DL.CONF" "5" "2022-01-08" "1.20.1" "gallery-dl Manual" .\" disable hyphenation .nh .\" disable justification (adjust text to left margin only) @@ -649,11 +649,23 @@ config options. .IP "Default:" 9 \f[I]["oauth", "recursive", "test"]\f[] + current extractor category +.IP "Example:" 4 +["imgur", "gfycat:user", "*:image"] + .IP "Description:" 4 -A list of extractor categories to ignore (or allow) +A list of extractor identifiers to ignore (or allow) when spawning child extractors for unknown URLs, e.g. from \f[I]reddit\f[] or \f[I]plurk\f[]. +Each identifier can be + +.br +* A category or basecategory name (\f[I]"imgur"\f[], \f[I]"mastodon"\f[]) +.br +* | A (base)category-subcategory pair, where both names are separated by a colon (\f[I]"gfycat:user"\f[]). +Both names can be a * or left empty, matching all possible names (\f[I]"*:image"\f[], \f[I]":user"\f[]). +.br + Note: Any \f[I]blacklist\f[] setting will automatically include \f[I]"oauth"\f[], \f[I]"recursive"\f[], and \f[I]"test"\f[]. diff --git a/gallery_dl.egg-info/PKG-INFO b/gallery_dl.egg-info/PKG-INFO index 8b87746..7e1e284 100644 --- a/gallery_dl.egg-info/PKG-INFO +++ b/gallery_dl.egg-info/PKG-INFO @@ -1,6 +1,6 @@ Metadata-Version: 2.1 Name: gallery-dl -Version: 1.20.0 +Version: 1.20.1 Summary: Command-line program to download image galleries and collections from several image hosting sites Home-page: https://github.com/mikf/gallery-dl Author: Mike Fährmann @@ -98,8 +98,8 @@ Standalone Executable Prebuilt executable files with a Python interpreter and required Python packages included are available for -- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.20.0/gallery-dl.exe>`__ -- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.20.0/gallery-dl.bin>`__ +- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.20.1/gallery-dl.exe>`__ +- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.20.1/gallery-dl.bin>`__ | Executables build from the latest commit can be found at | https://github.com/mikf/gallery-dl/actions/workflows/executables.yml diff --git a/gallery_dl.egg-info/SOURCES.txt b/gallery_dl.egg-info/SOURCES.txt index 127354e..8ddae52 100644 --- a/gallery_dl.egg-info/SOURCES.txt +++ b/gallery_dl.egg-info/SOURCES.txt @@ -180,7 +180,6 @@ gallery_dl/extractor/webtoons.py gallery_dl/extractor/weibo.py gallery_dl/extractor/wikiart.py gallery_dl/extractor/wikieat.py -gallery_dl/extractor/wordpress.py gallery_dl/extractor/xhamster.py gallery_dl/extractor/xvideos.py gallery_dl/extractor/ytdl.py @@ -205,5 +204,4 @@ test/test_output.py test/test_postprocessor.py test/test_results.py test/test_text.py -test/test_util.py -test/test_ytdl.py
\ No newline at end of file +test/test_util.py
\ No newline at end of file diff --git a/gallery_dl/extractor/common.py b/gallery_dl/extractor/common.py index c440aee..afe4a16 100644 --- a/gallery_dl/extractor/common.py +++ b/gallery_dl/extractor/common.py @@ -336,7 +336,8 @@ class Extractor(): now = time.time() for cookie in self._cookiejar: - if cookie.name in names and cookie.domain == domain: + if cookie.name in names and ( + not domain or cookie.domain == domain): if cookie.expires and cookie.expires < now: self.log.warning("Cookie '%s' has expired", cookie.name) else: diff --git a/gallery_dl/extractor/gelbooru.py b/gallery_dl/extractor/gelbooru.py index df45d0d..a6bda52 100644 --- a/gallery_dl/extractor/gelbooru.py +++ b/gallery_dl/extractor/gelbooru.py @@ -10,7 +10,7 @@ from .common import Extractor, Message from . import gelbooru_v02 -from .. import text, exception +from .. import text, util, exception import binascii @@ -20,6 +20,42 @@ class GelbooruBase(): basecategory = "booru" root = "https://gelbooru.com" + def _api_request(self, params): + url = self.root + "/index.php?page=dapi&s=post&q=index&json=1" + data = self.request(url, params=params).json() + if "post" not in data: + return () + posts = data["post"] + if not isinstance(posts, list): + return (posts,) + return posts + + def _pagination(self, params): + params["pid"] = self.page_start + params["limit"] = self.per_page + + post = None + while True: + try: + posts = self._api_request(params) + except ValueError: + if "tags" not in params or post is None: + raise + taglist = [tag for tag in params["tags"].split() + if not tag.startswith("id:<")] + taglist.append("id:<" + str(post.attrib["id"])) + params["tags"] = " ".join(taglist) + params["pid"] = 0 + continue + + post = None + for post in posts: + yield post + + if len(posts) < self.per_page: + return + params["pid"] += 1 + @staticmethod def _file_url(post): url = post["file_url"] @@ -82,6 +118,11 @@ class GelbooruPoolExtractor(GelbooruBase, "pool_name": text.unescape(name), } + def posts(self): + params = {} + for params["id"] in util.advance(self.post_ids, self.page_start): + yield from self._api_request(params) + class GelbooruPostExtractor(GelbooruBase, gelbooru_v02.GelbooruV02PostExtractor): diff --git a/gallery_dl/extractor/gelbooru_v02.py b/gallery_dl/extractor/gelbooru_v02.py index a42a202..8da0bde 100644 --- a/gallery_dl/extractor/gelbooru_v02.py +++ b/gallery_dl/extractor/gelbooru_v02.py @@ -19,8 +19,15 @@ import re class GelbooruV02Extractor(booru.BooruExtractor): basecategory = "gelbooru_v02" + def __init__(self, match): + booru.BooruExtractor.__init__(self, match) + try: + self.api_root = INSTANCES[self.category]["api_root"] + except KeyError: + self.api_root = self.root + def _api_request(self, params): - url = self.root + "/index.php?page=dapi&s=post&q=index" + url = self.api_root + "/index.php?page=dapi&s=post&q=index" return ElementTree.fromstring(self.request(url, params=params).text) def _pagination(self, params): @@ -97,12 +104,15 @@ class GelbooruV02Extractor(booru.BooruExtractor): post["notes"] = notes -BASE_PATTERN = GelbooruV02Extractor.update({ +INSTANCES = { "realbooru": {"root": "https://realbooru.com"}, - "rule34" : {"root": "https://rule34.xxx"}, + "rule34" : {"root": "https://rule34.xxx", + "api_root": " https://api.rule34.xxx"}, "safebooru": {"root": "https://safebooru.org"}, "tbib" : {"root": "https://tbib.org"}, -}) +} + +BASE_PATTERN = GelbooruV02Extractor.update(INSTANCES) class GelbooruV02TagExtractor(GelbooruV02Extractor): diff --git a/gallery_dl/extractor/hitomi.py b/gallery_dl/extractor/hitomi.py index 88cf98c..ce6c7ce 100644 --- a/gallery_dl/extractor/hitomi.py +++ b/gallery_dl/extractor/hitomi.py @@ -26,7 +26,7 @@ class HitomiGalleryExtractor(GalleryExtractor): r"/(?:[^/?#]+-)?(\d+)") test = ( ("https://hitomi.la/galleries/867789.html", { - "pattern": r"https://[a-c]b.hitomi.la/images/1639745412/\d+" + "pattern": r"https://[a-c]b.hitomi.la/images/1641140516/\d+" r"/[0-9a-f]{64}\.jpg", "keyword": "4873ef9a523621fc857b114e0b2820ba4066e9ae", "options": (("metadata", True),), @@ -39,12 +39,12 @@ class HitomiGalleryExtractor(GalleryExtractor): }), # Game CG with scenes (#321) ("https://hitomi.la/galleries/733697.html", { - "url": "479d16fe92117a6a2ce81b4e702e6347922c81e3", + "url": "d4854175da2b5fa4ae62749266c7be0bf237dc99", "count": 210, }), # fallback for galleries only available through /reader/ URLs ("https://hitomi.la/galleries/1045954.html", { - "url": "ebc1415c5d7f634166ef7e2635b77735de1ea7a2", + "url": "eea99c3745719a7a392150335e6ae3f73faa0b85", "count": 1413, }), # gallery with "broken" redirect @@ -138,7 +138,7 @@ class HitomiGalleryExtractor(GalleryExtractor): def images(self, _): # see https://ltn.hitomi.la/gg.js - gg_m, gg_b = _parse_gg(self) + gg_m, gg_b, gg_default = _parse_gg(self) result = [] for image in self.info["files"]: @@ -148,7 +148,7 @@ class HitomiGalleryExtractor(GalleryExtractor): # see https://ltn.hitomi.la/common.js inum = int(ihash[-1] + ihash[-3:-1], 16) url = "https://{}b.hitomi.la/images/{}/{}/{}.{}".format( - chr(97 + gg_m.get(inum, 0)), + chr(97 + gg_m.get(inum, gg_default)), gg_b, inum, ihash, idata["extension"], ) result.append((url, idata)) @@ -195,10 +195,25 @@ class HitomiTagExtractor(Extractor): def _parse_gg(extr): page = extr.request("https://ltn.hitomi.la/gg.js").text - m = { - int(match.group(1)): int(match.group(2)) - for match in re.finditer(r"case (\d+): o = (\d+); break;", page) - } + m = {} + + keys = [] + for match in re.finditer( + r"case\s+(\d+):(?:\s*o\s*=\s*(\d+))?", page): + key, value = match.groups() + keys.append(int(key)) + + if value: + value = int(value) + for key in keys: + m[key] = value + keys.clear() + + for match in re.finditer( + r"if\s+\(g\s*===?\s*(\d+)\)[\s{]*o\s*=\s*(\d+)", page): + m[int(match.group(1))] = int(match.group(2)) + + d = re.search(r"(?:var\s|default:)\s*o\s*=\s*(\d+)", page) b = re.search(r"b:\s*[\"'](.+)[\"']", page) - return m, b.group(1).strip("/") + return m, b.group(1).strip("/"), int(d.group(1)) if d else 1 diff --git a/gallery_dl/extractor/mangadex.py b/gallery_dl/extractor/mangadex.py index 393f4e2..ea5d4a8 100644 --- a/gallery_dl/extractor/mangadex.py +++ b/gallery_dl/extractor/mangadex.py @@ -40,7 +40,7 @@ class MangadexExtractor(Extractor): uuid = chapter["id"] data = self._transform(chapter) data["_extractor"] = MangadexChapterExtractor - self._cache[uuid] = (chapter, data) + self._cache[uuid] = data yield Message.Queue, self.root + "/chapter/" + uuid, data def _transform(self, chapter): @@ -72,7 +72,7 @@ class MangadexExtractor(Extractor): "date" : text.parse_datetime(cattributes["publishAt"]), "lang" : lang, "language": util.code_to_language(lang), - "count" : len(cattributes["data"]), + "count" : cattributes["pages"], } data["artist"] = [artist["attributes"]["name"] @@ -107,20 +107,21 @@ class MangadexChapterExtractor(MangadexExtractor): def items(self): try: - chapter, data = self._cache.pop(self.uuid) + data = self._cache.pop(self.uuid) except KeyError: chapter = self.api.chapter(self.uuid) data = self._transform(chapter) - yield Message.Directory, data - cattributes = chapter["attributes"] + yield Message.Directory, data data["_http_headers"] = self._headers - base = "{}/data/{}/".format( - self.api.athome_server(self.uuid)["baseUrl"], cattributes["hash"]) + + server = self.api.athome_server(self.uuid) + chapter = server["chapter"] + base = "{}/data/{}/".format(server["baseUrl"], chapter["hash"]) enum = util.enumerate_reversed if self.config( "page-reverse") else enumerate - for data["page"], page in enum(cattributes["data"], 1): + for data["page"], page in enum(chapter["data"], 1): text.nameext_from_url(page, data) yield Message.Url, base + page, data diff --git a/gallery_dl/extractor/newgrounds.py b/gallery_dl/extractor/newgrounds.py index 4351b3e..8bcbc20 100644 --- a/gallery_dl/extractor/newgrounds.py +++ b/gallery_dl/extractor/newgrounds.py @@ -38,6 +38,7 @@ class NewgroundsExtractor(Extractor): def items(self): self.login() + metadata = self.metadata() for post_url in self.posts(): try: @@ -48,6 +49,8 @@ class NewgroundsExtractor(Extractor): url = None if url: + if metadata: + post.update(metadata) yield Message.Directory, post yield Message.Url, url, text.nameext_from_url(url, post) @@ -62,9 +65,12 @@ class NewgroundsExtractor(Extractor): "Unable to get download URL for '%s'", post_url) def posts(self): - """Return urls of all relevant image pages""" + """Return URLs of all relevant post pages""" return self._pagination(self._path) + def metadata(self): + """Return general metadata""" + def login(self): username, password = self._get_auth_info() if username: @@ -493,3 +499,59 @@ class NewgroundsFollowingExtractor(NewgroundsFavoriteExtractor): text.ensure_http_scheme(user.rpartition('"')[2]) for user in text.extract_iter(page, 'class="item-user', '"><img') ] + + +class NewgroundsSearchExtractor(NewgroundsExtractor): + """Extractor for newgrounds.com search reesults""" + subcategory = "search" + directory_fmt = ("{category}", "search", "{search_tags}") + pattern = (r"(?:https?://)?(?:www\.)?newgrounds\.com" + r"/search/conduct/([^/?#]+)/?\?([^#]+)") + test = ( + ("https://www.newgrounds.com/search/conduct/art?terms=tree", { + "pattern": NewgroundsImageExtractor.pattern, + "keyword": {"search_tags": "tree"}, + "range": "1-10", + "count": 10, + }), + ("https://www.newgrounds.com/search/conduct/movies?terms=tree", { + "pattern": r"https://uploads.ungrounded.net(/alternate)?/\d+/\d+", + "range": "1-10", + "count": 10, + }), + ("https://www.newgrounds.com/search/conduct/audio?advanced=1" + "&terms=tree+green+nature&match=tdtu&genre=5&suitabilities=e%2Cm"), + ) + + def __init__(self, match): + NewgroundsExtractor.__init__(self, match) + self._path, query = match.groups() + self.query = text.parse_query(query) + + def posts(self): + return self._pagination("/search/conduct/" + self._path, self.query) + + def metadata(self): + return {"search_tags": self.query.get("terms", "")} + + def _pagination(self, path, params): + url = self.root + path + headers = { + "Accept": "application/json, text/javascript, */*; q=0.01", + "X-Requested-With": "XMLHttpRequest", + "Referer": self.root, + } + params["inner"] = "1" + params["page"] = 1 + + while True: + data = self.request(url, params=params, headers=headers).json() + + post_url = None + for post_url in text.extract_iter(data["content"], 'href="', '"'): + if not post_url.startswith("/search/"): + yield post_url + + if post_url is None: + return + params["page"] += 1 diff --git a/gallery_dl/extractor/patreon.py b/gallery_dl/extractor/patreon.py index f8c80ef..a7e0ff1 100644 --- a/gallery_dl/extractor/patreon.py +++ b/gallery_dl/extractor/patreon.py @@ -20,6 +20,7 @@ class PatreonExtractor(Extractor): """Base class for patreon extractors""" category = "patreon" root = "https://www.patreon.com" + cookiedomain = ".patreon.com" directory_fmt = ("{category}", "{creator[full_name]}") filename_fmt = "{id}_{title}_{num:>02}.{extension}" archive_fmt = "{id}_{num}" diff --git a/gallery_dl/extractor/wordpress.py b/gallery_dl/extractor/wordpress.py deleted file mode 100644 index dd7d28a..0000000 --- a/gallery_dl/extractor/wordpress.py +++ /dev/null @@ -1,41 +0,0 @@ -# -*- coding: utf-8 -*- - -# Copyright 2021 Mike Fährmann -# -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License version 2 as -# published by the Free Software Foundation. - -"""Extractors for WordPress blogs""" - -from .common import BaseExtractor, Message -from .. import text - - -class WordpressExtractor(BaseExtractor): - """Base class for wordpress extractors""" - basecategory = "wordpress" - - def items(self): - for post in self.posts(): - yield Message.Difrectory, post - - - -BASE_PATTERN = WordpressExtractor.update({}) - - -class WordpressBlogExtractor(WordpressExtractor): - """Extractor for WordPress blogs""" - subcategory = "blog" - directory_fmt = ("{category}", "{blog}") - pattern = BASE_PATTERN + r"/?$" - - def posts(self): - url = self.root + "/wp-json/wp/v2/posts" - params = {"page": 1, "per_page": "100"} - - while True: - data = self.request(url, params=params).json() - exit() - yield 1 diff --git a/gallery_dl/job.py b/gallery_dl/job.py index 97a8d3f..3e72e9c 100644 --- a/gallery_dl/job.py +++ b/gallery_dl/job.py @@ -501,14 +501,15 @@ class DownloadJob(Job): clist = self.extractor.config("whitelist") if clist is not None: negate = False + special = None else: clist = self.extractor.config("blacklist") negate = True + special = util.SPECIAL_EXTRACTORS if clist is None: clist = (self.extractor.category,) - return util.build_extractor_filter( - clist, negate, util.SPECIAL_EXTRACTORS) + return util.build_extractor_filter(clist, negate, special) class SimulationJob(DownloadJob): diff --git a/gallery_dl/option.py b/gallery_dl/option.py index 1967bf7..cdfe9a1 100644 --- a/gallery_dl/option.py +++ b/gallery_dl/option.py @@ -92,28 +92,28 @@ def build_parser(): help="Print program version and exit", ) general.add_argument( - "--dest", - dest="base-directory", metavar="DEST", action=ConfigAction, - help=argparse.SUPPRESS, - ) - general.add_argument( "-i", "--input-file", dest="inputfiles", metavar="FILE", action="append", help=("Download URLs found in FILE ('-' for stdin). " "More than one --input-file can be specified"), ) general.add_argument( + "-d", "--destination", + dest="base-directory", metavar="PATH", action=ConfigAction, + help="Target location for file downloads", + ) + general.add_argument( + "-D", "--directory", + dest="directory", metavar="PATH", + help="Exact location for file downloads", + ) + general.add_argument( "-f", "--filename", dest="filename", metavar="FORMAT", help=("Filename format string for downloaded files " "('/O' for \"original\" filenames)"), ) general.add_argument( - "-d", "--directory", - dest="directory", metavar="PATH", - help="Target location for file downloads", - ) - general.add_argument( "--cookies", dest="cookies", metavar="FILE", action=ConfigAction, help="File to load additional cookies from", diff --git a/gallery_dl/path.py b/gallery_dl/path.py index 9e9e983..15db67f 100644 --- a/gallery_dl/path.py +++ b/gallery_dl/path.py @@ -178,10 +178,11 @@ class PathFormat(): if WINDOWS: # Enable longer-than-260-character paths + directory = os.path.abspath(directory) if directory.startswith("\\\\"): directory = "\\\\?\\UNC\\" + directory[2:] else: - directory = "\\\\?\\" + os.path.abspath(directory) + directory = "\\\\?\\" + directory # abspath() in Python 3.7+ removes trailing path separators (#402) if directory[-1] != sep: diff --git a/gallery_dl/version.py b/gallery_dl/version.py index b5114e8..8fa7c22 100644 --- a/gallery_dl/version.py +++ b/gallery_dl/version.py @@ -6,4 +6,4 @@ # it under the terms of the GNU General Public License version 2 as # published by the Free Software Foundation. -__version__ = "1.20.0" +__version__ = "1.20.1" diff --git a/test/test_ytdl.py b/test/test_ytdl.py deleted file mode 100644 index 97431e3..0000000 --- a/test/test_ytdl.py +++ /dev/null @@ -1,545 +0,0 @@ -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- - -# Copyright 2021 Mike Fährmann -# -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License version 2 as -# published by the Free Software Foundation. - -import os -import sys -import unittest - -import re -import shlex - -sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) -from gallery_dl import ytdl, util, config - - -class Test_CommandlineArguments(unittest.TestCase): - module_name = "youtube_dl" - - @classmethod - def setUpClass(cls): - try: - cls.module = __import__(cls.module_name) - except ImportError: - raise unittest.SkipTest("cannot import module '{}'".format( - cls.module_name)) - cls.default = ytdl.parse_command_line(cls.module, []) - - def test_ignore_errors(self): - self._("--ignore-errors" , "ignoreerrors", True) - self._("--abort-on-error", "ignoreerrors", False) - - def test_default_search(self): - self._(["--default-search", "foo"] , "default_search", "foo") - - def test_mark_watched(self): - self._("--mark-watched" , "mark_watched", True) - self._("--no-mark-watched", "mark_watched", False) - - def test_proxy(self): - self._(["--proxy", "socks5://127.0.0.1:1080/"], - "proxy", "socks5://127.0.0.1:1080/") - self._(["--cn-verification-proxy", "https://127.0.0.1"], - "cn_verification_proxy", "https://127.0.0.1") - self._(["--geo-verification-proxy", "127.0.0.1"], - "geo_verification_proxy", "127.0.0.1") - - def test_retries(self): - inf = float("inf") - - self._(["--retries", "5"], "retries", 5) - self._(["--retries", "inf"], "retries", inf) - self._(["--retries", "infinite"], "retries", inf) - self._(["--fragment-retries", "8"], "fragment_retries", 8) - self._(["--fragment-retries", "inf"], "fragment_retries", inf) - self._(["--fragment-retries", "infinite"], "fragment_retries", inf) - - def test_geo_bypass(self): - self._("--geo-bypass", "geo_bypass", True) - self._("--no-geo-bypass", "geo_bypass", False) - self._(["--geo-bypass-country", "EN"], "geo_bypass_country", "EN") - self._(["--geo-bypass-ip-block", "198.51.100.14/24"], - "geo_bypass_ip_block", "198.51.100.14/24") - - def test_headers(self): - headers = self.module.std_headers - - self.assertNotEqual(headers["User-Agent"], "Foo/1.0") - self._(["--user-agent", "Foo/1.0"]) - self.assertEqual(headers["User-Agent"], "Foo/1.0") - - self.assertNotIn("Referer", headers) - self._(["--referer", "http://example.org/"]) - self.assertEqual(headers["Referer"], "http://example.org/") - - self.assertNotEqual(headers["Accept"], "*/*") - self.assertNotIn("DNT", headers) - self._([ - "--add-header", "accept:*/*", - "--add-header", "dnt:1", - ]) - self.assertEqual(headers["accept"], "*/*") - self.assertEqual(headers["dnt"], "1") - - def test_extract_audio(self): - opts = self._(["--extract-audio"]) - self.assertEqual(opts["postprocessors"][0], { - "key": "FFmpegExtractAudio", - "preferredcodec": "best", - "preferredquality": "5", - "nopostoverwrites": False, - }) - - opts = self._([ - "--extract-audio", - "--audio-format", "opus", - "--audio-quality", "9", - "--no-post-overwrites", - ]) - self.assertEqual(opts["postprocessors"][0], { - "key": "FFmpegExtractAudio", - "preferredcodec": "opus", - "preferredquality": "9", - "nopostoverwrites": True, - }) - - def test_recode_video(self): - opts = self._(["--recode-video", " mkv "]) - self.assertEqual(opts["postprocessors"][0], { - "key": "FFmpegVideoConvertor", - "preferedformat": "mkv", - }) - - def test_subs(self): - opts = self._(["--convert-subs", "srt"]) - conv = {"key": "FFmpegSubtitlesConvertor", "format": "srt"} - if self.module_name == "yt_dlp": - conv["when"] = "before_dl" - self.assertEqual(opts["postprocessors"][0], conv) - - def test_embed(self): - subs = {"key": "FFmpegEmbedSubtitle"} - thumb = {"key": "EmbedThumbnail", "already_have_thumbnail": False} - if self.module_name == "yt_dlp": - subs["already_have_subtitle"] = False - - opts = self._(["--embed-subs", "--embed-thumbnail"]) - self.assertEqual(opts["postprocessors"], [subs, thumb]) - - thumb["already_have_thumbnail"] = True - if self.module_name == "yt_dlp": - subs["already_have_subtitle"] = True - - opts = self._([ - "--embed-thumbnail", - "--embed-subs", - "--write-sub", - "--write-all-thumbnails", - ]) - self.assertEqual(opts["postprocessors"], [subs, thumb]) - - def test_metadata(self): - opts = self._("--add-metadata") - self.assertEqual(opts["postprocessors"][0], {"key": "FFmpegMetadata"}) - - def test_metadata_from_title(self): - opts = self._(["--metadata-from-title", "%(artist)s - %(title)s"]) - self.assertEqual(opts["postprocessors"][0], { - "key": "MetadataFromTitle", - "titleformat": "%(artist)s - %(title)s", - }) - - def test_xattr(self): - self._("--xattr-set-filesize", "xattr_set_filesize", True) - - opts = self._("--xattrs") - self.assertEqual(opts["postprocessors"][0], {"key": "XAttrMetadata"}) - - def test_noop(self): - result = self._([ - "--update", - "--dump-user-agent", - "--list-extractors", - "--extractor-descriptions", - "--ignore-config", - "--config-location", - "--dump-json", - "--dump-single-json", - "--list-thumbnails", - ]) - - result["daterange"] = self.default["daterange"] - self.assertEqual(result, self.default) - - def _(self, cmdline, option=util.SENTINEL, expected=None): - if isinstance(cmdline, str): - cmdline = [cmdline] - result = ytdl.parse_command_line(self.module, cmdline) - if option is not util.SENTINEL: - self.assertEqual(result[option], expected, option) - return result - - -class Test_CommandlineArguments_YtDlp(Test_CommandlineArguments): - module_name = "yt_dlp" - - def test_retries_extractor(self): - inf = float("inf") - - self._(["--extractor-retries", "5"], "extractor_retries", 5) - self._(["--extractor-retries", "inf"], "extractor_retries", inf) - self._(["--extractor-retries", "infinite"], "extractor_retries", inf) - - def test_remuxs_video(self): - opts = self._(["--remux-video", " mkv "]) - self.assertEqual(opts["postprocessors"][0], { - "key": "FFmpegVideoRemuxer", - "preferedformat": "mkv", - }) - - def test_metadata(self): - opts = self._(["--embed-metadata", - "--no-embed-chapters", - "--embed-info-json"]) - self.assertEqual(opts["postprocessors"][0], { - "key": "FFmpegMetadata", - "add_chapters": False, - "add_metadata": True, - "add_infojson": True, - }) - - def test_metadata_from_title(self): - opts = self._(["--metadata-from-title", "%(artist)s - %(title)s"]) - self.assertEqual(opts["postprocessors"][0], { - "key": "MetadataParser", - "when": "pre_process", - "actions": [self.module.MetadataFromFieldPP.to_action( - "title:%(artist)s - %(title)s")], - }) - - -if __name__ == "__main__": - unittest.main(warnings="ignore") - -''' -Usage: __main__.py [OPTIONS] URL [URL...] - -Options: - General Options: - -h, --help Print this help text and exit - --version Print program version and exit - --force-generic-extractor Force extraction to use the generic - extractor - --flat-playlist Do not extract the videos of a - playlist, only list them. - --no-color Do not emit color codes in output - - Network Options: - --socket-timeout SECONDS Time to wait before giving up, in - seconds - --source-address IP Client-side IP address to bind to - -4, --force-ipv4 Make all connections via IPv4 - -6, --force-ipv6 Make all connections via IPv6 - - Video Selection: - --playlist-start NUMBER Playlist video to start at (default is - 1) - --playlist-end NUMBER Playlist video to end at (default is - last) - --playlist-items ITEM_SPEC Playlist video items to download. - Specify indices of the videos in the - playlist separated by commas like: "-- - playlist-items 1,2,5,8" if you want to - download videos indexed 1, 2, 5, 8 in - the playlist. You can specify range: " - --playlist-items 1-3,7,10-13", it will - download the videos at index 1, 2, 3, - 7, 10, 11, 12 and 13. - --match-title REGEX Download only matching titles (regex or - caseless sub-string) - --reject-title REGEX Skip download for matching titles - (regex or caseless sub-string) - --max-downloads NUMBER Abort after downloading NUMBER files - --min-filesize SIZE Do not download any videos smaller than - SIZE (e.g. 50k or 44.6m) - --max-filesize SIZE Do not download any videos larger than - SIZE (e.g. 50k or 44.6m) - --date DATE Download only videos uploaded in this - date - --datebefore DATE Download only videos uploaded on or - before this date (i.e. inclusive) - --dateafter DATE Download only videos uploaded on or - after this date (i.e. inclusive) - --min-views COUNT Do not download any videos with less - than COUNT views - --max-views COUNT Do not download any videos with more - than COUNT views - --match-filter FILTER Generic video filter. Specify any key - (see the "OUTPUT TEMPLATE" for a list - of available keys) to match if the key - is present, !key to check if the key is - not present, key > NUMBER (like - "comment_count > 12", also works with - >=, <, <=, !=, =) to compare against a - number, key = 'LITERAL' (like "uploader - = 'Mike Smith'", also works with !=) to - match against a string literal and & to - require multiple matches. Values which - are not known are excluded unless you - put a question mark (?) after the - operator. For example, to only match - videos that have been liked more than - 100 times and disliked less than 50 - times (or the dislike functionality is - not available at the given service), - but who also have a description, use - --match-filter "like_count > 100 & - dislike_count <? 50 & description" . - --no-playlist Download only the video, if the URL - refers to a video and a playlist. - --yes-playlist Download the playlist, if the URL - refers to a video and a playlist. - --age-limit YEARS Download only videos suitable for the - given age - --download-archive FILE Download only videos not listed in the - archive file. Record the IDs of all - downloaded videos in it. - --include-ads Download advertisements as well - (experimental) - - Download Options: - -r, --limit-rate RATE Maximum download rate in bytes per - second (e.g. 50K or 4.2M) - --skip-unavailable-fragments Skip unavailable fragments (DASH, - hlsnative and ISM) - --abort-on-unavailable-fragment Abort downloading when some fragment is - not available - --keep-fragments Keep downloaded fragments on disk after - downloading is finished; fragments are - erased by default - --buffer-size SIZE Size of download buffer (e.g. 1024 or - 16K) (default is 1024) - --no-resize-buffer Do not automatically adjust the buffer - size. By default, the buffer size is - automatically resized from an initial - value of SIZE. - --http-chunk-size SIZE Size of a chunk for chunk-based HTTP - downloading (e.g. 10485760 or 10M) - (default is disabled). May be useful - for bypassing bandwidth throttling - imposed by a webserver (experimental) - --playlist-reverse Download playlist videos in reverse - order - --playlist-random Download playlist videos in random - order - --xattr-set-filesize Set file xattribute ytdl.filesize with - expected file size - --hls-prefer-native Use the native HLS downloader instead - of ffmpeg - --hls-prefer-ffmpeg Use ffmpeg instead of the native HLS - downloader - --hls-use-mpegts Use the mpegts container for HLS - videos, allowing to play the video - while downloading (some players may not - be able to play it) - --external-downloader COMMAND Use the specified external downloader. - Currently supports aria2c,avconv,axel,c - url,ffmpeg,httpie,wget - --external-downloader-args ARGS Give these arguments to the external - downloader - - Filesystem Options: - -a, --batch-file FILE File containing URLs to download ('-' - for stdin), one URL per line. Lines - starting with '#', ';' or ']' are - considered as comments and ignored. - --id Use only video ID in file name - -o, --output TEMPLATE Output filename template, see the - "OUTPUT TEMPLATE" for all the info - --output-na-placeholder PLACEHOLDER Placeholder value for unavailable meta - fields in output filename template - (default is "NA") - --autonumber-start NUMBER Specify the start value for - %(autonumber)s (default is 1) - --restrict-filenames Restrict filenames to only ASCII - characters, and avoid "&" and spaces in - filenames - -w, --no-overwrites Do not overwrite files - -c, --continue Force resume of partially downloaded - files. By default, youtube-dl will - resume downloads if possible. - --no-continue Do not resume partially downloaded - files (restart from beginning) - --no-part Do not use .part files - write directly - into output file - --no-mtime Do not use the Last-modified header to - set the file modification time - --write-description Write video description to a - .description file - --write-info-json Write video metadata to a .info.json - file - --write-annotations Write video annotations to a - .annotations.xml file - --load-info-json FILE JSON file containing the video - information (created with the "--write- - info-json" option) - --cookies FILE File to read cookies from and dump - cookie jar in - --cache-dir DIR Location in the filesystem where - youtube-dl can store some downloaded - information permanently. By default - $XDG_CACHE_HOME/youtube-dl or - ~/.cache/youtube-dl . At the moment, - only YouTube player files (for videos - with obfuscated signatures) are cached, - but that may change. - --no-cache-dir Disable filesystem caching - --rm-cache-dir Delete all filesystem cache files - - Thumbnail Options: - --write-thumbnail Write thumbnail image to disk - --write-all-thumbnails Write all thumbnail image formats to - disk - - Verbosity / Simulation Options: - -q, --quiet Activate quiet mode - --no-warnings Ignore warnings - -s, --simulate Do not download the video and do not - write anything to disk - --skip-download Do not download the video - -g, --get-url Simulate, quiet but print URL - -e, --get-title Simulate, quiet but print title - --get-id Simulate, quiet but print id - --get-thumbnail Simulate, quiet but print thumbnail URL - --get-description Simulate, quiet but print video - description - --get-duration Simulate, quiet but print video length - --get-filename Simulate, quiet but print output - filename - --get-format Simulate, quiet but print output format - -j, --dump-json Simulate, quiet but print JSON - information. See the "OUTPUT TEMPLATE" - for a description of available keys. - -J, --dump-single-json Simulate, quiet but print JSON - information for each command-line - argument. If the URL refers to a - playlist, dump the whole playlist - information in a single line. - --print-json Be quiet and print the video - information as JSON (video is still - being downloaded). - --newline Output progress bar as new lines - --no-progress Do not print progress bar - --console-title Display progress in console titlebar - -v, --verbose Print various debugging information - --dump-pages Print downloaded pages encoded using - base64 to debug problems (very verbose) - --write-pages Write downloaded intermediary pages to - files in the current directory to debug - problems - --print-traffic Display sent and read HTTP traffic - -C, --call-home Contact the youtube-dl server for - debugging - --no-call-home Do NOT contact the youtube-dl server - for debugging - - Workarounds: - --encoding ENCODING Force the specified encoding - (experimental) - --no-check-certificate Suppress HTTPS certificate validation - --prefer-insecure Use an unencrypted connection to - retrieve information about the video. - (Currently supported only for YouTube) - --bidi-workaround Work around terminals that lack - bidirectional text support. Requires - bidiv or fribidi executable in PATH - --sleep-interval SECONDS Number of seconds to sleep before each - download when used alone or a lower - bound of a range for randomized sleep - before each download (minimum possible - number of seconds to sleep) when used - along with --max-sleep-interval. - --max-sleep-interval SECONDS Upper bound of a range for randomized - sleep before each download (maximum - possible number of seconds to sleep). - Must only be used along with --min- - sleep-interval. - - Video Format Options: - -f, --format FORMAT Video format code, see the "FORMAT - SELECTION" for all the info - --all-formats Download all available video formats - --prefer-free-formats Prefer free video formats unless a - specific one is requested - -F, --list-formats List all available formats of requested - videos - --youtube-skip-dash-manifest Do not download the DASH manifests and - related data on YouTube videos - --merge-output-format FORMAT If a merge is required (e.g. - bestvideo+bestaudio), output to given - container format. One of mkv, mp4, ogg, - webm, flv. Ignored if no merge is - required - - Subtitle Options: - --write-sub Write subtitle file - --write-auto-sub Write automatically generated subtitle - file (YouTube only) - --all-subs Download all the available subtitles of - the video - --list-subs List all available subtitles for the - video - --sub-format FORMAT Subtitle format, accepts formats - preference, for example: "srt" or - "ass/srt/best" - --sub-lang LANGS Languages of the subtitles to download - (optional) separated by commas, use - --list-subs for available language tags - - Authentication Options: - -u, --username USERNAME Login with this account ID - -p, --password PASSWORD Account password. If this option is - left out, youtube-dl will ask - interactively. - -2, --twofactor TWOFACTOR Two-factor authentication code - -n, --netrc Use .netrc authentication data - --video-password PASSWORD Video password (vimeo, youku) - - Adobe Pass Options: - --ap-mso MSO Adobe Pass multiple-system operator (TV - provider) identifier, use --ap-list-mso - for a list of available MSOs - --ap-username USERNAME Multiple-system operator account login - --ap-password PASSWORD Multiple-system operator account - password. If this option is left out, - youtube-dl will ask interactively. - --ap-list-mso List all supported multiple-system - operators - - Post-processing Options: - --postprocessor-args ARGS Give these arguments to the - postprocessor - -k, --keep-video Keep the video file on disk after the - post-processing; the video is erased by - default - --prefer-avconv Prefer avconv over ffmpeg for running - the postprocessors - --prefer-ffmpeg Prefer ffmpeg over avconv for running - the postprocessors (default) - --ffmpeg-location PATH Location of the ffmpeg/avconv binary; - either the path to the binary or its - containing directory. - --exec CMD Execute a command on the file after - downloading and post-processing, - similar to find's -exec syntax. - Example: --exec 'adb push {} - /sdcard/Music/ && rm {}' - --convert-subs FORMAT Convert the subtitles to other format - (currently supported: srt|ass|vtt|lrc) - -''' |
