From 8de58070ee3e55f29966a787fd618632dbf4309b Mon Sep 17 00:00:00 2001 From: Unit 193 Date: Sat, 8 Jan 2022 20:39:28 -0500 Subject: New upstream version 1.20.1. --- CHANGELOG.md | 14 + PKG-INFO | 6 +- README.rst | 4 +- data/completion/_gallery-dl | 4 +- data/completion/gallery-dl | 4 +- data/man/gallery-dl.1 | 11 +- data/man/gallery-dl.conf.5 | 16 +- gallery_dl.egg-info/PKG-INFO | 6 +- gallery_dl.egg-info/SOURCES.txt | 4 +- gallery_dl/extractor/common.py | 3 +- gallery_dl/extractor/gelbooru.py | 43 ++- gallery_dl/extractor/gelbooru_v02.py | 18 +- gallery_dl/extractor/hitomi.py | 35 ++- gallery_dl/extractor/mangadex.py | 17 +- gallery_dl/extractor/newgrounds.py | 64 +++- gallery_dl/extractor/patreon.py | 1 + gallery_dl/extractor/wordpress.py | 41 --- gallery_dl/job.py | 5 +- gallery_dl/option.py | 20 +- gallery_dl/path.py | 3 +- gallery_dl/version.py | 2 +- test/test_ytdl.py | 545 ----------------------------------- 22 files changed, 220 insertions(+), 646 deletions(-) delete mode 100644 gallery_dl/extractor/wordpress.py delete mode 100644 test/test_ytdl.py diff --git a/CHANGELOG.md b/CHANGELOG.md index 1dc4a21..c505c4b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,19 @@ # Changelog +## 1.20.1 - 2022-01-08 +### Additions +- [newgrounds] add `search` extractor ([#2161](https://github.com/mikf/gallery-dl/issues/2161)) +### Changes +- restore `-d/--dest` functionality from before 1.20.0 ([#2148](https://github.com/mikf/gallery-dl/issues/2148)) +- change short option for `--directory` to `-D` +### Fixes +- [gelbooru] handle changed API response format ([#2157](https://github.com/mikf/gallery-dl/issues/2157)) +- [hitomi] fix image URLs ([#2153](https://github.com/mikf/gallery-dl/issues/2153)) +- [mangadex] fix extraction ([#2177](https://github.com/mikf/gallery-dl/issues/2177)) +- [rule34] use `https://api.rule34.xxx` for API requests +- fix cookie checks for patreon, fanbox, fantia +- improve UNC path handling ([#2126](https://github.com/mikf/gallery-dl/issues/2126)) + ## 1.20.0 - 2021-12-29 ### Additions - [500px] add `favorite` extractor ([#1927](https://github.com/mikf/gallery-dl/issues/1927)) diff --git a/PKG-INFO b/PKG-INFO index 08b652d..63932cc 100644 --- a/PKG-INFO +++ b/PKG-INFO @@ -1,6 +1,6 @@ Metadata-Version: 2.1 Name: gallery_dl -Version: 1.20.0 +Version: 1.20.1 Summary: Command-line program to download image galleries and collections from several image hosting sites Home-page: https://github.com/mikf/gallery-dl Author: Mike Fährmann @@ -98,8 +98,8 @@ Standalone Executable Prebuilt executable files with a Python interpreter and required Python packages included are available for -- `Windows `__ -- `Linux `__ +- `Windows `__ +- `Linux `__ | Executables build from the latest commit can be found at | https://github.com/mikf/gallery-dl/actions/workflows/executables.yml diff --git a/README.rst b/README.rst index c8b7afd..ecc0b4d 100644 --- a/README.rst +++ b/README.rst @@ -64,8 +64,8 @@ Standalone Executable Prebuilt executable files with a Python interpreter and required Python packages included are available for -- `Windows `__ -- `Linux `__ +- `Windows `__ +- `Linux `__ | Executables build from the latest commit can be found at | https://github.com/mikf/gallery-dl/actions/workflows/executables.yml diff --git a/data/completion/_gallery-dl b/data/completion/_gallery-dl index 2ac93f7..6b3ac6d 100644 --- a/data/completion/_gallery-dl +++ b/data/completion/_gallery-dl @@ -7,10 +7,10 @@ local rc=1 _arguments -C -S \ {-h,--help}'[Print this help message and exit]' \ --version'[Print program version and exit]' \ ---dest'[==SUPPRESS==]':'':_files \ {-i,--input-file}'[Download URLs found in FILE ("-" for stdin). More than one --input-file can be specified]':'':_files \ +{-d,--destination}'[Target location for file downloads]':'' \ +{-D,--directory}'[Exact location for file downloads]':'' \ {-f,--filename}'[Filename format string for downloaded files ("/O" for "original" filenames)]':'' \ -{-d,--directory}'[Target location for file downloads]':'' \ --cookies'[File to load additional cookies from]':'':_files \ --proxy'[Use the specified proxy]':'' \ --clear-cache'[Delete cached login sessions, cookies, etc. for MODULE (ALL to delete everything)]':'' \ diff --git a/data/completion/gallery-dl b/data/completion/gallery-dl index 4085bb9..ac64645 100644 --- a/data/completion/gallery-dl +++ b/data/completion/gallery-dl @@ -7,10 +7,10 @@ _gallery_dl() if [[ "${prev}" =~ ^(-i|--input-file|--cookies|--write-log|--write-unsupported|-c|--config|--config-yaml|--download-archive)$ ]]; then COMPREPLY=( $(compgen -f -- "${cur}") ) - elif [[ "${prev}" =~ ^(--dest)$ ]]; then + elif [[ "${prev}" =~ ^()$ ]]; then COMPREPLY=( $(compgen -d -- "${cur}") ) else - COMPREPLY=( $(compgen -W "--help --version --dest --input-file --filename --directory --cookies --proxy --clear-cache --quiet --verbose --get-urls --resolve-urls --dump-json --simulate --extractor-info --list-keywords --list-modules --list-extractors --write-log --write-unsupported --write-pages --limit-rate --retries --http-timeout --sleep --sleep-request --sleep-extractor --filesize-min --filesize-max --no-part --no-skip --no-mtime --no-download --no-check-certificate --config --config-yaml --option --ignore-config --username --password --netrc --download-archive --abort --terminate --range --chapter-range --filter --chapter-filter --zip --ugoira-conv --ugoira-conv-lossless --write-metadata --write-info-json --write-infojson --write-tags --mtime-from-date --exec --exec-after --postprocessor" -- "${cur}") ) + COMPREPLY=( $(compgen -W "--help --version --input-file --destination --directory --filename --cookies --proxy --clear-cache --quiet --verbose --get-urls --resolve-urls --dump-json --simulate --extractor-info --list-keywords --list-modules --list-extractors --write-log --write-unsupported --write-pages --limit-rate --retries --http-timeout --sleep --sleep-request --sleep-extractor --filesize-min --filesize-max --no-part --no-skip --no-mtime --no-download --no-check-certificate --config --config-yaml --option --ignore-config --username --password --netrc --download-archive --abort --terminate --range --chapter-range --filter --chapter-filter --zip --ugoira-conv --ugoira-conv-lossless --write-metadata --write-info-json --write-infojson --write-tags --mtime-from-date --exec --exec-after --postprocessor" -- "${cur}") ) fi } diff --git a/data/man/gallery-dl.1 b/data/man/gallery-dl.1 index a7f51a7..992d0a6 100644 --- a/data/man/gallery-dl.1 +++ b/data/man/gallery-dl.1 @@ -1,4 +1,4 @@ -.TH "GALLERY-DL" "1" "2021-12-29" "1.20.0" "gallery-dl Manual" +.TH "GALLERY-DL" "1" "2022-01-08" "1.20.1" "gallery-dl Manual" .\" disable hyphenation .nh @@ -26,12 +26,15 @@ Print program version and exit .B "\-i, \-\-input\-file" \f[I]FILE\f[] Download URLs found in FILE ('-' for stdin). More than one --input-file can be specified .TP +.B "\-d, \-\-destination" \f[I]PATH\f[] +Target location for file downloads +.TP +.B "\-D, \-\-directory" \f[I]PATH\f[] +Exact location for file downloads +.TP .B "\-f, \-\-filename" \f[I]FORMAT\f[] Filename format string for downloaded files ('/O' for "original" filenames) .TP -.B "\-d, \-\-directory" \f[I]PATH\f[] -Target location for file downloads -.TP .B "\-\-cookies" \f[I]FILE\f[] File to load additional cookies from .TP diff --git a/data/man/gallery-dl.conf.5 b/data/man/gallery-dl.conf.5 index a574625..a62575b 100644 --- a/data/man/gallery-dl.conf.5 +++ b/data/man/gallery-dl.conf.5 @@ -1,4 +1,4 @@ -.TH "GALLERY-DL.CONF" "5" "2021-12-29" "1.20.0" "gallery-dl Manual" +.TH "GALLERY-DL.CONF" "5" "2022-01-08" "1.20.1" "gallery-dl Manual" .\" disable hyphenation .nh .\" disable justification (adjust text to left margin only) @@ -649,11 +649,23 @@ config options. .IP "Default:" 9 \f[I]["oauth", "recursive", "test"]\f[] + current extractor category +.IP "Example:" 4 +["imgur", "gfycat:user", "*:image"] + .IP "Description:" 4 -A list of extractor categories to ignore (or allow) +A list of extractor identifiers to ignore (or allow) when spawning child extractors for unknown URLs, e.g. from \f[I]reddit\f[] or \f[I]plurk\f[]. +Each identifier can be + +.br +* A category or basecategory name (\f[I]"imgur"\f[], \f[I]"mastodon"\f[]) +.br +* | A (base)category-subcategory pair, where both names are separated by a colon (\f[I]"gfycat:user"\f[]). +Both names can be a * or left empty, matching all possible names (\f[I]"*:image"\f[], \f[I]":user"\f[]). +.br + Note: Any \f[I]blacklist\f[] setting will automatically include \f[I]"oauth"\f[], \f[I]"recursive"\f[], and \f[I]"test"\f[]. diff --git a/gallery_dl.egg-info/PKG-INFO b/gallery_dl.egg-info/PKG-INFO index 8b87746..7e1e284 100644 --- a/gallery_dl.egg-info/PKG-INFO +++ b/gallery_dl.egg-info/PKG-INFO @@ -1,6 +1,6 @@ Metadata-Version: 2.1 Name: gallery-dl -Version: 1.20.0 +Version: 1.20.1 Summary: Command-line program to download image galleries and collections from several image hosting sites Home-page: https://github.com/mikf/gallery-dl Author: Mike Fährmann @@ -98,8 +98,8 @@ Standalone Executable Prebuilt executable files with a Python interpreter and required Python packages included are available for -- `Windows `__ -- `Linux `__ +- `Windows `__ +- `Linux `__ | Executables build from the latest commit can be found at | https://github.com/mikf/gallery-dl/actions/workflows/executables.yml diff --git a/gallery_dl.egg-info/SOURCES.txt b/gallery_dl.egg-info/SOURCES.txt index 127354e..8ddae52 100644 --- a/gallery_dl.egg-info/SOURCES.txt +++ b/gallery_dl.egg-info/SOURCES.txt @@ -180,7 +180,6 @@ gallery_dl/extractor/webtoons.py gallery_dl/extractor/weibo.py gallery_dl/extractor/wikiart.py gallery_dl/extractor/wikieat.py -gallery_dl/extractor/wordpress.py gallery_dl/extractor/xhamster.py gallery_dl/extractor/xvideos.py gallery_dl/extractor/ytdl.py @@ -205,5 +204,4 @@ test/test_output.py test/test_postprocessor.py test/test_results.py test/test_text.py -test/test_util.py -test/test_ytdl.py \ No newline at end of file +test/test_util.py \ No newline at end of file diff --git a/gallery_dl/extractor/common.py b/gallery_dl/extractor/common.py index c440aee..afe4a16 100644 --- a/gallery_dl/extractor/common.py +++ b/gallery_dl/extractor/common.py @@ -336,7 +336,8 @@ class Extractor(): now = time.time() for cookie in self._cookiejar: - if cookie.name in names and cookie.domain == domain: + if cookie.name in names and ( + not domain or cookie.domain == domain): if cookie.expires and cookie.expires < now: self.log.warning("Cookie '%s' has expired", cookie.name) else: diff --git a/gallery_dl/extractor/gelbooru.py b/gallery_dl/extractor/gelbooru.py index df45d0d..a6bda52 100644 --- a/gallery_dl/extractor/gelbooru.py +++ b/gallery_dl/extractor/gelbooru.py @@ -10,7 +10,7 @@ from .common import Extractor, Message from . import gelbooru_v02 -from .. import text, exception +from .. import text, util, exception import binascii @@ -20,6 +20,42 @@ class GelbooruBase(): basecategory = "booru" root = "https://gelbooru.com" + def _api_request(self, params): + url = self.root + "/index.php?page=dapi&s=post&q=index&json=1" + data = self.request(url, params=params).json() + if "post" not in data: + return () + posts = data["post"] + if not isinstance(posts, list): + return (posts,) + return posts + + def _pagination(self, params): + params["pid"] = self.page_start + params["limit"] = self.per_page + + post = None + while True: + try: + posts = self._api_request(params) + except ValueError: + if "tags" not in params or post is None: + raise + taglist = [tag for tag in params["tags"].split() + if not tag.startswith("id:<")] + taglist.append("id:<" + str(post.attrib["id"])) + params["tags"] = " ".join(taglist) + params["pid"] = 0 + continue + + post = None + for post in posts: + yield post + + if len(posts) < self.per_page: + return + params["pid"] += 1 + @staticmethod def _file_url(post): url = post["file_url"] @@ -82,6 +118,11 @@ class GelbooruPoolExtractor(GelbooruBase, "pool_name": text.unescape(name), } + def posts(self): + params = {} + for params["id"] in util.advance(self.post_ids, self.page_start): + yield from self._api_request(params) + class GelbooruPostExtractor(GelbooruBase, gelbooru_v02.GelbooruV02PostExtractor): diff --git a/gallery_dl/extractor/gelbooru_v02.py b/gallery_dl/extractor/gelbooru_v02.py index a42a202..8da0bde 100644 --- a/gallery_dl/extractor/gelbooru_v02.py +++ b/gallery_dl/extractor/gelbooru_v02.py @@ -19,8 +19,15 @@ import re class GelbooruV02Extractor(booru.BooruExtractor): basecategory = "gelbooru_v02" + def __init__(self, match): + booru.BooruExtractor.__init__(self, match) + try: + self.api_root = INSTANCES[self.category]["api_root"] + except KeyError: + self.api_root = self.root + def _api_request(self, params): - url = self.root + "/index.php?page=dapi&s=post&q=index" + url = self.api_root + "/index.php?page=dapi&s=post&q=index" return ElementTree.fromstring(self.request(url, params=params).text) def _pagination(self, params): @@ -97,12 +104,15 @@ class GelbooruV02Extractor(booru.BooruExtractor): post["notes"] = notes -BASE_PATTERN = GelbooruV02Extractor.update({ +INSTANCES = { "realbooru": {"root": "https://realbooru.com"}, - "rule34" : {"root": "https://rule34.xxx"}, + "rule34" : {"root": "https://rule34.xxx", + "api_root": " https://api.rule34.xxx"}, "safebooru": {"root": "https://safebooru.org"}, "tbib" : {"root": "https://tbib.org"}, -}) +} + +BASE_PATTERN = GelbooruV02Extractor.update(INSTANCES) class GelbooruV02TagExtractor(GelbooruV02Extractor): diff --git a/gallery_dl/extractor/hitomi.py b/gallery_dl/extractor/hitomi.py index 88cf98c..ce6c7ce 100644 --- a/gallery_dl/extractor/hitomi.py +++ b/gallery_dl/extractor/hitomi.py @@ -26,7 +26,7 @@ class HitomiGalleryExtractor(GalleryExtractor): r"/(?:[^/?#]+-)?(\d+)") test = ( ("https://hitomi.la/galleries/867789.html", { - "pattern": r"https://[a-c]b.hitomi.la/images/1639745412/\d+" + "pattern": r"https://[a-c]b.hitomi.la/images/1641140516/\d+" r"/[0-9a-f]{64}\.jpg", "keyword": "4873ef9a523621fc857b114e0b2820ba4066e9ae", "options": (("metadata", True),), @@ -39,12 +39,12 @@ class HitomiGalleryExtractor(GalleryExtractor): }), # Game CG with scenes (#321) ("https://hitomi.la/galleries/733697.html", { - "url": "479d16fe92117a6a2ce81b4e702e6347922c81e3", + "url": "d4854175da2b5fa4ae62749266c7be0bf237dc99", "count": 210, }), # fallback for galleries only available through /reader/ URLs ("https://hitomi.la/galleries/1045954.html", { - "url": "ebc1415c5d7f634166ef7e2635b77735de1ea7a2", + "url": "eea99c3745719a7a392150335e6ae3f73faa0b85", "count": 1413, }), # gallery with "broken" redirect @@ -138,7 +138,7 @@ class HitomiGalleryExtractor(GalleryExtractor): def images(self, _): # see https://ltn.hitomi.la/gg.js - gg_m, gg_b = _parse_gg(self) + gg_m, gg_b, gg_default = _parse_gg(self) result = [] for image in self.info["files"]: @@ -148,7 +148,7 @@ class HitomiGalleryExtractor(GalleryExtractor): # see https://ltn.hitomi.la/common.js inum = int(ihash[-1] + ihash[-3:-1], 16) url = "https://{}b.hitomi.la/images/{}/{}/{}.{}".format( - chr(97 + gg_m.get(inum, 0)), + chr(97 + gg_m.get(inum, gg_default)), gg_b, inum, ihash, idata["extension"], ) result.append((url, idata)) @@ -195,10 +195,25 @@ class HitomiTagExtractor(Extractor): def _parse_gg(extr): page = extr.request("https://ltn.hitomi.la/gg.js").text - m = { - int(match.group(1)): int(match.group(2)) - for match in re.finditer(r"case (\d+): o = (\d+); break;", page) - } + m = {} + + keys = [] + for match in re.finditer( + r"case\s+(\d+):(?:\s*o\s*=\s*(\d+))?", page): + key, value = match.groups() + keys.append(int(key)) + + if value: + value = int(value) + for key in keys: + m[key] = value + keys.clear() + + for match in re.finditer( + r"if\s+\(g\s*===?\s*(\d+)\)[\s{]*o\s*=\s*(\d+)", page): + m[int(match.group(1))] = int(match.group(2)) + + d = re.search(r"(?:var\s|default:)\s*o\s*=\s*(\d+)", page) b = re.search(r"b:\s*[\"'](.+)[\"']", page) - return m, b.group(1).strip("/") + return m, b.group(1).strip("/"), int(d.group(1)) if d else 1 diff --git a/gallery_dl/extractor/mangadex.py b/gallery_dl/extractor/mangadex.py index 393f4e2..ea5d4a8 100644 --- a/gallery_dl/extractor/mangadex.py +++ b/gallery_dl/extractor/mangadex.py @@ -40,7 +40,7 @@ class MangadexExtractor(Extractor): uuid = chapter["id"] data = self._transform(chapter) data["_extractor"] = MangadexChapterExtractor - self._cache[uuid] = (chapter, data) + self._cache[uuid] = data yield Message.Queue, self.root + "/chapter/" + uuid, data def _transform(self, chapter): @@ -72,7 +72,7 @@ class MangadexExtractor(Extractor): "date" : text.parse_datetime(cattributes["publishAt"]), "lang" : lang, "language": util.code_to_language(lang), - "count" : len(cattributes["data"]), + "count" : cattributes["pages"], } data["artist"] = [artist["attributes"]["name"] @@ -107,20 +107,21 @@ class MangadexChapterExtractor(MangadexExtractor): def items(self): try: - chapter, data = self._cache.pop(self.uuid) + data = self._cache.pop(self.uuid) except KeyError: chapter = self.api.chapter(self.uuid) data = self._transform(chapter) - yield Message.Directory, data - cattributes = chapter["attributes"] + yield Message.Directory, data data["_http_headers"] = self._headers - base = "{}/data/{}/".format( - self.api.athome_server(self.uuid)["baseUrl"], cattributes["hash"]) + + server = self.api.athome_server(self.uuid) + chapter = server["chapter"] + base = "{}/data/{}/".format(server["baseUrl"], chapter["hash"]) enum = util.enumerate_reversed if self.config( "page-reverse") else enumerate - for data["page"], page in enum(cattributes["data"], 1): + for data["page"], page in enum(chapter["data"], 1): text.nameext_from_url(page, data) yield Message.Url, base + page, data diff --git a/gallery_dl/extractor/newgrounds.py b/gallery_dl/extractor/newgrounds.py index 4351b3e..8bcbc20 100644 --- a/gallery_dl/extractor/newgrounds.py +++ b/gallery_dl/extractor/newgrounds.py @@ -38,6 +38,7 @@ class NewgroundsExtractor(Extractor): def items(self): self.login() + metadata = self.metadata() for post_url in self.posts(): try: @@ -48,6 +49,8 @@ class NewgroundsExtractor(Extractor): url = None if url: + if metadata: + post.update(metadata) yield Message.Directory, post yield Message.Url, url, text.nameext_from_url(url, post) @@ -62,9 +65,12 @@ class NewgroundsExtractor(Extractor): "Unable to get download URL for '%s'", post_url) def posts(self): - """Return urls of all relevant image pages""" + """Return URLs of all relevant post pages""" return self._pagination(self._path) + def metadata(self): + """Return general metadata""" + def login(self): username, password = self._get_auth_info() if username: @@ -493,3 +499,59 @@ class NewgroundsFollowingExtractor(NewgroundsFavoriteExtractor): text.ensure_http_scheme(user.rpartition('"')[2]) for user in text.extract_iter(page, 'class="item-user', '"> NUMBER (like - "comment_count > 12", also works with - >=, <, <=, !=, =) to compare against a - number, key = 'LITERAL' (like "uploader - = 'Mike Smith'", also works with !=) to - match against a string literal and & to - require multiple matches. Values which - are not known are excluded unless you - put a question mark (?) after the - operator. For example, to only match - videos that have been liked more than - 100 times and disliked less than 50 - times (or the dislike functionality is - not available at the given service), - but who also have a description, use - --match-filter "like_count > 100 & - dislike_count