From b830dc03b3b7c9dd119648e1be9c1145d56e096c Mon Sep 17 00:00:00 2001 From: Unit 193 Date: Tue, 15 Apr 2025 05:25:37 -0400 Subject: New upstream version 1.29.4. --- CHANGELOG.md | 63 +++++++-------- PKG-INFO | 6 +- README.rst | 4 +- data/man/gallery-dl.1 | 2 +- data/man/gallery-dl.conf.5 | 56 +++++++++++++- docs/gallery-dl.conf | 12 ++- gallery_dl.egg-info/PKG-INFO | 6 +- gallery_dl/downloader/ytdl.py | 1 + gallery_dl/extractor/chevereto.py | 20 +++-- gallery_dl/extractor/danbooru.py | 5 +- gallery_dl/extractor/deviantart.py | 4 +- gallery_dl/extractor/discord.py | 16 ++-- gallery_dl/extractor/everia.py | 2 +- gallery_dl/extractor/gelbooru.py | 3 +- gallery_dl/extractor/hentai2read.py | 24 +++--- gallery_dl/extractor/instagram.py | 1 + gallery_dl/extractor/issuu.py | 15 ++-- gallery_dl/extractor/kemonoparty.py | 3 + gallery_dl/extractor/pixiv.py | 4 +- gallery_dl/extractor/readcomiconline.py | 4 +- gallery_dl/extractor/rule34xyz.py | 82 +++++++++++--------- gallery_dl/extractor/tumblr.py | 4 +- gallery_dl/extractor/webtoons.py | 133 +++++++++++++++++++------------- gallery_dl/extractor/zerochan.py | 6 +- gallery_dl/extractor/zzup.py | 2 +- gallery_dl/formatter.py | 2 + gallery_dl/path.py | 2 +- gallery_dl/postprocessor/metadata.py | 9 ++- gallery_dl/postprocessor/ugoira.py | 7 +- gallery_dl/util.py | 3 + gallery_dl/version.py | 2 +- test/test_formatter.py | 9 ++- test/test_postprocessor.py | 21 +++++ 33 files changed, 340 insertions(+), 193 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 257f47b..d779ffa 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,43 +1,32 @@ -## 1.29.3 - 2025-03-29 +## 1.29.4 - 2025-04-13 ### Extractors #### Additions -- [danbooru] add `favgroup` extractor -- [imhentai] support `hentaienvy.com` and `hentaizap.com` ([#7192](https://github.com/mikf/gallery-dl/issues/7192) [#7218](https://github.com/mikf/gallery-dl/issues/7218)) +- [chevereto] support `imagepond.net` ([#7278](https://github.com/mikf/gallery-dl/issues/7278)) +- [webtoons] add `artist` extractor ([#7274](https://github.com/mikf/gallery-dl/issues/7274)) #### Fixes -- [bunkr] fix `filename` extraction ([#7237](https://github.com/mikf/gallery-dl/issues/7237)) -- [deviantart:stash] fix legacy `sta.sh` links ([#7181](https://github.com/mikf/gallery-dl/issues/7181)) -- [hitomi] fix extractors ([#7230](https://github.com/mikf/gallery-dl/issues/7230)) -- [mangapark] fix extractors ([#4999](https://github.com/mikf/gallery-dl/issues/4999) [#5883](https://github.com/mikf/gallery-dl/issues/5883) [#6507](https://github.com/mikf/gallery-dl/issues/6507) [#6908](https://github.com/mikf/gallery-dl/issues/6908) [#7232](https://github.com/mikf/gallery-dl/issues/7232)) -- [nozomi] fix extractors ([#7242](https://github.com/mikf/gallery-dl/issues/7242)) -- [patreon] include subdomains in `session_id` cookie check ([#7188](https://github.com/mikf/gallery-dl/issues/7188)) -- [patreon] do not match `/messages` URLs as creator ([#7187](https://github.com/mikf/gallery-dl/issues/7187)) -- [pinterest] handle `story_pin_static_sticker_block` blocks ([#7251](https://github.com/mikf/gallery-dl/issues/7251)) -- [sexcom] fix `gif` pin extraction ([#7239](https://github.com/mikf/gallery-dl/issues/7239)) -- [skeb] make exceptions when extracting posts non-fatal ([#7250](https://github.com/mikf/gallery-dl/issues/7250)) -- [zerochan] parse `JSON-LD` data ([#7178](https://github.com/mikf/gallery-dl/issues/7178)) +- [deviantart] fix `KeyError: 'has_subfolders'` ([#7272](https://github.com/mikf/gallery-dl/issues/7272) [#7337](https://github.com/mikf/gallery-dl/issues/7337)) +- [discord] fix `parent` keyword inconsistency ([#7341](https://github.com/mikf/gallery-dl/issues/7341) [#7353](https://github.com/mikf/gallery-dl/issues/7353)) +- [E621:pool] fix `AttributeError` ([#7265](https://github.com/mikf/gallery-dl/issues/7265) [#7344](https://github.com/mikf/gallery-dl/issues/7344)) +- [everia] fix/improve image extraction ([#7270](https://github.com/mikf/gallery-dl/issues/7270)) +- [gelbooru] fix video URLs ([#7345](https://github.com/mikf/gallery-dl/issues/7345)) +- [hentai2read] fix `AttributeError` exception for chapters without artist ([#7355](https://github.com/mikf/gallery-dl/issues/7355)) +- [issuu] fix extractors ([#7317](https://github.com/mikf/gallery-dl/issues/7317)) +- [kemonoparty] fix file paths with backslashes ([#7321](https://github.com/mikf/gallery-dl/issues/7321)) +- [readcomiconline] fix `issue` extractor ([#7269](https://github.com/mikf/gallery-dl/issues/7269) [#7330](https://github.com/mikf/gallery-dl/issues/7330)) +- [rule34xyz] update to API v2 ([#7289](https://github.com/mikf/gallery-dl/issues/7289)) +- [zerochan] fix `KeyError: 'author'` ([#7282](https://github.com/mikf/gallery-dl/issues/7282)) #### Improvements -- [arcalive] extend `gifs` option -- [deviantart] support multiple images for single posts ([#6653](https://github.com/mikf/gallery-dl/issues/6653) [#7261](https://github.com/mikf/gallery-dl/issues/7261)) -- [deviantart] add subfolder support ([#4988](https://github.com/mikf/gallery-dl/issues/4988) [#7185](https://github.com/mikf/gallery-dl/issues/7185) [#7220](https://github.com/mikf/gallery-dl/issues/7220)) -- [deviantart] match `/gallery/recommended-for-you` URLs ([#7168](https://github.com/mikf/gallery-dl/issues/7168) [#7243](https://github.com/mikf/gallery-dl/issues/7243)) -- [instagram] extract videos from `video_dash_manifest` data ([#6379](https://github.com/mikf/gallery-dl/issues/6379) [#7006](https://github.com/mikf/gallery-dl/issues/7006)) -- [mangapark] support mirror domains -- [mangapark] support v3 URLs ([#2072](https://github.com/mikf/gallery-dl/issues/2072)) -- [mastodon] support `/statuses` URLs ([#7255](https://github.com/mikf/gallery-dl/issues/7255)) -- [sexcom] support new-style `/gifs` and `/videos` URLs ([#7239](https://github.com/mikf/gallery-dl/issues/7239)) -- [subscribestar] detect redirects to `/age_confirmation_warning` pages -- [tiktok] add retry mechanism to rehydration data extraction ([#7191](https://github.com/mikf/gallery-dl/issues/7191)) -#### Metadata -- [bbc] extract more metadata ([#6582](https://github.com/mikf/gallery-dl/issues/6582)) -- [kemonoparty] extract `archives` metadata ([#7195](https://github.com/mikf/gallery-dl/issues/7195)) -- [kemonoparty] enable `username`/`user_profile` metadata by default -- [kemonoparty:discord] always provide `channel_name` metadata ([#7245](https://github.com/mikf/gallery-dl/issues/7245)) -- [sexcom] extract `date_url` metadata ([#7239](https://github.com/mikf/gallery-dl/issues/7239)) -- [subscribestar] extract `title` metadata ([#7219](https://github.com/mikf/gallery-dl/issues/7219)) +- [instagram] use Chrome `User-Agent` by default ([#6379](https://github.com/mikf/gallery-dl/issues/6379)) +- [pixiv] support `phixiv.net` URLs ([#7352](https://github.com/mikf/gallery-dl/issues/7352)) +- [tumblr] support URLs without subdomain ([#7358](https://github.com/mikf/gallery-dl/issues/7358)) +- [webtoons] download JPEG files in higher quality +- [webtoons] use a default 0.5-1.5s delay between requests ([#7329](https://github.com/mikf/gallery-dl/issues/7329)) +- [zzup] support `w.zzup.com` URLs ([#7327](https://github.com/mikf/gallery-dl/issues/7327)) ### Downloaders -- [ytdl] support processing inline HLS/DASH manifest data ([#6379](https://github.com/mikf/gallery-dl/issues/6379) [#7006](https://github.com/mikf/gallery-dl/issues/7006)) +- [ytdl] fix `KeyError: 'extractor'` exception when `ytdl` reports an error ([#7301](https://github.com/mikf/gallery-dl/issues/7301)) +### Post Processors +- [metadata] add `metadata-path` option ([#6582](https://github.com/mikf/gallery-dl/issues/6582)) +- [metadata] fix handling of empty directory paths ([#7296](https://github.com/mikf/gallery-dl/issues/7296)) +- [ugoira] preserve `extension` when using `"mode": "archive"` ([#7304](https://github.com/mikf/gallery-dl/issues/7304)) ### Miscellaneous -- [aes] simplify `block_count` calculation -- [common] add `subdomains` argument to `cookies_check()` ([#7188](https://github.com/mikf/gallery-dl/issues/7188)) -- [config] fix using the same key multiple times with `apply` ([#7127](https://github.com/mikf/gallery-dl/issues/7127)) -- [tests] implement expected failures +- [formatter] add `i` and `f` conversions ([#6582](https://github.com/mikf/gallery-dl/issues/6582)) diff --git a/PKG-INFO b/PKG-INFO index 4481e14..3d113ec 100644 --- a/PKG-INFO +++ b/PKG-INFO @@ -1,6 +1,6 @@ Metadata-Version: 2.4 Name: gallery_dl -Version: 1.29.3 +Version: 1.29.4 Summary: Command-line program to download image galleries and collections from several image hosting sites Home-page: https://github.com/mikf/gallery-dl Download-URL: https://github.com/mikf/gallery-dl/releases/latest @@ -133,9 +133,9 @@ Standalone Executable Prebuilt executable files with a Python interpreter and required Python packages included are available for -- `Windows `__ +- `Windows `__ (Requires `Microsoft Visual C++ Redistributable Package (x86) `__) -- `Linux `__ +- `Linux `__ Nightly Builds diff --git a/README.rst b/README.rst index 43f18a9..1d8a195 100644 --- a/README.rst +++ b/README.rst @@ -77,9 +77,9 @@ Standalone Executable Prebuilt executable files with a Python interpreter and required Python packages included are available for -- `Windows `__ +- `Windows `__ (Requires `Microsoft Visual C++ Redistributable Package (x86) `__) -- `Linux `__ +- `Linux `__ Nightly Builds diff --git a/data/man/gallery-dl.1 b/data/man/gallery-dl.1 index 5b0e7e7..7eb34af 100644 --- a/data/man/gallery-dl.1 +++ b/data/man/gallery-dl.1 @@ -1,4 +1,4 @@ -.TH "GALLERY-DL" "1" "2025-03-29" "1.29.3" "gallery-dl Manual" +.TH "GALLERY-DL" "1" "2025-04-13" "1.29.4" "gallery-dl Manual" .\" disable hyphenation .nh diff --git a/data/man/gallery-dl.conf.5 b/data/man/gallery-dl.conf.5 index d032f25..dc11605 100644 --- a/data/man/gallery-dl.conf.5 +++ b/data/man/gallery-dl.conf.5 @@ -1,4 +1,4 @@ -.TH "GALLERY-DL.CONF" "5" "2025-03-29" "1.29.3" "gallery-dl Manual" +.TH "GALLERY-DL.CONF" "5" "2025-04-13" "1.29.4" "gallery-dl Manual" .\" disable hyphenation .nh .\" disable justification (adjust text to left margin only) @@ -472,6 +472,7 @@ response before \f[I]retrying\f[] the request. \f[I]soundgasm\f[], \f[I]urlgalleries\f[], \f[I]vk\f[], +\f[I]webtoons\f[], \f[I]weebcentral\f[], \f[I]xfolio\f[], \f[I]zerochan\f[] @@ -815,6 +816,8 @@ or a \f[I]list\f[] with IP and explicit port number as elements. .br * \f[I]"Patreon/72.2.28 (Android; Android 14; Scale/2.10)"\f[]: \f[I]patreon\f[] .br +* \f[I]"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/134.0.0.0 Safari/537.36"\f[]: \f[I]instagram\f[] +.br * \f[I]"Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:LATEST) Gecko/20100101 Firefox/LATEST"\f[]: otherwise .IP "Description:" 4 @@ -6212,6 +6215,45 @@ Fetch extra submission metadata during gallery downloads. Note: This requires 1 additional HTTP request per submission. +.SS extractor.webtoons.quality +.IP "Type:" 6 +.br +* \f[I]integer\f[] +.br +* \f[I]string\f[] +.br +* \f[I]object\f[] (ext -> type) + + +.IP "Default:" 9 +\f[I]"original"\f[] + +.IP "Example:" 4 +.br +* 90 +.br +* "q50" +.br +* {"jpg": "q80", "jpeg": "q80", "png": false} + +.IP "Description:" 4 +Controls the quality of downloaded files by modifying URLs' \f[I]type\f[] parameter. + +\f[I]"original"\f[] +Download minimally compressed versions of JPG files +any \f[I]integer\f[] +Use \f[I]"q"\f[] as \f[I]type\f[] parameter for JPEG files +any \f[I]string\f[] +Use this value as \f[I]type\f[] parameter for JPEG files +any \f[I]object\f[] +Use the given values as \f[I]type\f[] parameter for URLs with the specified extensions +.br +- Set a value to \f[I]false\f[] to completely remove these extension's \f[I]type\f[] parameter +.br +- Omit an extension to leave its URLs unchanged +.br + + .SS extractor.weibo.gifs .IP "Type:" 6 .br @@ -7731,6 +7773,18 @@ files with, which will replace the original filename extensions. Note: \f[I]metadata.extension\f[] is ignored if this option is set. +.SS metadata.metadata-path +.IP "Type:" 6 +\f[I]string\f[] + +.IP "Example:" 4 +"_meta_path" + +.IP "Description:" 4 +Insert the path of generated files +into metadata dictionaries as the given name. + + .SS metadata.event .IP "Type:" 6 .br diff --git a/docs/gallery-dl.conf b/docs/gallery-dl.conf index 8ede568..b85a3e7 100644 --- a/docs/gallery-dl.conf +++ b/docs/gallery-dl.conf @@ -732,11 +732,13 @@ "api-key" : null, "metadata": false }, - "weebcentral": + "webtoons": { - "sleep-request": "0.5-1.5" + "sleep-request": "0.5-1.5", + + "quality": "original" }, - "xfolio": + "weebcentral": { "sleep-request": "0.5-1.5" }, @@ -751,6 +753,10 @@ "retweets" : false, "videos" : true }, + "xfolio": + { + "sleep-request": "0.5-1.5" + }, "ytdl": { "cmdline-args": null, diff --git a/gallery_dl.egg-info/PKG-INFO b/gallery_dl.egg-info/PKG-INFO index 4481e14..3d113ec 100644 --- a/gallery_dl.egg-info/PKG-INFO +++ b/gallery_dl.egg-info/PKG-INFO @@ -1,6 +1,6 @@ Metadata-Version: 2.4 Name: gallery_dl -Version: 1.29.3 +Version: 1.29.4 Summary: Command-line program to download image galleries and collections from several image hosting sites Home-page: https://github.com/mikf/gallery-dl Download-URL: https://github.com/mikf/gallery-dl/releases/latest @@ -133,9 +133,9 @@ Standalone Executable Prebuilt executable files with a Python interpreter and required Python packages included are available for -- `Windows `__ +- `Windows `__ (Requires `Microsoft Visual C++ Redistributable Package (x86) `__) -- `Linux `__ +- `Linux `__ Nightly Builds diff --git a/gallery_dl/downloader/ytdl.py b/gallery_dl/downloader/ytdl.py index 9d653b3..7a20dc2 100644 --- a/gallery_dl/downloader/ytdl.py +++ b/gallery_dl/downloader/ytdl.py @@ -200,6 +200,7 @@ class YoutubeDLDownloader(DownloaderBase): return None info_dict = { + "extractor": "", "id" : video_id, "title" : video_id, "formats" : fmts, diff --git a/gallery_dl/extractor/chevereto.py b/gallery_dl/extractor/chevereto.py index c9ccb7d..600d231 100644 --- a/gallery_dl/extractor/chevereto.py +++ b/gallery_dl/extractor/chevereto.py @@ -18,19 +18,23 @@ class CheveretoExtractor(BaseExtractor): directory_fmt = ("{category}", "{user}", "{album}",) archive_fmt = "{id}" - def __init__(self, match): - BaseExtractor.__init__(self, match) - self.path = match.group(match.lastindex) + def _init(self): + self.path = self.groups[-1] def _pagination(self, url): - while url: + while True: page = self.request(url).text for item in text.extract_iter( page, '
<') + url = text.extr(page, 'data-pagination="next" href="', '"') + if not url: + return + if url[0] == "/": + url = self.root + url BASE_PATTERN = CheveretoExtractor.update({ @@ -42,6 +46,10 @@ BASE_PATTERN = CheveretoExtractor.update({ "root": "https://img.kiwi", "pattern": r"img\.kiwi", }, + "imagepond": { + "root": "https://imagepond.net", + "pattern": r"imagepond\.net", + }, }) diff --git a/gallery_dl/extractor/danbooru.py b/gallery_dl/extractor/danbooru.py index 741800c..06c31b9 100644 --- a/gallery_dl/extractor/danbooru.py +++ b/gallery_dl/extractor/danbooru.py @@ -282,10 +282,11 @@ class DanbooruPoolExtractor(DanbooruExtractor): example = "https://danbooru.donmai.us/pools/12345" def metadata(self): - return self._collection_metadata(self.groups[-1], "pool") + self.pool_id = self.groups[-1] + return self._collection_metadata(self.pool_id, "pool") def posts(self): - return self._collection_posts(self.groups[-1], "pool") + return self._collection_posts(self.pool_id, "pool") class DanbooruFavgroupExtractor(DanbooruExtractor): diff --git a/gallery_dl/extractor/deviantart.py b/gallery_dl/extractor/deviantart.py index 3a862c1..378c7ec 100644 --- a/gallery_dl/extractor/deviantart.py +++ b/gallery_dl/extractor/deviantart.py @@ -687,7 +687,7 @@ x2="45.4107524%" y2="71.4898596%" id="app-root-3">\ for folder in folders: if match(folder["name"]): return folder - elif folder["has_subfolders"]: + elif folder.get("has_subfolders"): for subfolder in folder["subfolders"]: if match(subfolder["name"]): return subfolder @@ -695,7 +695,7 @@ x2="45.4107524%" y2="71.4898596%" id="app-root-3">\ for folder in folders: if folder["folderid"] == uuid: return folder - elif folder["has_subfolders"]: + elif folder.get("has_subfolders"): for subfolder in folder["subfolders"]: if subfolder["folderid"] == uuid: return subfolder diff --git a/gallery_dl/extractor/discord.py b/gallery_dl/extractor/discord.py index 6a5fcc9..ac21fec 100644 --- a/gallery_dl/extractor/discord.py +++ b/gallery_dl/extractor/discord.py @@ -49,7 +49,10 @@ class DiscordExtractor(Extractor): text_content.append(field.get("name", "")) text_content.append(field.get("value", "")) - text_content.append(embed.get("footer", {}).get("text", "")) + try: + text_content.append(embed["footer"]["text"]) + except Exception: + pass if message.get("poll"): text_content.append(message["poll"]["question"]["text"]) @@ -224,10 +227,12 @@ class DiscordExtractor(Extractor): return self.server_metadata def build_server_and_channels(self, server_id): - server = self.api.get_server(server_id) - self.parse_server(server) + self.parse_server(self.api.get_server(server_id)) - for channel in self.api.get_server_channels(server_id): + for channel in sorted( + self.api.get_server_channels(server_id), + key=lambda ch: ch["type"] != 4 + ): self.parse_channel(channel) @@ -353,7 +358,8 @@ class DiscordAPI(): "limit": MESSAGES_BATCH, "before": before }) - before = messages[-1]["id"] + if messages: + before = messages[-1]["id"] return messages return self._pagination(_method, MESSAGES_BATCH) diff --git a/gallery_dl/extractor/everia.py b/gallery_dl/extractor/everia.py index 94444ff..e41f6f6 100644 --- a/gallery_dl/extractor/everia.py +++ b/gallery_dl/extractor/everia.py @@ -52,7 +52,7 @@ class EveriaPostExtractor(EveriaExtractor): def items(self): url = self.root + self.groups[0] page = self.request(url).text - content = text.extr(page, 'itemprop="text">', "
") + content = text.extr(page, 'itemprop="text">', "", "") manga_id, pos = text.extract(page, 'data-mid="', '"', pos) chapter_id, pos = text.extract(page, 'data-cid="', '"', pos) - chapter, sep, minor = self.chapter.partition(".") - match = re.match(r"Reading (.+) \(([^)]+)\) Hentai(?: by (.+))? - " + chapter, sep, minor = self.groups[1].partition(".") + + match = re.match(r"Reading (.+) \(([^)]+)\) Hentai(?: by (.*))? - " r"([^:]+): (.+) . Page 1 ", title) + if match: + manga, type, author, _, title = match.groups() + else: + self.log.warning("Failed to extract 'manga', 'type', 'author', " + "and 'title' metadata") + manga = type = author = title = "" + return { - "manga": match.group(1), + "manga": manga, "manga_id": text.parse_int(manga_id), "chapter": text.parse_int(chapter), "chapter_minor": sep + minor, "chapter_id": text.parse_int(chapter_id), - "type": match.group(2), - "author": match.group(3), - "title": match.group(5), + "type": type, + "author": author, + "title": title, "lang": "en", "language": "English", } diff --git a/gallery_dl/extractor/instagram.py b/gallery_dl/extractor/instagram.py index aa26408..432a7ad 100644 --- a/gallery_dl/extractor/instagram.py +++ b/gallery_dl/extractor/instagram.py @@ -29,6 +29,7 @@ class InstagramExtractor(Extractor): root = "https://www.instagram.com" cookies_domain = ".instagram.com" cookies_names = ("sessionid",) + useragent = util.USERAGENT_CHROME request_interval = (6.0, 12.0) def __init__(self, match): diff --git a/gallery_dl/extractor/issuu.py b/gallery_dl/extractor/issuu.py index 65717b4..abbdfd5 100644 --- a/gallery_dl/extractor/issuu.py +++ b/gallery_dl/extractor/issuu.py @@ -29,9 +29,11 @@ class IssuuPublicationExtractor(IssuuBase, GalleryExtractor): example = "https://issuu.com/issuu/docs/TITLE/" def metadata(self, page): - pos = page.rindex('id="initial-data"') - data = util.json_loads(text.unescape(text.rextract( - page, '