From 63b6119a44afe2be9563acffd72aa974bb9d7f17 Mon Sep 17 00:00:00 2001 From: Unit 193 Date: Mon, 6 Jan 2025 01:56:28 -0500 Subject: New upstream version 1.28.3. --- CHANGELOG.md | 47 ++++----- PKG-INFO | 6 +- README.rst | 4 +- data/man/gallery-dl.1 | 2 +- data/man/gallery-dl.conf.5 | 188 ++++++++++++++++++++-------------- gallery_dl.egg-info/PKG-INFO | 6 +- gallery_dl/extractor/8muses.py | 7 +- gallery_dl/extractor/batoto.py | 60 +++++++++-- gallery_dl/extractor/boosty.py | 3 + gallery_dl/extractor/civitai.py | 17 ++- gallery_dl/extractor/deviantart.py | 107 ++++++++++++++++--- gallery_dl/extractor/directlink.py | 3 +- gallery_dl/extractor/hitomi.py | 27 ++--- gallery_dl/extractor/instagram.py | 18 +++- gallery_dl/extractor/kemonoparty.py | 12 ++- gallery_dl/extractor/piczel.py | 49 ++++----- gallery_dl/extractor/poipiku.py | 10 +- gallery_dl/extractor/sankaku.py | 2 +- gallery_dl/extractor/subscribestar.py | 28 +++-- gallery_dl/extractor/szurubooru.py | 4 + gallery_dl/extractor/tapas.py | 4 +- gallery_dl/version.py | 2 +- 22 files changed, 401 insertions(+), 205 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 2df827d..7fc97ba 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,31 +1,26 @@ -## 1.28.2 - 2024-12-20 +## 1.28.3 - 2025-01-04 ### Extractors #### Additions -- [cyberdrop] add extractor for media URLs ([#2496](https://github.com/mikf/gallery-dl/issues/2496)) -- [itaku] add `search` extractor ([#6613](https://github.com/mikf/gallery-dl/issues/6613)) -- [lofter] add initial support ([#650](https://github.com/mikf/gallery-dl/issues/650), [#2294](https://github.com/mikf/gallery-dl/issues/2294), [#4095](https://github.com/mikf/gallery-dl/issues/4095), [#4728](https://github.com/mikf/gallery-dl/issues/4728), [#5656](https://github.com/mikf/gallery-dl/issues/5656), [#6607](https://github.com/mikf/gallery-dl/issues/6607)) -- [yiffverse] add support ([#6611](https://github.com/mikf/gallery-dl/issues/6611)) +- [civitai] add `user-videos` extractor ([#6644](https://github.com/mikf/gallery-dl/issues/6644)) +- [szurubooru] support `visuabusters.com/booru` ([#6729](https://github.com/mikf/gallery-dl/issues/6729)) #### Fixes -- [facebook] decode Unicode surrogate pairs in metadata values ([#6599](https://github.com/mikf/gallery-dl/issues/6599)) -- [zerochan] parse API responses manually when receiving invalid JSON ([#6632](https://github.com/mikf/gallery-dl/issues/6632)) -- [zerochan] fix `source` metadata extraction when not logged in +- [8muses] skip albums without valid `permalink` ([#6717](https://github.com/mikf/gallery-dl/issues/6717)) +- [batoto] update domains ([#6714](https://github.com/mikf/gallery-dl/issues/6714)) +- [deviantart:tiptap] fix deviation embeds without `token` +- [hitomi] fix searches ([#6713](https://github.com/mikf/gallery-dl/issues/6713)) +- [instagram:reels] fix `pinned` values ([#6719](https://github.com/mikf/gallery-dl/issues/6719)) +- [kemonoparty] handle `discord` favorites ([#6706](https://github.com/mikf/gallery-dl/issues/6706)) +- [piczel] fix extraction ([#6735](https://github.com/mikf/gallery-dl/issues/6735)) +- [poipiku] fix downloads when post has a warning ([#6736](https://github.com/mikf/gallery-dl/issues/6736)) +- [sankaku] support alphanumeric book/pool IDs ([#6757](https://github.com/mikf/gallery-dl/issues/6757)) +- [subscribestar] fix attachment downloads ([#6721](https://github.com/mikf/gallery-dl/issues/6721), [#6724](https://github.com/mikf/gallery-dl/issues/6724), [#6758](https://github.com/mikf/gallery-dl/issues/6758)) +- [subscribestar] improve `content` metadata extraction ([#6761](https://github.com/mikf/gallery-dl/issues/6761)) +- [tapas] fix `TypeError` for locked episodes ([#6700](https://github.com/mikf/gallery-dl/issues/6700)) #### Improvements -- [bilibili] extract files from `module_top` entries ([#6687](https://github.com/mikf/gallery-dl/issues/6687)) -- [bilibili] support `/upload/opus` URLs ([#6687](https://github.com/mikf/gallery-dl/issues/6687)) -- [bluesky] default to `posts` timeline when `reposts` or `quoted` is enabled ([#6583](https://github.com/mikf/gallery-dl/issues/6583)) -- [common] simplify HTTP error messages -- [common] detect `DDoS-Guard` challenge pages -- [deviantart] improve `tiptap` markup to HTML conversion ([#6686](https://github.com/mikf/gallery-dl/issues/6686)) - - fix `KeyError: 'attrs'` for links without `href` - - support `heading` content blocks - - support `strike` text markers -- [instagram] extract `date` metadata for stories ([#6677](https://github.com/mikf/gallery-dl/issues/6677)) -- [kemonoparty:favorite] support new URL format ([#6676](https://github.com/mikf/gallery-dl/issues/6676)) -- [saint] support `saint2.cr` URLs ([#6692](https://github.com/mikf/gallery-dl/issues/6692)) -- [tapas] improve extractor hierarchy ([#6680](https://github.com/mikf/gallery-dl/issues/6680)) -#### Options -- [cohost] add `avatar` and `background` options ([#6656](https://github.com/mikf/gallery-dl/issues/6656)) +- [boosty] support `file` post attachments ([#6760](https://github.com/mikf/gallery-dl/issues/6760)) +- [deviantart:tiptap] support more content block types ([#6686](https://github.com/mikf/gallery-dl/issues/6686)) +- [directlink] use domain as `subcategory` ([#6703](https://github.com/mikf/gallery-dl/issues/6703)) +- [hitomi] provide `search_tags` metadata for `tag` and `search` results ([#6756](https://github.com/mikf/gallery-dl/issues/6756)) +- [subscribestar] support `audio` files ([#6758](https://github.com/mikf/gallery-dl/issues/6758)) ### Miscellaneous -- support `*` wildcards for `parent>child` categories, for example `reddit>*` ([#6673](https://github.com/mikf/gallery-dl/issues/6673)) -- use latest Firefox UA as default `user-agent` -- use random unused port for `"user-agent": "browser"` requests +- [workflows:executables] build with Python 3.13 diff --git a/PKG-INFO b/PKG-INFO index d5fce98..ecc3fc2 100644 --- a/PKG-INFO +++ b/PKG-INFO @@ -1,6 +1,6 @@ Metadata-Version: 2.1 Name: gallery_dl -Version: 1.28.2 +Version: 1.28.3 Summary: Command-line program to download image galleries and collections from several image hosting sites Home-page: https://github.com/mikf/gallery-dl Download-URL: https://github.com/mikf/gallery-dl/releases/latest @@ -117,9 +117,9 @@ Standalone Executable Prebuilt executable files with a Python interpreter and required Python packages included are available for -- `Windows `__ +- `Windows `__ (Requires `Microsoft Visual C++ Redistributable Package (x86) `__) -- `Linux `__ +- `Linux `__ Nightly Builds diff --git a/README.rst b/README.rst index 240dfe5..6ed729b 100644 --- a/README.rst +++ b/README.rst @@ -76,9 +76,9 @@ Standalone Executable Prebuilt executable files with a Python interpreter and required Python packages included are available for -- `Windows `__ +- `Windows `__ (Requires `Microsoft Visual C++ Redistributable Package (x86) `__) -- `Linux `__ +- `Linux `__ Nightly Builds diff --git a/data/man/gallery-dl.1 b/data/man/gallery-dl.1 index 3d84f58..f4791df 100644 --- a/data/man/gallery-dl.1 +++ b/data/man/gallery-dl.1 @@ -1,4 +1,4 @@ -.TH "GALLERY-DL" "1" "2024-12-20" "1.28.2" "gallery-dl Manual" +.TH "GALLERY-DL" "1" "2025-01-04" "1.28.3" "gallery-dl Manual" .\" disable hyphenation .nh diff --git a/data/man/gallery-dl.conf.5 b/data/man/gallery-dl.conf.5 index c27f632..7028b7a 100644 --- a/data/man/gallery-dl.conf.5 +++ b/data/man/gallery-dl.conf.5 @@ -1,4 +1,4 @@ -.TH "GALLERY-DL.CONF" "5" "2024-12-20" "1.28.2" "gallery-dl Manual" +.TH "GALLERY-DL.CONF" "5" "2025-01-04" "1.28.3" "gallery-dl Manual" .\" disable hyphenation .nh .\" disable justification (adjust text to left margin only) @@ -454,18 +454,34 @@ response before \f[I]retrying\f[] the request. .IP "Default:" 9 .br * \f[I]"0.5-1.5"\f[] -\f[I]ao3\f[], \f[I]civitai\f[], -\f[I][Danbooru]\f[], \f[I][E621]\f[], \f[I][foolfuuka]:search\f[], \f[I]itaku\f[], +\f[I]ao3\f[], +\f[I]civitai\f[], +\f[I][Danbooru]\f[], +\f[I][E621]\f[], +\f[I][foolfuuka]:search\f[], +\f[I]itaku\f[], \f[I]koharu\f[], -\f[I]newgrounds\f[], \f[I][philomena]\f[], \f[I]pixiv:novel\f[], \f[I]plurk\f[], -\f[I]poipiku\f[] , \f[I]pornpics\f[], \f[I]scrolller\f[], \f[I]soundgasm\f[], -\f[I]urlgalleries\f[], \f[I]vk\f[], \f[I]zerochan\f[] +\f[I]newgrounds\f[], +\f[I][philomena]\f[], +\f[I]pixiv:novel\f[], +\f[I]plurk\f[], +\f[I]poipiku\f[] , +\f[I]pornpics\f[], +\f[I]scrolller\f[], +\f[I]soundgasm\f[], +\f[I]urlgalleries\f[], +\f[I]vk\f[], +\f[I]zerochan\f[] .br * \f[I]"1.0-2.0"\f[] -\f[I]flickr\f[], \f[I]weibo\f[], \f[I][wikimedia]\f[] +\f[I]flickr\f[], +\f[I]weibo\f[], +\f[I][wikimedia]\f[] .br * \f[I]"2.0-4.0"\f[] -\f[I]behance\f[], \f[I]imagefap\f[], \f[I][Nijie]\f[] +\f[I]behance\f[], +\f[I]imagefap\f[], +\f[I][Nijie]\f[] .br * \f[I]"3.0-6.0"\f[] \f[I]bilibili\f[], @@ -782,17 +798,20 @@ or a \f[I]list\f[] with IP and explicit port number as elements. \f[I]string\f[] .IP "Default:" 9 -\f[I]"Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:128.0) Gecko/20100101 Firefox/128.0"\f[] +.br +* \f[I]"gallery-dl/VERSION"\f[]: \f[I][Danbooru]\f[], \f[I]mangadex\f[] +.br +* \f[I]"gallery-dl/VERSION (by mikf)"\f[]: \f[I][E621]\f[] +.br +* \f[I]"Patreon/72.2.28 (Android; Android 14; Scale/2.10)"\f[]: \f[I]patreon\f[] +.br +* \f[I]"Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:LATEST) Gecko/20100101 Firefox/LATEST"\f[]: otherwise .IP "Description:" 4 User-Agent header value to be used for HTTP requests. Setting this value to \f[I]"browser"\f[] will try to automatically detect -and use the User-Agent used by the system's default browser. - -Note: This option has no effect on -pixiv, e621, mangadex, and patreon -extractors, as these need specific values to function correctly. +and use the \f[I]User-Agent\f[] header of the system's default browser. .SS extractor.*.browser @@ -1965,9 +1984,15 @@ A (comma-separated) list of subcategories to include when processing a user profile. Possible values are -\f[I]"user-models"\f[], -\f[I]"user-posts"\f[], -\f[I]"user-images"\f[]. + +.br +* \f[I]"user-models"\f[] +.br +* \f[I]"user-posts"\f[] +.br +* \f[I]"user-images"\f[] +.br +* \f[I]"user-videos"\f[] It is possible to use \f[I]"all"\f[] instead of listing all values separately. @@ -1993,7 +2018,7 @@ It is possible to use \f[I]"all"\f[] instead of listing all values separately. .IP "Description:" 4 Extract additional \f[I]generation\f[] metadata. -Note: This requires 1 additional HTTP request per image. +Note: This requires 1 additional HTTP request per image or video. .SS extractor.civitai.nsfw @@ -2135,7 +2160,7 @@ Setting this option to \f[I]"auto"\f[] uses the same domain as a given input URL. -.SS extractor.danbooru.external +.SS extractor.[Danbooru].external .IP "Type:" 6 \f[I]bool\f[] @@ -2147,7 +2172,7 @@ For unavailable or restricted posts, follow the \f[I]source\f[] and download from there if possible. -.SS extractor.danbooru.ugoira +.SS extractor.[Danbooru].ugoira .IP "Type:" 6 \f[I]bool\f[] @@ -2212,48 +2237,6 @@ greater than the per-page limit, gallery-dl will stop after the first batch. The value cannot be less than 1. -.SS extractor.derpibooru.api-key -.IP "Type:" 6 -\f[I]string\f[] - -.IP "Default:" 9 -\f[I]null\f[] - -.IP "Description:" 4 -Your \f[I]Derpibooru API Key\f[], -to use your account's browsing settings and filters. - - -.SS extractor.derpibooru.filter -.IP "Type:" 6 -\f[I]integer\f[] - -.IP "Default:" 9 -\f[I]56027\f[] (\f[I]Everything\f[] filter) - -.IP "Description:" 4 -The content filter ID to use. - -Setting an explicit filter ID overrides any default filters and can be used -to access 18+ content without \f[I]API Key\f[]. - -See \f[I]Filters\f[] for details. - - -.SS extractor.derpibooru.svg -.IP "Type:" 6 -\f[I]bool\f[] - -.IP "Default:" 9 -\f[I]true\f[] - -.IP "Description:" 4 -Download SVG versions of images when available. - -Try to download the \f[I]view_url\f[] version of these posts -when this option is disabled. - - .SS extractor.deviantart.auto-watch .IP "Type:" 6 \f[I]bool\f[] @@ -4097,25 +4080,73 @@ Selects the format of \f[I]images\f[] \f[I]files\f[]. Possible formats: .br -* \f[I]original\f[] +* \f[I]download_url\f[] (\f[I]"a":1,"p":1\f[]) .br -* \f[I]default\f[] +* \f[I]url\f[] (\f[I]"w":620\f[]) .br -* \f[I]default_small\f[] +* \f[I]original\f[] (\f[I]"q":100,"webp":0\f[]) .br -* \f[I]default_blurred\f[] +* \f[I]default\f[] (\f[I]"w":620\f[]) .br -* \f[I]default_blurred_small\f[] +* \f[I]default_small\f[] (\f[I]"w":360\f[]) .br -* \f[I]thumbnail\f[] +* \f[I]default_blurred\f[] (\f[I]"w":620\f[]) .br -* \f[I]thumbnail_large\f[] +* \f[I]default_blurred_small\f[] (\f[I]"w":360\f[]) .br -* \f[I]thumbnail_small\f[] +* \f[I]thumbnail\f[] (\f[I]"h":360,"w":360\f[]) .br -* \f[I]url\f[] +* \f[I]thumbnail_large\f[] (\f[I]"h":1080,"w":1080\f[]) .br -* \f[I]download_url\f[] +* \f[I]thumbnail_small\f[] (\f[I]"h":100,"w":100\f[]) + + +.SS extractor.[philomena].api-key +.IP "Type:" 6 +\f[I]string\f[] + +.IP "Default:" 9 +\f[I]null\f[] + +.IP "Description:" 4 +Your account's API Key, +to use your personal browsing settings and filters. + + +.SS extractor.[philomena].filter +.IP "Type:" 6 +\f[I]integer\f[] + +.IP "Default:" 9 +:\f[I]derpibooru\f[]: +\f[I]56027\f[] (\f[I]Everything\f[] filter) +:\f[I]ponybooru\f[]: +\f[I]3\f[] (\f[I]Nah.\f[] filter) +:otherwise: +\f[I]2\f[] + + +.IP "Description:" 4 +The content filter ID to use. + +Setting an explicit filter ID overrides any default filters and can be used +to access 18+ content without \f[I]API Key\f[]. + +See \f[I]Filters\f[] for details. + + +.SS extractor.[philomena].svg +.IP "Type:" 6 +\f[I]bool\f[] + +.IP "Default:" 9 +\f[I]true\f[] + +.IP "Description:" 4 +Download SVG versions of images when available. + +Try to download the \f[I]view_url\f[] version of these posts +when this option is disabled. .SS extractor.pillowfort.external @@ -6218,7 +6249,7 @@ Note: This requires 1 additional HTTP request per post. .br * "preview_url" .br -* ["sample_url", "preview_url", "file_url"} +* ["sample_url", "preview_url", "file_url"] .IP "Description:" 4 Alternate field name to retrieve download URLs from. @@ -6992,10 +7023,13 @@ before outputting them as JSON. .. code:: json { -"Pictures": ["jpg", "jpeg", "png", "gif", "bmp", "svg", "webp"], -"Video" : ["flv", "ogv", "avi", "mp4", "mpg", "mpeg", "3gp", "mkv", "webm", "vob", "wmv"], -"Music" : ["mp3", "aac", "flac", "ogg", "wma", "m4a", "wav"], -"Archives": ["zip", "rar", "7z", "tar", "gz", "bz2"] +"Pictures" : ["jpg", "jpeg", "png", "gif", "bmp", "svg", "webp", +"avif", "heic", "heif", "ico", "psd"], +"Video" : ["flv", "ogv", "avi", "mp4", "mpg", "mpeg", "3gp", "mkv", +"webm", "vob", "wmv", "m4v", "mov"], +"Music" : ["mp3", "aac", "flac", "ogg", "wma", "m4a", "wav"], +"Archives" : ["zip", "rar", "7z", "tar", "gz", "bz2"], +"Documents": ["txt", "pdf"] } diff --git a/gallery_dl.egg-info/PKG-INFO b/gallery_dl.egg-info/PKG-INFO index d5fce98..ecc3fc2 100644 --- a/gallery_dl.egg-info/PKG-INFO +++ b/gallery_dl.egg-info/PKG-INFO @@ -1,6 +1,6 @@ Metadata-Version: 2.1 Name: gallery_dl -Version: 1.28.2 +Version: 1.28.3 Summary: Command-line program to download image galleries and collections from several image hosting sites Home-page: https://github.com/mikf/gallery-dl Download-URL: https://github.com/mikf/gallery-dl/releases/latest @@ -117,9 +117,9 @@ Standalone Executable Prebuilt executable files with a Python interpreter and required Python packages included are available for -- `Windows `__ +- `Windows `__ (Requires `Microsoft Visual C++ Redistributable Package (x86) `__) -- `Linux `__ +- `Linux `__ Nightly Builds diff --git a/gallery_dl/extractor/8muses.py b/gallery_dl/extractor/8muses.py index f88a0c6..68b906e 100644 --- a/gallery_dl/extractor/8muses.py +++ b/gallery_dl/extractor/8muses.py @@ -57,7 +57,12 @@ class _8musesAlbumExtractor(Extractor): albums = data.get("albums") if albums: for album in albums: - url = self.root + "/comics/album/" + album["permalink"] + permalink = album.get("permalink") + if not permalink: + self.log.debug("Private album") + continue + + url = self.root + "/comics/album/" + permalink yield Message.Queue, url, { "url" : url, "name" : album["name"], diff --git a/gallery_dl/extractor/batoto.py b/gallery_dl/extractor/batoto.py index 786acd9..77c40ef 100644 --- a/gallery_dl/extractor/batoto.py +++ b/gallery_dl/extractor/batoto.py @@ -10,17 +10,55 @@ from .common import Extractor, ChapterExtractor, MangaExtractor from .. import text, exception import re -BASE_PATTERN = (r"(?:https?://)?(?:" - r"(?:ba|d|h|m|w)to\.to|" +BASE_PATTERN = (r"(?:https?://)?(" + r"(?:ba|d|f|h|j|m|w)to\.to|" r"(?:(?:manga|read)toto|batocomic|[xz]bato)\.(?:com|net|org)|" r"comiko\.(?:net|org)|" r"bat(?:otoo|o?two)\.com)") +# https://rentry.co/batoto +DOMAINS = { + "dto.to", + "fto.to", + "hto.to", + "jto.to", + "mto.to", + "wto.to", + "xbato.com", + "xbato.net", + "xbato.org", + "zbato.com", + "zbato.net", + "zbato.org", + "readtoto.com", + "readtoto.net", + "readtoto.org", + "batocomic.com", + "batocomic.net", + "batocomic.org", + "batotoo.com", + "batotwo.com", + "comiko.net", + "comiko.org", + "battwo.com", +} +LEGACY_DOMAINS = { + "bato.to", + "mangatoto.com", + "mangatoto.net", + "mangatoto.org", +} + class BatotoBase(): """Base class for batoto extractors""" category = "batoto" - root = "https://bato.to" + root = "https://xbato.org" + + def _init_root(self, match): + domain = match.group(1) + if domain not in LEGACY_DOMAINS: + self.root = "https://" + domain def request(self, url, **kwargs): kwargs["encoding"] = "utf-8" @@ -28,13 +66,13 @@ class BatotoBase(): class BatotoChapterExtractor(BatotoBase, ChapterExtractor): - """Extractor for bato.to manga chapters""" + """Extractor for batoto manga chapters""" pattern = BASE_PATTERN + r"/(?:title/[^/?#]+|chapter)/(\d+)" - example = "https://bato.to/title/12345-MANGA/54321" + example = "https://xbato.org/title/12345-MANGA/54321" def __init__(self, match): - self.root = text.root_from_url(match.group(0)) - self.chapter_id = match.group(1) + self._init_root(match) + self.chapter_id = match.group(2) url = "{}/title/0/{}".format(self.root, self.chapter_id) ChapterExtractor.__init__(self, match, url) @@ -86,16 +124,16 @@ class BatotoChapterExtractor(BatotoBase, ChapterExtractor): class BatotoMangaExtractor(BatotoBase, MangaExtractor): - """Extractor for bato.to manga""" + """Extractor for batoto manga""" reverse = False chapterclass = BatotoChapterExtractor pattern = (BASE_PATTERN + r"/(?:title/(\d+)[^/?#]*|series/(\d+)(?:/[^/?#]*)?)/?$") - example = "https://bato.to/title/12345-MANGA/" + example = "https://xbato.org/title/12345-MANGA/" def __init__(self, match): - self.root = text.root_from_url(match.group(0)) - self.manga_id = match.group(1) or match.group(2) + self._init_root(match) + self.manga_id = match.group(2) or match.group(3) url = "{}/title/{}".format(self.root, self.manga_id) MangaExtractor.__init__(self, match, url) diff --git a/gallery_dl/extractor/boosty.py b/gallery_dl/extractor/boosty.py index 33823be..c28fad9 100644 --- a/gallery_dl/extractor/boosty.py +++ b/gallery_dl/extractor/boosty.py @@ -124,6 +124,9 @@ class BoostyExtractor(Extractor): elif type == "audio_file": files.append(self._update_url(post, block)) + elif type == "file": + files.append(self._update_url(post, block)) + else: self.log.debug("%s: Unsupported data type '%s'", post["int_id"], type) diff --git a/gallery_dl/extractor/civitai.py b/gallery_dl/extractor/civitai.py index 1e8cb42..36efcfe 100644 --- a/gallery_dl/extractor/civitai.py +++ b/gallery_dl/extractor/civitai.py @@ -338,6 +338,7 @@ class CivitaiUserExtractor(CivitaiExtractor): (CivitaiUserModelsExtractor, base + "models"), (CivitaiUserPostsExtractor , base + "posts"), (CivitaiUserImagesExtractor, base + "images"), + (CivitaiUserVideosExtractor, base + "videos"), ), ("user-models", "user-posts")) @@ -400,6 +401,20 @@ class CivitaiUserImagesExtractor(CivitaiExtractor): return self.api.images(params) +class CivitaiUserVideosExtractor(CivitaiExtractor): + subcategory = "user-videos" + directory_fmt = ("{category}", "{username|user[username]}", "videos") + pattern = USER_PATTERN + r"/videos/?(?:\?([^#]+))?" + example = "https://civitai.com/user/USER/videos" + + def images(self): + self._image_ext = "mp4" + params = text.parse_query(self.groups[1]) + params["types"] = ["video"] + params["username"] = text.unquote(self.groups[0]) + return self.api.images(params) + + class CivitaiRestAPI(): """Interface for the Civitai Public REST API @@ -484,7 +499,7 @@ class CivitaiTrpcAPI(): self.root = extractor.root + "/api/trpc/" self.headers = { "content-type" : "application/json", - "x-client-version": "5.0.211", + "x-client-version": "5.0.394", "x-client-date" : "", "x-client" : "web", "x-fingerprint" : "undefined", diff --git a/gallery_dl/extractor/deviantart.py b/gallery_dl/extractor/deviantart.py index 69934b4..8172f62 100644 --- a/gallery_dl/extractor/deviantart.py +++ b/gallery_dl/extractor/deviantart.py @@ -440,7 +440,8 @@ class DeviantartExtractor(Extractor): html.append("text-align:") html.append(attrs["textAlign"]) html.append(";") - html.append('margin-inline-start:0px">') + self._tiptap_process_indentation(html, attrs) + html.append('">') for block in children: self._tiptap_process_content(html, block) @@ -460,17 +461,32 @@ class DeviantartExtractor(Extractor): html.append(' style="text-align:') html.append(attrs.get("textAlign") or "left") html.append('">') - html.append('') - - children = content.get("content") - if children: - for block in children: - self._tiptap_process_content(html, block) - + html.append('') + self._tiptap_process_children(html, content) html.append("") + elif type in ("listItem", "bulletList", "orderedList", "blockquote"): + c = type[1] + tag = ( + "li" if c == "i" else + "ul" if c == "u" else + "ol" if c == "r" else + "blockquote" + ) + html.append("<" + tag + ">") + self._tiptap_process_children(html, content) + html.append("") + + elif type == "anchor": + attrs = content["attrs"] + html.append('') + elif type == "hardBreak": html.append("

") @@ -488,6 +504,44 @@ class DeviantartExtractor(Extractor): html.append(user) html.append('') + elif type == "da-gif": + attrs = content["attrs"] + width = str(attrs.get("width") or "") + height = str(attrs.get("height") or "") + url = text.escape(attrs.get("url") or "") + + html.append('
') + + elif type == "da-video": + src = text.escape(content["attrs"].get("src") or "") + html.append('
' + '
') + else: self.log.warning("Unsupported content type '%s'", type) @@ -501,7 +555,13 @@ class DeviantartExtractor(Extractor): attrs = mark.get("attrs") or {} html.append('') + if "target" in attrs: + html.append('" target="') + html.append(attrs["target"]) + html.append('" rel="') + html.append(attrs.get("rel") or + "noopener noreferrer nofollow ugc") + html.append('">') close.append("") elif type == "bold": html.append("") @@ -525,6 +585,18 @@ class DeviantartExtractor(Extractor): else: html.append(text.escape(content["text"])) + def _tiptap_process_children(self, html, content): + children = content.get("content") + if children: + for block in children: + self._tiptap_process_content(html, block) + + def _tiptap_process_indentation(self, html, attrs): + itype = ("text-indent" if attrs.get("indentType") == "line" else + "margin-inline-start") + isize = str((attrs.get("indentation") or 0) * 24) + html.append(itype + ":" + isize + "px") + def _tiptap_process_deviation(self, html, content): dev = content["attrs"]["deviation"] media = dev.get("media") or () @@ -758,19 +830,22 @@ x2="45.4107524%" y2="71.4898596%" id="app-root-3">\ self.api.user_friends_unwatch(username) def _eclipse_media(self, media, format="preview"): - url = [media["baseUri"], ] + url = [media["baseUri"]] formats = { fmt["t"]: fmt for fmt in media["types"] } - tokens = media["token"] - if len(tokens) == 1: - fmt = formats[format] - url.append(fmt["c"].replace("", media["prettyName"])) - url.append("?token=") - url.append(tokens[-1]) + tokens = media.get("token") or () + if tokens: + if len(tokens) <= 1: + fmt = formats[format] + if "c" in fmt: + url.append(fmt["c"].replace( + "", media["prettyName"])) + url.append("?token=") + url.append(tokens[-1]) return "".join(url), formats diff --git a/gallery_dl/extractor/directlink.py b/gallery_dl/extractor/directlink.py index 2f0230a..4559aff 100644 --- a/gallery_dl/extractor/directlink.py +++ b/gallery_dl/extractor/directlink.py @@ -25,7 +25,8 @@ class DirectlinkExtractor(Extractor): def __init__(self, match): Extractor.__init__(self, match) - self.data = match.groupdict() + self.data = data = match.groupdict() + self.subcategory = ".".join(data["domain"].rsplit(".", 2)[-2:]) def items(self): data = self.data diff --git a/gallery_dl/extractor/hitomi.py b/gallery_dl/extractor/hitomi.py index 308b42c..e15e13c 100644 --- a/gallery_dl/extractor/hitomi.py +++ b/gallery_dl/extractor/hitomi.py @@ -122,7 +122,10 @@ class HitomiTagExtractor(Extractor): self.tag = tag def items(self): - data = {"_extractor": HitomiGalleryExtractor} + data = { + "_extractor": HitomiGalleryExtractor, + "search_tags": text.unquote(self.tag.rpartition("-")[0]), + } nozomi_url = "https://ltn.hitomi.la/{}/{}.nozomi".format( self.type, self.tag) headers = { @@ -202,12 +205,14 @@ class HitomiSearchExtractor(Extractor): def __init__(self, match): Extractor.__init__(self, match) self.query = match.group(1) - self.tags = text.unquote(self.query).split(" ") + self.tags = text.unquote(self.query) def items(self): - data = {"_extractor": HitomiGalleryExtractor} - - results = [self.get_nozomi_items(tag) for tag in self.tags] + data = { + "_extractor": HitomiGalleryExtractor, + "search_tags": self.tags, + } + results = [self.get_nozomi_items(tag) for tag in self.tags.split(" ")] intersects = set.intersection(*results) for gallery_id in sorted(intersects, reverse=True): @@ -219,20 +224,16 @@ class HitomiSearchExtractor(Extractor): area, tag, language = self.get_nozomi_args(full_tag) if area: - referer_base = "{}/n/{}/{}-{}.html".format( - self.root, area, tag, language) - nozomi_url = "https://ltn.hitomi.la/{}/{}-{}.nozomi".format( + nozomi_url = "https://ltn.hitomi.la/n/{}/{}-{}.nozomi".format( area, tag, language) else: - referer_base = "{}/n/{}-{}.html".format( - self.root, tag, language) - nozomi_url = "https://ltn.hitomi.la/{}-{}.nozomi".format( + nozomi_url = "https://ltn.hitomi.la/n/{}-{}.nozomi".format( tag, language) headers = { "Origin": self.root, "Cache-Control": "max-age=0", - "Referer": "{}/search.html?{}".format(referer_base, self.query), + "Referer": "{}/search.html?{}".format(self.root, self.query), } response = self.request(nozomi_url, headers=headers) @@ -251,7 +252,7 @@ class HitomiSearchExtractor(Extractor): language = tag tag = "index" - return area, tag, language + return area, tag.replace("_", " "), language @memcache(maxage=1800) diff --git a/gallery_dl/extractor/instagram.py b/gallery_dl/extractor/instagram.py index 8c5b180..e344b2f 100644 --- a/gallery_dl/extractor/instagram.py +++ b/gallery_dl/extractor/instagram.py @@ -172,8 +172,8 @@ class InstagramExtractor(Extractor): "post_shortcode": post["code"], "post_url": "{}/p/{}/".format(self.root, post["code"]), "likes": post.get("like_count", 0), - "pinned": post.get("timeline_pinned_user_ids", ()), "liked": post.get("has_liked", False), + "pinned": self._extract_pinned(post), } caption = post["caption"] @@ -385,6 +385,10 @@ class InstagramExtractor(Extractor): "username" : user["username"], "full_name": user["full_name"]}) + def _extract_pinned(self, post): + return (post.get("timeline_pinned_user_ids") or + post.get("clips_tab_pinned_user_ids") or ()) + def _init_cursor(self): cursor = self.config("cursor", True) if cursor is True: @@ -451,6 +455,12 @@ class InstagramPostsExtractor(InstagramExtractor): uid = self.api.user_id(self.item) return self.api.user_feed(uid) + def _extract_pinned(self, post): + try: + return post["timeline_pinned_user_ids"] + except KeyError: + return () + class InstagramReelsExtractor(InstagramExtractor): """Extractor for an Instagram user's reels""" @@ -462,6 +472,12 @@ class InstagramReelsExtractor(InstagramExtractor): uid = self.api.user_id(self.item) return self.api.user_clips(uid) + def _extract_pinned(self, post): + try: + return post["clips_tab_pinned_user_ids"] + except KeyError: + return () + class InstagramTaggedExtractor(InstagramExtractor): """Extractor for an Instagram user's tagged posts""" diff --git a/gallery_dl/extractor/kemonoparty.py b/gallery_dl/extractor/kemonoparty.py index a7caca9..66bbab5 100644 --- a/gallery_dl/extractor/kemonoparty.py +++ b/gallery_dl/extractor/kemonoparty.py @@ -455,9 +455,15 @@ class KemonopartyFavoriteExtractor(KemonopartyExtractor): reverse=(order == "desc")) for user in users: - user["_extractor"] = KemonopartyUserExtractor - url = "{}/{}/user/{}".format( - self.root, user["service"], user["id"]) + service = user["service"] + if service == "discord": + user["_extractor"] = KemonopartyDiscordServerExtractor + url = "{}/discord/server/{}".format( + self.root, user["id"]) + else: + user["_extractor"] = KemonopartyUserExtractor + url = "{}/{}/user/{}".format( + self.root, service, user["id"]) yield Message.Queue, url, user elif type == "post": diff --git a/gallery_dl/extractor/piczel.py b/gallery_dl/extractor/piczel.py index fe26704..8a729f3 100644 --- a/gallery_dl/extractor/piczel.py +++ b/gallery_dl/extractor/piczel.py @@ -11,6 +11,8 @@ from .common import Extractor, Message from .. import text +BASE_PATTERN = r"(?:https?://)?(?:www\.)?piczel\.tv" + class PiczelExtractor(Extractor): """Base class for piczel extractors""" @@ -30,6 +32,7 @@ class PiczelExtractor(Extractor): if post["multi"]: images = post["images"] del post["images"] + post["count"] = len(images) yield Message.Directory, post for post["num"], image in enumerate(images): if "id" in image: @@ -39,6 +42,7 @@ class PiczelExtractor(Extractor): yield Message.Url, url, text.nameext_from_url(url, post) else: + post["count"] = 1 yield Message.Directory, post post["num"] = 0 url = post["image"]["url"] @@ -47,35 +51,27 @@ class PiczelExtractor(Extractor): def posts(self): """Return an iterable with all relevant post objects""" - def _pagination(self, url, folder_id=None): - params = { - "from_id" : None, - "folder_id": folder_id, - } + def _pagination(self, url, pnum=1): + params = {"page": pnum} while True: data = self.request(url, params=params).json() - if not data: - return - params["from_id"] = data[-1]["id"] - for post in data: - if not folder_id or folder_id == post["folder_id"]: - yield post + yield from data["data"] + + params["page"] = data["meta"]["next_page"] + if not params["page"]: + return class PiczelUserExtractor(PiczelExtractor): """Extractor for all images from a user's gallery""" subcategory = "user" - pattern = r"(?:https?://)?(?:www\.)?piczel\.tv/gallery/([^/?#]+)/?$" + pattern = BASE_PATTERN + r"/gallery/([^/?#]+)/?$" example = "https://piczel.tv/gallery/USER" - def __init__(self, match): - PiczelExtractor.__init__(self, match) - self.user = match.group(1) - def posts(self): - url = "{}/api/users/{}/gallery".format(self.root_api, self.user) + url = "{}/api/users/{}/gallery".format(self.root_api, self.groups[0]) return self._pagination(url) @@ -84,29 +80,20 @@ class PiczelFolderExtractor(PiczelExtractor): subcategory = "folder" directory_fmt = ("{category}", "{user[username]}", "{folder[name]}") archive_fmt = "f{folder[id]}_{id}_{num}" - pattern = (r"(?:https?://)?(?:www\.)?piczel\.tv" - r"/gallery/(?!image)([^/?#]+)/(\d+)") + pattern = BASE_PATTERN + r"/gallery/(?!image/)[^/?#]+/(\d+)" example = "https://piczel.tv/gallery/USER/12345" - def __init__(self, match): - PiczelExtractor.__init__(self, match) - self.user, self.folder_id = match.groups() - def posts(self): - url = "{}/api/users/{}/gallery".format(self.root_api, self.user) - return self._pagination(url, int(self.folder_id)) + url = "{}/api/gallery/folder/{}".format(self.root_api, self.groups[0]) + return self._pagination(url) class PiczelImageExtractor(PiczelExtractor): """Extractor for individual images""" subcategory = "image" - pattern = r"(?:https?://)?(?:www\.)?piczel\.tv/gallery/image/(\d+)" + pattern = BASE_PATTERN + r"/gallery/image/(\d+)" example = "https://piczel.tv/gallery/image/12345" - def __init__(self, match): - PiczelExtractor.__init__(self, match) - self.image_id = match.group(1) - def posts(self): - url = "{}/api/gallery/{}".format(self.root_api, self.image_id) + url = "{}/api/gallery/{}".format(self.root_api, self.groups[0]) return (self.request(url).json(),) diff --git a/gallery_dl/extractor/poipiku.py b/gallery_dl/extractor/poipiku.py index e09a7aa..e371ee2 100644 --- a/gallery_dl/extractor/poipiku.py +++ b/gallery_dl/extractor/poipiku.py @@ -52,20 +52,23 @@ class PoipikuExtractor(Extractor): } yield Message.Directory, post - post["num"] = 0 + post["num"] = warning = 0 while True: thumb = extr('class="IllustItemThumbImg" src="', '"') if not thumb: break elif thumb.startswith(("//img.poipiku.com/img/", "/img/")): + if "/warning" in thumb: + warning = True + self.log.debug("%s: %s", post["post_id"], thumb) continue post["num"] += 1 url = text.ensure_http_scheme(thumb[:-8]).replace( "//img.", "//img-org.", 1) yield Message.Url, url, text.nameext_from_url(url, post) - if not extr('ShowAppendFile', '<'): + if not warning and not extr('ShowAppendFile', '<'): continue url = self.root + "/f/ShowAppendFileF.jsp" @@ -87,7 +90,8 @@ class PoipikuExtractor(Extractor): page = resp["html"] if (resp.get("result_num") or 0) < 0: - self.log.warning("'%s'", page.replace("
", " ")) + self.log.warning("%s: '%s'", + post["post_id"], page.replace("
", " ")) for thumb in text.extract_iter( page, 'class="IllustItemThumbImg" src="', '"'): diff --git a/gallery_dl/extractor/sankaku.py b/gallery_dl/extractor/sankaku.py index d5309dc..5e3a958 100644 --- a/gallery_dl/extractor/sankaku.py +++ b/gallery_dl/extractor/sankaku.py @@ -131,7 +131,7 @@ class SankakuPoolExtractor(SankakuExtractor): subcategory = "pool" directory_fmt = ("{category}", "pool", "{pool[id]} {pool[name_en]}") archive_fmt = "p_{pool}_{id}" - pattern = BASE_PATTERN + r"/(?:books|pools?/show)/(\d+)" + pattern = BASE_PATTERN + r"/(?:books|pools?/show)/(\w+)" example = "https://sankaku.app/books/12345" def __init__(self, match): diff --git a/gallery_dl/extractor/subscribestar.py b/gallery_dl/extractor/subscribestar.py index 7c760ac..8668330 100644 --- a/gallery_dl/extractor/subscribestar.py +++ b/gallery_dl/extractor/subscribestar.py @@ -11,6 +11,7 @@ from .common import Extractor, Message from .. import text, util, exception from ..cache import cache +import re BASE_PATTERN = r"(?:https?://)?(?:www\.)?subscribestar\.(com|adult)" @@ -98,9 +99,10 @@ class SubscribestarExtractor(Extractor): media.append(item) attachments = text.extr( - html, 'class="uploads-docs"', 'data-role="post-edit_form"') + html, 'class="uploads-docs"', 'class="post-edit_form"') if attachments: - for att in attachments.split('class="doc_preview"')[1:]: + for att in re.split( + r'class="doc_preview[" ]', attachments)[1:]: media.append({ "id" : text.parse_int(text.extr( att, 'data-upload-id="', '"')), @@ -110,6 +112,20 @@ class SubscribestarExtractor(Extractor): "type": "attachment", }) + audios = text.extr( + html, 'class="uploads-audios"', 'class="post-edit_form"') + if audios: + for audio in re.split( + r'class="audio_preview-data[" ]', audios)[1:]: + media.append({ + "id" : text.parse_int(text.extr( + audio, 'data-upload-id="', '"')), + "name": text.unescape(text.extr( + audio, 'audio_preview-title">', '<')), + "url" : text.unescape(text.extr(audio, 'src="', '"')), + "type": "audio", + }) + return media def _data_from_post(self, html): @@ -121,9 +137,7 @@ class SubscribestarExtractor(Extractor): "author_nick": text.unescape(extr('>', '<')), "date" : self._parse_datetime(extr( 'class="post-date">', '")[2]), - "content" : (extr( - '
")[2]), + "content" : extr('', '').strip(), } def _parse_datetime(self, dt): @@ -180,7 +194,5 @@ class SubscribestarPostExtractor(SubscribestarExtractor): "author_nick": text.unescape(extr('alt="', '"')), "date" : self._parse_datetime(extr( '', '<')), - "content" : (extr( - '
")[2]), + "content" : extr('', '').strip(), } diff --git a/gallery_dl/extractor/szurubooru.py b/gallery_dl/extractor/szurubooru.py index b6917cc..b122f26 100644 --- a/gallery_dl/extractor/szurubooru.py +++ b/gallery_dl/extractor/szurubooru.py @@ -92,6 +92,10 @@ BASE_PATTERN = SzurubooruExtractor.update({ "root": "https://snootbooru.com", "pattern": r"snootbooru\.com", }, + "visuabusters": { + "root": "https://www.visuabusters.com/booru", + "pattern": r"(?:www\.)?visuabusters\.com/booru", + }, }) diff --git a/gallery_dl/extractor/tapas.py b/gallery_dl/extractor/tapas.py index e756385..35a346d 100644 --- a/gallery_dl/extractor/tapas.py +++ b/gallery_dl/extractor/tapas.py @@ -85,8 +85,8 @@ class TapasEpisodeExtractor(TapasExtractor): episode = data["episode"] if not episode.get("free") and not episode.get("unlocked"): raise exception.AuthorizationError( - "%s: Episode '%s' not unlocked", - episode_id, episode["title"]) + "{}: Episode '{}' not unlocked".format( + episode_id, episode["title"])) html = data["html"] episode["series"] = self._extract_series(html) diff --git a/gallery_dl/version.py b/gallery_dl/version.py index 651745a..4b28924 100644 --- a/gallery_dl/version.py +++ b/gallery_dl/version.py @@ -6,5 +6,5 @@ # it under the terms of the GNU General Public License version 2 as # published by the Free Software Foundation. -__version__ = "1.28.2" +__version__ = "1.28.3" __variant__ = None -- cgit v1.2.3