diff options
| author | 2024-12-08 20:34:39 -0500 | |
|---|---|---|
| committer | 2024-12-08 20:34:39 -0500 | |
| commit | 955a18e4feea86fdb35e531a00304e00d037652c (patch) | |
| tree | 06060068ebe725be4294758b2caca3e2491ef4f0 | |
| parent | 402872c8ca0118f5ed9c172d3c11dac90dd41c37 (diff) | |
| parent | f6877087773089220d68288d055276fca6c556d4 (diff) | |
Update upstream source from tag 'upstream/1.28.1'
Update to upstream version '1.28.1'
with Debian dir f1535f052953f6a9195352a951ec8dd121144a27
25 files changed, 422 insertions, 230 deletions
diff --git a/CHANGELOG.md b/CHANGELOG.md index e3dec8c..b831cd4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,87 +1,26 @@ -## 1.28.0 - 2024-11-30 -### Changes -- [common] disable using environment network settings by default (`HTTP_PROXY`, `.netrc`, …) - - disable `trust_env` session attribute - - disable `Authorization` header injection from `.netrc` auth ([#5780](https://github.com/mikf/gallery-dl/issues/5780), [#6134](https://github.com/mikf/gallery-dl/issues/6134), [#6455](https://github.com/mikf/gallery-dl/issues/6455)) - - add `proxy-env` option -- [ytdl] change `forward-cookies` default value to `true` ([#6401](https://github.com/mikf/gallery-dl/issues/6401), [#6348](https://github.com/mikf/gallery-dl/issues/6348)) +## 1.28.1 - 2024-12-07 ### Extractors #### Additions -- [bilibili] add support for `opus` articles ([#2824](https://github.com/mikf/gallery-dl/issues/2824), [#6443](https://github.com/mikf/gallery-dl/issues/6443)) -- [bluesky] add `hashtag` extractor ([#4438](https://github.com/mikf/gallery-dl/issues/4438)) -- [danbooru] add `artist` and `artist-search` extractors ([#5348](https://github.com/mikf/gallery-dl/issues/5348)) -- [everia] add support ([#1067](https://github.com/mikf/gallery-dl/issues/1067), [#2472](https://github.com/mikf/gallery-dl/issues/2472), [#4091](https://github.com/mikf/gallery-dl/issues/4091), [#6227](https://github.com/mikf/gallery-dl/issues/6227)) -- [facebook] add support ([#470](https://github.com/mikf/gallery-dl/issues/470), [#2612](https://github.com/mikf/gallery-dl/issues/2612), [#5626](https://github.com/mikf/gallery-dl/issues/5626), [#6548](https://github.com/mikf/gallery-dl/issues/6548)) -- [hentaifoundry] add `tag` extractor ([#6465](https://github.com/mikf/gallery-dl/issues/6465)) -- [hitomi] add `index` and `search` extractors ([#2502](https://github.com/mikf/gallery-dl/issues/2502), [#6392](https://github.com/mikf/gallery-dl/issues/6392), [#3720](https://github.com/mikf/gallery-dl/issues/3720)) -- [motherless] add support ([#2074](https://github.com/mikf/gallery-dl/issues/2074), [#4413](https://github.com/mikf/gallery-dl/issues/4413), [#6221](https://github.com/mikf/gallery-dl/issues/6221)) -- [noop] add `noop` extractor -- [rule34vault] add support ([#5708](https://github.com/mikf/gallery-dl/issues/5708), [#6240](https://github.com/mikf/gallery-dl/issues/6240)) -- [rule34xyz] add support ([#1078](https://github.com/mikf/gallery-dl/issues/1078), [#4960](https://github.com/mikf/gallery-dl/issues/4960)) -- [saint] add support ([#4405](https://github.com/mikf/gallery-dl/issues/4405), [#6324](https://github.com/mikf/gallery-dl/issues/6324)) -- [tumblr] add `search` extractor ([#6394](https://github.com/mikf/gallery-dl/issues/6394)) +- [bluesky] add `info` extractor #### Fixes -- [8chan] avoid performing network requests within `_init()` ([#6387](https://github.com/mikf/gallery-dl/issues/6387)) -- [bluesky] fix downloads from non-bsky PDSs ([#6406](https://github.com/mikf/gallery-dl/issues/6406)) -- [bunkr] fix album names containing `<>&` characters -- [flickr] use `download` URLs ([#6360](https://github.com/mikf/gallery-dl/issues/6360), [#6464](https://github.com/mikf/gallery-dl/issues/6464)) -- [hiperdex] update domain to `hipertoon.com` ([#6420](https://github.com/mikf/gallery-dl/issues/6420)) -- [imagechest] fix extractors ([#6475](https://github.com/mikf/gallery-dl/issues/6475), [#6491](https://github.com/mikf/gallery-dl/issues/6491)) -- [instagram] fix using numeric cursor values ([#6414](https://github.com/mikf/gallery-dl/issues/6414)) -- [kemonoparty] update to new site layout ([#6415](https://github.com/mikf/gallery-dl/issues/6415), [#6503](https://github.com/mikf/gallery-dl/issues/6503), [#6528](https://github.com/mikf/gallery-dl/issues/6528), [#6530](https://github.com/mikf/gallery-dl/issues/6530), [#6536](https://github.com/mikf/gallery-dl/issues/6536), [#6542](https://github.com/mikf/gallery-dl/issues/6542), [#6554](https://github.com/mikf/gallery-dl/issues/6554)) -- [koharu] update domain to `niyaniya.moe` ([#6430](https://github.com/mikf/gallery-dl/issues/6430), [#6432](https://github.com/mikf/gallery-dl/issues/6432)) -- [mangadex] apply `lang` option only to chapter results ([#6372](https://github.com/mikf/gallery-dl/issues/6372)) -- [newgrounds] fix metadata extraction ([#6463](https://github.com/mikf/gallery-dl/issues/6463), [#6533](https://github.com/mikf/gallery-dl/issues/6533)) -- [nhentai] support `.webp` files ([#6442](https://github.com/mikf/gallery-dl/issues/6442), [#6479](https://github.com/mikf/gallery-dl/issues/6479)) -- [patreon] use legacy mobile UA when no `session_id` is set -- [pinterest] update API headers ([#6513](https://github.com/mikf/gallery-dl/issues/6513)) -- [pinterest] detect video/audio by block content ([#6421](https://github.com/mikf/gallery-dl/issues/6421)) -- [scrolller] prevent exception for posts without `mediaSources` ([#5051](https://github.com/mikf/gallery-dl/issues/5051)) -- [tumblrgallery] fix file downloads ([#6391](https://github.com/mikf/gallery-dl/issues/6391)) -- [twitter] make `source` metadata extraction non-fatal ([#6472](https://github.com/mikf/gallery-dl/issues/6472)) -- [weibo] fix livephoto `filename` & `extension` ([#6471](https://github.com/mikf/gallery-dl/issues/6471)) +- [bluesky] fix exception when encountering non-quote embeds ([#6577](https://github.com/mikf/gallery-dl/issues/6577)) +- [bluesky] unescape search queries ([#6579](https://github.com/mikf/gallery-dl/issues/6579)) +- [common] restore using environment proxy settings by default ([#6553](https://github.com/mikf/gallery-dl/issues/6553), [#6609](https://github.com/mikf/gallery-dl/issues/6609)) +- [common] improve handling of `user-agent` settings ([#6594](https://github.com/mikf/gallery-dl/issues/6594)) +- [e621] fix `TypeError` when `metadata` is enabled ([#6587](https://github.com/mikf/gallery-dl/issues/6587)) +- [gofile] fix website token extraction ([#6596](https://github.com/mikf/gallery-dl/issues/6596)) +- [inkbunny] fix re-login loop ([#6618](https://github.com/mikf/gallery-dl/issues/6618)) +- [instagram] handle empty `carousel_media` entries ([#6595](https://github.com/mikf/gallery-dl/issues/6595)) +- [kemonoparty] fix `o` query parameter handling ([#6597](https://github.com/mikf/gallery-dl/issues/6597)) +- [nhentai] fix download URLs ([#6620](https://github.com/mikf/gallery-dl/issues/6620)) +- [readcomiconline] fix `chapter` extraction ([#6070](https://github.com/mikf/gallery-dl/issues/6070), [#6335](https://github.com/mikf/gallery-dl/issues/6335)) +- [realbooru] fix extraction ([#6543](https://github.com/mikf/gallery-dl/issues/6543)) +- [rule34] fix `favorite` extraction ([#6573](https://github.com/mikf/gallery-dl/issues/6573)) +- [zerochan] download `.webp` and `.gif` files ([#6576](https://github.com/mikf/gallery-dl/issues/6576)) #### Improvements -- [bluesky] support `main.bsky.dev` URLs ([#4438](https://github.com/mikf/gallery-dl/issues/4438)) -- [bluesky] match common embed fixes ([#6410](https://github.com/mikf/gallery-dl/issues/6410), [#6411](https://github.com/mikf/gallery-dl/issues/6411)) -- [boosty] update default video format list ([#2387](https://github.com/mikf/gallery-dl/issues/2387)) -- [bunkr] support `bunkr.cr` URLs -- [common] allow passing cookies to OAuth extractors -- [common] allow overriding more default `User-Agent` headers ([#6496](https://github.com/mikf/gallery-dl/issues/6496)) -- [philomena] switch default `ponybooru` filter ([#6437](https://github.com/mikf/gallery-dl/issues/6437)) -- [pinterest] support `story_pin_music` blocks ([#6421](https://github.com/mikf/gallery-dl/issues/6421)) -- [pixiv] get ugoira frame extension from `meta_single_page` values ([#6056](https://github.com/mikf/gallery-dl/issues/6056)) -- [reddit] support user profile share links ([#6389](https://github.com/mikf/gallery-dl/issues/6389)) -- [steamgriddb] disable `adjust-extensions` for `fake-png` files ([#5274](https://github.com/mikf/gallery-dl/issues/5274)) -- [twitter] remove cookies migration workaround -#### Metadata -- [bluesky] provide `author[instance]` metadata ([#4438](https://github.com/mikf/gallery-dl/issues/4438)) -- [instagram] fix `extension` of apparent `.webp` files ([#6541](https://github.com/mikf/gallery-dl/issues/6541)) -- [pillowfort] provide `count` metadata ([#6478](https://github.com/mikf/gallery-dl/issues/6478)) -- [pixiv:ranking] add `rank` metadata field ([#6531](https://github.com/mikf/gallery-dl/issues/6531)) -- [poipiku] return `count` as proper number ([#6445](https://github.com/mikf/gallery-dl/issues/6445)) -- [webtoons] extract `episode_no` for comic results ([#6439](https://github.com/mikf/gallery-dl/issues/6439)) +- [hentaicosplays] update domains ([#6578](https://github.com/mikf/gallery-dl/issues/6578)) +- [pixiv:ranking] implement filtering results by `content` ([#6574](https://github.com/mikf/gallery-dl/issues/6574)) +- [pixiv] include user ID in failed AJAX request warnings ([#6581](https://github.com/mikf/gallery-dl/issues/6581)) #### Options -- [civitai] add `metadata` option - support fetching `generation` data ([#6383](https://github.com/mikf/gallery-dl/issues/6383)) -- [exhentai] implement `tags` option ([#2117](https://github.com/mikf/gallery-dl/issues/2117)) -- [koharu] implement `tags` option -- [rule34xyz] add `format` option ([#1078](https://github.com/mikf/gallery-dl/issues/1078)) -### Downloaders -- [ytdl] fix `AttributeError` caused by `decodeOption()` removal ([#6552](https://github.com/mikf/gallery-dl/issues/6552)) -### Post Processors -- [classify] rewrite - fix skipping existing files ([#5213](https://github.com/mikf/gallery-dl/issues/5213)) -- enable inheriting options from global `postprocessor` objects -- allow `postprocessors` values to be a single post processor object -### Cookies -- support Chromium table version 24 ([#6162](https://github.com/mikf/gallery-dl/issues/6162)) -- fix GCM pad length calculation ([#6162](https://github.com/mikf/gallery-dl/issues/6162)) -- try decryption with empty password as fallback -### Documentation -- update recommended `pip` command for installing `dev` version ([#6493](https://github.com/mikf/gallery-dl/issues/6493)) -- update `gallery-dl.conf` ([#6501](https://github.com/mikf/gallery-dl/issues/6501)) -### Options -- add `-4/--force-ipv4` and `-6/--force-ipv6` command-line options -- fix passing negative numbers as arguments ([#5262](https://github.com/mikf/gallery-dl/issues/5262)) -### Miscellaneous -- [output] use default ANSI colors only when stream is a TTY -- [util] implement `defaultdict` filters-environment -- [util] enable using multiple statements for all `filter` options ([#5262](https://github.com/mikf/gallery-dl/issues/5262)) +- [patreon] add `format-images` option ([#6569](https://github.com/mikf/gallery-dl/issues/6569)) +- [zerochan] add `extensions` option ([#6576](https://github.com/mikf/gallery-dl/issues/6576)) @@ -1,6 +1,6 @@ Metadata-Version: 2.1 Name: gallery_dl -Version: 1.28.0 +Version: 1.28.1 Summary: Command-line program to download image galleries and collections from several image hosting sites Home-page: https://github.com/mikf/gallery-dl Download-URL: https://github.com/mikf/gallery-dl/releases/latest @@ -117,9 +117,9 @@ Standalone Executable Prebuilt executable files with a Python interpreter and required Python packages included are available for -- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.28.0/gallery-dl.exe>`__ +- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.28.1/gallery-dl.exe>`__ (Requires `Microsoft Visual C++ Redistributable Package (x86) <https://aka.ms/vs/17/release/vc_redist.x86.exe>`__) -- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.28.0/gallery-dl.bin>`__ +- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.28.1/gallery-dl.bin>`__ Nightly Builds @@ -76,9 +76,9 @@ Standalone Executable Prebuilt executable files with a Python interpreter and required Python packages included are available for -- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.28.0/gallery-dl.exe>`__ +- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.28.1/gallery-dl.exe>`__ (Requires `Microsoft Visual C++ Redistributable Package (x86) <https://aka.ms/vs/17/release/vc_redist.x86.exe>`__) -- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.28.0/gallery-dl.bin>`__ +- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.28.1/gallery-dl.bin>`__ Nightly Builds diff --git a/data/man/gallery-dl.1 b/data/man/gallery-dl.1 index 29568cf..96c01a0 100644 --- a/data/man/gallery-dl.1 +++ b/data/man/gallery-dl.1 @@ -1,4 +1,4 @@ -.TH "GALLERY-DL" "1" "2024-11-30" "1.28.0" "gallery-dl Manual" +.TH "GALLERY-DL" "1" "2024-12-07" "1.28.1" "gallery-dl Manual" .\" disable hyphenation .nh diff --git a/data/man/gallery-dl.conf.5 b/data/man/gallery-dl.conf.5 index c441095..e2c1e14 100644 --- a/data/man/gallery-dl.conf.5 +++ b/data/man/gallery-dl.conf.5 @@ -1,4 +1,4 @@ -.TH "GALLERY-DL.CONF" "5" "2024-11-30" "1.28.0" "gallery-dl Manual" +.TH "GALLERY-DL.CONF" "5" "2024-12-07" "1.28.1" "gallery-dl Manual" .\" disable hyphenation .nh .\" disable justification (adjust text to left margin only) @@ -747,7 +747,7 @@ Note: If a proxy URL does not include a scheme, \f[I]bool\f[] .IP "Default:" 9 -\f[I]false\f[] +\f[I]true\f[] .IP "Description:" 4 Collect proxy configuration information from environment variables @@ -4040,10 +4040,54 @@ Note: This requires 1 additional HTTP request per post. \f[I]["images", "image_large", "attachments", "postfile", "content"]\f[] .IP "Description:" 4 -Determines the type and order of files to be downloaded. +Determines types and order of files to download. -Available types are -\f[I]postfile\f[], \f[I]images\f[], \f[I]image_large\f[], \f[I]attachments\f[], and \f[I]content\f[]. +Available types: + +.br +* \f[I]postfile\f[] +.br +* \f[I]images\f[] +.br +* \f[I]image_large\f[] +.br +* \f[I]attachments\f[] +.br +* \f[I]content\f[] + + +.SS extractor.patreon.format-images +.IP "Type:" 6 +\f[I]string\f[] + +.IP "Default:" 9 +\f[I]"download_url"\f[] + +.IP "Description:" 4 +Selects the format of \f[I]images\f[] \f[I]files\f[]. + +Possible formats: + +.br +* \f[I]original\f[] +.br +* \f[I]default\f[] +.br +* \f[I]default_small\f[] +.br +* \f[I]default_blurred\f[] +.br +* \f[I]default_blurred_small\f[] +.br +* \f[I]thumbnail\f[] +.br +* \f[I]thumbnail_large\f[] +.br +* \f[I]thumbnail_small\f[] +.br +* \f[I]url\f[] +.br +* \f[I]download_url\f[] .SS extractor.pillowfort.external @@ -6040,6 +6084,28 @@ Available options can be found in \f[I]youtube-dl's docstrings\f[] +.SS extractor.zerochan.extensions +.IP "Type:" 6 +.br +* \f[I]string\f[] +.br +* \f[I]list\f[] of \f[I]strings\f[] + +.IP "Default:" 9 +\f[I]["jpg", "png", "webp", "gif"]\f[] + +.IP "Example:" 4 +.br +* "gif" +.br +* ["webp", "gif", "jpg"} + +.IP "Description:" 4 +List of filename extensions to try when dynamically building download URLs +(\f[I]"pagination": "api"\f[] + +\f[I]"metadata": false\f[]) + + .SS extractor.zerochan.metadata .IP "Type:" 6 \f[I]bool\f[] diff --git a/docs/gallery-dl.conf b/docs/gallery-dl.conf index c7382f3..4dc2e14 100644 --- a/docs/gallery-dl.conf +++ b/docs/gallery-dl.conf @@ -22,7 +22,7 @@ "tls12" : true, "browser" : null, "proxy" : null, - "proxy-env" : false, + "proxy-env" : true, "source-address": null, "retries" : 4, "retry-codes" : [], diff --git a/gallery_dl.egg-info/PKG-INFO b/gallery_dl.egg-info/PKG-INFO index 842dead..f82026d 100644 --- a/gallery_dl.egg-info/PKG-INFO +++ b/gallery_dl.egg-info/PKG-INFO @@ -1,6 +1,6 @@ Metadata-Version: 2.1 Name: gallery_dl -Version: 1.28.0 +Version: 1.28.1 Summary: Command-line program to download image galleries and collections from several image hosting sites Home-page: https://github.com/mikf/gallery-dl Download-URL: https://github.com/mikf/gallery-dl/releases/latest @@ -117,9 +117,9 @@ Standalone Executable Prebuilt executable files with a Python interpreter and required Python packages included are available for -- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.28.0/gallery-dl.exe>`__ +- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.28.1/gallery-dl.exe>`__ (Requires `Microsoft Visual C++ Redistributable Package (x86) <https://aka.ms/vs/17/release/vc_redist.x86.exe>`__) -- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.28.0/gallery-dl.bin>`__ +- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.28.1/gallery-dl.bin>`__ Nightly Builds diff --git a/gallery_dl.egg-info/SOURCES.txt b/gallery_dl.egg-info/SOURCES.txt index a98e9da..398c9f7 100644 --- a/gallery_dl.egg-info/SOURCES.txt +++ b/gallery_dl.egg-info/SOURCES.txt @@ -187,6 +187,7 @@ gallery_dl/extractor/pornpics.py gallery_dl/extractor/postmill.py gallery_dl/extractor/reactor.py gallery_dl/extractor/readcomiconline.py +gallery_dl/extractor/realbooru.py gallery_dl/extractor/recursive.py gallery_dl/extractor/reddit.py gallery_dl/extractor/redgifs.py diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py index 594ce41..8d5f3d0 100644 --- a/gallery_dl/extractor/__init__.py +++ b/gallery_dl/extractor/__init__.py @@ -140,6 +140,7 @@ modules = [ "postmill", "reactor", "readcomiconline", + "realbooru", "reddit", "redgifs", "rule34us", diff --git a/gallery_dl/extractor/bluesky.py b/gallery_dl/extractor/bluesky.py index bbff17c..f60ea15 100644 --- a/gallery_dl/extractor/bluesky.py +++ b/gallery_dl/extractor/bluesky.py @@ -75,10 +75,13 @@ class BlueskyExtractor(Extractor): quote = embed["record"] if "record" in quote: quote = quote["record"] + value = quote.pop("value", None) + if value is None: + break quote["quote_id"] = self._pid(post) quote["quote_by"] = post["author"] embed = quote.get("embed") - quote.update(quote.pop("value")) + quote.update(value) post = quote def posts(self): @@ -202,6 +205,7 @@ class BlueskyUserExtractor(BlueskyExtractor): def items(self): base = "{}/profile/{}/".format(self.root, self.user) return self._dispatch_extractors(( + (BlueskyInfoExtractor , base + "info"), (BlueskyAvatarExtractor , base + "avatar"), (BlueskyBackgroundExtractor, base + "banner"), (BlueskyPostsExtractor , base + "posts"), @@ -298,6 +302,17 @@ class BlueskyPostExtractor(BlueskyExtractor): return self.api.get_post_thread(self.user, self.post_id) +class BlueskyInfoExtractor(BlueskyExtractor): + subcategory = "info" + pattern = USER_PATTERN + r"/info" + example = "https://bsky.app/profile/HANDLE/info" + + def items(self): + self._metadata_user = True + self.api._did_from_actor(self.user) + return iter(((Message.Directory, self._user),)) + + class BlueskyAvatarExtractor(BlueskyExtractor): subcategory = "avatar" filename_fmt = "avatar_{post_id}.{extension}" @@ -324,7 +339,8 @@ class BlueskySearchExtractor(BlueskyExtractor): example = "https://bsky.app/search?q=QUERY" def posts(self): - return self.api.search_posts(self.user) + query = text.unquote(self.user.replace("+", " ")) + return self.api.search_posts(query) class BlueskyHashtagExtractor(BlueskyExtractor): diff --git a/gallery_dl/extractor/common.py b/gallery_dl/extractor/common.py index f364124..5f9d355 100644 --- a/gallery_dl/extractor/common.py +++ b/gallery_dl/extractor/common.py @@ -348,7 +348,7 @@ class Extractor(): ssl_options = ssl_ciphers = 0 # .netrc Authorization headers are alwsays disabled - session.trust_env = True if self.config("proxy-env", False) else False + session.trust_env = True if self.config("proxy-env", True) else False browser = self.config("browser") if browser is None: @@ -387,8 +387,8 @@ class Extractor(): useragent = self.useragent elif useragent == "browser": useragent = _browser_useragent() - elif useragent is config.get(("extractor",), "user-agent") and \ - useragent == Extractor.useragent: + elif self.useragent is not Extractor.useragent and \ + useragent is config.get(("extractor",), "user-agent"): useragent = self.useragent headers["User-Agent"] = useragent headers["Accept"] = "*/*" diff --git a/gallery_dl/extractor/danbooru.py b/gallery_dl/extractor/danbooru.py index c3dfd91..37b6747 100644 --- a/gallery_dl/extractor/danbooru.py +++ b/gallery_dl/extractor/danbooru.py @@ -26,16 +26,7 @@ class DanbooruExtractor(BaseExtractor): def _init(self): self.ugoira = self.config("ugoira", False) self.external = self.config("external", False) - - includes = self.config("metadata") - if includes: - if isinstance(includes, (list, tuple)): - includes = ",".join(includes) - elif not isinstance(includes, str): - includes = "artist_commentary,children,notes,parent,uploader" - self.includes = includes + ",id" - else: - self.includes = False + self.includes = False threshold = self.config("threshold") if isinstance(threshold, int): @@ -56,6 +47,16 @@ class DanbooruExtractor(BaseExtractor): return pages * self.per_page def items(self): + # 'includes' initialization must be done here and not in '_init()' + # or it'll cause an exception with e621 when 'metadata' is enabled + includes = self.config("metadata") + if includes: + if isinstance(includes, (list, tuple)): + includes = ",".join(includes) + elif not isinstance(includes, str): + includes = "artist_commentary,children,notes,parent,uploader" + self.includes = includes + ",id" + data = self.metadata() for post in self.posts(): @@ -223,7 +224,7 @@ class DanbooruTagExtractor(DanbooruExtractor): else: prefix = None elif tag.startswith( - ("id:", "md5", "ordfav:", "ordfavgroup:", "ordpool:")): + ("id:", "md5:", "ordfav:", "ordfavgroup:", "ordpool:")): prefix = None break diff --git a/gallery_dl/extractor/gelbooru_v02.py b/gallery_dl/extractor/gelbooru_v02.py index aad5752..2c1174a 100644 --- a/gallery_dl/extractor/gelbooru_v02.py +++ b/gallery_dl/extractor/gelbooru_v02.py @@ -24,10 +24,6 @@ class GelbooruV02Extractor(booru.BooruExtractor): self.user_id = self.config("user-id") self.root_api = self.config_instance("root-api") or self.root - if self.category == "realbooru": - self.items = self._items_realbooru - self._tags = self._tags_realbooru - def _api_request(self, params): url = self.root_api + "/index.php?page=dapi&s=post&q=index" return ElementTree.fromstring(self.request(url, params=params).text) @@ -82,16 +78,17 @@ class GelbooruV02Extractor(booru.BooruExtractor): params["pid"] = self.page_start * self.per_page data = {} + find_ids = re.compile(r"\sid=\"p(\d+)").findall + while True: - num_ids = 0 page = self.request(url, params=params).text + pids = find_ids(page) - for data["id"] in text.extract_iter(page, '" id="p', '"'): - num_ids += 1 + for data["id"] in pids: for post in self._api_request(data): yield post.attrib - if num_ids < self.per_page: + if len(pids) < self.per_page: return params["pid"] += self.per_page @@ -136,59 +133,8 @@ class GelbooruV02Extractor(booru.BooruExtractor): "body" : text.unescape(text.remove_html(extr(">", "</div>"))), }) - def _file_url_realbooru(self, post): - url = post["file_url"] - md5 = post["md5"] - if md5 not in post["preview_url"] or url.count("/") == 5: - url = "{}/images/{}/{}/{}.{}".format( - self.root, md5[0:2], md5[2:4], md5, url.rpartition(".")[2]) - return url - - def _items_realbooru(self): - from .common import Message - data = self.metadata() - - for post in self.posts(): - try: - html = self._html(post) - fallback = post["file_url"] - url = post["file_url"] = text.rextract( - html, 'href="', '"', html.index(">Original<"))[0] - except Exception: - self.log.debug("Unable to fetch download URL for post %s " - "(md5: %s)", post.get("id"), post.get("md5")) - continue - - text.nameext_from_url(url, post) - post.update(data) - self._prepare(post) - self._tags(post, html) - - path = url.rpartition("/")[0] - post["_fallback"] = ( - "{}/{}.{}".format(path, post["md5"], post["extension"]), - fallback, - ) - - yield Message.Directory, post - yield Message.Url, url, post - - def _tags_realbooru(self, post, page): - tag_container = text.extr(page, 'id="tagLink"', '</div>') - tags = collections.defaultdict(list) - pattern = re.compile( - r'<a class="(?:tag-type-)?([^"]+).*?;tags=([^"&]+)') - for tag_type, tag_name in pattern.findall(tag_container): - tags[tag_type].append(text.unescape(text.unquote(tag_name))) - for key, value in tags.items(): - post["tags_" + key] = " ".join(value) - BASE_PATTERN = GelbooruV02Extractor.update({ - "realbooru": { - "root": "https://realbooru.com", - "pattern": r"realbooru\.com", - }, "rule34": { "root": "https://rule34.xxx", "root-api": "https://api.rule34.xxx", diff --git a/gallery_dl/extractor/gofile.py b/gallery_dl/extractor/gofile.py index 52b4ae6..ef9ea60 100644 --- a/gallery_dl/extractor/gofile.py +++ b/gallery_dl/extractor/gofile.py @@ -75,8 +75,8 @@ class GofileFolderExtractor(Extractor): @cache(maxage=86400) def _get_website_token(self): self.log.debug("Fetching website token") - page = self.request(self.root + "/dist/js/alljs.js").text - return text.extr(page, 'wt: "', '"') + page = self.request(self.root + "/dist/js/global.js").text + return text.extr(page, '.wt = "', '"') def _get_content(self, content_id, password=None): headers = {"Authorization": "Bearer " + self.api_token} diff --git a/gallery_dl/extractor/hentaicosplays.py b/gallery_dl/extractor/hentaicosplays.py index fbbae16..4992b7b 100644 --- a/gallery_dl/extractor/hentaicosplays.py +++ b/gallery_dl/extractor/hentaicosplays.py @@ -5,31 +5,46 @@ # published by the Free Software Foundation. """Extractors for https://hentai-cosplay-xxx.com/ -(also works for hentai-img.com and porn-images-xxx.com)""" +(also works for hentai-img-xxx.com and porn-image.com)""" -from .common import GalleryExtractor +from .common import BaseExtractor, GalleryExtractor from .. import text -class HentaicosplaysGalleryExtractor(GalleryExtractor): +class HentaicosplaysExtractor(BaseExtractor): + basecategory = "hentaicosplays" + + +BASE_PATTERN = HentaicosplaysExtractor.update({ + "hentaicosplay": { + "root": "https://hentai-cosplay-xxx.com", + "pattern": r"(?:\w\w\.)?hentai-cosplays?(?:-xxx)?\.com", + }, + "hentaiimg": { + "root": "https://hentai-img-xxx.com", + "pattern": r"(?:\w\w\.)?hentai-img(?:-xxx)?\.com", + }, + "pornimage": { + "root": "https://porn-image.com", + "pattern": r"(?:\w\w\.)?porn-images?(?:-xxx)?\.com", + }, +}) + + +class HentaicosplaysGalleryExtractor( + HentaicosplaysExtractor, GalleryExtractor): """Extractor for image galleries from - hentai-cosplay-xxx.com, hentai-img.com, and porn-images-xxx.com""" - category = "hentaicosplays" + hentai-cosplay-xxx.com, hentai-img-xxx.com, and porn-image.com""" directory_fmt = ("{site}", "{title}") filename_fmt = "{filename}.{extension}" archive_fmt = "{title}_{filename}" - pattern = r"((?:https?://)?(?:\w{2}\.)?" \ - r"(hentai-cosplay(?:s|-xxx)|hentai-img|porn-images-xxx)\.com)/" \ - r"(?:image|story)/([\w-]+)" + pattern = BASE_PATTERN + r"/(?:image|story)/([\w-]+)" example = "https://hentai-cosplay-xxx.com/image/TITLE/" def __init__(self, match): - root, self.site, self.slug = match.groups() - self.root = text.ensure_http_scheme(root) - if self.root == "https://hentai-cosplays.com": - self.root = "https://hentai-cosplay-xxx.com" - url = "{}/story/{}/".format(self.root, self.slug) - GalleryExtractor.__init__(self, match, url) + BaseExtractor.__init__(self, match) + self.slug = self.groups[-1] + self.gallery_url = "{}/story/{}/".format(self.root, self.slug) def _init(self): self.session.headers["Referer"] = self.gallery_url @@ -39,7 +54,7 @@ class HentaicosplaysGalleryExtractor(GalleryExtractor): return { "title": text.unescape(title.rpartition(" Story Viewer - ")[0]), "slug" : self.slug, - "site" : self.site, + "site" : self.root.partition("://")[2].rpartition(".")[0], } def images(self, page): diff --git a/gallery_dl/extractor/inkbunny.py b/gallery_dl/extractor/inkbunny.py index bff3156..47e071a 100644 --- a/gallery_dl/extractor/inkbunny.py +++ b/gallery_dl/extractor/inkbunny.py @@ -338,9 +338,9 @@ class InkbunnyAPI(): def _call(self, endpoint, params): url = "https://inkbunny.net/api_" + endpoint + ".php" - params["sid"] = self.session_id while True: + params["sid"] = self.session_id data = self.extractor.request(url, params=params).json() if "error_code" not in data: diff --git a/gallery_dl/extractor/instagram.py b/gallery_dl/extractor/instagram.py index a866f45..e6b6b14 100644 --- a/gallery_dl/extractor/instagram.py +++ b/gallery_dl/extractor/instagram.py @@ -207,8 +207,8 @@ class InstagramExtractor(Extractor): for user in coauthors ] - if "carousel_media" in post: - items = post["carousel_media"] + items = post.get("carousel_media") + if items: data["sidecar_media_id"] = data["post_id"] data["sidecar_shortcode"] = data["post_shortcode"] else: diff --git a/gallery_dl/extractor/kemonoparty.py b/gallery_dl/extractor/kemonoparty.py index 3d04f75..16c5b99 100644 --- a/gallery_dl/extractor/kemonoparty.py +++ b/gallery_dl/extractor/kemonoparty.py @@ -552,7 +552,8 @@ class KemonoAPI(): return response.json() def _pagination(self, endpoint, params, batch=50, key=False): - params["o"] = text.parse_int(params.get("o")) % 50 + offset = text.parse_int(params.get("o")) + params["o"] = offset - offset % batch while True: data = self._call(endpoint, params) diff --git a/gallery_dl/extractor/nhentai.py b/gallery_dl/extractor/nhentai.py index 90c5420..0d656d0 100644 --- a/gallery_dl/extractor/nhentai.py +++ b/gallery_dl/extractor/nhentai.py @@ -11,6 +11,7 @@ from .common import GalleryExtractor, Extractor, Message from .. import text, util import collections +import random class NhentaiGalleryExtractor(GalleryExtractor): @@ -59,15 +60,18 @@ class NhentaiGalleryExtractor(GalleryExtractor): } def images(self, _): - ufmt = ("https://i.nhentai.net/galleries/" + - self.data["media_id"] + "/{}.{}") - extdict = {"j": "jpg", "p": "png", "g": "gif", "w": "webp"} + exts = {"j": "jpg", "p": "png", "g": "gif", "w": "webp", "a": "avif"} + + data = self.data + ufmt = ("https://i{}.nhentai.net/galleries/" + + data["media_id"] + "/{}.{}").format return [ - (ufmt.format(num, extdict.get(img["t"], "jpg")), { - "width": img["w"], "height": img["h"], + (ufmt(random.randint(1, 4), num, exts.get(img["t"], "jpg")), { + "width" : img["w"], + "height": img["h"], }) - for num, img in enumerate(self.data["images"]["pages"], 1) + for num, img in enumerate(data["images"]["pages"], 1) ] diff --git a/gallery_dl/extractor/patreon.py b/gallery_dl/extractor/patreon.py index 3eacf1a..e4a5985 100644 --- a/gallery_dl/extractor/patreon.py +++ b/gallery_dl/extractor/patreon.py @@ -23,18 +23,22 @@ class PatreonExtractor(Extractor): directory_fmt = ("{category}", "{creator[full_name]}") filename_fmt = "{id}_{title}_{num:>02}.{extension}" archive_fmt = "{id}_{num}" + useragent = "Patreon/72.2.28 (Android; Android 14; Scale/2.10)" _warning = True def _init(self): - if self.cookies_check(("session_id",)): - self.session.headers["User-Agent"] = \ - "Patreon/72.2.28 (Android; Android 14; Scale/2.10)" - else: + if not self.cookies_check(("session_id",)): if self._warning: PatreonExtractor._warning = False self.log.warning("no 'session_id' cookie set") - self.session.headers["User-Agent"] = \ - "Patreon/7.6.28 (Android; Android 11; Scale/2.10)" + if self.session.headers["User-Agent"] is self.useragent: + self.session.headers["User-Agent"] = \ + "Patreon/7.6.28 (Android; Android 11; Scale/2.10)" + + format_images = self.config("format-images") + if format_images: + self._images_fmt = format_images + self._images_url = self._images_url_fmt def items(self): generators = self._build_file_generators(self.config("files")) @@ -80,11 +84,20 @@ class PatreonExtractor(Extractor): def _images(self, post): for image in post.get("images") or (): - url = image.get("download_url") + url = self._images_url(image) if url: name = image.get("file_name") or self._filename(url) or url yield "image", url, name + def _images_url(self, image): + return image.get("download_url") + + def _images_url_fmt(self, image): + try: + return image["image_urls"][self._images_fmt] + except Exception: + return image.get("download_url") + def _image_large(self, post): image = post.get("image") if image: diff --git a/gallery_dl/extractor/pixiv.py b/gallery_dl/extractor/pixiv.py index 8ad061d..6207bf7 100644 --- a/gallery_dl/extractor/pixiv.py +++ b/gallery_dl/extractor/pixiv.py @@ -380,8 +380,9 @@ class PixivArtworksExtractor(PixivExtractor): ajax_ids.extend(map(int, body["manga"])) ajax_ids.sort() except Exception as exc: - self.log.warning("Unable to collect artwork IDs using AJAX " - "API (%s: %s)", exc.__class__.__name__, exc) + self.log.warning("u%s: Failed to collect artwork IDs " + "using AJAX API (%s: %s)", + self.user_id, exc.__class__.__name__, exc) else: works = self._extend_sanity(works, ajax_ids) @@ -607,8 +608,12 @@ class PixivRankingExtractor(PixivExtractor): def works(self): ranking = self.ranking - for ranking["rank"], work in enumerate( - self.api.illust_ranking(self.mode, self.date), 1): + + works = self.api.illust_ranking(self.mode, self.date) + if self.type: + works = filter(lambda work, t=self.type: work["type"] == t, works) + + for ranking["rank"], work in enumerate(works, 1): yield work def metadata(self): @@ -648,10 +653,13 @@ class PixivRankingExtractor(PixivExtractor): date = (now - timedelta(days=1)).strftime("%Y-%m-%d") self.date = date + self.type = type = query.get("content") + self.ranking = ranking = { "mode": mode, "date": self.date, "rank": 0, + "type": type or "all", } return {"ranking": ranking} diff --git a/gallery_dl/extractor/readcomiconline.py b/gallery_dl/extractor/readcomiconline.py index 271fa50..c0374eb 100644 --- a/gallery_dl/extractor/readcomiconline.py +++ b/gallery_dl/extractor/readcomiconline.py @@ -79,13 +79,22 @@ class ReadcomiconlineIssueExtractor(ReadcomiconlineBase, ChapterExtractor): def images(self, page): results = [] + referer = {"_http_headers": {"Referer": self.gallery_url}} + root = text.extr(page, "return baeu(l, '", "'") + + replacements = re.findall( + r"l = l\.replace\(/([^/]+)/g, [\"']([^\"']*)", page) for block in page.split(" pth = '")[1:]: pth = text.extr(block, "", "'") + for needle, repl in re.findall( r"pth = pth\.replace\(/([^/]+)/g, [\"']([^\"']*)", block): pth = pth.replace(needle, repl) - results.append((beau(pth), None)) + for needle, repl in replacements: + pth = pth.replace(needle, repl) + + results.append((baeu(pth, root), referer)) return results @@ -119,20 +128,24 @@ class ReadcomiconlineComicExtractor(ReadcomiconlineBase, MangaExtractor): return results -def beau(url): - """https://readcomiconline.li/Scripts/rguard.min.js?v=1.5.1""" +def baeu(url, root="", root_blogspot="https://2.bp.blogspot.com"): + """https://readcomiconline.li/Scripts/rguard.min.js""" + if not root: + root = root_blogspot + url = url.replace("pw_.g28x", "b") url = url.replace("d2pr.x_27", "h") if url.startswith("https"): - return url - - url, sep, rest = url.partition("?") - containsS0 = "=s0" in url - url = url[:-3 if containsS0 else -6] - url = url[15:33] + url[50:] - url = url[0:-11] + url[-2:] - url = binascii.a2b_base64(url).decode() - url = url[0:13] + url[17:] - url = url[0:-2] + ("=s0" if containsS0 else "=s1600") - return "https://2.bp.blogspot.com/" + url + sep + rest + return url.replace(root_blogspot, root, 1) + + path, sep, query = url.partition("?") + + contains_s0 = "=s0" in path + path = path[:-3 if contains_s0 else -6] + path = path[15:33] + path[50:] # step1() + path = path[0:-11] + path[-2:] # step2() + path = binascii.a2b_base64(path).decode() # atob() + path = path[0:13] + path[17:] + path = path[0:-2] + ("=s0" if contains_s0 else "=s1600") + return root + "/" + path + sep + query diff --git a/gallery_dl/extractor/realbooru.py b/gallery_dl/extractor/realbooru.py new file mode 100644 index 0000000..ab8a9b1 --- /dev/null +++ b/gallery_dl/extractor/realbooru.py @@ -0,0 +1,157 @@ +# -*- coding: utf-8 -*- + +# Copyright 2024 Mike Fährmann +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. + +"""Extractors for https://realbooru.com/""" + +from . import booru +from .. import text, util +import collections +import re + +BASE_PATTERN = r"(?:https?://)?realbooru\.com" + + +class RealbooruExtractor(booru.BooruExtractor): + basecategory = "booru" + category = "realbooru" + root = "https://realbooru.com" + + def _parse_post(self, post_id): + url = "{}/index.php?page=post&s=view&id={}".format( + self.root, post_id) + page = self.request(url).text + extr = text.extract_from(page) + rating = extr('name="rating" content="', '"') + extr('class="container"', '>') + + post = { + "_html" : page, + "id" : post_id, + "rating" : "e" if rating == "adult" else (rating or "?")[0], + "tags" : text.unescape(extr(' alt="', '"')), + "file_url" : extr('src="', '"'), + "created_at": extr(">Posted at ", " by "), + "uploader" : extr(">", "<"), + "score" : extr('">', "<"), + "title" : extr('id="title" style="width: 100%;" value="', '"'), + "source" : extr('d="source" style="width: 100%;" value="', '"'), + } + + post["md5"] = post["file_url"].rpartition("/")[2].partition(".")[0] + return post + + def skip(self, num): + self.page_start += num + return num + + def _prepare(self, post): + post["date"] = text.parse_datetime(post["created_at"], "%b, %d %Y") + + def _pagination(self, params, begin, end): + url = self.root + "/index.php" + params["pid"] = self.page_start + + while True: + page = self.request(url, params=params).text + + cnt = 0 + for post_id in text.extract_iter(page, begin, end): + cnt += 1 + yield self._parse_post(post_id) + + if cnt < self.per_page: + return + params["pid"] += self.per_page + + def _tags(self, post, _): + page = post["_html"] + tag_container = text.extr(page, 'id="tagLink"', '</div>') + tags = collections.defaultdict(list) + pattern = re.compile( + r'<a class="(?:tag-type-)?([^"]+).*?;tags=([^"&]+)') + for tag_type, tag_name in pattern.findall(tag_container): + tags[tag_type].append(text.unescape(text.unquote(tag_name))) + for key, value in tags.items(): + post["tags_" + key] = " ".join(value) + + +class RealbooruTagExtractor(RealbooruExtractor): + subcategory = "tag" + directory_fmt = ("{category}", "{search_tags}") + archive_fmt = "t_{search_tags}_{id}" + per_page = 42 + pattern = BASE_PATTERN + r"/index\.php\?page=post&s=list&tags=([^&#]*)" + example = "https://realbooru.com/index.php?page=post&s=list&tags=TAG" + + def metadata(self): + self.tags = text.unquote(self.groups[0].replace("+", " ")) + return {"search_tags": self.tags} + + def posts(self): + return self._pagination({ + "page": "post", + "s" : "list", + "tags": self.tags, + }, '<a id="p', '"') + + +class RealbooruFavoriteExtractor(RealbooruExtractor): + subcategory = "favorite" + directory_fmt = ("{category}", "favorites", "{favorite_id}") + archive_fmt = "f_{favorite_id}_{id}" + per_page = 50 + pattern = BASE_PATTERN + r"/index\.php\?page=favorites&s=view&id=(\d+)" + example = "https://realbooru.com/index.php?page=favorites&s=view&id=12345" + + def metadata(self): + return {"favorite_id": text.parse_int(self.groups[0])} + + def posts(self): + return self._pagination({ + "page": "favorites", + "s" : "view", + "id" : self.groups[0], + }, '" id="p', '"') + + +class RealbooruPoolExtractor(RealbooruExtractor): + subcategory = "pool" + directory_fmt = ("{category}", "pool", "{pool} {pool_name}") + archive_fmt = "p_{pool}_{id}" + pattern = BASE_PATTERN + r"/index\.php\?page=pool&s=show&id=(\d+)" + example = "https://realbooru.com/index.php?page=pool&s=show&id=12345" + + def metadata(self): + pool_id = self.groups[0] + url = "{}/index.php?page=pool&s=show&id={}".format(self.root, pool_id) + page = self.request(url).text + + name, pos = text.extract(page, "<h4>Pool: ", "</h4>") + self.post_ids = text.extract_iter( + page, 'class="thumb" id="p', '"', pos) + + return { + "pool": text.parse_int(pool_id), + "pool_name": text.unescape(name), + } + + def posts(self): + return map( + self._parse_post, + util.advance(self.post_ids, self.page_start) + ) + + +class RealbooruPostExtractor(RealbooruExtractor): + subcategory = "post" + archive_fmt = "{id}" + pattern = BASE_PATTERN + r"/index\.php\?page=post&s=view&id=(\d+)" + example = "https://realbooru.com/index.php?page=post&s=view&id=12345" + + def posts(self): + return (self._parse_post(self.groups[0]),) diff --git a/gallery_dl/extractor/zerochan.py b/gallery_dl/extractor/zerochan.py index f9b1a7f..4c4fb3a 100644 --- a/gallery_dl/extractor/zerochan.py +++ b/gallery_dl/extractor/zerochan.py @@ -145,6 +145,14 @@ class ZerochanTagExtractor(ZerochanExtractor): self.posts = self.posts_api self.session.headers["User-Agent"] = util.USERAGENT + exts = self.config("extensions") + if exts: + if isinstance(exts, str): + exts = exts.split(",") + self.exts = exts + else: + self.exts = ("jpg", "png", "webp", "gif") + def metadata(self): return {"search_tags": text.unquote( self.search_tag.replace("+", " "))} @@ -194,8 +202,6 @@ class ZerochanTagExtractor(ZerochanExtractor): "p" : self.page_start, } - static = "https://static.zerochan.net/.full." - while True: response = self.request(url, params=params, allow_redirects=False) @@ -221,15 +227,20 @@ class ZerochanTagExtractor(ZerochanExtractor): yield post else: for post in posts: - base = static + str(post["id"]) - post["file_url"] = base + ".jpg" - post["_fallback"] = (base + ".png",) + urls = self._urls(post) + post["file_url"] = next(urls) + post["_fallback"] = urls yield post if not data.get("next"): return params["p"] += 1 + def _urls(self, post, static="https://static.zerochan.net/.full."): + base = static + str(post["id"]) + "." + for ext in self.exts: + yield base + ext + class ZerochanImageExtractor(ZerochanExtractor): subcategory = "image" diff --git a/gallery_dl/version.py b/gallery_dl/version.py index 2bf03f4..2dab0d6 100644 --- a/gallery_dl/version.py +++ b/gallery_dl/version.py @@ -6,5 +6,5 @@ # it under the terms of the GNU General Public License version 2 as # published by the Free Software Foundation. -__version__ = "1.28.0" +__version__ = "1.28.1" __variant__ = None |
