From f6877087773089220d68288d055276fca6c556d4 Mon Sep 17 00:00:00 2001 From: Unit 193 Date: Sun, 8 Dec 2024 20:34:33 -0500 Subject: New upstream version 1.28.1. --- CHANGELOG.md | 103 +++++---------------- PKG-INFO | 6 +- README.rst | 4 +- data/man/gallery-dl.1 | 2 +- data/man/gallery-dl.conf.5 | 76 +++++++++++++++- docs/gallery-dl.conf | 2 +- gallery_dl.egg-info/PKG-INFO | 6 +- gallery_dl.egg-info/SOURCES.txt | 1 + gallery_dl/extractor/__init__.py | 1 + gallery_dl/extractor/bluesky.py | 20 +++- gallery_dl/extractor/common.py | 6 +- gallery_dl/extractor/danbooru.py | 23 ++--- gallery_dl/extractor/gelbooru_v02.py | 64 +------------ gallery_dl/extractor/gofile.py | 4 +- gallery_dl/extractor/hentaicosplays.py | 45 ++++++--- gallery_dl/extractor/inkbunny.py | 2 +- gallery_dl/extractor/instagram.py | 4 +- gallery_dl/extractor/kemonoparty.py | 3 +- gallery_dl/extractor/nhentai.py | 16 ++-- gallery_dl/extractor/patreon.py | 27 ++++-- gallery_dl/extractor/pixiv.py | 16 +++- gallery_dl/extractor/readcomiconline.py | 41 ++++++--- gallery_dl/extractor/realbooru.py | 157 ++++++++++++++++++++++++++++++++ gallery_dl/extractor/zerochan.py | 21 ++++- gallery_dl/version.py | 2 +- 25 files changed, 422 insertions(+), 230 deletions(-) create mode 100644 gallery_dl/extractor/realbooru.py diff --git a/CHANGELOG.md b/CHANGELOG.md index e3dec8c..b831cd4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,87 +1,26 @@ -## 1.28.0 - 2024-11-30 -### Changes -- [common] disable using environment network settings by default (`HTTP_PROXY`, `.netrc`, …) - - disable `trust_env` session attribute - - disable `Authorization` header injection from `.netrc` auth ([#5780](https://github.com/mikf/gallery-dl/issues/5780), [#6134](https://github.com/mikf/gallery-dl/issues/6134), [#6455](https://github.com/mikf/gallery-dl/issues/6455)) - - add `proxy-env` option -- [ytdl] change `forward-cookies` default value to `true` ([#6401](https://github.com/mikf/gallery-dl/issues/6401), [#6348](https://github.com/mikf/gallery-dl/issues/6348)) +## 1.28.1 - 2024-12-07 ### Extractors #### Additions -- [bilibili] add support for `opus` articles ([#2824](https://github.com/mikf/gallery-dl/issues/2824), [#6443](https://github.com/mikf/gallery-dl/issues/6443)) -- [bluesky] add `hashtag` extractor ([#4438](https://github.com/mikf/gallery-dl/issues/4438)) -- [danbooru] add `artist` and `artist-search` extractors ([#5348](https://github.com/mikf/gallery-dl/issues/5348)) -- [everia] add support ([#1067](https://github.com/mikf/gallery-dl/issues/1067), [#2472](https://github.com/mikf/gallery-dl/issues/2472), [#4091](https://github.com/mikf/gallery-dl/issues/4091), [#6227](https://github.com/mikf/gallery-dl/issues/6227)) -- [facebook] add support ([#470](https://github.com/mikf/gallery-dl/issues/470), [#2612](https://github.com/mikf/gallery-dl/issues/2612), [#5626](https://github.com/mikf/gallery-dl/issues/5626), [#6548](https://github.com/mikf/gallery-dl/issues/6548)) -- [hentaifoundry] add `tag` extractor ([#6465](https://github.com/mikf/gallery-dl/issues/6465)) -- [hitomi] add `index` and `search` extractors ([#2502](https://github.com/mikf/gallery-dl/issues/2502), [#6392](https://github.com/mikf/gallery-dl/issues/6392), [#3720](https://github.com/mikf/gallery-dl/issues/3720)) -- [motherless] add support ([#2074](https://github.com/mikf/gallery-dl/issues/2074), [#4413](https://github.com/mikf/gallery-dl/issues/4413), [#6221](https://github.com/mikf/gallery-dl/issues/6221)) -- [noop] add `noop` extractor -- [rule34vault] add support ([#5708](https://github.com/mikf/gallery-dl/issues/5708), [#6240](https://github.com/mikf/gallery-dl/issues/6240)) -- [rule34xyz] add support ([#1078](https://github.com/mikf/gallery-dl/issues/1078), [#4960](https://github.com/mikf/gallery-dl/issues/4960)) -- [saint] add support ([#4405](https://github.com/mikf/gallery-dl/issues/4405), [#6324](https://github.com/mikf/gallery-dl/issues/6324)) -- [tumblr] add `search` extractor ([#6394](https://github.com/mikf/gallery-dl/issues/6394)) +- [bluesky] add `info` extractor #### Fixes -- [8chan] avoid performing network requests within `_init()` ([#6387](https://github.com/mikf/gallery-dl/issues/6387)) -- [bluesky] fix downloads from non-bsky PDSs ([#6406](https://github.com/mikf/gallery-dl/issues/6406)) -- [bunkr] fix album names containing `<>&` characters -- [flickr] use `download` URLs ([#6360](https://github.com/mikf/gallery-dl/issues/6360), [#6464](https://github.com/mikf/gallery-dl/issues/6464)) -- [hiperdex] update domain to `hipertoon.com` ([#6420](https://github.com/mikf/gallery-dl/issues/6420)) -- [imagechest] fix extractors ([#6475](https://github.com/mikf/gallery-dl/issues/6475), [#6491](https://github.com/mikf/gallery-dl/issues/6491)) -- [instagram] fix using numeric cursor values ([#6414](https://github.com/mikf/gallery-dl/issues/6414)) -- [kemonoparty] update to new site layout ([#6415](https://github.com/mikf/gallery-dl/issues/6415), [#6503](https://github.com/mikf/gallery-dl/issues/6503), [#6528](https://github.com/mikf/gallery-dl/issues/6528), [#6530](https://github.com/mikf/gallery-dl/issues/6530), [#6536](https://github.com/mikf/gallery-dl/issues/6536), [#6542](https://github.com/mikf/gallery-dl/issues/6542), [#6554](https://github.com/mikf/gallery-dl/issues/6554)) -- [koharu] update domain to `niyaniya.moe` ([#6430](https://github.com/mikf/gallery-dl/issues/6430), [#6432](https://github.com/mikf/gallery-dl/issues/6432)) -- [mangadex] apply `lang` option only to chapter results ([#6372](https://github.com/mikf/gallery-dl/issues/6372)) -- [newgrounds] fix metadata extraction ([#6463](https://github.com/mikf/gallery-dl/issues/6463), [#6533](https://github.com/mikf/gallery-dl/issues/6533)) -- [nhentai] support `.webp` files ([#6442](https://github.com/mikf/gallery-dl/issues/6442), [#6479](https://github.com/mikf/gallery-dl/issues/6479)) -- [patreon] use legacy mobile UA when no `session_id` is set -- [pinterest] update API headers ([#6513](https://github.com/mikf/gallery-dl/issues/6513)) -- [pinterest] detect video/audio by block content ([#6421](https://github.com/mikf/gallery-dl/issues/6421)) -- [scrolller] prevent exception for posts without `mediaSources` ([#5051](https://github.com/mikf/gallery-dl/issues/5051)) -- [tumblrgallery] fix file downloads ([#6391](https://github.com/mikf/gallery-dl/issues/6391)) -- [twitter] make `source` metadata extraction non-fatal ([#6472](https://github.com/mikf/gallery-dl/issues/6472)) -- [weibo] fix livephoto `filename` & `extension` ([#6471](https://github.com/mikf/gallery-dl/issues/6471)) +- [bluesky] fix exception when encountering non-quote embeds ([#6577](https://github.com/mikf/gallery-dl/issues/6577)) +- [bluesky] unescape search queries ([#6579](https://github.com/mikf/gallery-dl/issues/6579)) +- [common] restore using environment proxy settings by default ([#6553](https://github.com/mikf/gallery-dl/issues/6553), [#6609](https://github.com/mikf/gallery-dl/issues/6609)) +- [common] improve handling of `user-agent` settings ([#6594](https://github.com/mikf/gallery-dl/issues/6594)) +- [e621] fix `TypeError` when `metadata` is enabled ([#6587](https://github.com/mikf/gallery-dl/issues/6587)) +- [gofile] fix website token extraction ([#6596](https://github.com/mikf/gallery-dl/issues/6596)) +- [inkbunny] fix re-login loop ([#6618](https://github.com/mikf/gallery-dl/issues/6618)) +- [instagram] handle empty `carousel_media` entries ([#6595](https://github.com/mikf/gallery-dl/issues/6595)) +- [kemonoparty] fix `o` query parameter handling ([#6597](https://github.com/mikf/gallery-dl/issues/6597)) +- [nhentai] fix download URLs ([#6620](https://github.com/mikf/gallery-dl/issues/6620)) +- [readcomiconline] fix `chapter` extraction ([#6070](https://github.com/mikf/gallery-dl/issues/6070), [#6335](https://github.com/mikf/gallery-dl/issues/6335)) +- [realbooru] fix extraction ([#6543](https://github.com/mikf/gallery-dl/issues/6543)) +- [rule34] fix `favorite` extraction ([#6573](https://github.com/mikf/gallery-dl/issues/6573)) +- [zerochan] download `.webp` and `.gif` files ([#6576](https://github.com/mikf/gallery-dl/issues/6576)) #### Improvements -- [bluesky] support `main.bsky.dev` URLs ([#4438](https://github.com/mikf/gallery-dl/issues/4438)) -- [bluesky] match common embed fixes ([#6410](https://github.com/mikf/gallery-dl/issues/6410), [#6411](https://github.com/mikf/gallery-dl/issues/6411)) -- [boosty] update default video format list ([#2387](https://github.com/mikf/gallery-dl/issues/2387)) -- [bunkr] support `bunkr.cr` URLs -- [common] allow passing cookies to OAuth extractors -- [common] allow overriding more default `User-Agent` headers ([#6496](https://github.com/mikf/gallery-dl/issues/6496)) -- [philomena] switch default `ponybooru` filter ([#6437](https://github.com/mikf/gallery-dl/issues/6437)) -- [pinterest] support `story_pin_music` blocks ([#6421](https://github.com/mikf/gallery-dl/issues/6421)) -- [pixiv] get ugoira frame extension from `meta_single_page` values ([#6056](https://github.com/mikf/gallery-dl/issues/6056)) -- [reddit] support user profile share links ([#6389](https://github.com/mikf/gallery-dl/issues/6389)) -- [steamgriddb] disable `adjust-extensions` for `fake-png` files ([#5274](https://github.com/mikf/gallery-dl/issues/5274)) -- [twitter] remove cookies migration workaround -#### Metadata -- [bluesky] provide `author[instance]` metadata ([#4438](https://github.com/mikf/gallery-dl/issues/4438)) -- [instagram] fix `extension` of apparent `.webp` files ([#6541](https://github.com/mikf/gallery-dl/issues/6541)) -- [pillowfort] provide `count` metadata ([#6478](https://github.com/mikf/gallery-dl/issues/6478)) -- [pixiv:ranking] add `rank` metadata field ([#6531](https://github.com/mikf/gallery-dl/issues/6531)) -- [poipiku] return `count` as proper number ([#6445](https://github.com/mikf/gallery-dl/issues/6445)) -- [webtoons] extract `episode_no` for comic results ([#6439](https://github.com/mikf/gallery-dl/issues/6439)) +- [hentaicosplays] update domains ([#6578](https://github.com/mikf/gallery-dl/issues/6578)) +- [pixiv:ranking] implement filtering results by `content` ([#6574](https://github.com/mikf/gallery-dl/issues/6574)) +- [pixiv] include user ID in failed AJAX request warnings ([#6581](https://github.com/mikf/gallery-dl/issues/6581)) #### Options -- [civitai] add `metadata` option - support fetching `generation` data ([#6383](https://github.com/mikf/gallery-dl/issues/6383)) -- [exhentai] implement `tags` option ([#2117](https://github.com/mikf/gallery-dl/issues/2117)) -- [koharu] implement `tags` option -- [rule34xyz] add `format` option ([#1078](https://github.com/mikf/gallery-dl/issues/1078)) -### Downloaders -- [ytdl] fix `AttributeError` caused by `decodeOption()` removal ([#6552](https://github.com/mikf/gallery-dl/issues/6552)) -### Post Processors -- [classify] rewrite - fix skipping existing files ([#5213](https://github.com/mikf/gallery-dl/issues/5213)) -- enable inheriting options from global `postprocessor` objects -- allow `postprocessors` values to be a single post processor object -### Cookies -- support Chromium table version 24 ([#6162](https://github.com/mikf/gallery-dl/issues/6162)) -- fix GCM pad length calculation ([#6162](https://github.com/mikf/gallery-dl/issues/6162)) -- try decryption with empty password as fallback -### Documentation -- update recommended `pip` command for installing `dev` version ([#6493](https://github.com/mikf/gallery-dl/issues/6493)) -- update `gallery-dl.conf` ([#6501](https://github.com/mikf/gallery-dl/issues/6501)) -### Options -- add `-4/--force-ipv4` and `-6/--force-ipv6` command-line options -- fix passing negative numbers as arguments ([#5262](https://github.com/mikf/gallery-dl/issues/5262)) -### Miscellaneous -- [output] use default ANSI colors only when stream is a TTY -- [util] implement `defaultdict` filters-environment -- [util] enable using multiple statements for all `filter` options ([#5262](https://github.com/mikf/gallery-dl/issues/5262)) +- [patreon] add `format-images` option ([#6569](https://github.com/mikf/gallery-dl/issues/6569)) +- [zerochan] add `extensions` option ([#6576](https://github.com/mikf/gallery-dl/issues/6576)) diff --git a/PKG-INFO b/PKG-INFO index 842dead..f82026d 100644 --- a/PKG-INFO +++ b/PKG-INFO @@ -1,6 +1,6 @@ Metadata-Version: 2.1 Name: gallery_dl -Version: 1.28.0 +Version: 1.28.1 Summary: Command-line program to download image galleries and collections from several image hosting sites Home-page: https://github.com/mikf/gallery-dl Download-URL: https://github.com/mikf/gallery-dl/releases/latest @@ -117,9 +117,9 @@ Standalone Executable Prebuilt executable files with a Python interpreter and required Python packages included are available for -- `Windows `__ +- `Windows `__ (Requires `Microsoft Visual C++ Redistributable Package (x86) `__) -- `Linux `__ +- `Linux `__ Nightly Builds diff --git a/README.rst b/README.rst index 8b8b74f..63d400f 100644 --- a/README.rst +++ b/README.rst @@ -76,9 +76,9 @@ Standalone Executable Prebuilt executable files with a Python interpreter and required Python packages included are available for -- `Windows `__ +- `Windows `__ (Requires `Microsoft Visual C++ Redistributable Package (x86) `__) -- `Linux `__ +- `Linux `__ Nightly Builds diff --git a/data/man/gallery-dl.1 b/data/man/gallery-dl.1 index 29568cf..96c01a0 100644 --- a/data/man/gallery-dl.1 +++ b/data/man/gallery-dl.1 @@ -1,4 +1,4 @@ -.TH "GALLERY-DL" "1" "2024-11-30" "1.28.0" "gallery-dl Manual" +.TH "GALLERY-DL" "1" "2024-12-07" "1.28.1" "gallery-dl Manual" .\" disable hyphenation .nh diff --git a/data/man/gallery-dl.conf.5 b/data/man/gallery-dl.conf.5 index c441095..e2c1e14 100644 --- a/data/man/gallery-dl.conf.5 +++ b/data/man/gallery-dl.conf.5 @@ -1,4 +1,4 @@ -.TH "GALLERY-DL.CONF" "5" "2024-11-30" "1.28.0" "gallery-dl Manual" +.TH "GALLERY-DL.CONF" "5" "2024-12-07" "1.28.1" "gallery-dl Manual" .\" disable hyphenation .nh .\" disable justification (adjust text to left margin only) @@ -747,7 +747,7 @@ Note: If a proxy URL does not include a scheme, \f[I]bool\f[] .IP "Default:" 9 -\f[I]false\f[] +\f[I]true\f[] .IP "Description:" 4 Collect proxy configuration information from environment variables @@ -4040,10 +4040,54 @@ Note: This requires 1 additional HTTP request per post. \f[I]["images", "image_large", "attachments", "postfile", "content"]\f[] .IP "Description:" 4 -Determines the type and order of files to be downloaded. +Determines types and order of files to download. -Available types are -\f[I]postfile\f[], \f[I]images\f[], \f[I]image_large\f[], \f[I]attachments\f[], and \f[I]content\f[]. +Available types: + +.br +* \f[I]postfile\f[] +.br +* \f[I]images\f[] +.br +* \f[I]image_large\f[] +.br +* \f[I]attachments\f[] +.br +* \f[I]content\f[] + + +.SS extractor.patreon.format-images +.IP "Type:" 6 +\f[I]string\f[] + +.IP "Default:" 9 +\f[I]"download_url"\f[] + +.IP "Description:" 4 +Selects the format of \f[I]images\f[] \f[I]files\f[]. + +Possible formats: + +.br +* \f[I]original\f[] +.br +* \f[I]default\f[] +.br +* \f[I]default_small\f[] +.br +* \f[I]default_blurred\f[] +.br +* \f[I]default_blurred_small\f[] +.br +* \f[I]thumbnail\f[] +.br +* \f[I]thumbnail_large\f[] +.br +* \f[I]thumbnail_small\f[] +.br +* \f[I]url\f[] +.br +* \f[I]download_url\f[] .SS extractor.pillowfort.external @@ -6040,6 +6084,28 @@ Available options can be found in \f[I]youtube-dl's docstrings\f[] +.SS extractor.zerochan.extensions +.IP "Type:" 6 +.br +* \f[I]string\f[] +.br +* \f[I]list\f[] of \f[I]strings\f[] + +.IP "Default:" 9 +\f[I]["jpg", "png", "webp", "gif"]\f[] + +.IP "Example:" 4 +.br +* "gif" +.br +* ["webp", "gif", "jpg"} + +.IP "Description:" 4 +List of filename extensions to try when dynamically building download URLs +(\f[I]"pagination": "api"\f[] + +\f[I]"metadata": false\f[]) + + .SS extractor.zerochan.metadata .IP "Type:" 6 \f[I]bool\f[] diff --git a/docs/gallery-dl.conf b/docs/gallery-dl.conf index c7382f3..4dc2e14 100644 --- a/docs/gallery-dl.conf +++ b/docs/gallery-dl.conf @@ -22,7 +22,7 @@ "tls12" : true, "browser" : null, "proxy" : null, - "proxy-env" : false, + "proxy-env" : true, "source-address": null, "retries" : 4, "retry-codes" : [], diff --git a/gallery_dl.egg-info/PKG-INFO b/gallery_dl.egg-info/PKG-INFO index 842dead..f82026d 100644 --- a/gallery_dl.egg-info/PKG-INFO +++ b/gallery_dl.egg-info/PKG-INFO @@ -1,6 +1,6 @@ Metadata-Version: 2.1 Name: gallery_dl -Version: 1.28.0 +Version: 1.28.1 Summary: Command-line program to download image galleries and collections from several image hosting sites Home-page: https://github.com/mikf/gallery-dl Download-URL: https://github.com/mikf/gallery-dl/releases/latest @@ -117,9 +117,9 @@ Standalone Executable Prebuilt executable files with a Python interpreter and required Python packages included are available for -- `Windows `__ +- `Windows `__ (Requires `Microsoft Visual C++ Redistributable Package (x86) `__) -- `Linux `__ +- `Linux `__ Nightly Builds diff --git a/gallery_dl.egg-info/SOURCES.txt b/gallery_dl.egg-info/SOURCES.txt index a98e9da..398c9f7 100644 --- a/gallery_dl.egg-info/SOURCES.txt +++ b/gallery_dl.egg-info/SOURCES.txt @@ -187,6 +187,7 @@ gallery_dl/extractor/pornpics.py gallery_dl/extractor/postmill.py gallery_dl/extractor/reactor.py gallery_dl/extractor/readcomiconline.py +gallery_dl/extractor/realbooru.py gallery_dl/extractor/recursive.py gallery_dl/extractor/reddit.py gallery_dl/extractor/redgifs.py diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py index 594ce41..8d5f3d0 100644 --- a/gallery_dl/extractor/__init__.py +++ b/gallery_dl/extractor/__init__.py @@ -140,6 +140,7 @@ modules = [ "postmill", "reactor", "readcomiconline", + "realbooru", "reddit", "redgifs", "rule34us", diff --git a/gallery_dl/extractor/bluesky.py b/gallery_dl/extractor/bluesky.py index bbff17c..f60ea15 100644 --- a/gallery_dl/extractor/bluesky.py +++ b/gallery_dl/extractor/bluesky.py @@ -75,10 +75,13 @@ class BlueskyExtractor(Extractor): quote = embed["record"] if "record" in quote: quote = quote["record"] + value = quote.pop("value", None) + if value is None: + break quote["quote_id"] = self._pid(post) quote["quote_by"] = post["author"] embed = quote.get("embed") - quote.update(quote.pop("value")) + quote.update(value) post = quote def posts(self): @@ -202,6 +205,7 @@ class BlueskyUserExtractor(BlueskyExtractor): def items(self): base = "{}/profile/{}/".format(self.root, self.user) return self._dispatch_extractors(( + (BlueskyInfoExtractor , base + "info"), (BlueskyAvatarExtractor , base + "avatar"), (BlueskyBackgroundExtractor, base + "banner"), (BlueskyPostsExtractor , base + "posts"), @@ -298,6 +302,17 @@ class BlueskyPostExtractor(BlueskyExtractor): return self.api.get_post_thread(self.user, self.post_id) +class BlueskyInfoExtractor(BlueskyExtractor): + subcategory = "info" + pattern = USER_PATTERN + r"/info" + example = "https://bsky.app/profile/HANDLE/info" + + def items(self): + self._metadata_user = True + self.api._did_from_actor(self.user) + return iter(((Message.Directory, self._user),)) + + class BlueskyAvatarExtractor(BlueskyExtractor): subcategory = "avatar" filename_fmt = "avatar_{post_id}.{extension}" @@ -324,7 +339,8 @@ class BlueskySearchExtractor(BlueskyExtractor): example = "https://bsky.app/search?q=QUERY" def posts(self): - return self.api.search_posts(self.user) + query = text.unquote(self.user.replace("+", " ")) + return self.api.search_posts(query) class BlueskyHashtagExtractor(BlueskyExtractor): diff --git a/gallery_dl/extractor/common.py b/gallery_dl/extractor/common.py index f364124..5f9d355 100644 --- a/gallery_dl/extractor/common.py +++ b/gallery_dl/extractor/common.py @@ -348,7 +348,7 @@ class Extractor(): ssl_options = ssl_ciphers = 0 # .netrc Authorization headers are alwsays disabled - session.trust_env = True if self.config("proxy-env", False) else False + session.trust_env = True if self.config("proxy-env", True) else False browser = self.config("browser") if browser is None: @@ -387,8 +387,8 @@ class Extractor(): useragent = self.useragent elif useragent == "browser": useragent = _browser_useragent() - elif useragent is config.get(("extractor",), "user-agent") and \ - useragent == Extractor.useragent: + elif self.useragent is not Extractor.useragent and \ + useragent is config.get(("extractor",), "user-agent"): useragent = self.useragent headers["User-Agent"] = useragent headers["Accept"] = "*/*" diff --git a/gallery_dl/extractor/danbooru.py b/gallery_dl/extractor/danbooru.py index c3dfd91..37b6747 100644 --- a/gallery_dl/extractor/danbooru.py +++ b/gallery_dl/extractor/danbooru.py @@ -26,16 +26,7 @@ class DanbooruExtractor(BaseExtractor): def _init(self): self.ugoira = self.config("ugoira", False) self.external = self.config("external", False) - - includes = self.config("metadata") - if includes: - if isinstance(includes, (list, tuple)): - includes = ",".join(includes) - elif not isinstance(includes, str): - includes = "artist_commentary,children,notes,parent,uploader" - self.includes = includes + ",id" - else: - self.includes = False + self.includes = False threshold = self.config("threshold") if isinstance(threshold, int): @@ -56,6 +47,16 @@ class DanbooruExtractor(BaseExtractor): return pages * self.per_page def items(self): + # 'includes' initialization must be done here and not in '_init()' + # or it'll cause an exception with e621 when 'metadata' is enabled + includes = self.config("metadata") + if includes: + if isinstance(includes, (list, tuple)): + includes = ",".join(includes) + elif not isinstance(includes, str): + includes = "artist_commentary,children,notes,parent,uploader" + self.includes = includes + ",id" + data = self.metadata() for post in self.posts(): @@ -223,7 +224,7 @@ class DanbooruTagExtractor(DanbooruExtractor): else: prefix = None elif tag.startswith( - ("id:", "md5", "ordfav:", "ordfavgroup:", "ordpool:")): + ("id:", "md5:", "ordfav:", "ordfavgroup:", "ordpool:")): prefix = None break diff --git a/gallery_dl/extractor/gelbooru_v02.py b/gallery_dl/extractor/gelbooru_v02.py index aad5752..2c1174a 100644 --- a/gallery_dl/extractor/gelbooru_v02.py +++ b/gallery_dl/extractor/gelbooru_v02.py @@ -24,10 +24,6 @@ class GelbooruV02Extractor(booru.BooruExtractor): self.user_id = self.config("user-id") self.root_api = self.config_instance("root-api") or self.root - if self.category == "realbooru": - self.items = self._items_realbooru - self._tags = self._tags_realbooru - def _api_request(self, params): url = self.root_api + "/index.php?page=dapi&s=post&q=index" return ElementTree.fromstring(self.request(url, params=params).text) @@ -82,16 +78,17 @@ class GelbooruV02Extractor(booru.BooruExtractor): params["pid"] = self.page_start * self.per_page data = {} + find_ids = re.compile(r"\sid=\"p(\d+)").findall + while True: - num_ids = 0 page = self.request(url, params=params).text + pids = find_ids(page) - for data["id"] in text.extract_iter(page, '" id="p', '"'): - num_ids += 1 + for data["id"] in pids: for post in self._api_request(data): yield post.attrib - if num_ids < self.per_page: + if len(pids) < self.per_page: return params["pid"] += self.per_page @@ -136,59 +133,8 @@ class GelbooruV02Extractor(booru.BooruExtractor): "body" : text.unescape(text.remove_html(extr(">", ""))), }) - def _file_url_realbooru(self, post): - url = post["file_url"] - md5 = post["md5"] - if md5 not in post["preview_url"] or url.count("/") == 5: - url = "{}/images/{}/{}/{}.{}".format( - self.root, md5[0:2], md5[2:4], md5, url.rpartition(".")[2]) - return url - - def _items_realbooru(self): - from .common import Message - data = self.metadata() - - for post in self.posts(): - try: - html = self._html(post) - fallback = post["file_url"] - url = post["file_url"] = text.rextract( - html, 'href="', '"', html.index(">Original<"))[0] - except Exception: - self.log.debug("Unable to fetch download URL for post %s " - "(md5: %s)", post.get("id"), post.get("md5")) - continue - - text.nameext_from_url(url, post) - post.update(data) - self._prepare(post) - self._tags(post, html) - - path = url.rpartition("/")[0] - post["_fallback"] = ( - "{}/{}.{}".format(path, post["md5"], post["extension"]), - fallback, - ) - - yield Message.Directory, post - yield Message.Url, url, post - - def _tags_realbooru(self, post, page): - tag_container = text.extr(page, 'id="tagLink"', '') - tags = collections.defaultdict(list) - pattern = re.compile( - r'') + + post = { + "_html" : page, + "id" : post_id, + "rating" : "e" if rating == "adult" else (rating or "?")[0], + "tags" : text.unescape(extr(' alt="', '"')), + "file_url" : extr('src="', '"'), + "created_at": extr(">Posted at ", " by "), + "uploader" : extr(">", "<"), + "score" : extr('">', "<"), + "title" : extr('id="title" style="width: 100%;" value="', '"'), + "source" : extr('d="source" style="width: 100%;" value="', '"'), + } + + post["md5"] = post["file_url"].rpartition("/")[2].partition(".")[0] + return post + + def skip(self, num): + self.page_start += num + return num + + def _prepare(self, post): + post["date"] = text.parse_datetime(post["created_at"], "%b, %d %Y") + + def _pagination(self, params, begin, end): + url = self.root + "/index.php" + params["pid"] = self.page_start + + while True: + page = self.request(url, params=params).text + + cnt = 0 + for post_id in text.extract_iter(page, begin, end): + cnt += 1 + yield self._parse_post(post_id) + + if cnt < self.per_page: + return + params["pid"] += self.per_page + + def _tags(self, post, _): + page = post["_html"] + tag_container = text.extr(page, 'id="tagLink"', '') + tags = collections.defaultdict(list) + pattern = re.compile( + r'Pool: ", "") + self.post_ids = text.extract_iter( + page, 'class="thumb" id="p', '"', pos) + + return { + "pool": text.parse_int(pool_id), + "pool_name": text.unescape(name), + } + + def posts(self): + return map( + self._parse_post, + util.advance(self.post_ids, self.page_start) + ) + + +class RealbooruPostExtractor(RealbooruExtractor): + subcategory = "post" + archive_fmt = "{id}" + pattern = BASE_PATTERN + r"/index\.php\?page=post&s=view&id=(\d+)" + example = "https://realbooru.com/index.php?page=post&s=view&id=12345" + + def posts(self): + return (self._parse_post(self.groups[0]),) diff --git a/gallery_dl/extractor/zerochan.py b/gallery_dl/extractor/zerochan.py index f9b1a7f..4c4fb3a 100644 --- a/gallery_dl/extractor/zerochan.py +++ b/gallery_dl/extractor/zerochan.py @@ -145,6 +145,14 @@ class ZerochanTagExtractor(ZerochanExtractor): self.posts = self.posts_api self.session.headers["User-Agent"] = util.USERAGENT + exts = self.config("extensions") + if exts: + if isinstance(exts, str): + exts = exts.split(",") + self.exts = exts + else: + self.exts = ("jpg", "png", "webp", "gif") + def metadata(self): return {"search_tags": text.unquote( self.search_tag.replace("+", " "))} @@ -194,8 +202,6 @@ class ZerochanTagExtractor(ZerochanExtractor): "p" : self.page_start, } - static = "https://static.zerochan.net/.full." - while True: response = self.request(url, params=params, allow_redirects=False) @@ -221,15 +227,20 @@ class ZerochanTagExtractor(ZerochanExtractor): yield post else: for post in posts: - base = static + str(post["id"]) - post["file_url"] = base + ".jpg" - post["_fallback"] = (base + ".png",) + urls = self._urls(post) + post["file_url"] = next(urls) + post["_fallback"] = urls yield post if not data.get("next"): return params["p"] += 1 + def _urls(self, post, static="https://static.zerochan.net/.full."): + base = static + str(post["id"]) + "." + for ext in self.exts: + yield base + ext + class ZerochanImageExtractor(ZerochanExtractor): subcategory = "image" diff --git a/gallery_dl/version.py b/gallery_dl/version.py index 2bf03f4..2dab0d6 100644 --- a/gallery_dl/version.py +++ b/gallery_dl/version.py @@ -6,5 +6,5 @@ # it under the terms of the GNU General Public License version 2 as # published by the Free Software Foundation. -__version__ = "1.28.0" +__version__ = "1.28.1" __variant__ = None -- cgit v1.2.3