aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLibravatarUnit 193 <unit193@unit193.net>2024-12-08 20:34:39 -0500
committerLibravatarUnit 193 <unit193@unit193.net>2024-12-08 20:34:39 -0500
commit955a18e4feea86fdb35e531a00304e00d037652c (patch)
tree06060068ebe725be4294758b2caca3e2491ef4f0
parent402872c8ca0118f5ed9c172d3c11dac90dd41c37 (diff)
parentf6877087773089220d68288d055276fca6c556d4 (diff)
Update upstream source from tag 'upstream/1.28.1'
Update to upstream version '1.28.1' with Debian dir f1535f052953f6a9195352a951ec8dd121144a27
-rw-r--r--CHANGELOG.md103
-rw-r--r--PKG-INFO6
-rw-r--r--README.rst4
-rw-r--r--data/man/gallery-dl.12
-rw-r--r--data/man/gallery-dl.conf.576
-rw-r--r--docs/gallery-dl.conf2
-rw-r--r--gallery_dl.egg-info/PKG-INFO6
-rw-r--r--gallery_dl.egg-info/SOURCES.txt1
-rw-r--r--gallery_dl/extractor/__init__.py1
-rw-r--r--gallery_dl/extractor/bluesky.py20
-rw-r--r--gallery_dl/extractor/common.py6
-rw-r--r--gallery_dl/extractor/danbooru.py23
-rw-r--r--gallery_dl/extractor/gelbooru_v02.py64
-rw-r--r--gallery_dl/extractor/gofile.py4
-rw-r--r--gallery_dl/extractor/hentaicosplays.py45
-rw-r--r--gallery_dl/extractor/inkbunny.py2
-rw-r--r--gallery_dl/extractor/instagram.py4
-rw-r--r--gallery_dl/extractor/kemonoparty.py3
-rw-r--r--gallery_dl/extractor/nhentai.py16
-rw-r--r--gallery_dl/extractor/patreon.py27
-rw-r--r--gallery_dl/extractor/pixiv.py16
-rw-r--r--gallery_dl/extractor/readcomiconline.py41
-rw-r--r--gallery_dl/extractor/realbooru.py157
-rw-r--r--gallery_dl/extractor/zerochan.py21
-rw-r--r--gallery_dl/version.py2
25 files changed, 422 insertions, 230 deletions
diff --git a/CHANGELOG.md b/CHANGELOG.md
index e3dec8c..b831cd4 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,87 +1,26 @@
-## 1.28.0 - 2024-11-30
-### Changes
-- [common] disable using environment network settings by default (`HTTP_PROXY`, `.netrc`, …)
- - disable `trust_env` session attribute
- - disable `Authorization` header injection from `.netrc` auth ([#5780](https://github.com/mikf/gallery-dl/issues/5780), [#6134](https://github.com/mikf/gallery-dl/issues/6134), [#6455](https://github.com/mikf/gallery-dl/issues/6455))
- - add `proxy-env` option
-- [ytdl] change `forward-cookies` default value to `true` ([#6401](https://github.com/mikf/gallery-dl/issues/6401), [#6348](https://github.com/mikf/gallery-dl/issues/6348))
+## 1.28.1 - 2024-12-07
### Extractors
#### Additions
-- [bilibili] add support for `opus` articles ([#2824](https://github.com/mikf/gallery-dl/issues/2824), [#6443](https://github.com/mikf/gallery-dl/issues/6443))
-- [bluesky] add `hashtag` extractor ([#4438](https://github.com/mikf/gallery-dl/issues/4438))
-- [danbooru] add `artist` and `artist-search` extractors ([#5348](https://github.com/mikf/gallery-dl/issues/5348))
-- [everia] add support ([#1067](https://github.com/mikf/gallery-dl/issues/1067), [#2472](https://github.com/mikf/gallery-dl/issues/2472), [#4091](https://github.com/mikf/gallery-dl/issues/4091), [#6227](https://github.com/mikf/gallery-dl/issues/6227))
-- [facebook] add support ([#470](https://github.com/mikf/gallery-dl/issues/470), [#2612](https://github.com/mikf/gallery-dl/issues/2612), [#5626](https://github.com/mikf/gallery-dl/issues/5626), [#6548](https://github.com/mikf/gallery-dl/issues/6548))
-- [hentaifoundry] add `tag` extractor ([#6465](https://github.com/mikf/gallery-dl/issues/6465))
-- [hitomi] add `index` and `search` extractors ([#2502](https://github.com/mikf/gallery-dl/issues/2502), [#6392](https://github.com/mikf/gallery-dl/issues/6392), [#3720](https://github.com/mikf/gallery-dl/issues/3720))
-- [motherless] add support ([#2074](https://github.com/mikf/gallery-dl/issues/2074), [#4413](https://github.com/mikf/gallery-dl/issues/4413), [#6221](https://github.com/mikf/gallery-dl/issues/6221))
-- [noop] add `noop` extractor
-- [rule34vault] add support ([#5708](https://github.com/mikf/gallery-dl/issues/5708), [#6240](https://github.com/mikf/gallery-dl/issues/6240))
-- [rule34xyz] add support ([#1078](https://github.com/mikf/gallery-dl/issues/1078), [#4960](https://github.com/mikf/gallery-dl/issues/4960))
-- [saint] add support ([#4405](https://github.com/mikf/gallery-dl/issues/4405), [#6324](https://github.com/mikf/gallery-dl/issues/6324))
-- [tumblr] add `search` extractor ([#6394](https://github.com/mikf/gallery-dl/issues/6394))
+- [bluesky] add `info` extractor
#### Fixes
-- [8chan] avoid performing network requests within `_init()` ([#6387](https://github.com/mikf/gallery-dl/issues/6387))
-- [bluesky] fix downloads from non-bsky PDSs ([#6406](https://github.com/mikf/gallery-dl/issues/6406))
-- [bunkr] fix album names containing `<>&` characters
-- [flickr] use `download` URLs ([#6360](https://github.com/mikf/gallery-dl/issues/6360), [#6464](https://github.com/mikf/gallery-dl/issues/6464))
-- [hiperdex] update domain to `hipertoon.com` ([#6420](https://github.com/mikf/gallery-dl/issues/6420))
-- [imagechest] fix extractors ([#6475](https://github.com/mikf/gallery-dl/issues/6475), [#6491](https://github.com/mikf/gallery-dl/issues/6491))
-- [instagram] fix using numeric cursor values ([#6414](https://github.com/mikf/gallery-dl/issues/6414))
-- [kemonoparty] update to new site layout ([#6415](https://github.com/mikf/gallery-dl/issues/6415), [#6503](https://github.com/mikf/gallery-dl/issues/6503), [#6528](https://github.com/mikf/gallery-dl/issues/6528), [#6530](https://github.com/mikf/gallery-dl/issues/6530), [#6536](https://github.com/mikf/gallery-dl/issues/6536), [#6542](https://github.com/mikf/gallery-dl/issues/6542), [#6554](https://github.com/mikf/gallery-dl/issues/6554))
-- [koharu] update domain to `niyaniya.moe` ([#6430](https://github.com/mikf/gallery-dl/issues/6430), [#6432](https://github.com/mikf/gallery-dl/issues/6432))
-- [mangadex] apply `lang` option only to chapter results ([#6372](https://github.com/mikf/gallery-dl/issues/6372))
-- [newgrounds] fix metadata extraction ([#6463](https://github.com/mikf/gallery-dl/issues/6463), [#6533](https://github.com/mikf/gallery-dl/issues/6533))
-- [nhentai] support `.webp` files ([#6442](https://github.com/mikf/gallery-dl/issues/6442), [#6479](https://github.com/mikf/gallery-dl/issues/6479))
-- [patreon] use legacy mobile UA when no `session_id` is set
-- [pinterest] update API headers ([#6513](https://github.com/mikf/gallery-dl/issues/6513))
-- [pinterest] detect video/audio by block content ([#6421](https://github.com/mikf/gallery-dl/issues/6421))
-- [scrolller] prevent exception for posts without `mediaSources` ([#5051](https://github.com/mikf/gallery-dl/issues/5051))
-- [tumblrgallery] fix file downloads ([#6391](https://github.com/mikf/gallery-dl/issues/6391))
-- [twitter] make `source` metadata extraction non-fatal ([#6472](https://github.com/mikf/gallery-dl/issues/6472))
-- [weibo] fix livephoto `filename` & `extension` ([#6471](https://github.com/mikf/gallery-dl/issues/6471))
+- [bluesky] fix exception when encountering non-quote embeds ([#6577](https://github.com/mikf/gallery-dl/issues/6577))
+- [bluesky] unescape search queries ([#6579](https://github.com/mikf/gallery-dl/issues/6579))
+- [common] restore using environment proxy settings by default ([#6553](https://github.com/mikf/gallery-dl/issues/6553), [#6609](https://github.com/mikf/gallery-dl/issues/6609))
+- [common] improve handling of `user-agent` settings ([#6594](https://github.com/mikf/gallery-dl/issues/6594))
+- [e621] fix `TypeError` when `metadata` is enabled ([#6587](https://github.com/mikf/gallery-dl/issues/6587))
+- [gofile] fix website token extraction ([#6596](https://github.com/mikf/gallery-dl/issues/6596))
+- [inkbunny] fix re-login loop ([#6618](https://github.com/mikf/gallery-dl/issues/6618))
+- [instagram] handle empty `carousel_media` entries ([#6595](https://github.com/mikf/gallery-dl/issues/6595))
+- [kemonoparty] fix `o` query parameter handling ([#6597](https://github.com/mikf/gallery-dl/issues/6597))
+- [nhentai] fix download URLs ([#6620](https://github.com/mikf/gallery-dl/issues/6620))
+- [readcomiconline] fix `chapter` extraction ([#6070](https://github.com/mikf/gallery-dl/issues/6070), [#6335](https://github.com/mikf/gallery-dl/issues/6335))
+- [realbooru] fix extraction ([#6543](https://github.com/mikf/gallery-dl/issues/6543))
+- [rule34] fix `favorite` extraction ([#6573](https://github.com/mikf/gallery-dl/issues/6573))
+- [zerochan] download `.webp` and `.gif` files ([#6576](https://github.com/mikf/gallery-dl/issues/6576))
#### Improvements
-- [bluesky] support `main.bsky.dev` URLs ([#4438](https://github.com/mikf/gallery-dl/issues/4438))
-- [bluesky] match common embed fixes ([#6410](https://github.com/mikf/gallery-dl/issues/6410), [#6411](https://github.com/mikf/gallery-dl/issues/6411))
-- [boosty] update default video format list ([#2387](https://github.com/mikf/gallery-dl/issues/2387))
-- [bunkr] support `bunkr.cr` URLs
-- [common] allow passing cookies to OAuth extractors
-- [common] allow overriding more default `User-Agent` headers ([#6496](https://github.com/mikf/gallery-dl/issues/6496))
-- [philomena] switch default `ponybooru` filter ([#6437](https://github.com/mikf/gallery-dl/issues/6437))
-- [pinterest] support `story_pin_music` blocks ([#6421](https://github.com/mikf/gallery-dl/issues/6421))
-- [pixiv] get ugoira frame extension from `meta_single_page` values ([#6056](https://github.com/mikf/gallery-dl/issues/6056))
-- [reddit] support user profile share links ([#6389](https://github.com/mikf/gallery-dl/issues/6389))
-- [steamgriddb] disable `adjust-extensions` for `fake-png` files ([#5274](https://github.com/mikf/gallery-dl/issues/5274))
-- [twitter] remove cookies migration workaround
-#### Metadata
-- [bluesky] provide `author[instance]` metadata ([#4438](https://github.com/mikf/gallery-dl/issues/4438))
-- [instagram] fix `extension` of apparent `.webp` files ([#6541](https://github.com/mikf/gallery-dl/issues/6541))
-- [pillowfort] provide `count` metadata ([#6478](https://github.com/mikf/gallery-dl/issues/6478))
-- [pixiv:ranking] add `rank` metadata field ([#6531](https://github.com/mikf/gallery-dl/issues/6531))
-- [poipiku] return `count` as proper number ([#6445](https://github.com/mikf/gallery-dl/issues/6445))
-- [webtoons] extract `episode_no` for comic results ([#6439](https://github.com/mikf/gallery-dl/issues/6439))
+- [hentaicosplays] update domains ([#6578](https://github.com/mikf/gallery-dl/issues/6578))
+- [pixiv:ranking] implement filtering results by `content` ([#6574](https://github.com/mikf/gallery-dl/issues/6574))
+- [pixiv] include user ID in failed AJAX request warnings ([#6581](https://github.com/mikf/gallery-dl/issues/6581))
#### Options
-- [civitai] add `metadata` option - support fetching `generation` data ([#6383](https://github.com/mikf/gallery-dl/issues/6383))
-- [exhentai] implement `tags` option ([#2117](https://github.com/mikf/gallery-dl/issues/2117))
-- [koharu] implement `tags` option
-- [rule34xyz] add `format` option ([#1078](https://github.com/mikf/gallery-dl/issues/1078))
-### Downloaders
-- [ytdl] fix `AttributeError` caused by `decodeOption()` removal ([#6552](https://github.com/mikf/gallery-dl/issues/6552))
-### Post Processors
-- [classify] rewrite - fix skipping existing files ([#5213](https://github.com/mikf/gallery-dl/issues/5213))
-- enable inheriting options from global `postprocessor` objects
-- allow `postprocessors` values to be a single post processor object
-### Cookies
-- support Chromium table version 24 ([#6162](https://github.com/mikf/gallery-dl/issues/6162))
-- fix GCM pad length calculation ([#6162](https://github.com/mikf/gallery-dl/issues/6162))
-- try decryption with empty password as fallback
-### Documentation
-- update recommended `pip` command for installing `dev` version ([#6493](https://github.com/mikf/gallery-dl/issues/6493))
-- update `gallery-dl.conf` ([#6501](https://github.com/mikf/gallery-dl/issues/6501))
-### Options
-- add `-4/--force-ipv4` and `-6/--force-ipv6` command-line options
-- fix passing negative numbers as arguments ([#5262](https://github.com/mikf/gallery-dl/issues/5262))
-### Miscellaneous
-- [output] use default ANSI colors only when stream is a TTY
-- [util] implement `defaultdict` filters-environment
-- [util] enable using multiple statements for all `filter` options ([#5262](https://github.com/mikf/gallery-dl/issues/5262))
+- [patreon] add `format-images` option ([#6569](https://github.com/mikf/gallery-dl/issues/6569))
+- [zerochan] add `extensions` option ([#6576](https://github.com/mikf/gallery-dl/issues/6576))
diff --git a/PKG-INFO b/PKG-INFO
index 842dead..f82026d 100644
--- a/PKG-INFO
+++ b/PKG-INFO
@@ -1,6 +1,6 @@
Metadata-Version: 2.1
Name: gallery_dl
-Version: 1.28.0
+Version: 1.28.1
Summary: Command-line program to download image galleries and collections from several image hosting sites
Home-page: https://github.com/mikf/gallery-dl
Download-URL: https://github.com/mikf/gallery-dl/releases/latest
@@ -117,9 +117,9 @@ Standalone Executable
Prebuilt executable files with a Python interpreter and
required Python packages included are available for
-- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.28.0/gallery-dl.exe>`__
+- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.28.1/gallery-dl.exe>`__
(Requires `Microsoft Visual C++ Redistributable Package (x86) <https://aka.ms/vs/17/release/vc_redist.x86.exe>`__)
-- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.28.0/gallery-dl.bin>`__
+- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.28.1/gallery-dl.bin>`__
Nightly Builds
diff --git a/README.rst b/README.rst
index 8b8b74f..63d400f 100644
--- a/README.rst
+++ b/README.rst
@@ -76,9 +76,9 @@ Standalone Executable
Prebuilt executable files with a Python interpreter and
required Python packages included are available for
-- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.28.0/gallery-dl.exe>`__
+- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.28.1/gallery-dl.exe>`__
(Requires `Microsoft Visual C++ Redistributable Package (x86) <https://aka.ms/vs/17/release/vc_redist.x86.exe>`__)
-- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.28.0/gallery-dl.bin>`__
+- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.28.1/gallery-dl.bin>`__
Nightly Builds
diff --git a/data/man/gallery-dl.1 b/data/man/gallery-dl.1
index 29568cf..96c01a0 100644
--- a/data/man/gallery-dl.1
+++ b/data/man/gallery-dl.1
@@ -1,4 +1,4 @@
-.TH "GALLERY-DL" "1" "2024-11-30" "1.28.0" "gallery-dl Manual"
+.TH "GALLERY-DL" "1" "2024-12-07" "1.28.1" "gallery-dl Manual"
.\" disable hyphenation
.nh
diff --git a/data/man/gallery-dl.conf.5 b/data/man/gallery-dl.conf.5
index c441095..e2c1e14 100644
--- a/data/man/gallery-dl.conf.5
+++ b/data/man/gallery-dl.conf.5
@@ -1,4 +1,4 @@
-.TH "GALLERY-DL.CONF" "5" "2024-11-30" "1.28.0" "gallery-dl Manual"
+.TH "GALLERY-DL.CONF" "5" "2024-12-07" "1.28.1" "gallery-dl Manual"
.\" disable hyphenation
.nh
.\" disable justification (adjust text to left margin only)
@@ -747,7 +747,7 @@ Note: If a proxy URL does not include a scheme,
\f[I]bool\f[]
.IP "Default:" 9
-\f[I]false\f[]
+\f[I]true\f[]
.IP "Description:" 4
Collect proxy configuration information from environment variables
@@ -4040,10 +4040,54 @@ Note: This requires 1 additional HTTP request per post.
\f[I]["images", "image_large", "attachments", "postfile", "content"]\f[]
.IP "Description:" 4
-Determines the type and order of files to be downloaded.
+Determines types and order of files to download.
-Available types are
-\f[I]postfile\f[], \f[I]images\f[], \f[I]image_large\f[], \f[I]attachments\f[], and \f[I]content\f[].
+Available types:
+
+.br
+* \f[I]postfile\f[]
+.br
+* \f[I]images\f[]
+.br
+* \f[I]image_large\f[]
+.br
+* \f[I]attachments\f[]
+.br
+* \f[I]content\f[]
+
+
+.SS extractor.patreon.format-images
+.IP "Type:" 6
+\f[I]string\f[]
+
+.IP "Default:" 9
+\f[I]"download_url"\f[]
+
+.IP "Description:" 4
+Selects the format of \f[I]images\f[] \f[I]files\f[].
+
+Possible formats:
+
+.br
+* \f[I]original\f[]
+.br
+* \f[I]default\f[]
+.br
+* \f[I]default_small\f[]
+.br
+* \f[I]default_blurred\f[]
+.br
+* \f[I]default_blurred_small\f[]
+.br
+* \f[I]thumbnail\f[]
+.br
+* \f[I]thumbnail_large\f[]
+.br
+* \f[I]thumbnail_small\f[]
+.br
+* \f[I]url\f[]
+.br
+* \f[I]download_url\f[]
.SS extractor.pillowfort.external
@@ -6040,6 +6084,28 @@ Available options can be found in
\f[I]youtube-dl's docstrings\f[]
+.SS extractor.zerochan.extensions
+.IP "Type:" 6
+.br
+* \f[I]string\f[]
+.br
+* \f[I]list\f[] of \f[I]strings\f[]
+
+.IP "Default:" 9
+\f[I]["jpg", "png", "webp", "gif"]\f[]
+
+.IP "Example:" 4
+.br
+* "gif"
+.br
+* ["webp", "gif", "jpg"}
+
+.IP "Description:" 4
+List of filename extensions to try when dynamically building download URLs
+(\f[I]"pagination": "api"\f[] +
+\f[I]"metadata": false\f[])
+
+
.SS extractor.zerochan.metadata
.IP "Type:" 6
\f[I]bool\f[]
diff --git a/docs/gallery-dl.conf b/docs/gallery-dl.conf
index c7382f3..4dc2e14 100644
--- a/docs/gallery-dl.conf
+++ b/docs/gallery-dl.conf
@@ -22,7 +22,7 @@
"tls12" : true,
"browser" : null,
"proxy" : null,
- "proxy-env" : false,
+ "proxy-env" : true,
"source-address": null,
"retries" : 4,
"retry-codes" : [],
diff --git a/gallery_dl.egg-info/PKG-INFO b/gallery_dl.egg-info/PKG-INFO
index 842dead..f82026d 100644
--- a/gallery_dl.egg-info/PKG-INFO
+++ b/gallery_dl.egg-info/PKG-INFO
@@ -1,6 +1,6 @@
Metadata-Version: 2.1
Name: gallery_dl
-Version: 1.28.0
+Version: 1.28.1
Summary: Command-line program to download image galleries and collections from several image hosting sites
Home-page: https://github.com/mikf/gallery-dl
Download-URL: https://github.com/mikf/gallery-dl/releases/latest
@@ -117,9 +117,9 @@ Standalone Executable
Prebuilt executable files with a Python interpreter and
required Python packages included are available for
-- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.28.0/gallery-dl.exe>`__
+- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.28.1/gallery-dl.exe>`__
(Requires `Microsoft Visual C++ Redistributable Package (x86) <https://aka.ms/vs/17/release/vc_redist.x86.exe>`__)
-- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.28.0/gallery-dl.bin>`__
+- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.28.1/gallery-dl.bin>`__
Nightly Builds
diff --git a/gallery_dl.egg-info/SOURCES.txt b/gallery_dl.egg-info/SOURCES.txt
index a98e9da..398c9f7 100644
--- a/gallery_dl.egg-info/SOURCES.txt
+++ b/gallery_dl.egg-info/SOURCES.txt
@@ -187,6 +187,7 @@ gallery_dl/extractor/pornpics.py
gallery_dl/extractor/postmill.py
gallery_dl/extractor/reactor.py
gallery_dl/extractor/readcomiconline.py
+gallery_dl/extractor/realbooru.py
gallery_dl/extractor/recursive.py
gallery_dl/extractor/reddit.py
gallery_dl/extractor/redgifs.py
diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py
index 594ce41..8d5f3d0 100644
--- a/gallery_dl/extractor/__init__.py
+++ b/gallery_dl/extractor/__init__.py
@@ -140,6 +140,7 @@ modules = [
"postmill",
"reactor",
"readcomiconline",
+ "realbooru",
"reddit",
"redgifs",
"rule34us",
diff --git a/gallery_dl/extractor/bluesky.py b/gallery_dl/extractor/bluesky.py
index bbff17c..f60ea15 100644
--- a/gallery_dl/extractor/bluesky.py
+++ b/gallery_dl/extractor/bluesky.py
@@ -75,10 +75,13 @@ class BlueskyExtractor(Extractor):
quote = embed["record"]
if "record" in quote:
quote = quote["record"]
+ value = quote.pop("value", None)
+ if value is None:
+ break
quote["quote_id"] = self._pid(post)
quote["quote_by"] = post["author"]
embed = quote.get("embed")
- quote.update(quote.pop("value"))
+ quote.update(value)
post = quote
def posts(self):
@@ -202,6 +205,7 @@ class BlueskyUserExtractor(BlueskyExtractor):
def items(self):
base = "{}/profile/{}/".format(self.root, self.user)
return self._dispatch_extractors((
+ (BlueskyInfoExtractor , base + "info"),
(BlueskyAvatarExtractor , base + "avatar"),
(BlueskyBackgroundExtractor, base + "banner"),
(BlueskyPostsExtractor , base + "posts"),
@@ -298,6 +302,17 @@ class BlueskyPostExtractor(BlueskyExtractor):
return self.api.get_post_thread(self.user, self.post_id)
+class BlueskyInfoExtractor(BlueskyExtractor):
+ subcategory = "info"
+ pattern = USER_PATTERN + r"/info"
+ example = "https://bsky.app/profile/HANDLE/info"
+
+ def items(self):
+ self._metadata_user = True
+ self.api._did_from_actor(self.user)
+ return iter(((Message.Directory, self._user),))
+
+
class BlueskyAvatarExtractor(BlueskyExtractor):
subcategory = "avatar"
filename_fmt = "avatar_{post_id}.{extension}"
@@ -324,7 +339,8 @@ class BlueskySearchExtractor(BlueskyExtractor):
example = "https://bsky.app/search?q=QUERY"
def posts(self):
- return self.api.search_posts(self.user)
+ query = text.unquote(self.user.replace("+", " "))
+ return self.api.search_posts(query)
class BlueskyHashtagExtractor(BlueskyExtractor):
diff --git a/gallery_dl/extractor/common.py b/gallery_dl/extractor/common.py
index f364124..5f9d355 100644
--- a/gallery_dl/extractor/common.py
+++ b/gallery_dl/extractor/common.py
@@ -348,7 +348,7 @@ class Extractor():
ssl_options = ssl_ciphers = 0
# .netrc Authorization headers are alwsays disabled
- session.trust_env = True if self.config("proxy-env", False) else False
+ session.trust_env = True if self.config("proxy-env", True) else False
browser = self.config("browser")
if browser is None:
@@ -387,8 +387,8 @@ class Extractor():
useragent = self.useragent
elif useragent == "browser":
useragent = _browser_useragent()
- elif useragent is config.get(("extractor",), "user-agent") and \
- useragent == Extractor.useragent:
+ elif self.useragent is not Extractor.useragent and \
+ useragent is config.get(("extractor",), "user-agent"):
useragent = self.useragent
headers["User-Agent"] = useragent
headers["Accept"] = "*/*"
diff --git a/gallery_dl/extractor/danbooru.py b/gallery_dl/extractor/danbooru.py
index c3dfd91..37b6747 100644
--- a/gallery_dl/extractor/danbooru.py
+++ b/gallery_dl/extractor/danbooru.py
@@ -26,16 +26,7 @@ class DanbooruExtractor(BaseExtractor):
def _init(self):
self.ugoira = self.config("ugoira", False)
self.external = self.config("external", False)
-
- includes = self.config("metadata")
- if includes:
- if isinstance(includes, (list, tuple)):
- includes = ",".join(includes)
- elif not isinstance(includes, str):
- includes = "artist_commentary,children,notes,parent,uploader"
- self.includes = includes + ",id"
- else:
- self.includes = False
+ self.includes = False
threshold = self.config("threshold")
if isinstance(threshold, int):
@@ -56,6 +47,16 @@ class DanbooruExtractor(BaseExtractor):
return pages * self.per_page
def items(self):
+ # 'includes' initialization must be done here and not in '_init()'
+ # or it'll cause an exception with e621 when 'metadata' is enabled
+ includes = self.config("metadata")
+ if includes:
+ if isinstance(includes, (list, tuple)):
+ includes = ",".join(includes)
+ elif not isinstance(includes, str):
+ includes = "artist_commentary,children,notes,parent,uploader"
+ self.includes = includes + ",id"
+
data = self.metadata()
for post in self.posts():
@@ -223,7 +224,7 @@ class DanbooruTagExtractor(DanbooruExtractor):
else:
prefix = None
elif tag.startswith(
- ("id:", "md5", "ordfav:", "ordfavgroup:", "ordpool:")):
+ ("id:", "md5:", "ordfav:", "ordfavgroup:", "ordpool:")):
prefix = None
break
diff --git a/gallery_dl/extractor/gelbooru_v02.py b/gallery_dl/extractor/gelbooru_v02.py
index aad5752..2c1174a 100644
--- a/gallery_dl/extractor/gelbooru_v02.py
+++ b/gallery_dl/extractor/gelbooru_v02.py
@@ -24,10 +24,6 @@ class GelbooruV02Extractor(booru.BooruExtractor):
self.user_id = self.config("user-id")
self.root_api = self.config_instance("root-api") or self.root
- if self.category == "realbooru":
- self.items = self._items_realbooru
- self._tags = self._tags_realbooru
-
def _api_request(self, params):
url = self.root_api + "/index.php?page=dapi&s=post&q=index"
return ElementTree.fromstring(self.request(url, params=params).text)
@@ -82,16 +78,17 @@ class GelbooruV02Extractor(booru.BooruExtractor):
params["pid"] = self.page_start * self.per_page
data = {}
+ find_ids = re.compile(r"\sid=\"p(\d+)").findall
+
while True:
- num_ids = 0
page = self.request(url, params=params).text
+ pids = find_ids(page)
- for data["id"] in text.extract_iter(page, '" id="p', '"'):
- num_ids += 1
+ for data["id"] in pids:
for post in self._api_request(data):
yield post.attrib
- if num_ids < self.per_page:
+ if len(pids) < self.per_page:
return
params["pid"] += self.per_page
@@ -136,59 +133,8 @@ class GelbooruV02Extractor(booru.BooruExtractor):
"body" : text.unescape(text.remove_html(extr(">", "</div>"))),
})
- def _file_url_realbooru(self, post):
- url = post["file_url"]
- md5 = post["md5"]
- if md5 not in post["preview_url"] or url.count("/") == 5:
- url = "{}/images/{}/{}/{}.{}".format(
- self.root, md5[0:2], md5[2:4], md5, url.rpartition(".")[2])
- return url
-
- def _items_realbooru(self):
- from .common import Message
- data = self.metadata()
-
- for post in self.posts():
- try:
- html = self._html(post)
- fallback = post["file_url"]
- url = post["file_url"] = text.rextract(
- html, 'href="', '"', html.index(">Original<"))[0]
- except Exception:
- self.log.debug("Unable to fetch download URL for post %s "
- "(md5: %s)", post.get("id"), post.get("md5"))
- continue
-
- text.nameext_from_url(url, post)
- post.update(data)
- self._prepare(post)
- self._tags(post, html)
-
- path = url.rpartition("/")[0]
- post["_fallback"] = (
- "{}/{}.{}".format(path, post["md5"], post["extension"]),
- fallback,
- )
-
- yield Message.Directory, post
- yield Message.Url, url, post
-
- def _tags_realbooru(self, post, page):
- tag_container = text.extr(page, 'id="tagLink"', '</div>')
- tags = collections.defaultdict(list)
- pattern = re.compile(
- r'<a class="(?:tag-type-)?([^"]+).*?;tags=([^"&]+)')
- for tag_type, tag_name in pattern.findall(tag_container):
- tags[tag_type].append(text.unescape(text.unquote(tag_name)))
- for key, value in tags.items():
- post["tags_" + key] = " ".join(value)
-
BASE_PATTERN = GelbooruV02Extractor.update({
- "realbooru": {
- "root": "https://realbooru.com",
- "pattern": r"realbooru\.com",
- },
"rule34": {
"root": "https://rule34.xxx",
"root-api": "https://api.rule34.xxx",
diff --git a/gallery_dl/extractor/gofile.py b/gallery_dl/extractor/gofile.py
index 52b4ae6..ef9ea60 100644
--- a/gallery_dl/extractor/gofile.py
+++ b/gallery_dl/extractor/gofile.py
@@ -75,8 +75,8 @@ class GofileFolderExtractor(Extractor):
@cache(maxage=86400)
def _get_website_token(self):
self.log.debug("Fetching website token")
- page = self.request(self.root + "/dist/js/alljs.js").text
- return text.extr(page, 'wt: "', '"')
+ page = self.request(self.root + "/dist/js/global.js").text
+ return text.extr(page, '.wt = "', '"')
def _get_content(self, content_id, password=None):
headers = {"Authorization": "Bearer " + self.api_token}
diff --git a/gallery_dl/extractor/hentaicosplays.py b/gallery_dl/extractor/hentaicosplays.py
index fbbae16..4992b7b 100644
--- a/gallery_dl/extractor/hentaicosplays.py
+++ b/gallery_dl/extractor/hentaicosplays.py
@@ -5,31 +5,46 @@
# published by the Free Software Foundation.
"""Extractors for https://hentai-cosplay-xxx.com/
-(also works for hentai-img.com and porn-images-xxx.com)"""
+(also works for hentai-img-xxx.com and porn-image.com)"""
-from .common import GalleryExtractor
+from .common import BaseExtractor, GalleryExtractor
from .. import text
-class HentaicosplaysGalleryExtractor(GalleryExtractor):
+class HentaicosplaysExtractor(BaseExtractor):
+ basecategory = "hentaicosplays"
+
+
+BASE_PATTERN = HentaicosplaysExtractor.update({
+ "hentaicosplay": {
+ "root": "https://hentai-cosplay-xxx.com",
+ "pattern": r"(?:\w\w\.)?hentai-cosplays?(?:-xxx)?\.com",
+ },
+ "hentaiimg": {
+ "root": "https://hentai-img-xxx.com",
+ "pattern": r"(?:\w\w\.)?hentai-img(?:-xxx)?\.com",
+ },
+ "pornimage": {
+ "root": "https://porn-image.com",
+ "pattern": r"(?:\w\w\.)?porn-images?(?:-xxx)?\.com",
+ },
+})
+
+
+class HentaicosplaysGalleryExtractor(
+ HentaicosplaysExtractor, GalleryExtractor):
"""Extractor for image galleries from
- hentai-cosplay-xxx.com, hentai-img.com, and porn-images-xxx.com"""
- category = "hentaicosplays"
+ hentai-cosplay-xxx.com, hentai-img-xxx.com, and porn-image.com"""
directory_fmt = ("{site}", "{title}")
filename_fmt = "{filename}.{extension}"
archive_fmt = "{title}_{filename}"
- pattern = r"((?:https?://)?(?:\w{2}\.)?" \
- r"(hentai-cosplay(?:s|-xxx)|hentai-img|porn-images-xxx)\.com)/" \
- r"(?:image|story)/([\w-]+)"
+ pattern = BASE_PATTERN + r"/(?:image|story)/([\w-]+)"
example = "https://hentai-cosplay-xxx.com/image/TITLE/"
def __init__(self, match):
- root, self.site, self.slug = match.groups()
- self.root = text.ensure_http_scheme(root)
- if self.root == "https://hentai-cosplays.com":
- self.root = "https://hentai-cosplay-xxx.com"
- url = "{}/story/{}/".format(self.root, self.slug)
- GalleryExtractor.__init__(self, match, url)
+ BaseExtractor.__init__(self, match)
+ self.slug = self.groups[-1]
+ self.gallery_url = "{}/story/{}/".format(self.root, self.slug)
def _init(self):
self.session.headers["Referer"] = self.gallery_url
@@ -39,7 +54,7 @@ class HentaicosplaysGalleryExtractor(GalleryExtractor):
return {
"title": text.unescape(title.rpartition(" Story Viewer - ")[0]),
"slug" : self.slug,
- "site" : self.site,
+ "site" : self.root.partition("://")[2].rpartition(".")[0],
}
def images(self, page):
diff --git a/gallery_dl/extractor/inkbunny.py b/gallery_dl/extractor/inkbunny.py
index bff3156..47e071a 100644
--- a/gallery_dl/extractor/inkbunny.py
+++ b/gallery_dl/extractor/inkbunny.py
@@ -338,9 +338,9 @@ class InkbunnyAPI():
def _call(self, endpoint, params):
url = "https://inkbunny.net/api_" + endpoint + ".php"
- params["sid"] = self.session_id
while True:
+ params["sid"] = self.session_id
data = self.extractor.request(url, params=params).json()
if "error_code" not in data:
diff --git a/gallery_dl/extractor/instagram.py b/gallery_dl/extractor/instagram.py
index a866f45..e6b6b14 100644
--- a/gallery_dl/extractor/instagram.py
+++ b/gallery_dl/extractor/instagram.py
@@ -207,8 +207,8 @@ class InstagramExtractor(Extractor):
for user in coauthors
]
- if "carousel_media" in post:
- items = post["carousel_media"]
+ items = post.get("carousel_media")
+ if items:
data["sidecar_media_id"] = data["post_id"]
data["sidecar_shortcode"] = data["post_shortcode"]
else:
diff --git a/gallery_dl/extractor/kemonoparty.py b/gallery_dl/extractor/kemonoparty.py
index 3d04f75..16c5b99 100644
--- a/gallery_dl/extractor/kemonoparty.py
+++ b/gallery_dl/extractor/kemonoparty.py
@@ -552,7 +552,8 @@ class KemonoAPI():
return response.json()
def _pagination(self, endpoint, params, batch=50, key=False):
- params["o"] = text.parse_int(params.get("o")) % 50
+ offset = text.parse_int(params.get("o"))
+ params["o"] = offset - offset % batch
while True:
data = self._call(endpoint, params)
diff --git a/gallery_dl/extractor/nhentai.py b/gallery_dl/extractor/nhentai.py
index 90c5420..0d656d0 100644
--- a/gallery_dl/extractor/nhentai.py
+++ b/gallery_dl/extractor/nhentai.py
@@ -11,6 +11,7 @@
from .common import GalleryExtractor, Extractor, Message
from .. import text, util
import collections
+import random
class NhentaiGalleryExtractor(GalleryExtractor):
@@ -59,15 +60,18 @@ class NhentaiGalleryExtractor(GalleryExtractor):
}
def images(self, _):
- ufmt = ("https://i.nhentai.net/galleries/" +
- self.data["media_id"] + "/{}.{}")
- extdict = {"j": "jpg", "p": "png", "g": "gif", "w": "webp"}
+ exts = {"j": "jpg", "p": "png", "g": "gif", "w": "webp", "a": "avif"}
+
+ data = self.data
+ ufmt = ("https://i{}.nhentai.net/galleries/" +
+ data["media_id"] + "/{}.{}").format
return [
- (ufmt.format(num, extdict.get(img["t"], "jpg")), {
- "width": img["w"], "height": img["h"],
+ (ufmt(random.randint(1, 4), num, exts.get(img["t"], "jpg")), {
+ "width" : img["w"],
+ "height": img["h"],
})
- for num, img in enumerate(self.data["images"]["pages"], 1)
+ for num, img in enumerate(data["images"]["pages"], 1)
]
diff --git a/gallery_dl/extractor/patreon.py b/gallery_dl/extractor/patreon.py
index 3eacf1a..e4a5985 100644
--- a/gallery_dl/extractor/patreon.py
+++ b/gallery_dl/extractor/patreon.py
@@ -23,18 +23,22 @@ class PatreonExtractor(Extractor):
directory_fmt = ("{category}", "{creator[full_name]}")
filename_fmt = "{id}_{title}_{num:>02}.{extension}"
archive_fmt = "{id}_{num}"
+ useragent = "Patreon/72.2.28 (Android; Android 14; Scale/2.10)"
_warning = True
def _init(self):
- if self.cookies_check(("session_id",)):
- self.session.headers["User-Agent"] = \
- "Patreon/72.2.28 (Android; Android 14; Scale/2.10)"
- else:
+ if not self.cookies_check(("session_id",)):
if self._warning:
PatreonExtractor._warning = False
self.log.warning("no 'session_id' cookie set")
- self.session.headers["User-Agent"] = \
- "Patreon/7.6.28 (Android; Android 11; Scale/2.10)"
+ if self.session.headers["User-Agent"] is self.useragent:
+ self.session.headers["User-Agent"] = \
+ "Patreon/7.6.28 (Android; Android 11; Scale/2.10)"
+
+ format_images = self.config("format-images")
+ if format_images:
+ self._images_fmt = format_images
+ self._images_url = self._images_url_fmt
def items(self):
generators = self._build_file_generators(self.config("files"))
@@ -80,11 +84,20 @@ class PatreonExtractor(Extractor):
def _images(self, post):
for image in post.get("images") or ():
- url = image.get("download_url")
+ url = self._images_url(image)
if url:
name = image.get("file_name") or self._filename(url) or url
yield "image", url, name
+ def _images_url(self, image):
+ return image.get("download_url")
+
+ def _images_url_fmt(self, image):
+ try:
+ return image["image_urls"][self._images_fmt]
+ except Exception:
+ return image.get("download_url")
+
def _image_large(self, post):
image = post.get("image")
if image:
diff --git a/gallery_dl/extractor/pixiv.py b/gallery_dl/extractor/pixiv.py
index 8ad061d..6207bf7 100644
--- a/gallery_dl/extractor/pixiv.py
+++ b/gallery_dl/extractor/pixiv.py
@@ -380,8 +380,9 @@ class PixivArtworksExtractor(PixivExtractor):
ajax_ids.extend(map(int, body["manga"]))
ajax_ids.sort()
except Exception as exc:
- self.log.warning("Unable to collect artwork IDs using AJAX "
- "API (%s: %s)", exc.__class__.__name__, exc)
+ self.log.warning("u%s: Failed to collect artwork IDs "
+ "using AJAX API (%s: %s)",
+ self.user_id, exc.__class__.__name__, exc)
else:
works = self._extend_sanity(works, ajax_ids)
@@ -607,8 +608,12 @@ class PixivRankingExtractor(PixivExtractor):
def works(self):
ranking = self.ranking
- for ranking["rank"], work in enumerate(
- self.api.illust_ranking(self.mode, self.date), 1):
+
+ works = self.api.illust_ranking(self.mode, self.date)
+ if self.type:
+ works = filter(lambda work, t=self.type: work["type"] == t, works)
+
+ for ranking["rank"], work in enumerate(works, 1):
yield work
def metadata(self):
@@ -648,10 +653,13 @@ class PixivRankingExtractor(PixivExtractor):
date = (now - timedelta(days=1)).strftime("%Y-%m-%d")
self.date = date
+ self.type = type = query.get("content")
+
self.ranking = ranking = {
"mode": mode,
"date": self.date,
"rank": 0,
+ "type": type or "all",
}
return {"ranking": ranking}
diff --git a/gallery_dl/extractor/readcomiconline.py b/gallery_dl/extractor/readcomiconline.py
index 271fa50..c0374eb 100644
--- a/gallery_dl/extractor/readcomiconline.py
+++ b/gallery_dl/extractor/readcomiconline.py
@@ -79,13 +79,22 @@ class ReadcomiconlineIssueExtractor(ReadcomiconlineBase, ChapterExtractor):
def images(self, page):
results = []
+ referer = {"_http_headers": {"Referer": self.gallery_url}}
+ root = text.extr(page, "return baeu(l, '", "'")
+
+ replacements = re.findall(
+ r"l = l\.replace\(/([^/]+)/g, [\"']([^\"']*)", page)
for block in page.split(" pth = '")[1:]:
pth = text.extr(block, "", "'")
+
for needle, repl in re.findall(
r"pth = pth\.replace\(/([^/]+)/g, [\"']([^\"']*)", block):
pth = pth.replace(needle, repl)
- results.append((beau(pth), None))
+ for needle, repl in replacements:
+ pth = pth.replace(needle, repl)
+
+ results.append((baeu(pth, root), referer))
return results
@@ -119,20 +128,24 @@ class ReadcomiconlineComicExtractor(ReadcomiconlineBase, MangaExtractor):
return results
-def beau(url):
- """https://readcomiconline.li/Scripts/rguard.min.js?v=1.5.1"""
+def baeu(url, root="", root_blogspot="https://2.bp.blogspot.com"):
+ """https://readcomiconline.li/Scripts/rguard.min.js"""
+ if not root:
+ root = root_blogspot
+
url = url.replace("pw_.g28x", "b")
url = url.replace("d2pr.x_27", "h")
if url.startswith("https"):
- return url
-
- url, sep, rest = url.partition("?")
- containsS0 = "=s0" in url
- url = url[:-3 if containsS0 else -6]
- url = url[15:33] + url[50:]
- url = url[0:-11] + url[-2:]
- url = binascii.a2b_base64(url).decode()
- url = url[0:13] + url[17:]
- url = url[0:-2] + ("=s0" if containsS0 else "=s1600")
- return "https://2.bp.blogspot.com/" + url + sep + rest
+ return url.replace(root_blogspot, root, 1)
+
+ path, sep, query = url.partition("?")
+
+ contains_s0 = "=s0" in path
+ path = path[:-3 if contains_s0 else -6]
+ path = path[15:33] + path[50:] # step1()
+ path = path[0:-11] + path[-2:] # step2()
+ path = binascii.a2b_base64(path).decode() # atob()
+ path = path[0:13] + path[17:]
+ path = path[0:-2] + ("=s0" if contains_s0 else "=s1600")
+ return root + "/" + path + sep + query
diff --git a/gallery_dl/extractor/realbooru.py b/gallery_dl/extractor/realbooru.py
new file mode 100644
index 0000000..ab8a9b1
--- /dev/null
+++ b/gallery_dl/extractor/realbooru.py
@@ -0,0 +1,157 @@
+# -*- coding: utf-8 -*-
+
+# Copyright 2024 Mike Fährmann
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Extractors for https://realbooru.com/"""
+
+from . import booru
+from .. import text, util
+import collections
+import re
+
+BASE_PATTERN = r"(?:https?://)?realbooru\.com"
+
+
+class RealbooruExtractor(booru.BooruExtractor):
+ basecategory = "booru"
+ category = "realbooru"
+ root = "https://realbooru.com"
+
+ def _parse_post(self, post_id):
+ url = "{}/index.php?page=post&s=view&id={}".format(
+ self.root, post_id)
+ page = self.request(url).text
+ extr = text.extract_from(page)
+ rating = extr('name="rating" content="', '"')
+ extr('class="container"', '>')
+
+ post = {
+ "_html" : page,
+ "id" : post_id,
+ "rating" : "e" if rating == "adult" else (rating or "?")[0],
+ "tags" : text.unescape(extr(' alt="', '"')),
+ "file_url" : extr('src="', '"'),
+ "created_at": extr(">Posted at ", " by "),
+ "uploader" : extr(">", "<"),
+ "score" : extr('">', "<"),
+ "title" : extr('id="title" style="width: 100%;" value="', '"'),
+ "source" : extr('d="source" style="width: 100%;" value="', '"'),
+ }
+
+ post["md5"] = post["file_url"].rpartition("/")[2].partition(".")[0]
+ return post
+
+ def skip(self, num):
+ self.page_start += num
+ return num
+
+ def _prepare(self, post):
+ post["date"] = text.parse_datetime(post["created_at"], "%b, %d %Y")
+
+ def _pagination(self, params, begin, end):
+ url = self.root + "/index.php"
+ params["pid"] = self.page_start
+
+ while True:
+ page = self.request(url, params=params).text
+
+ cnt = 0
+ for post_id in text.extract_iter(page, begin, end):
+ cnt += 1
+ yield self._parse_post(post_id)
+
+ if cnt < self.per_page:
+ return
+ params["pid"] += self.per_page
+
+ def _tags(self, post, _):
+ page = post["_html"]
+ tag_container = text.extr(page, 'id="tagLink"', '</div>')
+ tags = collections.defaultdict(list)
+ pattern = re.compile(
+ r'<a class="(?:tag-type-)?([^"]+).*?;tags=([^"&]+)')
+ for tag_type, tag_name in pattern.findall(tag_container):
+ tags[tag_type].append(text.unescape(text.unquote(tag_name)))
+ for key, value in tags.items():
+ post["tags_" + key] = " ".join(value)
+
+
+class RealbooruTagExtractor(RealbooruExtractor):
+ subcategory = "tag"
+ directory_fmt = ("{category}", "{search_tags}")
+ archive_fmt = "t_{search_tags}_{id}"
+ per_page = 42
+ pattern = BASE_PATTERN + r"/index\.php\?page=post&s=list&tags=([^&#]*)"
+ example = "https://realbooru.com/index.php?page=post&s=list&tags=TAG"
+
+ def metadata(self):
+ self.tags = text.unquote(self.groups[0].replace("+", " "))
+ return {"search_tags": self.tags}
+
+ def posts(self):
+ return self._pagination({
+ "page": "post",
+ "s" : "list",
+ "tags": self.tags,
+ }, '<a id="p', '"')
+
+
+class RealbooruFavoriteExtractor(RealbooruExtractor):
+ subcategory = "favorite"
+ directory_fmt = ("{category}", "favorites", "{favorite_id}")
+ archive_fmt = "f_{favorite_id}_{id}"
+ per_page = 50
+ pattern = BASE_PATTERN + r"/index\.php\?page=favorites&s=view&id=(\d+)"
+ example = "https://realbooru.com/index.php?page=favorites&s=view&id=12345"
+
+ def metadata(self):
+ return {"favorite_id": text.parse_int(self.groups[0])}
+
+ def posts(self):
+ return self._pagination({
+ "page": "favorites",
+ "s" : "view",
+ "id" : self.groups[0],
+ }, '" id="p', '"')
+
+
+class RealbooruPoolExtractor(RealbooruExtractor):
+ subcategory = "pool"
+ directory_fmt = ("{category}", "pool", "{pool} {pool_name}")
+ archive_fmt = "p_{pool}_{id}"
+ pattern = BASE_PATTERN + r"/index\.php\?page=pool&s=show&id=(\d+)"
+ example = "https://realbooru.com/index.php?page=pool&s=show&id=12345"
+
+ def metadata(self):
+ pool_id = self.groups[0]
+ url = "{}/index.php?page=pool&s=show&id={}".format(self.root, pool_id)
+ page = self.request(url).text
+
+ name, pos = text.extract(page, "<h4>Pool: ", "</h4>")
+ self.post_ids = text.extract_iter(
+ page, 'class="thumb" id="p', '"', pos)
+
+ return {
+ "pool": text.parse_int(pool_id),
+ "pool_name": text.unescape(name),
+ }
+
+ def posts(self):
+ return map(
+ self._parse_post,
+ util.advance(self.post_ids, self.page_start)
+ )
+
+
+class RealbooruPostExtractor(RealbooruExtractor):
+ subcategory = "post"
+ archive_fmt = "{id}"
+ pattern = BASE_PATTERN + r"/index\.php\?page=post&s=view&id=(\d+)"
+ example = "https://realbooru.com/index.php?page=post&s=view&id=12345"
+
+ def posts(self):
+ return (self._parse_post(self.groups[0]),)
diff --git a/gallery_dl/extractor/zerochan.py b/gallery_dl/extractor/zerochan.py
index f9b1a7f..4c4fb3a 100644
--- a/gallery_dl/extractor/zerochan.py
+++ b/gallery_dl/extractor/zerochan.py
@@ -145,6 +145,14 @@ class ZerochanTagExtractor(ZerochanExtractor):
self.posts = self.posts_api
self.session.headers["User-Agent"] = util.USERAGENT
+ exts = self.config("extensions")
+ if exts:
+ if isinstance(exts, str):
+ exts = exts.split(",")
+ self.exts = exts
+ else:
+ self.exts = ("jpg", "png", "webp", "gif")
+
def metadata(self):
return {"search_tags": text.unquote(
self.search_tag.replace("+", " "))}
@@ -194,8 +202,6 @@ class ZerochanTagExtractor(ZerochanExtractor):
"p" : self.page_start,
}
- static = "https://static.zerochan.net/.full."
-
while True:
response = self.request(url, params=params, allow_redirects=False)
@@ -221,15 +227,20 @@ class ZerochanTagExtractor(ZerochanExtractor):
yield post
else:
for post in posts:
- base = static + str(post["id"])
- post["file_url"] = base + ".jpg"
- post["_fallback"] = (base + ".png",)
+ urls = self._urls(post)
+ post["file_url"] = next(urls)
+ post["_fallback"] = urls
yield post
if not data.get("next"):
return
params["p"] += 1
+ def _urls(self, post, static="https://static.zerochan.net/.full."):
+ base = static + str(post["id"]) + "."
+ for ext in self.exts:
+ yield base + ext
+
class ZerochanImageExtractor(ZerochanExtractor):
subcategory = "image"
diff --git a/gallery_dl/version.py b/gallery_dl/version.py
index 2bf03f4..2dab0d6 100644
--- a/gallery_dl/version.py
+++ b/gallery_dl/version.py
@@ -6,5 +6,5 @@
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
-__version__ = "1.28.0"
+__version__ = "1.28.1"
__variant__ = None