summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLibravatarUnit 193 <unit193@unit193.net>2025-09-23 07:44:37 -0400
committerLibravatarUnit 193 <unit193@unit193.net>2025-09-23 07:44:37 -0400
commit42b62671fabfdcf983a9575221420d85f7fbcac1 (patch)
treefa6b2af249a7216aae5c70a926c6d08be1ac55a6
parent3b7f8716690b7aa1994a9cb387bbc7215e01a4ed (diff)
New upstream version 1.30.8.upstream/1.30.8
-rw-r--r--CHANGELOG.md66
-rw-r--r--PKG-INFO6
-rw-r--r--README.rst4
-rw-r--r--data/man/gallery-dl.12
-rw-r--r--data/man/gallery-dl.conf.5342
-rw-r--r--docs/gallery-dl.conf40
-rw-r--r--gallery_dl.egg-info/PKG-INFO6
-rw-r--r--gallery_dl.egg-info/SOURCES.txt4
-rw-r--r--gallery_dl/extractor/2ch.py31
-rw-r--r--gallery_dl/extractor/4archive.py6
-rw-r--r--gallery_dl/extractor/__init__.py4
-rw-r--r--gallery_dl/extractor/bellazon.py85
-rw-r--r--gallery_dl/extractor/bunkr.py2
-rw-r--r--gallery_dl/extractor/chevereto.py16
-rw-r--r--gallery_dl/extractor/danbooru.py17
-rw-r--r--gallery_dl/extractor/facebook.py10
-rw-r--r--gallery_dl/extractor/hdoujin.py42
-rw-r--r--gallery_dl/extractor/imgpile.py119
-rw-r--r--gallery_dl/extractor/instagram.py12
-rw-r--r--gallery_dl/extractor/iwara.py50
-rw-r--r--gallery_dl/extractor/kemono.py28
-rw-r--r--gallery_dl/extractor/lensdump.py3
-rw-r--r--gallery_dl/extractor/mangadex.py69
-rw-r--r--gallery_dl/extractor/mangataro.py105
-rw-r--r--gallery_dl/extractor/pinterest.py104
-rw-r--r--gallery_dl/extractor/reddit.py36
-rw-r--r--gallery_dl/extractor/schalenetwork.py149
-rw-r--r--gallery_dl/extractor/simpcity.py56
-rw-r--r--gallery_dl/extractor/thehentaiworld.py139
-rw-r--r--gallery_dl/extractor/twitter.py2
-rw-r--r--gallery_dl/extractor/vipergirls.py12
-rw-r--r--gallery_dl/job.py6
-rw-r--r--gallery_dl/postprocessor/common.py4
-rw-r--r--gallery_dl/postprocessor/metadata.py9
-rw-r--r--gallery_dl/postprocessor/python.py20
-rw-r--r--gallery_dl/util.py13
-rw-r--r--gallery_dl/version.py2
-rw-r--r--gallery_dl/ytdl.py18
-rw-r--r--test/test_extractor.py3
-rw-r--r--test/test_postprocessor.py25
-rw-r--r--test/test_util.py3
-rw-r--r--test/test_ytdl.py9
42 files changed, 1344 insertions, 335 deletions
diff --git a/CHANGELOG.md b/CHANGELOG.md
index a5eaa4d..5aa64b9 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,28 +1,48 @@
-## 1.30.7 - 2025-09-14
+## 1.30.8 - 2025-09-23
### Extractors
#### Additions
-- [bellazon] add support ([#7480](https://github.com/mikf/gallery-dl/issues/7480))
-- [cyberfile] add support ([#5015](https://github.com/mikf/gallery-dl/issues/5015))
-- [fansly] add `creator-media` extractor ([#4401](https://github.com/mikf/gallery-dl/issues/4401))
-- [simpcity] add support ([#3127](https://github.com/mikf/gallery-dl/issues/3127) [#5145](https://github.com/mikf/gallery-dl/issues/5145) [#5879](https://github.com/mikf/gallery-dl/issues/5879) [#8187](https://github.com/mikf/gallery-dl/issues/8187))
+- [chevereto] support `imglike.com` ([#5179](https://github.com/mikf/gallery-dl/issues/5179))
+- [chevereto] add `category` extractor ([#5179](https://github.com/mikf/gallery-dl/issues/5179))
+- [Danbooru] add `random` extractor ([#8270](https://github.com/mikf/gallery-dl/issues/8270))
+- [hdoujin] add support ([#6810](https://github.com/mikf/gallery-dl/issues/6810))
+- [imgpile] add support ([#5044](https://github.com/mikf/gallery-dl/issues/5044))
+- [mangadex] add `covers` extractor ([#4994](https://github.com/mikf/gallery-dl/issues/4994))
+- [mangataro] add support ([#8237](https://github.com/mikf/gallery-dl/issues/8237))
+- [thehentaiworld] add support ([#274](https://github.com/mikf/gallery-dl/issues/274) [#8237](https://github.com/mikf/gallery-dl/issues/8237))
#### Fixes
-- [aibooru] fix download URLs ([#8212](https://github.com/mikf/gallery-dl/issues/8212))
-- [ao3] fix pagination ([#8206](https://github.com/mikf/gallery-dl/issues/8206))
-- [boosty] fix extracting `accessToken` from cookies ([#8203](https://github.com/mikf/gallery-dl/issues/8203))
-- [comick] update `buildId` on `404` errors ([#8157](https://github.com/mikf/gallery-dl/issues/8157))
-- [facebook] fix `/photo/?fbid=…&set=…` URLs being handled as a set ([#8181](https://github.com/mikf/gallery-dl/issues/8181))
-- [fansly] fix & improve format selection ([#4401](https://github.com/mikf/gallery-dl/issues/4401))
-- [fansly] fix posts with more than 5 files ([#4401](https://github.com/mikf/gallery-dl/issues/4401))
-- [imgbb] fix & update ([#7936](https://github.com/mikf/gallery-dl/issues/7936))
-- [tiktok] fix `KeyError: 'author'` ([#8189](https://github.com/mikf/gallery-dl/issues/8189))
+- [4archive] fix `TypeError` ([#8217](https://github.com/mikf/gallery-dl/issues/8217))
+- [bellazon] fix video attachments ([#8239](https://github.com/mikf/gallery-dl/issues/8239))
+- [bunkr] fix `JSONDecodeError` for files with URL slugs containing apostrophes `'` ([#8150](https://github.com/mikf/gallery-dl/issues/8150))
+- [instagram] ensure manifest data exists before attempting a DASH download ([#8267](https://github.com/mikf/gallery-dl/issues/8267))
+- [schalenetwork] fix extraction ([#6948](https://github.com/mikf/gallery-dl/issues/6948) [#7391](https://github.com/mikf/gallery-dl/issues/7391) [#7728](https://github.com/mikf/gallery-dl/issues/7728))
+- [twitter] fix quoted Tweets being marked as `deleted` ([#8225](https://github.com/mikf/gallery-dl/issues/8225))
#### Improvements
-- [comick] handle redirects
-- [fansly] provide fallback URL for manifest downloads ([#4401](https://github.com/mikf/gallery-dl/issues/4401))
-- [fansly:creator] support custom wall IDs ([#4401](https://github.com/mikf/gallery-dl/issues/4401))
-- [tungsten:user] support filtering results by tag ([#8061](https://github.com/mikf/gallery-dl/issues/8061))
-- [twitter] continue searches on empty response ([#8173](https://github.com/mikf/gallery-dl/issues/8173))
-- [twitter] implement various `search-…` options ([#8173](https://github.com/mikf/gallery-dl/issues/8173))
+- [2ch] update domain to `2ch.su`, support `2ch.life` URLs ([#8216](https://github.com/mikf/gallery-dl/issues/8216))
+- [bellazon][simpcity][vipergirls] process threads in descending order ([#8248](https://github.com/mikf/gallery-dl/issues/8248))
+- [bellazon] extract `inline` images (##8247)
+- [bellazon] support video embeds ([#8239](https://github.com/mikf/gallery-dl/issues/8239))
+- [bellazon] support `#comment-12345` post links ([#8239](https://github.com/mikf/gallery-dl/issues/8239))
+- [lensdump] support new direct file URL pattern ([#8251](https://github.com/mikf/gallery-dl/issues/8251))
+- [simpcity] extract URLs of `<iframe>` embeds ([#8214](https://github.com/mikf/gallery-dl/issues/8214) [#8256](https://github.com/mikf/gallery-dl/issues/8256))
+- [simpcity] improve post content extraction ([#8214](https://github.com/mikf/gallery-dl/issues/8214))
+#### Metadata
+- [facebook] extract `biography` metadata ([#8233](https://github.com/mikf/gallery-dl/issues/8233))
+- [instagram:tagged] provide full `tagged_…` metadata when using `id:…` URLs ([#8263](https://github.com/mikf/gallery-dl/issues/8263))
+- [iwara] extract more metadata ([#6582](https://github.com/mikf/gallery-dl/issues/6582))
+- [iwara] make `type` available for directories ([#8245](https://github.com/mikf/gallery-dl/issues/8245))
+- [reddit] provide `comment` metadata for all media files ([#8228](https://github.com/mikf/gallery-dl/issues/8228))
+#### Options
+- [bellazon] add `quoted` option ([#8247](https://github.com/mikf/gallery-dl/issues/8247))
+- [bellazon] implement `order-posts` option ([#8248](https://github.com/mikf/gallery-dl/issues/8248))
+- [kemono:discord] implement `order-posts` option ([#8241](https://github.com/mikf/gallery-dl/issues/8241))
+- [simpcity] implement `order-posts` option ([#8248](https://github.com/mikf/gallery-dl/issues/8248))
+- [vipergirls] implement `order-posts` option ([#8248](https://github.com/mikf/gallery-dl/issues/8248))
+### Downloaders
+- [ytdl] fix errors caused by deprecated options removal
+### Post Processors
+- [metadata] add `"mode": "print"` ([#2691](https://github.com/mikf/gallery-dl/issues/2691))
+- [python] add `"mode": "eval"`
+- close archive database connections ([#8243](https://github.com/mikf/gallery-dl/issues/8243))
### Miscellaneous
-- [formatter] exclude `<>\` characters from `!R` results ([#8180](https://github.com/mikf/gallery-dl/issues/8180))
-- [formatter] support negative indicies
-- [util] emit debug `Proxy Map` logging message ([#8195](https://github.com/mikf/gallery-dl/issues/8195))
+- [util] define `__enter__` & `__exit__` methods for `NullResponse` objects ([#8227](https://github.com/mikf/gallery-dl/issues/8227))
+- [util] extend list of ISO 639 language codes
diff --git a/PKG-INFO b/PKG-INFO
index 6abd758..a339b24 100644
--- a/PKG-INFO
+++ b/PKG-INFO
@@ -1,6 +1,6 @@
Metadata-Version: 2.4
Name: gallery_dl
-Version: 1.30.7
+Version: 1.30.8
Summary: Command-line program to download image galleries and collections from several image hosting sites
Home-page: https://github.com/mikf/gallery-dl
Download-URL: https://github.com/mikf/gallery-dl/releases/latest
@@ -141,9 +141,9 @@ Standalone Executable
Prebuilt executable files with a Python interpreter and
required Python packages included are available for
-- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.30.7/gallery-dl.exe>`__
+- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.30.8/gallery-dl.exe>`__
(Requires `Microsoft Visual C++ Redistributable Package (x86) <https://aka.ms/vs/17/release/vc_redist.x86.exe>`__)
-- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.30.7/gallery-dl.bin>`__
+- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.30.8/gallery-dl.bin>`__
Nightly Builds
diff --git a/README.rst b/README.rst
index 71cf809..554fe51 100644
--- a/README.rst
+++ b/README.rst
@@ -79,9 +79,9 @@ Standalone Executable
Prebuilt executable files with a Python interpreter and
required Python packages included are available for
-- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.30.7/gallery-dl.exe>`__
+- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.30.8/gallery-dl.exe>`__
(Requires `Microsoft Visual C++ Redistributable Package (x86) <https://aka.ms/vs/17/release/vc_redist.x86.exe>`__)
-- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.30.7/gallery-dl.bin>`__
+- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.30.8/gallery-dl.bin>`__
Nightly Builds
diff --git a/data/man/gallery-dl.1 b/data/man/gallery-dl.1
index 36ca314..6560c3b 100644
--- a/data/man/gallery-dl.1
+++ b/data/man/gallery-dl.1
@@ -1,4 +1,4 @@
-.TH "GALLERY-DL" "1" "2025-09-14" "1.30.7" "gallery-dl Manual"
+.TH "GALLERY-DL" "1" "2025-09-23" "1.30.8" "gallery-dl Manual"
.\" disable hyphenation
.nh
diff --git a/data/man/gallery-dl.conf.5 b/data/man/gallery-dl.conf.5
index 8d2f806..fbf32bc 100644
--- a/data/man/gallery-dl.conf.5
+++ b/data/man/gallery-dl.conf.5
@@ -1,4 +1,4 @@
-.TH "GALLERY-DL.CONF" "5" "2025-09-14" "1.30.7" "gallery-dl Manual"
+.TH "GALLERY-DL.CONF" "5" "2025-09-23" "1.30.8" "gallery-dl Manual"
.\" disable hyphenation
.nh
.\" disable justification (adjust text to left margin only)
@@ -501,6 +501,7 @@ response before \f[I]retrying\f[] the request.
\f[I][Danbooru]\f[],
\f[I][E621]\f[],
\f[I][foolfuuka]:search\f[],
+\f[I]hdoujin\f[],
\f[I]itaku\f[],
\f[I]newgrounds\f[],
\f[I][philomena]\f[],
@@ -512,6 +513,7 @@ response before \f[I]retrying\f[] the request.
\f[I]scrolller\f[],
\f[I]sizebooru\f[],
\f[I]soundgasm\f[],
+\f[I]thehentaiworld\f[],
\f[I]urlgalleries\f[],
\f[I]vk\f[],
\f[I]webtoons\f[],
@@ -1883,6 +1885,34 @@ Supported module types are
\f[I]image\f[], \f[I]video\f[], \f[I]mediacollection\f[], \f[I]embed\f[], \f[I]text\f[].
+.SS extractor.bellazon.order-posts
+.IP "Type:" 6
+\f[I]string\f[]
+
+.IP "Default:" 9
+\f[I]"desc"\f[]
+
+.IP "Description:" 4
+Controls the order in which
+posts of a \f[I]thread\f[] are processed.
+
+\f[I]"asc"\f[]
+Ascending order (oldest first)
+\f[I]"desc"\f[] | \f[I]"reverse"\f[]
+Descending order (newest first)
+
+
+.SS extractor.bellazon.quoted
+.IP "Type:" 6
+\f[I]bool\f[]
+
+.IP "Default:" 9
+\f[I]false\f[]
+
+.IP "Description:" 4
+Extract files from quoted content.
+
+
.SS extractor.[blogger].api-key
.IP "Type:" 6
\f[I]string\f[]
@@ -3656,6 +3686,87 @@ to attempt to fetch the current value used by gofile.
Recursively download files from subfolders.
+.SS extractor.hdoujin.crt
+.IP "Type:" 6
+\f[I]string\f[]
+
+.IP "Example:" 4
+.br
+* "0542daa9-352c-4fd5-a497-6c6d5cf07423"
+.br
+* "/12345/a1b2c3d4e5f6?crt=0542daa9-352c-4fd5-a497-6c6d5cf07423"
+
+.IP "Description:" 4
+The \f[I]crt\f[] query parameter value
+sent when fetching gallery data.
+
+To get this value:
+
+.br
+* Open your browser's Developer Tools (F12)
+.br
+* Select Network -> XHR
+.br
+* Open a gallery page
+.br
+* Select the last Network entry and copy its \f[I]crt\f[] value
+
+Note: You will also need your browser's
+\f[I]user-agent\f[]
+
+
+.SS extractor.hdoujin.format
+.IP "Type:" 6
+.br
+* \f[I]string\f[]
+.br
+* \f[I]list\f[] of \f[I]strings\f[]
+
+.IP "Default:" 9
+\f[I]["0", "1600", "1280", "980", "780"]\f[]
+
+.IP "Description:" 4
+Name(s) of the image format to download.
+
+When more than one format is given, the first available one is selected.
+
+Possible formats are
+.br
+\f[I]"780"\f[], \f[I]"980"\f[], \f[I]"1280"\f[], \f[I]"1600"\f[], \f[I]"0"\f[] (original)
+.br
+
+
+.SS extractor.hdoujin.tags
+.IP "Type:" 6
+\f[I]bool\f[]
+
+.IP "Default:" 9
+\f[I]false\f[]
+
+.IP "Description:" 4
+Group \f[I]tags\f[] by type and
+provide them as \f[I]tags_<type>\f[] metadata fields,
+for example \f[I]tags_artist\f[] or \f[I]tags_character\f[].
+
+
+.SS extractor.hdoujin.token
+.IP "Type:" 6
+\f[I]string\f[]
+
+.IP "Example:" 4
+.br
+* "3f1a9b72-4e4d-4f4e-9e5d-4a2b99f7c893"
+.br
+* "Bearer 3f1a9b72-4e4d-4f4e-9e5d-4a2b99f7c893"
+.br
+* "Authorization: Bearer 3f1a9b72-4e4d-4f4e-9e5d-4a2b99f7c893"
+
+.IP "Description:" 4
+\f[I]Authorization\f[] header value
+used for requests to \f[I]https://api.hdoujin.org\f[]
+to access \f[I]favorite\f[] galleries.
+
+
.SS extractor.hentaifoundry.descriptions
.IP "Type:" 6
\f[I]string\f[]
@@ -4209,12 +4320,28 @@ Controls the order in which
\f[I]revisions\f[]
are returned.
-.br
-* \f[I]"asc"\f[]: Ascending order (oldest first)
-.br
-* \f[I]"desc"\f[]: Descending order (newest first)
-.br
-* \f[I]"reverse"\f[]: Same as \f[I]"asc"\f[]
+\f[I]"asc"\f[] | \f[I]"reverse"\f[]
+Ascending order (oldest first)
+\f[I]"desc"\f[]
+Descending order (newest first)
+
+
+.SS extractor.kemono.discord.order-posts
+.IP "Type:" 6
+\f[I]string\f[]
+
+.IP "Default:" 9
+\f[I]"asc"\f[]
+
+.IP "Description:" 4
+Controls the order in which
+\f[I]discord\f[] posts
+are returned.
+
+\f[I]"asc"\f[]
+Ascending order (oldest first)
+\f[I]"desc"\f[] | \f[I]"reverse"\f[]
+Descending order (newest first)
.SS extractor.khinsider.covers
@@ -4245,54 +4372,6 @@ If the selected format is not available,
the first in the list gets chosen (usually mp3).
-.SS extractor.schalenetwork.cbz
-.IP "Type:" 6
-\f[I]bool\f[]
-
-.IP "Default:" 9
-\f[I]true\f[]
-
-.IP "Description:" 4
-Download each gallery as a single \f[I].cbz\f[] file.
-
-Disabling this option causes a gallery
-to be downloaded as individual image files.
-
-
-.SS extractor.schalenetwork.format
-.IP "Type:" 6
-.br
-* \f[I]string\f[]
-.br
-* \f[I]list\f[] of \f[I]strings\f[]
-
-.IP "Default:" 9
-\f[I]["0", "1600", "1280", "980", "780"]\f[]
-
-.IP "Description:" 4
-Name(s) of the image format to download.
-
-When more than one format is given, the first available one is selected.
-
-Possible formats are
-.br
-\f[I]"780"\f[], \f[I]"980"\f[], \f[I]"1280"\f[], \f[I]"1600"\f[], \f[I]"0"\f[] (original)
-.br
-
-
-.SS extractor.schalenetwork.tags
-.IP "Type:" 6
-\f[I]bool\f[]
-
-.IP "Default:" 9
-\f[I]false\f[]
-
-.IP "Description:" 4
-Group \f[I]tags\f[] by type and
-provide them as \f[I]tags_<type>\f[] metadata fields,
-for example \f[I]tags_artist\f[] or \f[I]tags_character\f[].
-
-
.SS extractor.lolisafe.domain
.IP "Type:" 6
\f[I]string\f[]
@@ -5706,6 +5785,87 @@ Download video embeds from external sites.
Download videos.
+.SS extractor.schalenetwork.crt
+.IP "Type:" 6
+\f[I]string\f[]
+
+.IP "Example:" 4
+.br
+* "0542daa9-352c-4fd5-a497-6c6d5cf07423"
+.br
+* "/12345/a1b2c3d4e5f6?crt=0542daa9-352c-4fd5-a497-6c6d5cf07423"
+
+.IP "Description:" 4
+The \f[I]crt\f[] query parameter value
+sent when fetching gallery data.
+
+To get this value:
+
+.br
+* Open your browser's Developer Tools (F12)
+.br
+* Select Network -> XHR
+.br
+* Open a gallery page
+.br
+* Select the last Network entry and copy its \f[I]crt\f[] value
+
+Note: You will also need your browser's
+\f[I]user-agent\f[]
+
+
+.SS extractor.schalenetwork.format
+.IP "Type:" 6
+.br
+* \f[I]string\f[]
+.br
+* \f[I]list\f[] of \f[I]strings\f[]
+
+.IP "Default:" 9
+\f[I]["0", "1600", "1280", "980", "780"]\f[]
+
+.IP "Description:" 4
+Name(s) of the image format to download.
+
+When more than one format is given, the first available one is selected.
+
+Possible formats are
+.br
+\f[I]"780"\f[], \f[I]"980"\f[], \f[I]"1280"\f[], \f[I]"1600"\f[], \f[I]"0"\f[] (original)
+.br
+
+
+.SS extractor.schalenetwork.tags
+.IP "Type:" 6
+\f[I]bool\f[]
+
+.IP "Default:" 9
+\f[I]false\f[]
+
+.IP "Description:" 4
+Group \f[I]tags\f[] by type and
+provide them as \f[I]tags_<type>\f[] metadata fields,
+for example \f[I]tags_artist\f[] or \f[I]tags_character\f[].
+
+
+.SS extractor.schalenetwork.token
+.IP "Type:" 6
+\f[I]string\f[]
+
+.IP "Example:" 4
+.br
+* "3f1a9b72-4e4d-4f4e-9e5d-4a2b99f7c893"
+.br
+* "Bearer 3f1a9b72-4e4d-4f4e-9e5d-4a2b99f7c893"
+.br
+* "Authorization: Bearer 3f1a9b72-4e4d-4f4e-9e5d-4a2b99f7c893"
+
+.IP "Description:" 4
+\f[I]Authorization\f[] header value
+used for requests to \f[I]https://api.schale.network\f[]
+to access \f[I]favorite\f[] galleries.
+
+
.SS extractor.sexcom.gifs
.IP "Type:" 6
\f[I]bool\f[]
@@ -5717,6 +5877,23 @@ Download videos.
Download animated images as \f[I].gif\f[] instead of \f[I].webp\f[]
+.SS extractor.simpcity.order-posts
+.IP "Type:" 6
+\f[I]string\f[]
+
+.IP "Default:" 9
+\f[I]"desc"\f[]
+
+.IP "Description:" 4
+Controls the order in which
+posts of a \f[I]thread\f[] are processed.
+
+\f[I]"asc"\f[]
+Ascending order (oldest first)
+\f[I]"desc"\f[] | \f[I]"reverse"\f[]
+Descending order (newest first)
+
+
.SS extractor.sizebooru.metadata
.IP "Type:" 6
\f[I]bool\f[]
@@ -7003,6 +7180,23 @@ Note: Requires \f[I]login\f[]
or \f[I]cookies\f[]
+.SS extractor.vipergirls.order-posts
+.IP "Type:" 6
+\f[I]string\f[]
+
+.IP "Default:" 9
+\f[I]"desc"\f[]
+
+.IP "Description:" 4
+Controls the order in which
+posts of a \f[I]thread\f[] are processed.
+
+\f[I]"asc"\f[]
+Ascending order (oldest first)
+\f[I]"desc"\f[] | \f[I]"reverse"\f[]
+Descending order (newest first)
+
+
.SS extractor.vk.offset
.IP "Type:" 6
\f[I]integer\f[]
@@ -9226,6 +9420,26 @@ The event(s) for which \f[I]python.function\f[] gets called.
See \f[I]metadata.event\f[] for a list of available events.
+.SS python.expression
+.IP "Type:" 6
+\f[I]string\f[]
+
+.IP "Example:" 4
+.br
+* "print('Foo Bar')"
+.br
+* "terminate()"
+
+.IP "Description:" 4
+A
+\f[I]Python expression\f[]
+to
+\f[I]evaluate\f[].
+
+Note: Only used with
+\f[I]"mode": "eval"\f[]
+
+
.SS python.function
.IP "Type:" 6
\f[I]string\f[]
@@ -9248,6 +9462,24 @@ This function is specified as \f[I]<module>:<function name>\f[], where
It gets called with the current metadata dict as argument.
+.SS python.mode
+.IP "Type:" 6
+\f[I]string\f[]
+
+.IP "Default:" 9
+\f[I]"function"\f[]
+
+.IP "Description:" 4
+Selects what Python code to run.
+
+\f[I]"eval"\f[]
+Evaluate an
+\f[I]expression\f[]
+\f[I]function"\f[]
+Call a
+\f[I]function\f[]
+
+
.SS rename.from
.IP "Type:" 6
\f[I]string\f[]
diff --git a/docs/gallery-dl.conf b/docs/gallery-dl.conf
index 823fcc0..5a1b604 100644
--- a/docs/gallery-dl.conf
+++ b/docs/gallery-dl.conf
@@ -154,6 +154,11 @@
"modules": ["image", "video", "mediacollection", "embed"]
},
+ "bellazon":
+ {
+ "order-posts": "desc",
+ "quoted" : false
+ },
"bilibili":
{
"sleep-request": "3.0-6.0"
@@ -386,6 +391,15 @@
"website-token": null,
"recursive": false
},
+ "hdoujin":
+ {
+ "crt" : "",
+ "token": "",
+ "sleep-request": "0.5-1.5",
+
+ "format": ["0", "1600", "1280", "980", "780"],
+ "tags" : false
+ },
"hentaifoundry":
{
"descriptions": "text",
@@ -475,7 +489,11 @@
"max-posts" : null,
"metadata" : true,
"revisions" : false,
- "order-revisions": "desc"
+ "order-revisions": "desc",
+
+ "discord": {
+ "order-posts": "asc"
+ }
},
"khinsider":
{
@@ -680,11 +698,10 @@
},
"schalenetwork":
{
- "username": "",
- "password": "",
+ "crt" : "",
+ "token": "",
"sleep-request": "0.5-1.5",
- "cbz" : true,
"format": ["0", "1600", "1280", "980", "780"],
"tags" : false
},
@@ -698,6 +715,12 @@
{
"gifs": true
},
+ "simpcity":
+ {
+ "cookies": null,
+
+ "order-posts": "desc"
+ },
"sizebooru":
{
"sleep-request": "0.5-1.5",
@@ -761,6 +784,10 @@
{
"format": ["gif", "mp4", "webm", "webp"]
},
+ "thehentaiworld":
+ {
+ "sleep-request": "0.5-1.5"
+ },
"tiktok":
{
"audio" : true,
@@ -856,8 +883,9 @@
"password": "",
"sleep-request": "0.5",
- "domain" : "viper.click",
- "like" : false
+ "domain" : "viper.click",
+ "like" : false,
+ "order-posts": "desc"
},
"vk":
{
diff --git a/gallery_dl.egg-info/PKG-INFO b/gallery_dl.egg-info/PKG-INFO
index 6abd758..a339b24 100644
--- a/gallery_dl.egg-info/PKG-INFO
+++ b/gallery_dl.egg-info/PKG-INFO
@@ -1,6 +1,6 @@
Metadata-Version: 2.4
Name: gallery_dl
-Version: 1.30.7
+Version: 1.30.8
Summary: Command-line program to download image galleries and collections from several image hosting sites
Home-page: https://github.com/mikf/gallery-dl
Download-URL: https://github.com/mikf/gallery-dl/releases/latest
@@ -141,9 +141,9 @@ Standalone Executable
Prebuilt executable files with a Python interpreter and
required Python packages included are available for
-- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.30.7/gallery-dl.exe>`__
+- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.30.8/gallery-dl.exe>`__
(Requires `Microsoft Visual C++ Redistributable Package (x86) <https://aka.ms/vs/17/release/vc_redist.x86.exe>`__)
-- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.30.7/gallery-dl.bin>`__
+- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.30.8/gallery-dl.bin>`__
Nightly Builds
diff --git a/gallery_dl.egg-info/SOURCES.txt b/gallery_dl.egg-info/SOURCES.txt
index de3a9ed..93a6880 100644
--- a/gallery_dl.egg-info/SOURCES.txt
+++ b/gallery_dl.egg-info/SOURCES.txt
@@ -114,6 +114,7 @@ gallery_dl/extractor/girlsreleased.py
gallery_dl/extractor/girlswithmuscle.py
gallery_dl/extractor/gofile.py
gallery_dl/extractor/hatenablog.py
+gallery_dl/extractor/hdoujin.py
gallery_dl/extractor/hentai2read.py
gallery_dl/extractor/hentaicosplays.py
gallery_dl/extractor/hentaifoundry.py
@@ -130,6 +131,7 @@ gallery_dl/extractor/imagefap.py
gallery_dl/extractor/imagehosts.py
gallery_dl/extractor/imgbb.py
gallery_dl/extractor/imgbox.py
+gallery_dl/extractor/imgpile.py
gallery_dl/extractor/imgth.py
gallery_dl/extractor/imgur.py
gallery_dl/extractor/imhentai.py
@@ -161,6 +163,7 @@ gallery_dl/extractor/mangahere.py
gallery_dl/extractor/manganelo.py
gallery_dl/extractor/mangapark.py
gallery_dl/extractor/mangaread.py
+gallery_dl/extractor/mangataro.py
gallery_dl/extractor/mangoxo.py
gallery_dl/extractor/mastodon.py
gallery_dl/extractor/message.py
@@ -238,6 +241,7 @@ gallery_dl/extractor/tapas.py
gallery_dl/extractor/tcbscans.py
gallery_dl/extractor/telegraph.py
gallery_dl/extractor/tenor.py
+gallery_dl/extractor/thehentaiworld.py
gallery_dl/extractor/tiktok.py
gallery_dl/extractor/tmohentai.py
gallery_dl/extractor/toyhouse.py
diff --git a/gallery_dl/extractor/2ch.py b/gallery_dl/extractor/2ch.py
index f5bb7b7..912a251 100644
--- a/gallery_dl/extractor/2ch.py
+++ b/gallery_dl/extractor/2ch.py
@@ -4,37 +4,41 @@
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
-"""Extractors for https://2ch.hk/"""
+"""Extractors for https://2ch.su/"""
from .common import Extractor, Message
from .. import text, util
+BASE_PATTERN = r"(?:https?://)?2ch\.(su|life|hk)"
+
class _2chThreadExtractor(Extractor):
"""Extractor for 2ch threads"""
category = "2ch"
subcategory = "thread"
- root = "https://2ch.hk"
+ root = "https://2ch.su"
directory_fmt = ("{category}", "{board}", "{thread} {title}")
filename_fmt = "{tim}{filename:? //}.{extension}"
archive_fmt = "{board}_{thread}_{tim}"
- pattern = r"(?:https?://)?2ch\.hk/([^/?#]+)/res/(\d+)"
- example = "https://2ch.hk/a/res/12345.html"
+ pattern = rf"{BASE_PATTERN}/([^/?#]+)/res/(\d+)"
+ example = "https://2ch.su/a/res/12345.html"
def __init__(self, match):
+ tld = match[1]
+ self.root = f"https://2ch.{'su' if tld == 'hk' else tld}"
Extractor.__init__(self, match)
- self.board, self.thread = match.groups()
def items(self):
- url = f"{self.root}/{self.board}/res/{self.thread}.json"
+ _, board, thread = self.groups
+ url = f"{self.root}/{board}/res/{thread}.json"
posts = self.request_json(url)["threads"][0]["posts"]
op = posts[0]
title = op.get("subject") or text.remove_html(op["comment"])
thread = {
- "board" : self.board,
- "thread": self.thread,
+ "board" : board,
+ "thread": thread,
"title" : text.unescape(title)[:50],
}
@@ -61,16 +65,17 @@ class _2chBoardExtractor(Extractor):
"""Extractor for 2ch boards"""
category = "2ch"
subcategory = "board"
- root = "https://2ch.hk"
- pattern = r"(?:https?://)?2ch\.hk/([^/?#]+)/?$"
- example = "https://2ch.hk/a/"
+ root = "https://2ch.su"
+ pattern = rf"{BASE_PATTERN}/([^/?#]+)/?$"
+ example = "https://2ch.su/a/"
def __init__(self, match):
+ tld = match[1]
+ self.root = f"https://2ch.{'su' if tld == 'hk' else tld}"
Extractor.__init__(self, match)
- self.board = match[1]
def items(self):
- base = f"{self.root}/{self.board}"
+ base = f"{self.root}/{self.groups[1]}"
# index page
url = f"{base}/index.json"
diff --git a/gallery_dl/extractor/4archive.py b/gallery_dl/extractor/4archive.py
index c9be2a4..4c43464 100644
--- a/gallery_dl/extractor/4archive.py
+++ b/gallery_dl/extractor/4archive.py
@@ -62,7 +62,8 @@ class _4archiveThreadExtractor(Extractor):
data = {
"name": extr('class="name">', "</span>"),
"date": text.parse_datetime(
- extr('class="dateTime postNum" >', "<").strip(),
+ (extr('class="dateTime">', "<") or
+ extr('class="dateTime postNum" >', "<")).strip(),
"%Y-%m-%d %H:%M:%S"),
"no" : text.parse_int(extr(">Post No.", "<")),
}
@@ -70,8 +71,7 @@ class _4archiveThreadExtractor(Extractor):
extr('class="fileText"', ">File: <a")
data.update({
"url" : extr('href="', '"'),
- "filename": extr(
- 'rel="noreferrer noopener"', "</a>").strip()[1:],
+ "filename": extr('alt="Image: ', '"'),
"size" : text.parse_bytes(extr(" (", ", ")[:-1]),
"width" : text.parse_int(extr("", "x")),
"height" : text.parse_int(extr("", "px")),
diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py
index b32fcd1..abdb6cc 100644
--- a/gallery_dl/extractor/__init__.py
+++ b/gallery_dl/extractor/__init__.py
@@ -73,6 +73,7 @@ modules = [
"girlswithmuscle",
"gofile",
"hatenablog",
+ "hdoujin",
"hentai2read",
"hentaicosplays",
"hentaifoundry",
@@ -88,6 +89,7 @@ modules = [
"imagefap",
"imgbb",
"imgbox",
+ "imgpile",
"imgth",
"imgur",
"imhentai",
@@ -118,6 +120,7 @@ modules = [
"manganelo",
"mangapark",
"mangaread",
+ "mangataro",
"mangoxo",
"misskey",
"motherless",
@@ -188,6 +191,7 @@ modules = [
"tcbscans",
"telegraph",
"tenor",
+ "thehentaiworld",
"tiktok",
"tmohentai",
"toyhouse",
diff --git a/gallery_dl/extractor/bellazon.py b/gallery_dl/extractor/bellazon.py
index 5c9b9cd..5dcb6a5 100644
--- a/gallery_dl/extractor/bellazon.py
+++ b/gallery_dl/extractor/bellazon.py
@@ -20,32 +20,61 @@ class BellazonExtractor(Extractor):
root = "https://www.bellazon.com/main"
directory_fmt = ("{category}", "{thread[section]}",
"{thread[title]} ({thread[id]})")
- filename_fmt = "{post[id]}_{num:>02}_{id}.{extension}"
- archive_fmt = "{post[id]}/{filename}"
+ filename_fmt = "{post[id]}_{num:>02}_{id}_{filename}.{extension}"
+ archive_fmt = "{post[id]}/{id}_{filename}"
def items(self):
- extract_urls = text.re(r'<a ([^>]*?href="([^"]+)".*?)</a>').findall
- native = f"{self.root}/"
+ native = (f"{self.root}/", f"{self.root[6:]}/")
+ extract_urls = text.re(
+ r'(?s)<('
+ r'(?:video .*?<source src|a [^>]*?href)="([^"]+).*?</a>'
+ r'|img [^>]*?src="([^"]+)"[^>]*>'
+ r')'
+ ).findall
+
+ if self.config("quoted", False):
+ strip_quoted = None
+ else:
+ strip_quoted = text.re(r"(?s)<blockquote .*?</blockquote>").sub
for post in self.posts():
- urls = extract_urls(post["content"])
+ if strip_quoted is None:
+ urls = extract_urls(post["content"])
+ else:
+ urls = extract_urls(strip_quoted("", post["content"]))
+
data = {"post": post}
post["count"] = data["count"] = len(urls)
yield Message.Directory, data
- for data["num"], (info, url) in enumerate(urls, 1):
- url = text.unescape(url)
+ data["num"] = 0
+ for info, url, url_img in urls:
+ url = text.unescape(url or url_img)
+
if url.startswith(native):
+ if "/uploads/emoticons/" in url or "/profile/" in url:
+ continue
+ data["num"] += 1
if not (alt := text.extr(info, ' alt="', '"')) or (
alt.startswith("post-") and "_thumb." in alt):
name = url
else:
name = text.unescape(alt)
+
dc = text.nameext_from_url(name, data.copy())
dc["id"] = text.extr(info, 'data-fileid="', '"')
if ext := text.extr(info, 'data-fileext="', '"'):
dc["extension"] = ext
+ elif "/core/interface/file/attachment.php" in url:
+ if not dc["id"]:
+ dc["id"] = url.rpartition("?id=")[2]
+ if name := text.extr(info, ">", "<").strip():
+ text.nameext_from_url(name, dc)
+
+ if url[0] == "/":
+ url = f"https:{url}"
yield Message.Url, url, dc
+
else:
yield Message.Queue, url, data
@@ -70,6 +99,28 @@ class BellazonExtractor(Extractor):
pnum += 1
url = f"{base}/page/{pnum}/"
+ def _pagination_reverse(self, base, pnum=None):
+ base = f"{self.root}{base}"
+
+ url = f"{base}/page/9999/" # force redirect to highest page number
+ with self.request(url) as response:
+ parts = response.url.rsplit("/", 3)
+ pnum = text.parse_int(parts[2]) if parts[1] == "page" else 1
+ page = response.text
+
+ while True:
+ yield page
+
+ pnum -= 1
+ if pnum > 1:
+ url = f"{base}/page/{pnum}/"
+ elif pnum == 1:
+ url = f"{base}/"
+ else:
+ return
+
+ page = self.request(url).text
+
def _parse_thread(self, page):
schema = self._extract_jsonld(page)
author = schema["author"]
@@ -88,7 +139,7 @@ class BellazonExtractor(Extractor):
"posts": stats[1]["userInteractionCount"],
"date" : text.parse_datetime(schema["datePublished"]),
"date_updated": text.parse_datetime(schema["dateModified"]),
- "description" : text.unescape(schema["text"]),
+ "description" : text.unescape(schema["text"]).strip(),
"section" : path[-2],
"author" : author["name"],
"author_url" : url_a,
@@ -123,7 +174,7 @@ class BellazonExtractor(Extractor):
class BellazonPostExtractor(BellazonExtractor):
subcategory = "post"
pattern = (rf"{BASE_PATTERN}(/topic/\d+-[\w-]+(?:/page/\d+)?)"
- rf"/?#findComment-(\d+)")
+ rf"/?#(?:findC|c)omment-(\d+)")
example = "https://www.bellazon.com/main/topic/123-SLUG/#findComment-12345"
def posts(self):
@@ -145,10 +196,22 @@ class BellazonThreadExtractor(BellazonExtractor):
example = "https://www.bellazon.com/main/topic/123-SLUG/"
def posts(self):
- for page in self._pagination(*self.groups):
+ if (order := self.config("order-posts")) and \
+ order[0] not in ("d", "r"):
+ pages = self._pagination(*self.groups)
+ reverse = False
+ else:
+ pages = self._pagination_reverse(*self.groups)
+ reverse = True
+
+ for page in pages:
if "thread" not in self.kwdict:
self.kwdict["thread"] = self._parse_thread(page)
- for html in text.extract_iter(page, "<article ", "</article>"):
+ posts = text.extract_iter(page, "<article ", "</article>")
+ if reverse:
+ posts = list(posts)
+ posts.reverse()
+ for html in posts:
yield self._parse_post(html)
diff --git a/gallery_dl/extractor/bunkr.py b/gallery_dl/extractor/bunkr.py
index cf5bce1..14ebc48 100644
--- a/gallery_dl/extractor/bunkr.py
+++ b/gallery_dl/extractor/bunkr.py
@@ -162,7 +162,7 @@ class BunkrAlbumExtractor(LolisafeAlbumExtractor):
file["name"] = util.json_loads(text.extr(
item, 'original:', ',\n').replace("\\'", "'"))
file["slug"] = util.json_loads(text.extr(
- item, 'slug: ', ',\n'))
+ item, 'slug: ', ',\n').replace("\\'", "'"))
file["uuid"] = text.extr(
item, 'name: "', ".")
file["size"] = text.parse_int(text.extr(
diff --git a/gallery_dl/extractor/chevereto.py b/gallery_dl/extractor/chevereto.py
index 6ba4d08..67fdb39 100644
--- a/gallery_dl/extractor/chevereto.py
+++ b/gallery_dl/extractor/chevereto.py
@@ -50,6 +50,10 @@ BASE_PATTERN = CheveretoExtractor.update({
"root": "https://imagepond.net",
"pattern": r"imagepond\.net",
},
+ "imglike": {
+ "root": "https://imglike.com",
+ "pattern": r"imglike\.com",
+ },
})
@@ -152,6 +156,18 @@ class CheveretoAlbumExtractor(CheveretoExtractor):
yield Message.Queue, image, data
+class CheveretoCategoryExtractor(CheveretoExtractor):
+ """Extractor for chevereto galleries"""
+ subcategory = "category"
+ pattern = BASE_PATTERN + r"(/category/[^/?#]+)"
+ example = "https://imglike.com/category/TITLE"
+
+ def items(self):
+ data = {"_extractor": CheveretoImageExtractor}
+ for image in self._pagination(self.root + self.path):
+ yield Message.Queue, image, data
+
+
class CheveretoUserExtractor(CheveretoExtractor):
"""Extractor for chevereto users"""
subcategory = "user"
diff --git a/gallery_dl/extractor/danbooru.py b/gallery_dl/extractor/danbooru.py
index f8ad07a..29c7763 100644
--- a/gallery_dl/extractor/danbooru.py
+++ b/gallery_dl/extractor/danbooru.py
@@ -278,6 +278,23 @@ class DanbooruTagExtractor(DanbooruExtractor):
return self._pagination("/posts.json", {"tags": self.tags}, prefix)
+class DanbooruRandomExtractor(DanbooruTagExtractor):
+ """Extractor for a random danbooru post"""
+ subcategory = "random"
+ pattern = BASE_PATTERN + r"/posts/random(?:\?(?:[^&#]*&)*tags=([^&#]*))?"
+ example = "https://danbooru.donmai.us/posts/random?tags=TAG"
+
+ def metadata(self):
+ tags = self.groups[-1] or ""
+ self.tags = text.unquote(tags.replace("+", " "))
+ return {"search_tags": self.tags}
+
+ def posts(self):
+ posts = self.request_json(self.root + "/posts/random.json",
+ params={"tags": self.tags or None})
+ return (posts,) if isinstance(posts, dict) else posts
+
+
class DanbooruPoolExtractor(DanbooruExtractor):
"""Extractor for Danbooru pools"""
subcategory = "pool"
diff --git a/gallery_dl/extractor/facebook.py b/gallery_dl/extractor/facebook.py
index bf24941..6061737 100644
--- a/gallery_dl/extractor/facebook.py
+++ b/gallery_dl/extractor/facebook.py
@@ -369,6 +369,16 @@ class FacebookExtractor(Extractor):
for edge in (user["profile_tabs"]["profile_user"]
["timeline_nav_app_sections"]["edges"])
]
+
+ if bio := text.extr(page, '"best_description":{"text":"', '"'):
+ user["biography"] = self.decode_all(bio)
+ elif (pos := page.find(
+ '"__module_operation_ProfileCometTileView_profileT')) >= 0:
+ user["biography"] = self.decode_all(text.rextr(
+ page, '"text":"', '"', pos))
+ else:
+ user["biography"] = text.unescape(text.remove_html(text.extr(
+ page, "</span></span></h2>", "<ul>")))
except Exception:
if user is None:
self.log.debug("Failed to extract user data: %s", data)
diff --git a/gallery_dl/extractor/hdoujin.py b/gallery_dl/extractor/hdoujin.py
new file mode 100644
index 0000000..080b899
--- /dev/null
+++ b/gallery_dl/extractor/hdoujin.py
@@ -0,0 +1,42 @@
+# -*- coding: utf-8 -*-
+
+# Copyright 2025 Mike Fährmann
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Extractors for https://hdoujin.org/"""
+
+from . import schalenetwork
+
+BASE_PATTERN = r"(?:https?://)?(?:www\.)?(hdoujin\.(?:org|net))"
+
+
+class HdoujinBase():
+ """Base class for hdoujin extractors"""
+ category = "hdoujin"
+ root = "https://hdoujin.org"
+ root_api = "https://api.hdoujin.org"
+ root_auth = "https://auth.hdoujin.org"
+
+
+class HdoujinGalleryExtractor(
+ HdoujinBase, schalenetwork.SchalenetworkGalleryExtractor):
+ pattern = rf"{BASE_PATTERN}/(?:g|reader)/(\d+)/(\w+)"
+ example = "https://hdoujin.org/g/12345/67890abcdef/"
+
+
+class HdoujinSearchExtractor(
+ HdoujinBase, schalenetwork.SchalenetworkSearchExtractor):
+ pattern = rf"{BASE_PATTERN}/(?:tag/([^/?#]+)|browse)?(?:/?\?([^#]*))?$"
+ example = "https://hdoujin.org/browse?s=QUERY"
+
+
+class HdoujinFavoriteExtractor(
+ HdoujinBase, schalenetwork.SchalenetworkFavoriteExtractor):
+ pattern = rf"{BASE_PATTERN}/favorites(?:\?([^#]*))?"
+ example = "https://hdoujin.org/favorites"
+
+
+HdoujinBase.extr_class = HdoujinGalleryExtractor
diff --git a/gallery_dl/extractor/imgpile.py b/gallery_dl/extractor/imgpile.py
new file mode 100644
index 0000000..9fc3a9c
--- /dev/null
+++ b/gallery_dl/extractor/imgpile.py
@@ -0,0 +1,119 @@
+# -*- coding: utf-8 -*-
+
+# Copyright 2025 Mike Fährmann
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Extractors for https://imgpile.com/"""
+
+from .common import Extractor, Message
+from .. import text
+
+BASE_PATTERN = r"(?:https?://)?(?:www\.)?imgpile\.com"
+
+
+class ImgpileExtractor(Extractor):
+ """Base class for imgpile extractors"""
+ category = "imgpile"
+ root = "https://imgpile.com"
+ directory_fmt = ("{category}", "{post[author]}",
+ "{post[title]} ({post[id_slug]})")
+ archive_fmt = "{post[id_slug]}_{id}"
+
+ def items(self):
+ pass
+
+
+class ImgpilePostExtractor(ImgpileExtractor):
+ subcategory = "post"
+ pattern = rf"{BASE_PATTERN}/p/(\w+)"
+ example = "https://imgpile.com/p/AbCdEfG"
+
+ def items(self):
+ post_id = self.groups[0]
+ url = f"{self.root}/p/{post_id}"
+ page = self.request(url).text
+ extr = text.extract_from(page)
+
+ post = {
+ "id_slug": post_id,
+ "title" : text.unescape(extr("<title>", " - imgpile<")),
+ "id" : text.parse_int(extr('data-post-id="', '"')),
+ "author" : extr('/u/', '"'),
+ "score" : text.parse_int(text.remove_html(extr(
+ 'class="post-score">', "</"))),
+ "views" : text.parse_int(extr(
+ 'class="meta-value">', "<").replace(",", "")),
+ "tags" : text.split_html(extr(
+ " <!-- Tags -->", '<!-- "')),
+ }
+
+ files = self._extract_files(extr)
+ data = {"post": post}
+ data["count"] = post["count"] = len(files)
+
+ yield Message.Directory, data
+ for data["num"], file in enumerate(files, 1):
+ data.update(file)
+ url = file["url"]
+ yield Message.Url, url, text.nameext_from_url(url, data)
+
+ def _extract_files(self, extr):
+ files = []
+
+ while True:
+ media = extr('lass="post-media', '</div>')
+ if not media:
+ break
+ files.append({
+ "id_slug": text.extr(media, 'data-id="', '"'),
+ "id" : text.parse_int(text.extr(
+ media, 'data-media-id="', '"')),
+ "url": f"""http{text.extr(media, '<a href="http', '"')}""",
+ })
+ return files
+
+
+class ImgpileUserExtractor(ImgpileExtractor):
+ subcategory = "user"
+ pattern = rf"{BASE_PATTERN}/u/([^/?#]+)"
+ example = "https://imgpile.com/u/USER"
+
+ def items(self):
+ url = f"{self.root}/api/v1/posts"
+ params = {
+ "limit" : "100",
+ "sort" : "latest",
+ "period" : "all",
+ "visibility": "public",
+ # "moderation_status": "approved",
+ "username" : self.groups[0],
+ }
+ headers = {
+ "Accept" : "application/json",
+ # "Referer" : "https://imgpile.com/u/USER",
+ "Content-Type" : "application/json",
+ # "X-CSRF-TOKEN": "",
+ "Sec-Fetch-Dest": "empty",
+ "Sec-Fetch-Mode": "cors",
+ "Sec-Fetch-Site": "same-origin",
+ }
+
+ base = f"{self.root}/p/"
+ while True:
+ data = self.request_json(url, params=params, headers=headers)
+
+ if params is not None:
+ params = None
+ self.kwdict["total"] = data["meta"]["total"]
+
+ for item in data["data"]:
+ item["_extractor"] = ImgpilePostExtractor
+ url = f"{base}{item['slug']}"
+ yield Message.Queue, url, item
+
+ url = data["links"].get("next")
+ if not url:
+ return
diff --git a/gallery_dl/extractor/instagram.py b/gallery_dl/extractor/instagram.py
index 9b8f8c9..00e06b5 100644
--- a/gallery_dl/extractor/instagram.py
+++ b/gallery_dl/extractor/instagram.py
@@ -95,7 +95,7 @@ class InstagramExtractor(Extractor):
if videos:
file["_http_headers"] = videos_headers
text.nameext_from_url(url, file)
- if videos_dash:
+ if videos_dash and "_ytdl_manifest_data" in post:
file["_fallback"] = (url,)
file["_ytdl_manifest"] = "dash"
url = f"ytdl:{post['post_url']}{file['num']}.mp4"
@@ -505,10 +505,12 @@ class InstagramTaggedExtractor(InstagramExtractor):
def metadata(self):
if self.item.startswith("id:"):
self.user_id = self.item[3:]
- return {"tagged_owner_id": self.user_id}
-
- self.user_id = self.api.user_id(self.item)
- user = self.api.user_by_name(self.item)
+ if not self.config("metadata"):
+ return {"tagged_owner_id": self.user_id}
+ user = self.api.user_by_id(self.user_id)
+ else:
+ self.user_id = self.api.user_id(self.item)
+ user = self.api.user_by_name(self.item)
return {
"tagged_owner_id" : user["id"],
diff --git a/gallery_dl/extractor/iwara.py b/gallery_dl/extractor/iwara.py
index 179909b..8af2f42 100644
--- a/gallery_dl/extractor/iwara.py
+++ b/gallery_dl/extractor/iwara.py
@@ -45,6 +45,7 @@ class IwaraExtractor(Extractor):
image["id"], exc.__class__.__name__, exc)
continue
+ group_info["type"] = "image"
group_info["count"] = len(files)
yield Message.Directory, group_info
for num, file in enumerate(files, 1):
@@ -102,34 +103,37 @@ class IwaraExtractor(Extractor):
raise exception.AbortExtraction(f"Unsupported result type '{type}'")
def extract_media_info(self, item, key, include_file_info=True):
- title = t.strip() if (t := item.get("title")) else ""
+ info = {
+ "id" : item["id"],
+ "slug" : item.get("slug"),
+ "rating" : item.get("rating"),
+ "likes" : item.get("numLikes"),
+ "views" : item.get("numViews"),
+ "comments": item.get("numComments"),
+ "tags" : [t["id"] for t in item.get("tags") or ()],
+ "title" : t.strip() if (t := item.get("title")) else "",
+ "description": t.strip() if (t := item.get("body")) else "",
+ }
if include_file_info:
file_info = item if key is None else item.get(key) or {}
filename, _, extension = file_info.get("name", "").rpartition(".")
- return {
- "id" : item["id"],
- "file_id" : file_info.get("id"),
- "title" : title,
- "filename" : filename,
- "extension": extension,
- "date" : text.parse_datetime(
- file_info.get("createdAt"), "%Y-%m-%dT%H:%M:%S.%fZ"),
- "date_updated": text.parse_datetime(
- file_info.get("updatedAt"), "%Y-%m-%dT%H:%M:%S.%fZ"),
- "mime" : file_info.get("mime"),
- "size" : file_info.get("size"),
- "width" : file_info.get("width"),
- "height" : file_info.get("height"),
- "duration" : file_info.get("duration"),
- "type" : file_info.get("type"),
- }
- else:
- return {
- "id" : item["id"],
- "title": title,
- }
+ info["file_id"] = file_info.get("id")
+ info["filename"] = filename
+ info["extension"] = extension
+ info["date"] = text.parse_datetime(
+ file_info.get("createdAt"), "%Y-%m-%dT%H:%M:%S.%fZ")
+ info["date_updated"] = text.parse_datetime(
+ file_info.get("updatedAt"), "%Y-%m-%dT%H:%M:%S.%fZ")
+ info["mime"] = file_info.get("mime")
+ info["size"] = file_info.get("size")
+ info["width"] = file_info.get("width")
+ info["height"] = file_info.get("height")
+ info["duration"] = file_info.get("duration")
+ info["type"] = file_info.get("type")
+
+ return info
def extract_user_info(self, profile):
user = profile.get("user") or {}
diff --git a/gallery_dl/extractor/kemono.py b/gallery_dl/extractor/kemono.py
index fc5972c..1f70031 100644
--- a/gallery_dl/extractor/kemono.py
+++ b/gallery_dl/extractor/kemono.py
@@ -407,7 +407,11 @@ class KemonoDiscordExtractor(KemonoExtractor):
r"(/[A-Za-z0-9-._~:/?#\[\]@!$&'()*+,;%=]+)").findall
find_hash = util.re(HASH_PATTERN).match
- posts = self.api.discord_channel(channel_id)
+ if (order := self.config("order-posts")) and order[0] in ("r", "d"):
+ posts = self.api.discord_channel(channel_id, channel["post_count"])
+ else:
+ posts = self.api.discord_channel(channel_id)
+
if max_posts := self.config("max-posts"):
posts = itertools.islice(posts, max_posts)
@@ -627,9 +631,12 @@ class KemonoAPI():
endpoint = f"/{service}/user/{creator_id}/tags"
return self._call(endpoint)
- def discord_channel(self, channel_id):
+ def discord_channel(self, channel_id, post_count=None):
endpoint = f"/discord/channel/{channel_id}"
- return self._pagination(endpoint, {}, 150)
+ if post_count is None:
+ return self._pagination(endpoint, {}, 150)
+ else:
+ return self._pagination_reverse(endpoint, {}, 150, post_count)
def discord_channel_lookup(self, server_id):
endpoint = f"/discord/channel/lookup/{server_id}"
@@ -670,3 +677,18 @@ class KemonoAPI():
if len(data) < batch:
return
params["o"] += batch
+
+ def _pagination_reverse(self, endpoint, params, batch, count):
+ params["o"] = count // batch * batch
+
+ while True:
+ data = self._call(endpoint, params)
+
+ if not data:
+ return
+ data.reverse()
+ yield from data
+
+ if not params["o"]:
+ return
+ params["o"] -= batch
diff --git a/gallery_dl/extractor/lensdump.py b/gallery_dl/extractor/lensdump.py
index c700a29..b0198d5 100644
--- a/gallery_dl/extractor/lensdump.py
+++ b/gallery_dl/extractor/lensdump.py
@@ -100,7 +100,8 @@ class LensdumpImageExtractor(LensdumpBase, Extractor):
filename_fmt = "{category}_{id}{title:?_//}.{extension}"
directory_fmt = ("{category}",)
archive_fmt = "{id}"
- pattern = r"(?:https?://)?(?:(?:i\d?\.)?lensdump\.com|\w\.l3n\.co)/i/(\w+)"
+ pattern = (r"(?:https?://)?(?:(?:i\d?\.)?lensdump\.com|\w\.l3n\.co)"
+ r"/(?:i/)?(\w+)")
example = "https://lensdump.com/i/ID"
def items(self):
diff --git a/gallery_dl/extractor/mangadex.py b/gallery_dl/extractor/mangadex.py
index 225560d..fbed328 100644
--- a/gallery_dl/extractor/mangadex.py
+++ b/gallery_dl/extractor/mangadex.py
@@ -96,6 +96,57 @@ class MangadexExtractor(Extractor):
return data
+class MangadexCoversExtractor(MangadexExtractor):
+ """Extractor for mangadex manga covers"""
+ subcategory = "covers"
+ directory_fmt = ("{category}", "{manga}", "Covers")
+ filename_fmt = "{volume:>02}_{lang}.{extension}"
+ archive_fmt = "c_{cover_id}"
+ pattern = (rf"{BASE_PATTERN}/(?:title|manga)/(?!follows|feed$)([0-9a-f-]+)"
+ r"(?:/[^/?#]+)?\?tab=art")
+ example = ("https://mangadex.org/title"
+ "/01234567-89ab-cdef-0123-456789abcdef?tab=art")
+
+ def items(self):
+ base = f"{self.root}/covers/{self.uuid}/"
+ for cover in self.api.covers_manga(self.uuid):
+ data = self._transform_cover(cover)
+ name = data["cover"]
+ text.nameext_from_url(name, data)
+ data["cover_id"] = data["filename"]
+ yield Message.Directory, data
+ yield Message.Url, f"{base}{name}", data
+
+ def _transform_cover(self, cover):
+ relationships = defaultdict(list)
+ for item in cover["relationships"]:
+ relationships[item["type"]].append(item)
+ manga = self.api.manga(relationships["manga"][0]["id"])
+ for item in manga["relationships"]:
+ relationships[item["type"]].append(item)
+
+ cattributes = cover["attributes"]
+ mattributes = manga["attributes"]
+
+ return {
+ "manga" : (mattributes["title"].get("en") or
+ next(iter(mattributes["title"].values()))),
+ "manga_id": manga["id"],
+ "status" : mattributes["status"],
+ "author" : [author["attributes"]["name"]
+ for author in relationships["author"]],
+ "artist" : [artist["attributes"]["name"]
+ for artist in relationships["artist"]],
+ "tags" : [tag["attributes"]["name"]["en"]
+ for tag in mattributes["tags"]],
+ "cover" : cattributes["fileName"],
+ "lang" : cattributes.get("locale"),
+ "volume" : text.parse_int(cattributes["volume"]),
+ "date" : text.parse_datetime(cattributes["createdAt"]),
+ "date_updated": text.parse_datetime(cattributes["updatedAt"]),
+ }
+
+
class MangadexChapterExtractor(MangadexExtractor):
"""Extractor for manga-chapters from mangadex.org"""
subcategory = "chapter"
@@ -239,6 +290,10 @@ class MangadexAPI():
params = {"includes[]": ("scanlation_group",)}
return self._call("/chapter/" + uuid, params)["data"]
+ def covers_manga(self, uuid):
+ params = {"manga[]": uuid}
+ return self._pagination_covers("/cover", params)
+
def list(self, uuid):
return self._call("/list/" + uuid, None, True)["data"]
@@ -374,6 +429,20 @@ class MangadexAPI():
return self._pagination(endpoint, params, auth)
+ def _pagination_covers(self, endpoint, params=None, auth=False):
+ if params is None:
+ params = {}
+
+ lang = self.extractor.config("lang")
+ if isinstance(lang, str) and "," in lang:
+ lang = lang.split(",")
+ params["locales"] = lang
+ params["contentRating"] = None
+ params["order[volume]"] = \
+ "desc" if self.extractor.config("chapter-reverse") else "asc"
+
+ return self._pagination(endpoint, params, auth)
+
def _pagination(self, endpoint, params, auth=False):
config = self.extractor.config
diff --git a/gallery_dl/extractor/mangataro.py b/gallery_dl/extractor/mangataro.py
new file mode 100644
index 0000000..f4cc058
--- /dev/null
+++ b/gallery_dl/extractor/mangataro.py
@@ -0,0 +1,105 @@
+# -*- coding: utf-8 -*-
+
+# Copyright 2025 Mike Fährmann
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Extractors for https://mangataro.org/"""
+
+from .common import ChapterExtractor, MangaExtractor
+from .. import text
+from ..cache import memcache
+
+BASE_PATTERN = r"(?:https?://)?mangataro\.org"
+
+
+class MangataroBase():
+ """Base class for mangataro extractors"""
+ category = "mangataro"
+ root = "https://mangataro.org"
+
+
+class MangataroChapterExtractor(MangataroBase, ChapterExtractor):
+ """Extractor for mangataro manga chapters"""
+ pattern = rf"{BASE_PATTERN}(/read/([^/?#]+)/(?:[^/?#]*-)?(\d+))"
+ example = "https://mangataro.org/read/MANGA/ch123-12345"
+
+ def metadata(self, page):
+ _, slug, chapter_id = self.groups
+ comic = self._extract_jsonld(page)["@graph"][0]
+ chapter = comic["position"]
+ minor = chapter - int(chapter)
+ desc = comic["description"].split(" - ", 3)
+
+ return {
+ **_manga_info(self, slug),
+ "title" : desc[1] if len(desc) > 3 else "",
+ "chapter" : int(chapter),
+ "chapter_minor": str(round(minor, 5))[1:] if minor else "",
+ "chapter_id" : text.parse_int(chapter_id),
+ "chapter_url" : comic["url"],
+ "date" : text.parse_datetime(
+ comic["datePublished"], "%Y-%m-%dT%H:%M:%S%z"),
+ "date_updated" : text.parse_datetime(
+ comic["dateModified"], "%Y-%m-%dT%H:%M:%S%z"),
+ }
+
+ def images(self, page):
+ pos = page.find('class="comic-image-container')
+ img, pos = text.extract(page, ' src="', '"', pos)
+
+ images = [(img, None)]
+ images.extend(
+ (url, None)
+ for url in text.extract_iter(page, 'data-src="', '"', pos)
+ )
+ return images
+
+
+class MangataroMangaExtractor(MangataroBase, MangaExtractor):
+ """Extractor for mangataro manga"""
+ chapterclass = MangataroChapterExtractor
+ pattern = rf"{BASE_PATTERN}(/manga/([^/?#]+))"
+ example = "https://mangataro.org/manga/MANGA"
+
+ def chapters(self, page):
+ slug = self.groups[1]
+ manga = _manga_info(self, slug)
+
+ results = []
+ for url in text.extract_iter(text.extr(
+ page, '<div class="chapter-list', '<div id="tab-gallery"'),
+ '<a href="', '"'):
+ chapter, _, chapter_id = url[url.rfind("/")+3:].rpartition("-")
+ chapter, sep, minor = chapter.partition("-")
+ results.append((url, {
+ **manga,
+ "chapter" : text.parse_int(chapter),
+ "chapter_minor": f".{minor}" if sep else "",
+ "chapter_id" : text.parse_int(chapter_id),
+ }))
+ return results
+
+
+@memcache(keyarg=1)
+def _manga_info(self, slug):
+ url = f"{self.root}/manga/{slug}"
+ page = self.request(url).text
+ manga = self._extract_jsonld(page)
+
+ return {
+ "manga" : manga["name"].rpartition(" | ")[0].rpartition(" ")[0],
+ "manga_url" : manga["url"],
+ "cover" : manga["image"],
+ "author" : manga["author"]["name"].split(", "),
+ "genre" : manga["genre"],
+ "status" : manga["status"],
+ "description": text.unescape(text.extr(
+ page, 'id="description-content-tab">', "</div></div>")),
+ "tags" : text.split_html(text.extr(
+ page, ">Genres</h4>", "</div>")),
+ "publisher" : text.remove_html(text.extr(
+ page, '>Serialization</h4>', "</div>")),
+ }
diff --git a/gallery_dl/extractor/pinterest.py b/gallery_dl/extractor/pinterest.py
index 9c335ad..ff771fb 100644
--- a/gallery_dl/extractor/pinterest.py
+++ b/gallery_dl/extractor/pinterest.py
@@ -204,58 +204,6 @@ class PinterestExtractor(Extractor):
return media
-class PinterestPinExtractor(PinterestExtractor):
- """Extractor for images from a single pin from pinterest.com"""
- subcategory = "pin"
- pattern = BASE_PATTERN + r"/pin/([^/?#]+)(?!.*#related$)"
- example = "https://www.pinterest.com/pin/12345/"
-
- def __init__(self, match):
- PinterestExtractor.__init__(self, match)
- self.pin_id = match[1]
- self.pin = None
-
- def metadata(self):
- self.pin = self.api.pin(self.pin_id)
- return self.pin
-
- def pins(self):
- return (self.pin,)
-
-
-class PinterestBoardExtractor(PinterestExtractor):
- """Extractor for images from a board from pinterest.com"""
- subcategory = "board"
- directory_fmt = ("{category}", "{board[owner][username]}", "{board[name]}")
- archive_fmt = "{board[id]}_{id}"
- pattern = (BASE_PATTERN + r"/(?!pin/)([^/?#]+)"
- r"/(?!_saved|_created|pins/)([^/?#]+)/?(?:$|\?|#)")
- example = "https://www.pinterest.com/USER/BOARD/"
-
- def __init__(self, match):
- PinterestExtractor.__init__(self, match)
- self.user = text.unquote(match[1])
- self.board_name = text.unquote(match[2])
- self.board = None
-
- def metadata(self):
- self.board = self.api.board(self.user, self.board_name)
- return {"board": self.board}
-
- def pins(self):
- board = self.board
- pins = self.api.board_pins(board["id"])
-
- if board["section_count"] and self.config("sections", True):
- base = f"{self.root}{board['url']}id:"
- data = {"_extractor": PinterestSectionExtractor}
- sections = [(base + section["id"], data)
- for section in self.api.board_sections(board["id"])]
- pins = itertools.chain(pins, sections)
-
- return pins
-
-
class PinterestUserExtractor(PinterestExtractor):
"""Extractor for a user's boards"""
subcategory = "user"
@@ -357,6 +305,58 @@ class PinterestSearchExtractor(PinterestExtractor):
return self.api.search(self.search)
+class PinterestPinExtractor(PinterestExtractor):
+ """Extractor for images from a single pin from pinterest.com"""
+ subcategory = "pin"
+ pattern = BASE_PATTERN + r"/pin/([^/?#]+)(?!.*#related$)"
+ example = "https://www.pinterest.com/pin/12345/"
+
+ def __init__(self, match):
+ PinterestExtractor.__init__(self, match)
+ self.pin_id = match[1]
+ self.pin = None
+
+ def metadata(self):
+ self.pin = self.api.pin(self.pin_id)
+ return self.pin
+
+ def pins(self):
+ return (self.pin,)
+
+
+class PinterestBoardExtractor(PinterestExtractor):
+ """Extractor for images from a board from pinterest.com"""
+ subcategory = "board"
+ directory_fmt = ("{category}", "{board[owner][username]}", "{board[name]}")
+ archive_fmt = "{board[id]}_{id}"
+ pattern = (BASE_PATTERN + r"/(?!pin/)([^/?#]+)"
+ r"/([^/?#]+)/?(?!.*#related$)")
+ example = "https://www.pinterest.com/USER/BOARD/"
+
+ def __init__(self, match):
+ PinterestExtractor.__init__(self, match)
+ self.user = text.unquote(match[1])
+ self.board_name = text.unquote(match[2])
+ self.board = None
+
+ def metadata(self):
+ self.board = self.api.board(self.user, self.board_name)
+ return {"board": self.board}
+
+ def pins(self):
+ board = self.board
+ pins = self.api.board_pins(board["id"])
+
+ if board["section_count"] and self.config("sections", True):
+ base = f"{self.root}{board['url']}id:"
+ data = {"_extractor": PinterestSectionExtractor}
+ sections = [(base + section["id"], data)
+ for section in self.api.board_sections(board["id"])]
+ pins = itertools.chain(pins, sections)
+
+ return pins
+
+
class PinterestRelatedPinExtractor(PinterestPinExtractor):
"""Extractor for related pins of another pin from pinterest.com"""
subcategory = "related-pin"
diff --git a/gallery_dl/extractor/reddit.py b/gallery_dl/extractor/reddit.py
index 9febda9..e20d80e 100644
--- a/gallery_dl/extractor/reddit.py
+++ b/gallery_dl/extractor/reddit.py
@@ -56,6 +56,7 @@ class RedditExtractor(Extractor):
urls = []
if submission:
+ submission["comment"] = None
submission["date"] = text.parse_timestamp(
submission["created_utc"])
yield Message.Directory, submission
@@ -99,14 +100,13 @@ class RedditExtractor(Extractor):
elif not submission["is_self"]:
urls.append((url, submission))
+ if selftext and (txt := submission["selftext_html"]):
+ for url in text.extract_iter(txt, ' href="', '"'):
+ urls.append((url, submission))
+
elif parentdir:
yield Message.Directory, comments[0]
- if selftext and submission:
- for url in text.extract_iter(
- submission["selftext_html"] or "", ' href="', '"'):
- urls.append((url, submission))
-
if self.api.comments:
if comments and not submission:
submission = comments[0]
@@ -115,24 +115,24 @@ class RedditExtractor(Extractor):
yield Message.Directory, submission
for comment in comments:
+ media = (embeds and "media_metadata" in comment)
html = comment["body_html"] or ""
href = (' href="' in html)
- media = (embeds and "media_metadata" in comment)
- if media or href:
- comment["date"] = text.parse_timestamp(
- comment["created_utc"])
- if submission:
- data = submission.copy()
- data["comment"] = comment
- else:
- data = comment
+ if not media and not href:
+ continue
+
+ data = submission.copy()
+ data["comment"] = comment
+ comment["date"] = text.parse_timestamp(
+ comment["created_utc"])
if media:
- for embed in self._extract_embed(comment):
- submission["num"] += 1
- text.nameext_from_url(embed, submission)
- yield Message.Url, embed, submission
+ for url in self._extract_embed(comment):
+ data["num"] += 1
+ text.nameext_from_url(url, data)
+ yield Message.Url, url, data
+ submission["num"] = data["num"]
if href:
for url in text.extract_iter(html, ' href="', '"'):
diff --git a/gallery_dl/extractor/schalenetwork.py b/gallery_dl/extractor/schalenetwork.py
index d517287..dc42417 100644
--- a/gallery_dl/extractor/schalenetwork.py
+++ b/gallery_dl/extractor/schalenetwork.py
@@ -10,7 +10,6 @@
from .common import GalleryExtractor, Extractor, Message
from .. import text, exception
-from ..cache import cache
import collections
BASE_PATTERN = (
@@ -27,6 +26,8 @@ class SchalenetworkExtractor(Extractor):
category = "schalenetwork"
root = "https://niyaniya.moe"
root_api = "https://api.schale.network"
+ root_auth = "https://auth.schale.network"
+ extr_class = None
request_interval = (0.5, 1.5)
def _init(self):
@@ -38,6 +39,7 @@ class SchalenetworkExtractor(Extractor):
def _pagination(self, endpoint, params):
url_api = self.root_api + endpoint
+ cls = self.extr_class
while True:
data = self.request_json(
@@ -49,8 +51,8 @@ class SchalenetworkExtractor(Extractor):
return
for entry in entries:
- url = f"{self.root}/g/{entry['id']}/{entry['public_key']}"
- entry["_extractor"] = SchalenetworkGalleryExtractor
+ url = f"{self.root}/g/{entry['id']}/{entry['key']}"
+ entry["_extractor"] = cls
yield Message.Queue, url, entry
try:
@@ -60,6 +62,34 @@ class SchalenetworkExtractor(Extractor):
pass
params["page"] += 1
+ def _token(self):
+ if token := self.config("token"):
+ return f"Bearer {token.rpartition(' ')[2]}"
+ raise exception.AuthRequired("'token'", "your favorites")
+
+ def _crt(self):
+ crt = self.config("crt")
+ if not crt:
+ self._require_auth()
+
+ if not text.re(r"^[0-9a-f-]+$").match(crt):
+ path, _, qs = crt.partition("?")
+ if not qs:
+ qs = path
+ crt = text.parse_query(qs).get("crt")
+ if not crt:
+ self._require_auth()
+
+ return crt
+
+ def _require_auth(self, exc=None):
+ if exc is None:
+ msg = None
+ else:
+ msg = f"{exc.status} {exc.response.reason}"
+ raise exception.AuthRequired(
+ "'crt' query parameter & matching '--user-agent'", None, msg)
+
class SchalenetworkGalleryExtractor(SchalenetworkExtractor, GalleryExtractor):
"""Extractor for schale.network galleries"""
@@ -67,7 +97,7 @@ class SchalenetworkGalleryExtractor(SchalenetworkExtractor, GalleryExtractor):
directory_fmt = ("{category}", "{id} {title}")
archive_fmt = "{id}_{num}"
request_interval = 0.0
- pattern = BASE_PATTERN + r"/(?:g|reader)/(\d+)/(\w+)"
+ pattern = rf"{BASE_PATTERN}/(?:g|reader)/(\d+)/(\w+)"
example = "https://niyaniya.moe/g/12345/67890abcde/"
TAG_TYPES = {
@@ -86,27 +116,10 @@ class SchalenetworkGalleryExtractor(SchalenetworkExtractor, GalleryExtractor):
12: "other",
}
- def __init__(self, match):
- GalleryExtractor.__init__(self, match)
- self.page_url = None
-
- def _init(self):
- self.headers = {
- "Accept" : "*/*",
- "Referer": self.root + "/",
- "Origin" : self.root,
- }
-
- self.fmt = self.config("format")
- self.cbz = self.config("cbz", True)
-
- if self.cbz:
- self.filename_fmt = "{id} {title}.{extension}"
- self.directory_fmt = ("{category}",)
-
def metadata(self, _):
- url = f"{self.root_api}/books/detail/{self.groups[1]}/{self.groups[2]}"
- self.data = data = self.request_json(url, headers=self.headers)
+ _, gid, gkey = self.groups
+ url = f"{self.root_api}/books/detail/{gid}/{gkey}"
+ data = self.request_json(url, headers=self.headers)
data["date"] = text.parse_timestamp(data["created_at"] // 1000)
tags = []
@@ -127,53 +140,42 @@ class SchalenetworkGalleryExtractor(SchalenetworkExtractor, GalleryExtractor):
data["tags_" + types[type]] = values
try:
- if self.cbz:
- data["count"] = len(data["thumbnails"]["entries"])
+ data["count"] = len(data["thumbnails"]["entries"])
del data["thumbnails"]
- del data["rels"]
except Exception:
pass
return data
def images(self, _):
- data = self.data
- fmt = self._select_format(data["data"])
+ crt = self._crt()
+ _, gid, gkey = self.groups
+ url = f"{self.root_api}/books/detail/{gid}/{gkey}?crt={crt}"
+ try:
+ data = self.request_json(url, method="POST", headers=self.headers)
+ except exception.HttpError as exc:
+ self._require_auth(exc)
- url = (f"{self.root_api}/books/data/{data['id']}/"
- f"{data['public_key']}/{fmt['id']}/{fmt['public_key']}")
- params = {
- "v": data["updated_at"],
- "w": fmt["w"],
- }
+ fmt = self._select_format(data["data"])
- if self.cbz:
- params["action"] = "dl"
- base = self.request_json(
- url, method="POST", params=params, headers=self.headers,
- )["base"]
- url = f"{base}?v={data['updated_at']}&w={fmt['w']}"
- info = text.nameext_from_url(base)
- if not info["extension"]:
- info["extension"] = "cbz"
- return ((url, info),)
-
- data = self.request_json(url, params=params, headers=self.headers)
+ url = (f"{self.root_api}/books/data/{gid}/{gkey}"
+ f"/{fmt['id']}/{fmt['key']}/{fmt['w']}?crt={crt}")
+ data = self.request_json(url, headers=self.headers)
base = data["base"]
results = []
for entry in data["entries"]:
dimensions = entry["dimensions"]
info = {
- "w": dimensions[0],
- "h": dimensions[1],
+ "width" : dimensions[0],
+ "height": dimensions[1],
"_http_headers": self.headers,
}
results.append((base + entry["path"], info))
return results
def _select_format(self, formats):
- fmt = self.fmt
+ fmt = self.config("format")
if not fmt or fmt == "best":
fmtids = ("0", "1600", "1280", "980", "780")
@@ -182,7 +184,7 @@ class SchalenetworkGalleryExtractor(SchalenetworkExtractor, GalleryExtractor):
elif isinstance(fmt, list):
fmtids = fmt
else:
- fmtids = (str(self.fmt),)
+ fmtids = (str(fmt),)
for fmtid in fmtids:
try:
@@ -203,44 +205,39 @@ class SchalenetworkGalleryExtractor(SchalenetworkExtractor, GalleryExtractor):
class SchalenetworkSearchExtractor(SchalenetworkExtractor):
"""Extractor for schale.network search results"""
subcategory = "search"
- pattern = BASE_PATTERN + r"/\?([^#]*)"
- example = "https://niyaniya.moe/?s=QUERY"
+ pattern = rf"{BASE_PATTERN}/(?:tag/([^/?#]+)|browse)?(?:/?\?([^#]*))?$"
+ example = "https://niyaniya.moe/browse?s=QUERY"
def items(self):
- params = text.parse_query(self.groups[1])
+ _, tag, qs = self.groups
+
+ params = text.parse_query(qs)
params["page"] = text.parse_int(params.get("page"), 1)
+
+ if tag is not None:
+ ns, sep, tag = text.unquote(tag).partition(":")
+ if "+" in tag:
+ tag = tag.replace("+", " ")
+ q = '"'
+ else:
+ q = ""
+ q = '"' if " " in tag else ""
+ params["s"] = f"{ns}{sep}{q}^{tag}${q}"
+
return self._pagination("/books", params)
class SchalenetworkFavoriteExtractor(SchalenetworkExtractor):
"""Extractor for schale.network favorites"""
subcategory = "favorite"
- pattern = BASE_PATTERN + r"/favorites(?:\?([^#]*))?"
+ pattern = rf"{BASE_PATTERN}/favorites(?:\?([^#]*))?"
example = "https://niyaniya.moe/favorites"
def items(self):
- self.login()
-
params = text.parse_query(self.groups[1])
params["page"] = text.parse_int(params.get("page"), 1)
- return self._pagination("/favorites", params)
-
- def login(self):
- username, password = self._get_auth_info()
- if username:
- self.headers["Authorization"] = \
- "Bearer " + self._login_impl(username, password)
- return
-
- raise exception.AuthenticationError("Username and password required")
-
- @cache(maxage=86400, keyarg=1)
- def _login_impl(self, username, password):
- self.log.info("Logging in as %s", username)
+ self.headers["Authorization"] = self._token()
+ return self._pagination(f"/books/favorites?crt={self._crt()}", params)
- url = "https://auth.schale.network/login"
- data = {"uname": username, "passwd": password}
- response = self.request(
- url, method="POST", headers=self.headers, data=data)
- return response.json()["session"]
+SchalenetworkExtractor.extr_class = SchalenetworkGalleryExtractor
diff --git a/gallery_dl/extractor/simpcity.py b/gallery_dl/extractor/simpcity.py
index 8cc7e38..3354289 100644
--- a/gallery_dl/extractor/simpcity.py
+++ b/gallery_dl/extractor/simpcity.py
@@ -20,18 +20,20 @@ class SimpcityExtractor(Extractor):
root = "https://simpcity.cr"
def items(self):
- extract_urls = text.re(r' href="([^"]+)').findall
+ extract_urls = text.re(
+ r'<(?:a [^>]*?href|iframe [^>]*?src)="([^"]+)').findall
for post in self.posts():
urls = extract_urls(post["content"])
data = {"post": post}
post["count"] = data["count"] = len(urls)
+ yield Message.Directory, data
for data["num"], url in enumerate(urls, 1):
yield Message.Queue, url, data
def request_page(self, url):
try:
- return self.request(url).text
+ return self.request(url)
except exception.HttpError as exc:
if exc.status == 403 and b">Log in<" in exc.response.content:
msg = text.extr(exc.response.text, "blockMessage--error", "</")
@@ -44,14 +46,14 @@ class SimpcityExtractor(Extractor):
base = f"{self.root}{base}"
if pnum is None:
- url = base
+ url = f"{base}/"
pnum = 1
else:
url = f"{base}/page-{pnum}"
pnum = None
while True:
- page = self.request_page(url)
+ page = self.request_page(url).text
yield page
@@ -60,6 +62,31 @@ class SimpcityExtractor(Extractor):
pnum += 1
url = f"{base}/page-{pnum}"
+ def _pagination_reverse(self, base, pnum=None):
+ base = f"{self.root}{base}"
+
+ url = f"{base}/page-9999" # force redirect to last page
+ with self.request_page(url) as response:
+ url = response.url
+ if url[-1] == "/":
+ pnum = 1
+ else:
+ pnum = text.parse_int(url[url.rfind("-")+1:], 1)
+ page = response.text
+
+ while True:
+ yield page
+
+ pnum -= 1
+ if pnum > 1:
+ url = f"{base}/page-{pnum}"
+ elif pnum == 1:
+ url = f"{base}/"
+ else:
+ return
+
+ page = self.request_page(url).text
+
def _parse_thread(self, page):
schema = self._extract_jsonld(page)["mainEntity"]
author = schema["author"]
@@ -92,7 +119,8 @@ class SimpcityExtractor(Extractor):
"id": extr('data-content="post-', '"'),
"author_url": extr('itemprop="url" content="', '"'),
"date": text.parse_datetime(extr('datetime="', '"')),
- "content": extr('<div itemprop="text">', "\t\t</div>").strip(),
+ "content": extr('<div itemprop="text">',
+ '<div class="js-selectToQuote').strip(),
}
url_a = post["author_url"]
@@ -109,7 +137,7 @@ class SimpcityPostExtractor(SimpcityExtractor):
def posts(self):
post_id = self.groups[0]
url = f"{self.root}/posts/{post_id}/"
- page = self.request_page(url)
+ page = self.request_page(url).text
pos = page.find(f'data-content="post-{post_id}"')
if pos < 0:
@@ -126,10 +154,22 @@ class SimpcityThreadExtractor(SimpcityExtractor):
example = "https://simpcity.cr/threads/TITLE.12345/"
def posts(self):
- for page in self._pagination(*self.groups):
+ if (order := self.config("order-posts")) and \
+ order[0] not in ("d", "r"):
+ pages = self._pagination(*self.groups)
+ reverse = False
+ else:
+ pages = self._pagination_reverse(*self.groups)
+ reverse = True
+
+ for page in pages:
if "thread" not in self.kwdict:
self.kwdict["thread"] = self._parse_thread(page)
- for html in text.extract_iter(page, "<article ", "</article>"):
+ posts = text.extract_iter(page, "<article ", "</article>")
+ if reverse:
+ posts = list(posts)
+ posts.reverse()
+ for html in posts:
yield self._parse_post(html)
diff --git a/gallery_dl/extractor/thehentaiworld.py b/gallery_dl/extractor/thehentaiworld.py
new file mode 100644
index 0000000..055d7d8
--- /dev/null
+++ b/gallery_dl/extractor/thehentaiworld.py
@@ -0,0 +1,139 @@
+# -*- coding: utf-8 -*-
+
+# Copyright 2025 Mike Fährmann
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Extractors for https://thehentaiworld.com/"""
+
+from .common import Extractor, Message
+from .. import text, util
+import collections
+
+BASE_PATTERN = r"(?:https?://)?(?:www\.)?thehentaiworld\.com"
+
+
+class ThehentaiworldExtractor(Extractor):
+ """Base class for thehentaiworld extractors"""
+ category = "thehentaiworld"
+ root = "https://thehentaiworld.com"
+ filename_fmt = "{title} ({id}{num:?-//}).{extension}"
+ archive_fmt = "{id}_{num}"
+ request_interval = (0.5, 1.5)
+
+ def items(self):
+ for url in self.posts():
+ try:
+ post = self._extract_post(url)
+ except Exception as exc:
+ self.status |= 1
+ self.log.warning("Failed to extract post %s (%s: %s)",
+ url, exc.__class__.__name__, exc)
+ continue
+
+ if "file_urls" in post:
+ urls = post["file_urls"]
+ post["count"] = len(urls)
+ yield Message.Directory, post
+ for post["num"], url in enumerate(urls, 1):
+ text.nameext_from_url(url, post)
+ yield Message.Url, url, post
+ else:
+ yield Message.Directory, post
+ url = post["file_url"]
+ text.nameext_from_url(url, post)
+ yield Message.Url, url, post
+
+ def _extract_post(self, url):
+ extr = text.extract_from(self.request(url).text)
+
+ post = {
+ "num" : 0,
+ "count" : 1,
+ "title" : text.unescape(extr("<title>", "<").strip()),
+ "id" : text.parse_int(extr(" postid-", " ")),
+ "slug" : extr(" post-", '"'),
+ "tags" : extr('id="tagsHead">', "</ul>"),
+ "date" : text.parse_datetime(extr(
+ "<li>Posted: ", "<"), "%Y-%m-%d"),
+ }
+
+ if "/videos/" in url:
+ post["type"] = "video"
+ post["width"] = post["height"] = 0
+ post["votes"] = text.parse_int(extr("(<strong>", "</strong>"))
+ post["score"] = text.parse_float(extr("<strong>", "<"))
+ post["file_url"] = extr('<source src="', '"')
+ else:
+ post["type"] = "image"
+ post["width"] = text.parse_int(extr("<li>Size: ", " "))
+ post["height"] = text.parse_int(extr("x ", "<"))
+ post["file_url"] = extr('a href="', '"')
+ post["votes"] = text.parse_int(extr("(<strong>", "</strong>"))
+ post["score"] = text.parse_float(extr("<strong>", "<"))
+
+ if doujin := extr('<a id="prev-page"', "</div></div><"):
+ repl = text.re(r"-220x\d+\.").sub
+ post["file_urls"] = [
+ repl(".", url)
+ for url in text.extract_iter(
+ doujin, 'class="border" src="', '"')
+ ]
+
+ tags = collections.defaultdict(list)
+ pattern = text.re(r'<li><a class="([^"]*)" href="[^"]*">([^<]+)')
+ for tag_type, tag_name in pattern.findall(post["tags"]):
+ tags[tag_type].append(tag_name)
+ post["tags"] = tags_list = []
+ for key, value in tags.items():
+ tags_list.extend(value)
+ post[f"tags_{key}" if key else "tags_general"] = value
+
+ return post
+
+ def _pagination(self, endpoint):
+ base = f"{self.root}{endpoint}"
+ pnum = self.page_start
+
+ while True:
+ url = base if pnum < 2 else f"{base}page/{pnum}/"
+ page = self.request(url).text
+
+ yield from text.extract_iter(text.extr(
+ page, 'id="thumbContainer"', "<script"), ' href="', '"')
+
+ if 'class="next"' not in page:
+ return
+ pnum += 1
+
+
+class ThehentaiworldPostExtractor(ThehentaiworldExtractor):
+ subcategory = "post"
+ pattern = (rf"{BASE_PATTERN}"
+ rf"(/(?:(?:3d-cgi-)?hentai-image|video)s/([^/?#]+))")
+ example = "https://thehentaiworld.com/hentai-images/SLUG/"
+
+ def posts(self):
+ return (f"{self.root}{self.groups[0]}/",)
+
+
+class ThehentaiworldTagExtractor(ThehentaiworldExtractor):
+ subcategory = "tag"
+ per_page = 24
+ page_start = 1
+ post_start = 0
+ directory_fmt = ("{category}", "{search_tags}")
+ pattern = rf"{BASE_PATTERN}/tag/([^/?#]+)"
+ example = "https://thehentaiworld.com/tag/TAG/"
+
+ def posts(self):
+ self.kwdict["search_tags"] = tag = self.groups[0]
+ return util.advance(self._pagination(f"/tag/{tag}/"), self.post_start)
+
+ def skip(self, num):
+ pages, posts = divmod(num, self.per_page)
+ self.page_start += pages
+ self.post_start += posts
+ return num
diff --git a/gallery_dl/extractor/twitter.py b/gallery_dl/extractor/twitter.py
index ed3cfae..e6c84d1 100644
--- a/gallery_dl/extractor/twitter.py
+++ b/gallery_dl/extractor/twitter.py
@@ -2070,7 +2070,7 @@ class TwitterAPI():
quoted = tweet["quoted_status_result"]["result"]
quoted["legacy"]["quoted_by"] = (
tweet["core"]["user_results"]["result"]
- ["legacy"]["screen_name"])
+ ["core"]["screen_name"])
quoted["legacy"]["quoted_by_id_str"] = tweet["rest_id"]
quoted["sortIndex"] = entry.get("sortIndex")
diff --git a/gallery_dl/extractor/vipergirls.py b/gallery_dl/extractor/vipergirls.py
index e53ecf4..294fc57 100644
--- a/gallery_dl/extractor/vipergirls.py
+++ b/gallery_dl/extractor/vipergirls.py
@@ -51,8 +51,16 @@ class VipergirlsExtractor(Extractor):
like = False
posts = root.iter("post")
- if self.page:
- util.advance(posts, (text.parse_int(self.page[5:]) - 1) * 15)
+ if (order := self.config("order-posts")) and \
+ order[0] not in ("d", "r"):
+ if self.page:
+ util.advance(posts, (text.parse_int(self.page[5:]) - 1) * 15)
+ else:
+ posts = list(posts)
+ if self.page:
+ offset = text.parse_int(self.page[5:]) * 15
+ posts = posts[:offset]
+ posts.reverse()
for post in posts:
images = list(post)
diff --git a/gallery_dl/job.py b/gallery_dl/job.py
index 9d98e68..9369e5d 100644
--- a/gallery_dl/job.py
+++ b/gallery_dl/job.py
@@ -489,9 +489,6 @@ class DownloadJob(Job):
self.extractor.cookies_store()
- if "finalize" in hooks:
- for callback in hooks["finalize"]:
- callback(pathfmt)
if self.status:
if "finalize-error" in hooks:
for callback in hooks["finalize-error"]:
@@ -500,6 +497,9 @@ class DownloadJob(Job):
if "finalize-success" in hooks:
for callback in hooks["finalize-success"]:
callback(pathfmt)
+ if "finalize" in hooks:
+ for callback in hooks["finalize"]:
+ callback(pathfmt)
def handle_skip(self):
pathfmt = self.pathfmt
diff --git a/gallery_dl/postprocessor/common.py b/gallery_dl/postprocessor/common.py
index 8da8417..9992c56 100644
--- a/gallery_dl/postprocessor/common.py
+++ b/gallery_dl/postprocessor/common.py
@@ -54,7 +54,11 @@ class PostProcessor():
else:
self.log.debug(
"Using %s archive '%s'", self.name, archive_path)
+ job.register_hooks({"finalize": self._close_archive})
return True
self.archive = None
return False
+
+ def _close_archive(self, _):
+ self.archive.close()
diff --git a/gallery_dl/postprocessor/metadata.py b/gallery_dl/postprocessor/metadata.py
index c74f92f..a6d2b7f 100644
--- a/gallery_dl/postprocessor/metadata.py
+++ b/gallery_dl/postprocessor/metadata.py
@@ -45,6 +45,15 @@ class MetadataPP(PostProcessor):
cfmt = "\n".join(cfmt) + "\n"
self._content_fmt = formatter.parse(cfmt).format_map
ext = "txt"
+ elif mode == "print":
+ nl = "\n"
+ if isinstance(cfmt, list):
+ cfmt = f"{nl.join(cfmt)}{nl}"
+ if cfmt[-1] != nl and (cfmt[0] != "\f" or cfmt[1] == "F"):
+ cfmt = f"{cfmt}{nl}"
+ self.write = self._write_custom
+ self._content_fmt = formatter.parse(cfmt).format_map
+ filename = "-"
elif mode == "jsonl":
self.write = self._write_json
self._json_encode = self._make_encoder(options).encode
diff --git a/gallery_dl/postprocessor/python.py b/gallery_dl/postprocessor/python.py
index db71da2..66d9343 100644
--- a/gallery_dl/postprocessor/python.py
+++ b/gallery_dl/postprocessor/python.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2023 Mike Fährmann
+# Copyright 2023-2025 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -17,13 +17,14 @@ class PythonPP(PostProcessor):
def __init__(self, job, options):
PostProcessor.__init__(self, job)
- spec = options["function"]
- module_name, _, function_name = spec.rpartition(":")
- module = util.import_file(module_name)
- self.function = getattr(module, function_name)
-
- if self._init_archive(job, options):
- self.run = self.run_archive
+ mode = options.get("mode")
+ if mode == "eval" or not mode and options.get("expression"):
+ self.function = util.compile_expression(options["expression"])
+ else:
+ spec = options["function"]
+ module_name, _, function_name = spec.rpartition(":")
+ module = util.import_file(module_name)
+ self.function = getattr(module, function_name)
events = options.get("event")
if events is None:
@@ -32,6 +33,9 @@ class PythonPP(PostProcessor):
events = events.split(",")
job.register_hooks({event: self.run for event in events}, options)
+ if self._init_archive(job, options):
+ self.run = self.run_archive
+
def run(self, pathfmt):
self.function(pathfmt.kwdict)
diff --git a/gallery_dl/util.py b/gallery_dl/util.py
index 7b9ce99..49c1ba8 100644
--- a/gallery_dl/util.py
+++ b/gallery_dl/util.py
@@ -542,6 +542,7 @@ def language_to_code(lang, default=None):
CODES = {
"ar": "Arabic",
"bg": "Bulgarian",
+ "bn": "Bengali",
"ca": "Catalan",
"cs": "Czech",
"da": "Danish",
@@ -549,9 +550,11 @@ CODES = {
"el": "Greek",
"en": "English",
"es": "Spanish",
+ "fa": "Persian",
"fi": "Finnish",
"fr": "French",
"he": "Hebrew",
+ "hi": "Hindi",
"hu": "Hungarian",
"id": "Indonesian",
"it": "Italian",
@@ -564,9 +567,13 @@ CODES = {
"pt": "Portuguese",
"ro": "Romanian",
"ru": "Russian",
+ "sk": "Slovak",
+ "sl": "Slovenian",
+ "sr": "Serbian",
"sv": "Swedish",
"th": "Thai",
"tr": "Turkish",
+ "uk": "Ukrainian",
"vi": "Vietnamese",
"zh": "Chinese",
}
@@ -634,6 +641,12 @@ class NullResponse():
self.url = url
self.reason = str(reason)
+ def __enter__(self):
+ return self
+
+ def __exit__(self, exc_type, exc_value, traceback):
+ pass
+
def __str__(self):
return "900 " + self.reason
diff --git a/gallery_dl/version.py b/gallery_dl/version.py
index 277d679..4861a9d 100644
--- a/gallery_dl/version.py
+++ b/gallery_dl/version.py
@@ -6,5 +6,5 @@
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
-__version__ = "1.30.7"
+__version__ = "1.30.8"
__variant__ = None
diff --git a/gallery_dl/ytdl.py b/gallery_dl/ytdl.py
index cfc6b50..0296498 100644
--- a/gallery_dl/ytdl.py
+++ b/gallery_dl/ytdl.py
@@ -80,7 +80,10 @@ def parse_command_line(module, argv):
parser, opts, args = module.parseOpts(argv)
ytdlp = hasattr(module, "cookies")
- std_headers = module.std_headers
+ try:
+ std_headers = module.utils.networking.std_headers
+ except AttributeError:
+ std_headers = module.std_headers
try:
parse_bytes = module.parse_bytes
@@ -345,7 +348,7 @@ def parse_command_line(module, argv):
"nopart": opts.nopart,
"updatetime": opts.updatetime,
"writedescription": opts.writedescription,
- "writeannotations": opts.writeannotations,
+ "writeannotations": getattr(opts, "writeannotations", None),
"writeinfojson": opts.writeinfojson,
"allow_playlist_files": opts.allow_playlist_files,
"clean_infojson": opts.clean_infojson,
@@ -378,7 +381,8 @@ def parse_command_line(module, argv):
"max_views": opts.max_views,
"daterange": date,
"cachedir": opts.cachedir,
- "youtube_print_sig_code": opts.youtube_print_sig_code,
+ "youtube_print_sig_code": getattr(
+ opts, "youtube_print_sig_code", None),
"age_limit": opts.age_limit,
"download_archive": download_archive_fn,
"break_on_existing": getattr(opts, "break_on_existing", None),
@@ -394,8 +398,8 @@ def parse_command_line(module, argv):
"socket_timeout": opts.socket_timeout,
"bidi_workaround": opts.bidi_workaround,
"debug_printtraffic": opts.debug_printtraffic,
- "prefer_ffmpeg": opts.prefer_ffmpeg,
- "include_ads": opts.include_ads,
+ "prefer_ffmpeg": getattr(opts, "prefer_ffmpeg", None),
+ "include_ads": getattr(opts, "include_ads", None),
"default_search": opts.default_search,
"dynamic_mpd": getattr(opts, "dynamic_mpd", None),
"extractor_args": getattr(opts, "extractor_args", None),
@@ -420,7 +424,7 @@ def parse_command_line(module, argv):
opts, "sleep_interval_subtitles", None),
"external_downloader": opts.external_downloader,
"playlist_items": opts.playlist_items,
- "xattr_set_filesize": opts.xattr_set_filesize,
+ "xattr_set_filesize": getattr(opts, "xattr_set_filesize", None),
"match_filter": match_filter,
"no_color": getattr(opts, "no_color", None),
"ffmpeg_location": opts.ffmpeg_location,
@@ -430,7 +434,7 @@ def parse_command_line(module, argv):
opts, "hls_split_discontinuity", None),
"external_downloader_args": opts.external_downloader_args,
"postprocessor_args": opts.postprocessor_args,
- "cn_verification_proxy": opts.cn_verification_proxy,
+ "cn_verification_proxy": getattr(opts, "cn_verification_proxy", None),
"geo_verification_proxy": opts.geo_verification_proxy,
"geo_bypass": getattr(
opts, "geo_bypass", "default"),
diff --git a/test/test_extractor.py b/test/test_extractor.py
index f8b8f09..a623e1d 100644
--- a/test/test_extractor.py
+++ b/test/test_extractor.py
@@ -156,6 +156,9 @@ class TestExtractorModule(unittest.TestCase):
self.fail(f"{cls.__name__} pattern does not match "
f"example URL '{cls.example}'")
+ self.assertEqual(cls, extr.__class__)
+ self.assertEqual(cls, extractor.find(cls.example).__class__)
+
extr.request = fail_request
extr.initialize()
extr.finalize()
diff --git a/test/test_postprocessor.py b/test/test_postprocessor.py
index 07bd348..2902fea 100644
--- a/test/test_postprocessor.py
+++ b/test/test_postprocessor.py
@@ -20,7 +20,7 @@ import collections
from datetime import datetime
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
-from gallery_dl import extractor, output, path, util # noqa E402
+from gallery_dl import extractor, output, path, util, exception # noqa E402
from gallery_dl import postprocessor, config # noqa E402
from gallery_dl.postprocessor.common import PostProcessor # noqa E402
@@ -555,6 +555,17 @@ class MetadataTest(BasePostprocessorTest):
test({"mode": "custom", "format": "{foo}\n{missing}\n"})
test({"format": "{foo}\n{missing}\n"})
+ def test_metadata_mode_print(self):
+ self._create(
+ {"mode": "print", "format": "{foo}\n{missing}"},
+ {"foo": "bar"},
+ )
+
+ with patch("sys.stdout", Mock()) as m:
+ self._trigger()
+
+ self.assertEqual(self._output(m), "bar\nNone\n")
+
def test_metadata_extfmt(self):
pp = self._create({
"extension" : "ignored",
@@ -867,6 +878,18 @@ class PythonTest(BasePostprocessorTest):
self._trigger()
self.assertEqual(self.pathfmt.kwdict["_result"], 24)
+ def test_eval(self):
+ self._create({"mode": "eval", "expression": "abort()"})
+
+ with self.assertRaises(exception.StopExtraction):
+ self._trigger()
+
+ def test_eval_auto(self):
+ self._create({"expression": "abort()"})
+
+ with self.assertRaises(exception.StopExtraction):
+ self._trigger()
+
def _write_module(self, path):
with open(path, "w") as fp:
fp.write("""
diff --git a/test/test_util.py b/test/test_util.py
index 4a76769..bfaab01 100644
--- a/test/test_util.py
+++ b/test/test_util.py
@@ -1041,6 +1041,9 @@ value = 123
self.assertEqual(response.links.get("next"), None)
self.assertEqual(response.close(), None)
+ with response as ctx:
+ self.assertIs(response, ctx)
+
class TestExtractor():
category = "test_category"
diff --git a/test/test_ytdl.py b/test/test_ytdl.py
index 88933e4..1f28c9a 100644
--- a/test/test_ytdl.py
+++ b/test/test_ytdl.py
@@ -42,8 +42,6 @@ class Test_CommandlineArguments(unittest.TestCase):
def test_proxy(self):
self._(["--proxy", "socks5://127.0.0.1:1080/"],
"proxy", "socks5://127.0.0.1:1080/")
- self._(["--cn-verification-proxy", "https://127.0.0.1"],
- "cn_verification_proxy", "https://127.0.0.1")
self._(["--geo-verification-proxy", "127.0.0.1"],
"geo_verification_proxy", "127.0.0.1")
@@ -105,7 +103,10 @@ class Test_CommandlineArguments(unittest.TestCase):
"geo_bypass_ip_block", "198.51.100.14/24")
def test_headers(self):
- headers = self.module.std_headers
+ try:
+ headers = self.module.utils.networking.std_headers
+ except AttributeError:
+ headers = self.module.std_headers
self.assertNotEqual(headers["User-Agent"], "Foo/1.0")
self._(["--user-agent", "Foo/1.0"])
@@ -194,8 +195,6 @@ class Test_CommandlineArguments(unittest.TestCase):
})
def test_xattr(self):
- self._("--xattr-set-filesize", "xattr_set_filesize", True)
-
opts = self._("--xattrs")
self.assertEqual(opts["postprocessors"][0], {"key": "XAttrMetadata"})