aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLibravatarUnit 193 <unit193@unit193.net>2024-10-25 17:27:30 -0400
committerLibravatarUnit 193 <unit193@unit193.net>2024-10-25 17:27:30 -0400
commitfc004701f923bb954a22c7fec2ae8d607e78cb2b (patch)
treea5bea4ed6447ea43c099131430e3bd6182ee87d7
parent0db541f524e1774865efebcbe5653e9ad76ea2e8 (diff)
New upstream version 1.27.7.upstream/1.27.7
-rw-r--r--CHANGELOG.md82
-rw-r--r--PKG-INFO7
-rw-r--r--README.rst4
-rw-r--r--data/completion/_gallery-dl2
-rw-r--r--data/completion/gallery-dl2
-rw-r--r--data/completion/gallery-dl.fish2
-rw-r--r--data/man/gallery-dl.18
-rw-r--r--data/man/gallery-dl.conf.578
-rw-r--r--gallery_dl.egg-info/PKG-INFO7
-rw-r--r--gallery_dl.egg-info/SOURCES.txt1
-rw-r--r--gallery_dl/__init__.py2
-rw-r--r--gallery_dl/config.py2
-rw-r--r--gallery_dl/cookies.py192
-rw-r--r--gallery_dl/downloader/ytdl.py46
-rw-r--r--gallery_dl/exception.py12
-rw-r--r--gallery_dl/extractor/8chan.py53
-rw-r--r--gallery_dl/extractor/__init__.py1
-rw-r--r--gallery_dl/extractor/behance.py1
-rw-r--r--gallery_dl/extractor/bluesky.py101
-rw-r--r--gallery_dl/extractor/bunkr.py143
-rw-r--r--gallery_dl/extractor/civitai.py115
-rw-r--r--gallery_dl/extractor/cohost.py2
-rw-r--r--gallery_dl/extractor/common.py39
-rw-r--r--gallery_dl/extractor/deviantart.py4
-rw-r--r--gallery_dl/extractor/exhentai.py6
-rw-r--r--gallery_dl/extractor/foolfuuka.py2
-rw-r--r--gallery_dl/extractor/lensdump.py109
-rw-r--r--gallery_dl/extractor/lolisafe.py10
-rw-r--r--gallery_dl/extractor/mangadex.py22
-rw-r--r--gallery_dl/extractor/mangakakalot.py6
-rw-r--r--gallery_dl/extractor/newgrounds.py60
-rw-r--r--gallery_dl/extractor/nozomi.py3
-rw-r--r--gallery_dl/extractor/patreon.py7
-rw-r--r--gallery_dl/extractor/pinterest.py171
-rw-r--r--gallery_dl/extractor/pixiv.py77
-rw-r--r--gallery_dl/extractor/postmill.py2
-rw-r--r--gallery_dl/extractor/reddit.py8
-rw-r--r--gallery_dl/extractor/scrolller.py227
-rw-r--r--gallery_dl/extractor/telegraph.py2
-rw-r--r--gallery_dl/extractor/tsumino.py6
-rw-r--r--gallery_dl/extractor/urlgalleries.py30
-rw-r--r--gallery_dl/extractor/vk.py9
-rw-r--r--gallery_dl/extractor/wikimedia.py5
-rw-r--r--gallery_dl/job.py6
-rw-r--r--gallery_dl/option.py59
-rw-r--r--gallery_dl/util.py37
-rw-r--r--gallery_dl/version.py2
-rw-r--r--setup.py1
-rw-r--r--test/test_results.py2
-rw-r--r--test/test_util.py36
50 files changed, 1306 insertions, 505 deletions
diff --git a/CHANGELOG.md b/CHANGELOG.md
index bc6a301..f4bb546 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,57 +1,39 @@
-## 1.27.6 - 2024-10-11
+## 1.27.7 - 2024-10-25
### Extractors
#### Additions
-- [ao3] add `subscriptions` extractor ([#6247](https://github.com/mikf/gallery-dl/issues/6247))
-- [boosty] add support ([#2387](https://github.com/mikf/gallery-dl/issues/2387))
-- [civitai] add `post` extractors ([#6279](https://github.com/mikf/gallery-dl/issues/6279))
-- [pixiv] support unlisted artworks ([#5162](https://github.com/mikf/gallery-dl/issues/5162))
+- [civitai] add extractors for global `models` and `images` ([#6310](https://github.com/mikf/gallery-dl/issues/6310))
+- [mangadex] add `author` extractor ([#6372](https://github.com/mikf/gallery-dl/issues/6372))
+- [scrolller] add support ([#295](https://github.com/mikf/gallery-dl/issues/295), [#3418](https://github.com/mikf/gallery-dl/issues/3418), [#5051](https://github.com/mikf/gallery-dl/issues/5051))
#### Fixes
-- [cohost] sanitize default filenames ([#6262](https://github.com/mikf/gallery-dl/issues/6262))
- - limit `headline` length
- - remove `plainTextBody`
-- [deviantart] fix & improve journal/literature extraction ([#6254](https://github.com/mikf/gallery-dl/issues/6254), [#6207](https://github.com/mikf/gallery-dl/issues/6207), [#6196](https://github.com/mikf/gallery-dl/issues/6196))
- - extract journal HTML from webpage if possible
- - support converting `tiptap` markup to HTML
-- [deviantart] fix `stash` folder extraction
-- [flickr] update default API credentials ([#6300](https://github.com/mikf/gallery-dl/issues/6300))
-- [flickr] fix `ZeroDivisionError` ([#6252](https://github.com/mikf/gallery-dl/issues/6252))
-- [imagefap] fix `{num}` in single image default filenames
-- [myhentaigallery] fix `tags` extraction
-- [patreon] extract `attachments_media` files ([#6241](https://github.com/mikf/gallery-dl/issues/6241), [#6268](https://github.com/mikf/gallery-dl/issues/6268))
-- [pixiv] implement workaround for `limit_sanity_level` works ([#4327](https://github.com/mikf/gallery-dl/issues/4327), [#4747](https://github.com/mikf/gallery-dl/issues/4747), [#5054](https://github.com/mikf/gallery-dl/issues/5054), [#5435](https://github.com/mikf/gallery-dl/issues/5435), [#5651](https://github.com/mikf/gallery-dl/issues/5651), [#5655](https://github.com/mikf/gallery-dl/issues/5655))
-- [pornhub] fix `KeyError` when album images are missing ([#6299](https://github.com/mikf/gallery-dl/issues/6299))
-- [rule34us] fix extraction ([#6289](https://github.com/mikf/gallery-dl/issues/6289))
-- [8chan] set TOS cookie for current and previous day
+- [8chan] automatically detect `TOS` cookie name ([#6318](https://github.com/mikf/gallery-dl/issues/6318))
+- [bunkr] update to new site layout ([#6344](https://github.com/mikf/gallery-dl/issues/6344), [#6352](https://github.com/mikf/gallery-dl/issues/6352), [#6368](https://github.com/mikf/gallery-dl/issues/6368))
+- [bunkr] send proper `Referer` headers for file downloads ([#6319](https://github.com/mikf/gallery-dl/issues/6319))
+- [civitai] add `uuid` metadata field & use it as default archive format ([#6326](https://github.com/mikf/gallery-dl/issues/6326))
+- [civitai] fix "My Reactions" results ([#6263](https://github.com/mikf/gallery-dl/issues/6263))
+- [civitai] fix `model` file download URLs for tRPC API
+- [lensdump] fix extraction ([#6313](https://github.com/mikf/gallery-dl/issues/6313))
+- [pixiv] make retrieving ugoira metadata non-fatal ([#6297](https://github.com/mikf/gallery-dl/issues/6297))
+- [pixiv] fix exception when processing deleted `sanity_level` works ([#6339](https://github.com/mikf/gallery-dl/issues/6339))
+- [urlgalleries] fix extraction
+- [wikimedia] fix non-English Fandom/wiki.gg articles ([#6370](https://github.com/mikf/gallery-dl/issues/6370))
#### Improvements
-- [bunkr] support `bunkr.pk` URLs ([#6272](https://github.com/mikf/gallery-dl/issues/6272))
-- [civitai] use tRPC API by default ([#6279](https://github.com/mikf/gallery-dl/issues/6279))
-- [civitai] improve default archive format ([#6302](https://github.com/mikf/gallery-dl/issues/6302))
-- [komikcast] update domain to `komikcast.cz`
-- [newgrounds] detect more comment embeds ([#6253](https://github.com/mikf/gallery-dl/issues/6253))
-- [newgrounds] add more fallback URL formats for `art-images` files
-- [oauth] prevent empty browser names
-- [patreon] use mobile UA ([#6241](https://github.com/mikf/gallery-dl/issues/6241), [#6239](https://github.com/mikf/gallery-dl/issues/6239), [#6140](https://github.com/mikf/gallery-dl/issues/6140))
-- [patreon] handle suspended accounts
-- [pixiv] detect works requiring `My pixiv` access
-#### Metadata
-- [civitai] ensure image files have an `id` ([#6251](https://github.com/mikf/gallery-dl/issues/6251))
-- [gelbooru_v02] unescape HTML entities in categorized tags
-- [generic] ensure `path` metadata is always defined
-- [pixiv] retrieve `caption` from AJAX API when empty ([#4327](https://github.com/mikf/gallery-dl/issues/4327), [#5191](https://github.com/mikf/gallery-dl/issues/5191))
+- [8chan] support `/last/` thread URLs ([#6318](https://github.com/mikf/gallery-dl/issues/6318))
+- [bunkr] support `bunkr.ph` and `bunkr.ps` URLs
+- [newgrounds] support page numbers in URLs ([#6320](https://github.com/mikf/gallery-dl/issues/6320))
+- [patreon] support `/c/` prefix in creator URLs ([#6348](https://github.com/mikf/gallery-dl/issues/6348))
+- [pinterest] support `story` pins ([#6188](https://github.com/mikf/gallery-dl/issues/6188), [#6078](https://github.com/mikf/gallery-dl/issues/6078), [#4229](https://github.com/mikf/gallery-dl/issues/4229))
+- [pixiv] implement `sanity_level` workaround for user artworks results ([#4327](https://github.com/mikf/gallery-dl/issues/4327), [#5435](https://github.com/mikf/gallery-dl/issues/5435), [#6339](https://github.com/mikf/gallery-dl/issues/6339))
#### Options
-- [fanbox] add `comments` option, extend `metadata` option ([#6287](https://github.com/mikf/gallery-dl/issues/6287))
-- [pixiv] add `comments` option ([#6287](https://github.com/mikf/gallery-dl/issues/6287))
-#### Removals
-- [blogger] remove `micmicidol.club`
-- [chevereto] remove `deltaporno.com`
-- [lolisafe] remove `xbunkr.com`
-- [pururin] remove module
-- [shimmie2] remove `loudbooru.com`
+- [bluesky] add `quoted` option ([#6323](https://github.com/mikf/gallery-dl/issues/6323))
+- [pixiv] add `captions` option ([#4327](https://github.com/mikf/gallery-dl/issues/4327))
+- [reddit] add `embeds` option ([#6357](https://github.com/mikf/gallery-dl/issues/6357))
+- [vk] add `offset` option ([#6328](https://github.com/mikf/gallery-dl/issues/6328))
+### Downloaders
+- [ytdl] implement explicit HLS/DASH handling
### Post Processors
-- [ugoira] fix `BadZipFile` exceptions ([#6285](https://github.com/mikf/gallery-dl/issues/6285))
-- [ugoira] catch all exceptions when extracting ZIP archives ([#6285](https://github.com/mikf/gallery-dl/issues/6285))
-- [ugoira] forward frame data as `_ugoira_frame_data` ([#6154](https://github.com/mikf/gallery-dl/issues/6154), [#6285](https://github.com/mikf/gallery-dl/issues/6285))
+- add `error` event
### Miscellaneous
-- [build] remove setuptools and requests version restrictions
-- [docker] build from `python:3.12-alpine`
-- [text] improve `parse_query()` performance
+- [cookies] convert Chromium `expires_utc` values to Unix timestamps
+- [util] add `std` object to global eval namespace ([#6330](https://github.com/mikf/gallery-dl/issues/6330))
+- add `--print` and `--print-to-file` command-line options ([#6343](https://github.com/mikf/gallery-dl/issues/6343))
+- use child extractor fallbacks only when a non-user error occurs ([#6329](https://github.com/mikf/gallery-dl/issues/6329))
diff --git a/PKG-INFO b/PKG-INFO
index 27d0dd4..abc0001 100644
--- a/PKG-INFO
+++ b/PKG-INFO
@@ -1,6 +1,6 @@
Metadata-Version: 2.1
Name: gallery_dl
-Version: 1.27.6
+Version: 1.27.7
Summary: Command-line program to download image galleries and collections from several image hosting sites
Home-page: https://github.com/mikf/gallery-dl
Download-URL: https://github.com/mikf/gallery-dl/releases/latest
@@ -27,6 +27,7 @@ Classifier: Programming Language :: Python :: 3.9
Classifier: Programming Language :: Python :: 3.10
Classifier: Programming Language :: Python :: 3.11
Classifier: Programming Language :: Python :: 3.12
+Classifier: Programming Language :: Python :: 3.13
Classifier: Programming Language :: Python :: Implementation :: CPython
Classifier: Programming Language :: Python :: Implementation :: PyPy
Classifier: Topic :: Internet :: WWW/HTTP
@@ -114,9 +115,9 @@ Standalone Executable
Prebuilt executable files with a Python interpreter and
required Python packages included are available for
-- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.27.6/gallery-dl.exe>`__
+- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.27.7/gallery-dl.exe>`__
(Requires `Microsoft Visual C++ Redistributable Package (x86) <https://aka.ms/vs/17/release/vc_redist.x86.exe>`__)
-- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.27.6/gallery-dl.bin>`__
+- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.27.7/gallery-dl.bin>`__
Nightly Builds
diff --git a/README.rst b/README.rst
index fbb7fa5..335101c 100644
--- a/README.rst
+++ b/README.rst
@@ -74,9 +74,9 @@ Standalone Executable
Prebuilt executable files with a Python interpreter and
required Python packages included are available for
-- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.27.6/gallery-dl.exe>`__
+- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.27.7/gallery-dl.exe>`__
(Requires `Microsoft Visual C++ Redistributable Package (x86) <https://aka.ms/vs/17/release/vc_redist.x86.exe>`__)
-- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.27.6/gallery-dl.bin>`__
+- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.27.7/gallery-dl.bin>`__
Nightly Builds
diff --git a/data/completion/_gallery-dl b/data/completion/_gallery-dl
index 5e1b1e0..743808c 100644
--- a/data/completion/_gallery-dl
+++ b/data/completion/_gallery-dl
@@ -29,6 +29,8 @@ _arguments -s -S \
{-E,--extractor-info}'[Print extractor defaults and settings]' \
{-K,--list-keywords}'[Print a list of available keywords and example values for the given URLs]' \
{-e,--error-file}'[Add input URLs which returned an error to FILE]':'<file>':_files \
+{-N,--print}'[Write FORMAT during EVENT (default '\''prepare'\'') to standard output. Examples: '\''id'\'' or '\''post:{md5\[:8\]}'\'']':'<[event:]format>' \
+--print-to-file'[Append FORMAT during EVENT to FILE]':'<[event:]format file>' \
--list-modules'[Print a list of available extractor modules]' \
--list-extractors'[Print a list of extractor classes with description, (sub)category and example URL]':'<categories>' \
--write-log'[Write logging output to FILE]':'<file>':_files \
diff --git a/data/completion/gallery-dl b/data/completion/gallery-dl
index 32d9705..fd5268f 100644
--- a/data/completion/gallery-dl
+++ b/data/completion/gallery-dl
@@ -10,7 +10,7 @@ _gallery_dl()
elif [[ "${prev}" =~ ^()$ ]]; then
COMPREPLY=( $(compgen -d -- "${cur}") )
else
- COMPREPLY=( $(compgen -W "--help --version --filename --destination --directory --extractors --user-agent --clear-cache --update-check --input-file --input-file-comment --input-file-delete --no-input --quiet --warning --verbose --get-urls --resolve-urls --dump-json --resolve-json --simulate --extractor-info --list-keywords --error-file --list-modules --list-extractors --write-log --write-unsupported --write-pages --print-traffic --no-colors --retries --http-timeout --proxy --source-address --no-check-certificate --limit-rate --chunk-size --sleep --sleep-request --sleep-extractor --no-part --no-skip --no-mtime --no-download --option --config --config-yaml --config-toml --config-create --config-status --config-open --config-ignore --ignore-config --username --password --netrc --cookies --cookies-export --cookies-from-browser --abort --terminate --filesize-min --filesize-max --download-archive --range --chapter-range --filter --chapter-filter --postprocessor --no-postprocessors --postprocessor-option --write-metadata --write-info-json --write-infojson --write-tags --zip --cbz --mtime --mtime-from-date --rename --rename-to --ugoira --ugoira-conv --ugoira-conv-lossless --ugoira-conv-copy --exec --exec-after" -- "${cur}") )
+ COMPREPLY=( $(compgen -W "--help --version --filename --destination --directory --extractors --user-agent --clear-cache --update-check --input-file --input-file-comment --input-file-delete --no-input --quiet --warning --verbose --get-urls --resolve-urls --dump-json --resolve-json --simulate --extractor-info --list-keywords --error-file --print --print-to-file --list-modules --list-extractors --write-log --write-unsupported --write-pages --print-traffic --no-colors --retries --http-timeout --proxy --source-address --no-check-certificate --limit-rate --chunk-size --sleep --sleep-request --sleep-extractor --no-part --no-skip --no-mtime --no-download --option --config --config-yaml --config-toml --config-create --config-status --config-open --config-ignore --ignore-config --username --password --netrc --cookies --cookies-export --cookies-from-browser --abort --terminate --filesize-min --filesize-max --download-archive --range --chapter-range --filter --chapter-filter --postprocessor --no-postprocessors --postprocessor-option --write-metadata --write-info-json --write-infojson --write-tags --zip --cbz --mtime --mtime-from-date --rename --rename-to --ugoira --ugoira-conv --ugoira-conv-lossless --ugoira-conv-copy --exec --exec-after" -- "${cur}") )
fi
}
diff --git a/data/completion/gallery-dl.fish b/data/completion/gallery-dl.fish
index 7734f40..a239c50 100644
--- a/data/completion/gallery-dl.fish
+++ b/data/completion/gallery-dl.fish
@@ -23,6 +23,8 @@ complete -c gallery-dl -s 's' -l 'simulate' -d 'Simulate data extraction; do not
complete -c gallery-dl -s 'E' -l 'extractor-info' -d 'Print extractor defaults and settings'
complete -c gallery-dl -s 'K' -l 'list-keywords' -d 'Print a list of available keywords and example values for the given URLs'
complete -c gallery-dl -r -F -s 'e' -l 'error-file' -d 'Add input URLs which returned an error to FILE'
+complete -c gallery-dl -x -s 'N' -l 'print' -d 'Write FORMAT during EVENT (default "prepare") to standard output. Examples: "id" or "post:{md5[:8]}"'
+complete -c gallery-dl -x -l 'print-to-file' -d 'Append FORMAT during EVENT to FILE'
complete -c gallery-dl -l 'list-modules' -d 'Print a list of available extractor modules'
complete -c gallery-dl -x -l 'list-extractors' -d 'Print a list of extractor classes with description, (sub)category and example URL'
complete -c gallery-dl -r -F -l 'write-log' -d 'Write logging output to FILE'
diff --git a/data/man/gallery-dl.1 b/data/man/gallery-dl.1
index 3fedff4..a56dbcd 100644
--- a/data/man/gallery-dl.1
+++ b/data/man/gallery-dl.1
@@ -1,4 +1,4 @@
-.TH "GALLERY-DL" "1" "2024-10-11" "1.27.6" "gallery-dl Manual"
+.TH "GALLERY-DL" "1" "2024-10-25" "1.27.7" "gallery-dl Manual"
.\" disable hyphenation
.nh
@@ -89,6 +89,12 @@ Print a list of available keywords and example values for the given URLs
.B "\-e, \-\-error\-file" \f[I]FILE\f[]
Add input URLs which returned an error to FILE
.TP
+.B "\-N, \-\-print" \f[I][EVENT:]FORMAT\f[]
+Write FORMAT during EVENT (default 'prepare') to standard output. Examples: 'id' or 'post:{md5[:8]}'
+.TP
+.B "\-\-print\-to\-file" \f[I][EVENT:]FORMAT FILE\f[]
+Append FORMAT during EVENT to FILE
+.TP
.B "\-\-list\-modules"
Print a list of available extractor modules
.TP
diff --git a/data/man/gallery-dl.conf.5 b/data/man/gallery-dl.conf.5
index ba4bb3e..0ae8c38 100644
--- a/data/man/gallery-dl.conf.5
+++ b/data/man/gallery-dl.conf.5
@@ -1,4 +1,4 @@
-.TH "GALLERY-DL.CONF" "5" "2024-10-11" "1.27.6" "gallery-dl Manual"
+.TH "GALLERY-DL.CONF" "5" "2024-10-25" "1.27.7" "gallery-dl Manual"
.\" disable hyphenation
.nh
.\" disable justification (adjust text to left margin only)
@@ -548,6 +548,8 @@ This is supported for
.br
* \f[I]sankaku\f[]
.br
+* \f[I]scrolller\f[]
+.br
* \f[I]seiga\f[]
.br
* \f[I]subscribestar\f[]
@@ -1739,6 +1741,17 @@ Sets the maximum depth of returned reply posts.
(See depth parameter of \f[I]app.bsky.feed.getPostThread\f[])
+.SS extractor.bluesky.quoted
+.IP "Type:" 6
+\f[I]bool\f[]
+
+.IP "Default:" 9
+\f[I]false\f[]
+
+.IP "Description:" 4
+Fetch media from quoted posts.
+
+
.SS extractor.bluesky.reposts
.IP "Type:" 6
\f[I]bool\f[]
@@ -3974,6 +3987,17 @@ uses the same domain as a given input URL.
Include pins from board sections.
+.SS extractor.pinterest.stories
+.IP "Type:" 6
+\f[I]bool\f[]
+
+.IP "Default:" 9
+\f[I]true\f[]
+
+.IP "Description:" 4
+Extract files from story pins.
+
+
.SS extractor.pinterest.videos
.IP "Type:" 6
\f[I]bool\f[]
@@ -4095,6 +4119,18 @@ fetch bookmark tags as \f[I]tags_bookmark\f[] metadata.
Note: This requires 1 additional API request per bookmarked post.
+.SS extractor.pixiv.captions
+.IP "Type:" 6
+\f[I]bool\f[]
+
+.IP "Default:" 9
+\f[I]false\f[]
+
+.IP "Description:" 4
+For works with seemingly empty \f[I]caption\f[] metadata,
+try to grab the actual \f[I]caption\f[] value using the AJAX API.
+
+
.SS extractor.pixiv.comments
.IP "Type:" 6
\f[I]bool\f[]
@@ -4284,6 +4320,17 @@ stubs in the base comment tree.
Note: This requires 1 additional API call for every 100 extra comments.
+.SS extractor.reddit.embeds
+.IP "Type:" 6
+\f[I]bool\f[]
+
+.IP "Default:" 9
+\f[I]true\f[]
+
+.IP "Description:" 4
+Download embedded comments media.
+
+
.SS extractor.reddit.date-min & .date-max
.IP "Type:" 6
\f[I]Date\f[]
@@ -5200,7 +5247,21 @@ Any entries after the first one will be used for potential
\f[I]fallback\f[] URLs.
Known available sizes are
-\f[I]4096x4096\f[], \f[I]orig\f[], \f[I]large\f[], \f[I]medium\f[], and \f[I]small\f[].
+
+.br
+* \f[I]orig\f[]
+.br
+* \f[I]large\f[]
+.br
+* \f[I]medium\f[]
+.br
+* \f[I]small\f[]
+.br
+* \f[I]4096x4096\f[]
+.br
+* \f[I]900x900\f[]
+.br
+* \f[I]360x360\f[]
.SS extractor.twitter.logout
@@ -5487,6 +5548,17 @@ Note: Requires \f[I]login\f[]
or \f[I]cookies\f[]
+.SS extractor.vk.offset
+.IP "Type:" 6
+\f[I]integer\f[]
+
+.IP "Default:" 9
+\f[I]0\f[]
+
+.IP "Description:" 4
+Custom \f[I]offset\f[] starting value when paginating over image results.
+
+
.SS extractor.vsco.include
.IP "Type:" 6
.br
@@ -7080,6 +7152,8 @@ but before it gets moved to its target location
After a file got moved to its target location
\f[I]skip\f[]
When skipping a file download
+\f[I]error\f[]
+After a file download failed
\f[I]post\f[]
When starting to download all files of a post,
e.g. a Tweet on Twitter or a post on Patreon.
diff --git a/gallery_dl.egg-info/PKG-INFO b/gallery_dl.egg-info/PKG-INFO
index 27d0dd4..abc0001 100644
--- a/gallery_dl.egg-info/PKG-INFO
+++ b/gallery_dl.egg-info/PKG-INFO
@@ -1,6 +1,6 @@
Metadata-Version: 2.1
Name: gallery_dl
-Version: 1.27.6
+Version: 1.27.7
Summary: Command-line program to download image galleries and collections from several image hosting sites
Home-page: https://github.com/mikf/gallery-dl
Download-URL: https://github.com/mikf/gallery-dl/releases/latest
@@ -27,6 +27,7 @@ Classifier: Programming Language :: Python :: 3.9
Classifier: Programming Language :: Python :: 3.10
Classifier: Programming Language :: Python :: 3.11
Classifier: Programming Language :: Python :: 3.12
+Classifier: Programming Language :: Python :: 3.13
Classifier: Programming Language :: Python :: Implementation :: CPython
Classifier: Programming Language :: Python :: Implementation :: PyPy
Classifier: Topic :: Internet :: WWW/HTTP
@@ -114,9 +115,9 @@ Standalone Executable
Prebuilt executable files with a Python interpreter and
required Python packages included are available for
-- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.27.6/gallery-dl.exe>`__
+- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.27.7/gallery-dl.exe>`__
(Requires `Microsoft Visual C++ Redistributable Package (x86) <https://aka.ms/vs/17/release/vc_redist.x86.exe>`__)
-- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.27.6/gallery-dl.bin>`__
+- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.27.7/gallery-dl.bin>`__
Nightly Builds
diff --git a/gallery_dl.egg-info/SOURCES.txt b/gallery_dl.egg-info/SOURCES.txt
index df9217a..42a5df1 100644
--- a/gallery_dl.egg-info/SOURCES.txt
+++ b/gallery_dl.egg-info/SOURCES.txt
@@ -188,6 +188,7 @@ gallery_dl/extractor/redgifs.py
gallery_dl/extractor/rule34us.py
gallery_dl/extractor/sankaku.py
gallery_dl/extractor/sankakucomplex.py
+gallery_dl/extractor/scrolller.py
gallery_dl/extractor/seiga.py
gallery_dl/extractor/senmanga.py
gallery_dl/extractor/sexcom.py
diff --git a/gallery_dl/__init__.py b/gallery_dl/__init__.py
index 7a9e0be..62e96ae 100644
--- a/gallery_dl/__init__.py
+++ b/gallery_dl/__init__.py
@@ -63,7 +63,7 @@ def main():
browser, _, profile = args.cookies_from_browser.partition(":")
browser, _, keyring = browser.partition("+")
browser, _, domain = browser.partition("/")
- if profile.startswith(":"):
+ if profile and profile[0] == ":":
container = profile[1:]
profile = None
else:
diff --git a/gallery_dl/config.py b/gallery_dl/config.py
index 0a187c1..855fb4f 100644
--- a/gallery_dl/config.py
+++ b/gallery_dl/config.py
@@ -315,7 +315,7 @@ class apply():
self.original.append((path, key, get(path, key, util.SENTINEL)))
set(path, key, value)
- def __exit__(self, etype, value, traceback):
+ def __exit__(self, exc_type, exc_value, traceback):
for path, key, value in self.original:
if value is util.SENTINEL:
unset(path, key)
diff --git a/gallery_dl/cookies.py b/gallery_dl/cookies.py
index 0ffd29a..cec2ea0 100644
--- a/gallery_dl/cookies.py
+++ b/gallery_dl/cookies.py
@@ -31,59 +31,63 @@ SUPPORTED_BROWSERS = SUPPORTED_BROWSERS_CHROMIUM | {"firefox", "safari"}
logger = logging.getLogger("cookies")
-def load_cookies(cookiejar, browser_specification):
+def load_cookies(browser_specification):
browser_name, profile, keyring, container, domain = \
_parse_browser_specification(*browser_specification)
if browser_name == "firefox":
- load_cookies_firefox(cookiejar, profile, container, domain)
+ return load_cookies_firefox(profile, container, domain)
elif browser_name == "safari":
- load_cookies_safari(cookiejar, profile, domain)
+ return load_cookies_safari(profile, domain)
elif browser_name in SUPPORTED_BROWSERS_CHROMIUM:
- load_cookies_chrome(cookiejar, browser_name, profile, keyring, domain)
+ return load_cookies_chromium(browser_name, profile, keyring, domain)
else:
raise ValueError("unknown browser '{}'".format(browser_name))
-def load_cookies_firefox(cookiejar, profile=None, container=None, domain=None):
+def load_cookies_firefox(profile=None, container=None, domain=None):
path, container_id = _firefox_cookies_database(profile, container)
- with DatabaseConnection(path) as db:
- sql = ("SELECT name, value, host, path, isSecure, expiry "
- "FROM moz_cookies")
- conditions = []
- parameters = []
-
- if container_id is False:
- conditions.append("NOT INSTR(originAttributes,'userContextId=')")
- elif container_id:
- conditions.append(
- "originAttributes LIKE ? OR originAttributes LIKE ?")
- uid = "%userContextId={}".format(container_id)
- parameters += (uid, uid + "&%")
-
- if domain:
- if domain[0] == ".":
- conditions.append("host == ? OR host LIKE ?")
- parameters += (domain[1:], "%" + domain)
- else:
- conditions.append("host == ? OR host == ?")
- parameters += (domain, "." + domain)
+ sql = ("SELECT name, value, host, path, isSecure, expiry "
+ "FROM moz_cookies")
+ conditions = []
+ parameters = []
+
+ if container_id is False:
+ conditions.append("NOT INSTR(originAttributes,'userContextId=')")
+ elif container_id:
+ uid = "%userContextId={}".format(container_id)
+ conditions.append("originAttributes LIKE ? OR originAttributes LIKE ?")
+ parameters += (uid, uid + "&%")
+
+ if domain:
+ if domain[0] == ".":
+ conditions.append("host == ? OR host LIKE ?")
+ parameters += (domain[1:], "%" + domain)
+ else:
+ conditions.append("host == ? OR host == ?")
+ parameters += (domain, "." + domain)
- if conditions:
- sql = "{} WHERE ( {} )".format(sql, " ) AND ( ".join(conditions))
+ if conditions:
+ sql = "{} WHERE ( {} )".format(sql, " ) AND ( ".join(conditions))
- set_cookie = cookiejar.set_cookie
- for name, value, domain, path, secure, expires in db.execute(
- sql, parameters):
- set_cookie(Cookie(
+ with DatabaseConnection(path) as db:
+ cookies = [
+ Cookie(
0, name, value, None, False,
- domain, bool(domain), domain.startswith("."),
- path, bool(path), secure, expires, False, None, None, {},
- ))
- _log_info("Extracted %s cookies from Firefox", len(cookiejar))
+ domain, True if domain else False,
+ domain[0] == "." if domain else False,
+ path, True if path else False, secure, expires,
+ False, None, None, {},
+ )
+ for name, value, domain, path, secure, expires in db.execute(
+ sql, parameters)
+ ]
+
+ _log_info("Extracted %s cookies from Firefox", len(cookies))
+ return cookies
-def load_cookies_safari(cookiejar, profile=None, domain=None):
+def load_cookies_safari(profile=None, domain=None):
"""Ref.: https://github.com/libyal/dtformats/blob
/main/documentation/Safari%20Cookies.asciidoc
- This data appears to be out of date
@@ -95,31 +99,33 @@ def load_cookies_safari(cookiejar, profile=None, domain=None):
data = fp.read()
page_sizes, body_start = _safari_parse_cookies_header(data)
p = DataParser(data[body_start:])
+
+ cookies = []
for page_size in page_sizes:
- _safari_parse_cookies_page(p.read_bytes(page_size), cookiejar)
+ _safari_parse_cookies_page(p.read_bytes(page_size), cookies)
+ _log_info("Extracted %s cookies from Safari", len(cookies))
+ return cookies
-def load_cookies_chrome(cookiejar, browser_name, profile=None,
- keyring=None, domain=None):
- config = _get_chromium_based_browser_settings(browser_name)
- path = _chrome_cookies_database(profile, config)
+def load_cookies_chromium(browser_name, profile=None,
+ keyring=None, domain=None):
+ config = _chromium_browser_settings(browser_name)
+ path = _chromium_cookies_database(profile, config)
_log_debug("Extracting cookies from %s", path)
+ if domain:
+ if domain[0] == ".":
+ condition = " WHERE host_key == ? OR host_key LIKE ?"
+ parameters = (domain[1:], "%" + domain)
+ else:
+ condition = " WHERE host_key == ? OR host_key == ?"
+ parameters = (domain, "." + domain)
+ else:
+ condition = ""
+ parameters = ()
+
with DatabaseConnection(path) as db:
db.text_factory = bytes
- decryptor = get_cookie_decryptor(
- config["directory"], config["keyring"], keyring)
-
- if domain:
- if domain[0] == ".":
- condition = " WHERE host_key == ? OR host_key LIKE ?"
- parameters = (domain[1:], "%" + domain)
- else:
- condition = " WHERE host_key == ? OR host_key == ?"
- parameters = (domain, "." + domain)
- else:
- condition = ""
- parameters = ()
try:
rows = db.execute(
@@ -130,10 +136,12 @@ def load_cookies_chrome(cookiejar, browser_name, profile=None,
"SELECT host_key, name, value, encrypted_value, path, "
"expires_utc, secure FROM cookies" + condition, parameters)
- set_cookie = cookiejar.set_cookie
failed_cookies = 0
unencrypted_cookies = 0
+ decryptor = _chromium_cookie_decryptor(
+ config["directory"], config["keyring"], keyring)
+ cookies = []
for domain, name, value, enc_value, path, expires, secure in rows:
if not value and enc_value: # encrypted
@@ -145,15 +153,22 @@ def load_cookies_chrome(cookiejar, browser_name, profile=None,
value = value.decode()
unencrypted_cookies += 1
+ if expires:
+ # https://stackoverflow.com/a/43520042
+ expires = int(expires) // 1000000 - 11644473600
+ else:
+ expires = None
+
domain = domain.decode()
path = path.decode()
name = name.decode()
- set_cookie(Cookie(
+ cookies.append(Cookie(
0, name, value, None, False,
- domain, bool(domain), domain.startswith("."),
- path, bool(path), secure, expires or None, False,
- None, None, {},
+ domain, True if domain else False,
+ domain[0] == "." if domain else False,
+ path, True if path else False, secure, expires,
+ False, None, None, {},
))
if failed_cookies > 0:
@@ -162,10 +177,11 @@ def load_cookies_chrome(cookiejar, browser_name, profile=None,
failed_message = ""
_log_info("Extracted %s cookies from %s%s",
- len(cookiejar), browser_name.capitalize(), failed_message)
+ len(cookies), browser_name.capitalize(), failed_message)
counts = decryptor.cookie_counts
counts["unencrypted"] = unencrypted_cookies
- _log_debug("Cookie version breakdown: %s", counts)
+ _log_debug("version breakdown: %s", counts)
+ return cookies
# --------------------------------------------------------------------
@@ -253,7 +269,7 @@ def _safari_parse_cookies_header(data):
return page_sizes, p.cursor
-def _safari_parse_cookies_page(data, cookiejar, domain=None):
+def _safari_parse_cookies_page(data, cookies, domain=None):
p = DataParser(data)
p.expect_bytes(b"\x00\x00\x01\x00", "page signature")
number_of_cookies = p.read_uint()
@@ -267,17 +283,17 @@ def _safari_parse_cookies_page(data, cookiejar, domain=None):
for i, record_offset in enumerate(record_offsets):
p.skip_to(record_offset, "space between records")
record_length = _safari_parse_cookies_record(
- data[record_offset:], cookiejar, domain)
+ data[record_offset:], cookies, domain)
p.read_bytes(record_length)
p.skip_to_end("space in between pages")
-def _safari_parse_cookies_record(data, cookiejar, host=None):
+def _safari_parse_cookies_record(data, cookies, host=None):
p = DataParser(data)
record_size = p.read_uint()
p.skip(4, "unknown record field 1")
flags = p.read_uint()
- is_secure = bool(flags & 0x0001)
+ is_secure = True if (flags & 0x0001) else False
p.skip(4, "unknown record field 2")
domain_offset = p.read_uint()
name_offset = p.read_uint()
@@ -313,20 +329,21 @@ def _safari_parse_cookies_record(data, cookiejar, host=None):
p.skip_to(record_size, "space at the end of the record")
- cookiejar.set_cookie(Cookie(
+ cookies.append(Cookie(
0, name, value, None, False,
- domain, bool(domain), domain.startswith("."),
- path, bool(path), is_secure, expiration_date, False,
- None, None, {},
+ domain, True if domain else False,
+ domain[0] == "." if domain else False,
+ path, True if path else False, is_secure, expiration_date,
+ False, None, None, {},
))
return record_size
# --------------------------------------------------------------------
-# chrome
+# chromium
-def _chrome_cookies_database(profile, config):
+def _chromium_cookies_database(profile, config):
if profile is None:
search_root = config["directory"]
elif _is_path(profile):
@@ -346,7 +363,7 @@ def _chrome_cookies_database(profile, config):
return path
-def _get_chromium_based_browser_settings(browser_name):
+def _chromium_browser_settings(browser_name):
# https://chromium.googlesource.com/chromium
# /src/+/HEAD/docs/user_data_dir.md
join = os.path.join
@@ -414,7 +431,17 @@ def _get_chromium_based_browser_settings(browser_name):
}
-class ChromeCookieDecryptor:
+def _chromium_cookie_decryptor(
+ browser_root, browser_keyring_name, keyring=None):
+ if sys.platform in ("win32", "cygwin"):
+ return WindowsChromiumCookieDecryptor(browser_root)
+ elif sys.platform == "darwin":
+ return MacChromiumCookieDecryptor(browser_keyring_name)
+ else:
+ return LinuxChromiumCookieDecryptor(browser_keyring_name, keyring)
+
+
+class ChromiumCookieDecryptor:
"""
Overview:
@@ -452,16 +479,7 @@ class ChromeCookieDecryptor:
raise NotImplementedError("Must be implemented by sub classes")
-def get_cookie_decryptor(browser_root, browser_keyring_name, keyring=None):
- if sys.platform in ("win32", "cygwin"):
- return WindowsChromeCookieDecryptor(browser_root)
- elif sys.platform == "darwin":
- return MacChromeCookieDecryptor(browser_keyring_name)
- else:
- return LinuxChromeCookieDecryptor(browser_keyring_name, keyring)
-
-
-class LinuxChromeCookieDecryptor(ChromeCookieDecryptor):
+class LinuxChromiumCookieDecryptor(ChromiumCookieDecryptor):
def __init__(self, browser_keyring_name, keyring=None):
self._v10_key = self.derive_key(b"peanuts")
password = _get_linux_keyring_password(browser_keyring_name, keyring)
@@ -500,7 +518,7 @@ class LinuxChromeCookieDecryptor(ChromeCookieDecryptor):
return None
-class MacChromeCookieDecryptor(ChromeCookieDecryptor):
+class MacChromiumCookieDecryptor(ChromiumCookieDecryptor):
def __init__(self, browser_keyring_name):
password = _get_mac_keyring_password(browser_keyring_name)
self._v10_key = None if password is None else self.derive_key(password)
@@ -539,7 +557,7 @@ class MacChromeCookieDecryptor(ChromeCookieDecryptor):
return encrypted_value
-class WindowsChromeCookieDecryptor(ChromeCookieDecryptor):
+class WindowsChromiumCookieDecryptor(ChromiumCookieDecryptor):
def __init__(self, browser_root):
self._v10_key = _get_windows_v10_key(browser_root)
self._cookie_counts = {"v10": 0, "other": 0}
@@ -864,7 +882,7 @@ class DatabaseConnection():
self.directory.cleanup()
raise
- def __exit__(self, exc, value, tb):
+ def __exit__(self, exc_type, exc_value, traceback):
self.database.close()
if self.directory:
self.directory.cleanup()
diff --git a/gallery_dl/downloader/ytdl.py b/gallery_dl/downloader/ytdl.py
index 950a72f..26f328d 100644
--- a/gallery_dl/downloader/ytdl.py
+++ b/gallery_dl/downloader/ytdl.py
@@ -62,10 +62,18 @@ class YoutubeDLDownloader(DownloaderBase):
info_dict = kwdict.pop("_ytdl_info_dict", None)
if not info_dict:
+ url = url[5:]
try:
- info_dict = ytdl_instance.extract_info(url[5:], download=False)
+ manifest = kwdict.pop("_ytdl_manifest", None)
+ if manifest:
+ info_dict = self._extract_manifest(
+ ytdl_instance, url, manifest)
+ else:
+ info_dict = self._extract_info(ytdl_instance, url)
except Exception as exc:
self.log.debug("", exc_info=exc)
+ self.log.warning("%s: %s", exc.__class__.__name__, exc)
+
if not info_dict:
return False
@@ -134,6 +142,42 @@ class YoutubeDLDownloader(DownloaderBase):
ytdl_instance.process_info(entry)
return True
+ def _extract_info(self, ytdl, url):
+ return ytdl.extract_info(url, download=False)
+
+ def _extract_manifest(self, ytdl, url, manifest):
+ extr = ytdl.get_info_extractor("Generic")
+ video_id = extr._generic_id(url)
+
+ if manifest == "hls":
+ try:
+ formats, subtitles = extr._extract_m3u8_formats_and_subtitles(
+ url, video_id, "mp4")
+ except AttributeError:
+ formats = extr._extract_m3u8_formats(url, video_id, "mp4")
+ subtitles = None
+
+ elif manifest == "dash":
+ try:
+ formats, subtitles = extr._extract_mpd_formats_and_subtitles(
+ url, video_id)
+ except AttributeError:
+ formats = extr._extract_mpd_formats(url, video_id)
+ subtitles = None
+
+ else:
+ self.log.error("Unsupported manifest type '%s'", manifest)
+ return None
+
+ info_dict = {
+ "id" : video_id,
+ "title" : video_id,
+ "formats" : formats,
+ "subtitles": subtitles,
+ }
+ # extr._extra_manifest_info(info_dict, url)
+ return ytdl.process_ie_result(info_dict, download=False)
+
def _progress_hook(self, info):
if info["status"] == "downloading" and \
info["elapsed"] >= self.progress:
diff --git a/gallery_dl/exception.py b/gallery_dl/exception.py
index 08dcfdc..6b2ce3a 100644
--- a/gallery_dl/exception.py
+++ b/gallery_dl/exception.py
@@ -54,10 +54,16 @@ class HttpError(ExtractionError):
default = "HTTP request failed"
code = 4
- def __init__(self, message, response=None):
- ExtractionError.__init__(self, message)
+ def __init__(self, message="", response=None):
self.response = response
- self.status = 0 if response is None else response.status_code
+ if response is None:
+ self.status = 0
+ else:
+ self.status = response.status_code
+ if not message:
+ message = "'{} {}' for '{}'".format(
+ response.status_code, response.reason, response.url)
+ ExtractionError.__init__(self, message)
class NotFoundError(ExtractionError):
diff --git a/gallery_dl/extractor/8chan.py b/gallery_dl/extractor/8chan.py
index f81d2a1..ce1c52a 100644
--- a/gallery_dl/extractor/8chan.py
+++ b/gallery_dl/extractor/8chan.py
@@ -27,12 +27,22 @@ class _8chanExtractor(Extractor):
Extractor.__init__(self, match)
def _init(self):
- now = util.datetime_utcnow()
- domain = self.root.rpartition("/")[2]
- self.cookies.set(
- now.strftime("TOS%Y%m%d"), "1", domain=domain)
- self.cookies.set(
- (now - timedelta(1)).strftime("TOS%Y%m%d"), "1", domain=domain)
+ tos = self.cookies_tos_name()
+ self.cookies.set(tos, "1", domain=self.root[8:])
+
+ @memcache()
+ def cookies_tos_name(self):
+ url = self.root + "/.static/pages/confirmed.html"
+ headers = {"Referer": self.root + "/.static/pages/disclaimer.html"}
+ response = self.request(url, headers=headers, allow_redirects=False)
+
+ for cookie in response.cookies:
+ if cookie.name.lower().startswith("tos"):
+ self.log.debug("TOS cookie name: %s", cookie.name)
+ return cookie.name
+
+ self.log.error("Unable to determin TOS cookie name")
+ return "TOS20241009"
@memcache()
def cookies_prepare(self):
@@ -64,16 +74,14 @@ class _8chanThreadExtractor(_8chanExtractor):
"{threadId} {subject[:50]}")
filename_fmt = "{postId}{num:?-//} {filename[:200]}.{extension}"
archive_fmt = "{boardUri}_{postId}_{num}"
- pattern = BASE_PATTERN + r"/([^/?#]+)/res/(\d+)"
+ pattern = BASE_PATTERN + r"/([^/?#]+)/(?:res|last)/(\d+)"
example = "https://8chan.moe/a/res/12345.html"
- def __init__(self, match):
- _8chanExtractor.__init__(self, match)
- _, self.board, self.thread = match.groups()
-
def items(self):
+ _, board, thread = self.groups
+
# fetch thread data
- url = "{}/{}/res/{}.".format(self.root, self.board, self.thread)
+ url = "{}/{}/res/{}.".format(self.root, board, thread)
self.session.headers["Referer"] = url + "html"
thread = self.request(url + "json").json()
thread["postId"] = thread["threadId"]
@@ -106,25 +114,22 @@ class _8chanBoardExtractor(_8chanExtractor):
pattern = BASE_PATTERN + r"/([^/?#]+)/(?:(\d+)\.html)?$"
example = "https://8chan.moe/a/"
- def __init__(self, match):
- _8chanExtractor.__init__(self, match)
- _, self.board, self.page = match.groups()
-
def items(self):
- page = text.parse_int(self.page, 1)
- url = "{}/{}/{}.json".format(self.root, self.board, page)
- board = self.request(url).json()
- threads = board["threads"]
+ _, board, pnum = self.groups
+ pnum = text.parse_int(pnum, 1)
+ url = "{}/{}/{}.json".format(self.root, board, pnum)
+ data = self.request(url).json()
+ threads = data["threads"]
while True:
for thread in threads:
thread["_extractor"] = _8chanThreadExtractor
url = "{}/{}/res/{}.html".format(
- self.root, self.board, thread["threadId"])
+ self.root, board, thread["threadId"])
yield Message.Queue, url, thread
- page += 1
- if page > board["pageCount"]:
+ pnum += 1
+ if pnum > data["pageCount"]:
return
- url = "{}/{}/{}.json".format(self.root, self.board, page)
+ url = "{}/{}/{}.json".format(self.root, board, pnum)
threads = self.request(url).json()["threads"]
diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py
index 9885195..4e9fa50 100644
--- a/gallery_dl/extractor/__init__.py
+++ b/gallery_dl/extractor/__init__.py
@@ -141,6 +141,7 @@ modules = [
"rule34us",
"sankaku",
"sankakucomplex",
+ "scrolller",
"seiga",
"senmanga",
"sexcom",
diff --git a/gallery_dl/extractor/behance.py b/gallery_dl/extractor/behance.py
index 72f9195..14598b7 100644
--- a/gallery_dl/extractor/behance.py
+++ b/gallery_dl/extractor/behance.py
@@ -171,6 +171,7 @@ class BehanceGalleryExtractor(BehanceExtractor):
url = text.extr(page, '<source src="', '"')
if text.ext_from_url(url) == "m3u8":
url = "ytdl:" + url
+ module["_ytdl_manifest"] = "hls"
module["extension"] = "mp4"
append((url, module))
continue
diff --git a/gallery_dl/extractor/bluesky.py b/gallery_dl/extractor/bluesky.py
index 39c5635..a1a488e 100644
--- a/gallery_dl/extractor/bluesky.py
+++ b/gallery_dl/extractor/bluesky.py
@@ -42,62 +42,76 @@ class BlueskyExtractor(Extractor):
self._user = self._user_did = None
self.instance = self.root.partition("://")[2]
self.videos = self.config("videos", True)
+ self.quoted = self.config("quoted", False)
def items(self):
for post in self.posts():
if "post" in post:
post = post["post"]
-
- pid = post["uri"].rpartition("/")[2]
if self._user_did and post["author"]["did"] != self._user_did:
- self.log.debug("Skipping %s (repost)", pid)
- continue
-
- post.update(post["record"])
- del post["record"]
-
- if self._metadata_facets:
- if "facets" in post:
- post["hashtags"] = tags = []
- post["mentions"] = dids = []
- post["uris"] = uris = []
- for facet in post["facets"]:
- features = facet["features"][0]
- if "tag" in features:
- tags.append(features["tag"])
- elif "did" in features:
- dids.append(features["did"])
- elif "uri" in features:
- uris.append(features["uri"])
- else:
- post["hashtags"] = post["mentions"] = post["uris"] = ()
-
- if self._metadata_user:
- post["user"] = self._user or post["author"]
-
- files = self._extract_files(post)
- post["instance"] = self.instance
- post["post_id"] = pid
- post["count"] = len(files)
- post["date"] = text.parse_datetime(
- post["createdAt"][:19], "%Y-%m-%dT%H:%M:%S")
-
- yield Message.Directory, post
-
- if not files:
+ self.log.debug("Skipping %s (repost)", self._pid(post))
continue
-
- base = ("https://bsky.social/xrpc/com.atproto.sync.getBlob"
- "?did={}&cid=".format(post["author"]["did"]))
- for post["num"], file in enumerate(files, 1):
- post.update(file)
- yield Message.Url, base + file["filename"], post
+ embed = post.get("embed")
+ post.update(post.pop("record"))
+
+ while True:
+ self._prepare(post)
+ files = self._extract_files(post)
+
+ yield Message.Directory, post
+ if files:
+ base = ("https://bsky.social/xrpc/com.atproto.sync.getBlob"
+ "?did={}&cid=".format(post["author"]["did"]))
+ for post["num"], file in enumerate(files, 1):
+ post.update(file)
+ yield Message.Url, base + file["filename"], post
+
+ if not self.quoted or not embed or "record" not in embed:
+ break
+
+ quote = embed["record"]
+ if "record" in quote:
+ quote = quote["record"]
+ quote["quote_id"] = self._pid(post)
+ quote["quote_by"] = post["author"]
+ embed = quote.get("embed")
+ quote.update(quote.pop("value"))
+ post = quote
def posts(self):
return ()
+ def _pid(self, post):
+ return post["uri"].rpartition("/")[2]
+
+ def _prepare(self, post):
+ if self._metadata_facets:
+ if "facets" in post:
+ post["hashtags"] = tags = []
+ post["mentions"] = dids = []
+ post["uris"] = uris = []
+ for facet in post["facets"]:
+ features = facet["features"][0]
+ if "tag" in features:
+ tags.append(features["tag"])
+ elif "did" in features:
+ dids.append(features["did"])
+ elif "uri" in features:
+ uris.append(features["uri"])
+ else:
+ post["hashtags"] = post["mentions"] = post["uris"] = ()
+
+ if self._metadata_user:
+ post["user"] = self._user or post["author"]
+
+ post["instance"] = self.instance
+ post["post_id"] = self._pid(post)
+ post["date"] = text.parse_datetime(
+ post["createdAt"][:19], "%Y-%m-%dT%H:%M:%S")
+
def _extract_files(self, post):
if "embed" not in post:
+ post["count"] = 0
return ()
files = []
@@ -111,6 +125,7 @@ class BlueskyExtractor(Extractor):
if "video" in media and self.videos:
files.append(self._extract_media(media, "video"))
+ post["count"] = len(files)
return files
def _extract_media(self, media, key):
diff --git a/gallery_dl/extractor/bunkr.py b/gallery_dl/extractor/bunkr.py
index 9022ffc..6c79d0a 100644
--- a/gallery_dl/extractor/bunkr.py
+++ b/gallery_dl/extractor/bunkr.py
@@ -8,9 +8,10 @@
"""Extractors for https://bunkr.si/"""
+from .common import Extractor
from .lolisafe import LolisafeAlbumExtractor
-from .. import text, config
-
+from .. import text, config, exception
+import random
if config.get(("extractor", "bunkr"), "tlds"):
BASE_PATTERN = (
@@ -21,11 +22,28 @@ else:
BASE_PATTERN = (
r"(?:bunkr:(?:https?://)?([^/?#]+)|"
r"(?:https?://)?(?:app\.)?(bunkr+"
- r"\.(?:s[kiu]|[cf]i|pk|ru|la|is|to|a[cx]"
+ r"\.(?:s[kiu]|[cf]i|p[hks]|ru|la|is|to|a[cx]"
r"|black|cat|media|red|site|ws|org)))"
)
+DOMAINS = [
+ "bunkr.ac",
+ "bunkr.ci",
+ "bunkr.fi",
+ "bunkr.ph",
+ "bunkr.pk",
+ "bunkr.ps",
+ "bunkr.si",
+ "bunkr.sk",
+ "bunkr.ws",
+ "bunkr.black",
+ "bunkr.red",
+ "bunkr.media",
+ "bunkr.site",
+]
LEGACY_DOMAINS = {
+ "bunkr.ax",
+ "bunkr.cat",
"bunkr.ru",
"bunkrr.ru",
"bunkr.su",
@@ -34,6 +52,7 @@ LEGACY_DOMAINS = {
"bunkr.is",
"bunkr.to",
}
+CF_DOMAINS = set()
class BunkrAlbumExtractor(LolisafeAlbumExtractor):
@@ -49,45 +68,96 @@ class BunkrAlbumExtractor(LolisafeAlbumExtractor):
if domain not in LEGACY_DOMAINS:
self.root = "https://" + domain
+ def request(self, url, **kwargs):
+ kwargs["allow_redirects"] = False
+
+ while True:
+ try:
+ response = Extractor.request(self, url, **kwargs)
+ if response.status_code < 300:
+ return response
+
+ # redirect
+ url = response.headers["Location"]
+ root, path = self._split(url)
+ if root not in CF_DOMAINS:
+ continue
+ self.log.debug("Redirect to known CF challenge domain '%s'",
+ root)
+
+ except exception.HttpError as exc:
+ if exc.status != 403:
+ raise
+
+ # CF challenge
+ root, path = self._split(url)
+ CF_DOMAINS.add(root)
+ self.log.debug("Added '%s' to CF challenge domains", root)
+
+ try:
+ DOMAINS.remove(root.rpartition("/")[2])
+ except ValueError:
+ pass
+ else:
+ if not DOMAINS:
+ raise exception.StopExtraction(
+ "All Bunkr domains require solving a CF challenge")
+
+ # select alternative domain
+ root = "https://" + random.choice(DOMAINS)
+ self.log.debug("Trying '%s' as fallback", root)
+ url = root + path
+
def fetch_album(self, album_id):
# album metadata
page = self.request(self.root + "/a/" + self.album_id).text
- info = text.split_html(text.extr(
- page, "<h1", "</div>").partition(">")[2])
- count, _, size = info[1].split(None, 2)
+ title, size = text.split_html(text.extr(
+ page, "<h1", "</span>").partition(">")[2])
- pos = page.index('class="grid-images')
- urls = list(text.extract_iter(page, '<a href="', '"', pos))
-
- return self._extract_files(urls), {
+ items = list(text.extract_iter(page, "<!-- item -->", "<!-- -->"))
+ return self._extract_files(items), {
"album_id" : self.album_id,
- "album_name" : text.unescape(info[0]),
- "album_size" : size[1:-1],
- "count" : len(urls),
- "_http_validate": self._validate,
+ "album_name" : title,
+ "album_size" : text.extr(size, "(", ")"),
+ "count" : len(items),
}
- def _extract_files(self, urls):
- for url in urls:
+ def _extract_files(self, items):
+ for item in items:
try:
- url = self._extract_file(text.unescape(url))
+ url = text.extr(item, ' href="', '"')
+ file = self._extract_file(text.unescape(url))
+
+ info = text.split_html(item)
+ file["name"] = info[0]
+ file["size"] = info[2]
+ file["date"] = text.parse_datetime(
+ info[-1], "%H:%M:%S %d/%m/%Y")
+
+ yield file
+ except exception.StopExtraction:
+ raise
except Exception as exc:
self.log.error("%s: %s", exc.__class__.__name__, exc)
- continue
- yield {"file": text.unescape(url)}
-
- def _extract_file(self, url):
- page = self.request(url).text
- url = (text.extr(page, '<source src="', '"') or
- text.extr(page, '<img src="', '"'))
-
- if not url:
- url_download = text.rextract(
- page, ' href="', '"', page.rindex("Download"))[0]
- page = self.request(text.unescape(url_download)).text
- url = text.unescape(text.rextract(page, ' href="', '"')[0])
-
- return url
+ self.log.debug("", exc_info=exc)
+
+ def _extract_file(self, webpage_url):
+ response = self.request(webpage_url)
+ page = response.text
+ file_url = (text.extr(page, '<source src="', '"') or
+ text.extr(page, '<img src="', '"'))
+
+ if not file_url:
+ webpage_url = text.unescape(text.rextract(
+ page, ' href="', '"', page.rindex("Download"))[0])
+ response = self.request(webpage_url)
+ file_url = text.rextract(response.text, ' href="', '"')[0]
+
+ return {
+ "file" : text.unescape(file_url),
+ "_http_headers" : {"Referer": response.url},
+ "_http_validate": self._validate,
+ }
def _validate(self, response):
if response.history and response.url.endswith("/maintenance-vid.mp4"):
@@ -95,6 +165,10 @@ class BunkrAlbumExtractor(LolisafeAlbumExtractor):
return False
return True
+ def _split(self, url):
+ pos = url.index("/", 8)
+ return url[:pos], url[pos:]
+
class BunkrMediaExtractor(BunkrAlbumExtractor):
"""Extractor for bunkr.si media links"""
@@ -105,16 +179,15 @@ class BunkrMediaExtractor(BunkrAlbumExtractor):
def fetch_album(self, album_id):
try:
- url = self._extract_file(self.root + self.album_id)
+ file = self._extract_file(self.root + album_id)
except Exception as exc:
self.log.error("%s: %s", exc.__class__.__name__, exc)
return (), {}
- return ({"file": text.unescape(url)},), {
+ return (file,), {
"album_id" : "",
"album_name" : "",
"album_size" : -1,
"description": "",
"count" : 1,
- "_http_validate": self._validate,
}
diff --git a/gallery_dl/extractor/civitai.py b/gallery_dl/extractor/civitai.py
index 725af3a..0b1e44a 100644
--- a/gallery_dl/extractor/civitai.py
+++ b/gallery_dl/extractor/civitai.py
@@ -9,7 +9,7 @@
"""Extractors for https://www.civitai.com/"""
from .common import Extractor, Message
-from .. import text, util
+from .. import text, util, exception
import itertools
import time
@@ -23,7 +23,7 @@ class CivitaiExtractor(Extractor):
root = "https://civitai.com"
directory_fmt = ("{category}", "{username|user[username]}", "images")
filename_fmt = "{file[id]|id|filename}.{extension}"
- archive_fmt = "{file[hash]|hash}"
+ archive_fmt = "{file[uuid]|uuid}"
request_interval = (0.5, 1.5)
def _init(self):
@@ -101,9 +101,11 @@ class CivitaiExtractor(Extractor):
def _url(self, image):
url = image["url"]
if "/" in url:
- parts = url.rsplit("/", 2)
- parts[1] = self._image_quality
+ parts = url.rsplit("/", 3)
+ image["uuid"] = parts[1]
+ parts[2] = self._image_quality
return "/".join(parts)
+ image["uuid"] = url
name = image.get("name")
if not name:
@@ -133,8 +135,6 @@ class CivitaiModelExtractor(CivitaiExtractor):
directory_fmt = ("{category}", "{user[username]}",
"{model[id]}{model[name]:? //}",
"{version[id]}{version[name]:? //}")
- filename_fmt = "{file[id]}.{extension}"
- archive_fmt = "{file[hash]}"
pattern = BASE_PATTERN + r"/models/(\d+)(?:/?\?modelVersionId=(\d+))?"
example = "https://civitai.com/models/12345/TITLE"
@@ -195,19 +195,25 @@ class CivitaiModelExtractor(CivitaiExtractor):
)
def _extract_files_model(self, model, version, user):
- return [
- {
+ files = []
+
+ for num, file in enumerate(version["files"], 1):
+ file["uuid"] = "model-{}-{}-{}".format(
+ model["id"], version["id"], file["id"])
+ files.append({
"num" : num,
"file" : file,
"filename" : file["name"],
"extension": "bin",
- "url" : file["downloadUrl"],
+ "url" : file.get("downloadUrl") or
+ "{}/api/download/models/{}".format(
+ self.root, version["id"]),
"_http_headers" : {
"Authorization": self.api.headers.get("Authorization")},
"_http_validate": self._validate_file_model,
- }
- for num, file in enumerate(version["files"], 1)
- ]
+ })
+
+ return files
def _extract_files_image(self, model, version, user):
if "images" in version:
@@ -263,24 +269,14 @@ class CivitaiPostExtractor(CivitaiExtractor):
return ({"id": int(self.groups[0])},)
-class CivitaiTagModelsExtractor(CivitaiExtractor):
- subcategory = "tag-models"
- pattern = BASE_PATTERN + r"/(?:tag/|models\?tag=)([^/?&#]+)"
+class CivitaiTagExtractor(CivitaiExtractor):
+ subcategory = "tag"
+ pattern = BASE_PATTERN + r"/tag/([^/?&#]+)"
example = "https://civitai.com/tag/TAG"
def models(self):
tag = text.unquote(self.groups[0])
- return self.api.models({"tag": tag})
-
-
-class CivitaiTagImagesExtractor(CivitaiExtractor):
- subcategory = "tag-images"
- pattern = BASE_PATTERN + r"/images\?tags=([^&#]+)"
- example = "https://civitai.com/images?tags=12345"
-
- def images(self):
- tag = text.unquote(self.groups[0])
- return self.api.images({"tag": tag})
+ return self.api.models_tag(tag)
class CivitaiSearchExtractor(CivitaiExtractor):
@@ -293,6 +289,26 @@ class CivitaiSearchExtractor(CivitaiExtractor):
return self.api.models(params)
+class CivitaiModelsExtractor(CivitaiExtractor):
+ subcategory = "models"
+ pattern = BASE_PATTERN + r"/models(?:/?\?([^#]+))?(?:$|#)"
+ example = "https://civitai.com/models"
+
+ def models(self):
+ params = text.parse_query(self.groups[0])
+ return self.api.models(params)
+
+
+class CivitaiImagesExtractor(CivitaiExtractor):
+ subcategory = "images"
+ pattern = BASE_PATTERN + r"/images(?:/?\?([^#]+))?(?:$|#)"
+ example = "https://civitai.com/images"
+
+ def images(self):
+ params = text.parse_query(self.groups[0])
+ return self.api.images(params)
+
+
class CivitaiUserExtractor(CivitaiExtractor):
subcategory = "user"
pattern = USER_PATTERN + r"/?(?:$|\?|#)"
@@ -339,11 +355,35 @@ class CivitaiUserImagesExtractor(CivitaiExtractor):
pattern = USER_PATTERN + r"/images/?(?:\?([^#]+))?"
example = "https://civitai.com/user/USER/images"
+ def __init__(self, match):
+ self.params = text.parse_query_list(match.group(2))
+ if self.params.get("section") == "reactions":
+ self.subcategory = "reactions"
+ self.images = self.images_reactions
+ CivitaiExtractor.__init__(self, match)
+
def images(self):
- params = text.parse_query(self.groups[1])
+ params = self.params
params["username"] = text.unquote(self.groups[0])
return self.api.images(params)
+ def images_reactions(self):
+ if "Authorization" not in self.api.headers and \
+ not self.cookies.get(
+ "__Secure-civitai-token", domain=".civitai.com"):
+ raise exception.AuthorizationError("api-key or cookies required")
+
+ params = self.params
+ params["authed"] = True
+ params["useIndex"] = False
+ if "reactions" in params:
+ if isinstance(params["reactions"], str):
+ params["reactions"] = (params["reactions"],)
+ else:
+ params["reactions"] = (
+ "Like", "Dislike", "Heart", "Laugh", "Cry")
+ return self.api.images(params)
+
class CivitaiRestAPI():
"""Interface for the Civitai Public REST API
@@ -396,6 +436,9 @@ class CivitaiRestAPI():
def models(self, params):
return self._pagination("/v1/models", params)
+ def models_tag(self, tag):
+ return self.models({"tag": tag})
+
def _call(self, endpoint, params=None):
if endpoint[0] == "/":
url = self.root + endpoint
@@ -419,14 +462,14 @@ class CivitaiRestAPI():
class CivitaiTrpcAPI():
- """Interface for the Civitai TRPC API"""
+ """Interface for the Civitai tRPC API"""
def __init__(self, extractor):
self.extractor = extractor
self.root = extractor.root + "/api/trpc/"
self.headers = {
"content-type" : "application/json",
- "x-client-version": "5.0.146",
+ "x-client-version": "5.0.185",
"x-client-date" : "",
"x-client" : "web",
"x-fingerprint" : "undefined",
@@ -463,6 +506,7 @@ class CivitaiTrpcAPI():
"include" : ["cosmetics"],
})
+ params = self._type_params(params)
return self._pagination(endpoint, params)
def images_gallery(self, model, version, user):
@@ -516,6 +560,9 @@ class CivitaiTrpcAPI():
return self._pagination(endpoint, params)
+ def models_tag(self, tag):
+ return self.models({"tagname": tag})
+
def post(self, post_id):
endpoint = "post.get"
params = {"id": int(post_id)}
@@ -580,3 +627,13 @@ class CivitaiTrpcAPI():
def _merge_params(self, params_user, params_default):
params_default.update(params_user)
return params_default
+
+ def _type_params(self, params):
+ for key, type in (
+ ("tags" , int),
+ ("modelId" , int),
+ ("modelVersionId", int),
+ ):
+ if key in params:
+ params[key] = type(params[key])
+ return params
diff --git a/gallery_dl/extractor/cohost.py b/gallery_dl/extractor/cohost.py
index 4722a4f..0524239 100644
--- a/gallery_dl/extractor/cohost.py
+++ b/gallery_dl/extractor/cohost.py
@@ -109,7 +109,7 @@ class CohostUserExtractor(CohostExtractor):
"projectHandle": self.groups[0],
"page": 0,
"options": {
- "pinnedPostsAtTop" : bool(self.pinned),
+ "pinnedPostsAtTop" : True if self.pinned else False,
"hideReplies" : not self.replies,
"hideShares" : not self.shares,
"hideAsks" : not self.asks,
diff --git a/gallery_dl/extractor/common.py b/gallery_dl/extractor/common.py
index 32c8e67..2146fa6 100644
--- a/gallery_dl/extractor/common.py
+++ b/gallery_dl/extractor/common.py
@@ -185,7 +185,9 @@ class Extractor():
self._dump_response(response)
if (
code < 400 or
- code < 500 and (not fatal and code != 429 or fatal is None)
+ code < 500 and (
+ not fatal and code != 429 or fatal is None) or
+ fatal is ...
):
if encoding:
response.encoding = encoding
@@ -454,46 +456,49 @@ class Extractor():
cookies = random.choice(cookies)
self.cookies_load(cookies)
- def cookies_load(self, cookies):
- if isinstance(cookies, dict):
- self.cookies_update_dict(cookies, self.cookies_domain)
+ def cookies_load(self, cookies_source):
+ if isinstance(cookies_source, dict):
+ self.cookies_update_dict(cookies_source, self.cookies_domain)
- elif isinstance(cookies, str):
- path = util.expand_path(cookies)
+ elif isinstance(cookies_source, str):
+ path = util.expand_path(cookies_source)
try:
with open(path) as fp:
- util.cookiestxt_load(fp, self.cookies)
+ cookies = util.cookiestxt_load(fp)
except Exception as exc:
self.log.warning("cookies: %s", exc)
else:
- self.log.debug("Loading cookies from '%s'", cookies)
+ self.log.debug("Loading cookies from '%s'", cookies_source)
+ set_cookie = self.cookies.set_cookie
+ for cookie in cookies:
+ set_cookie(cookie)
self.cookies_file = path
- elif isinstance(cookies, (list, tuple)):
- key = tuple(cookies)
- cookiejar = _browser_cookies.get(key)
+ elif isinstance(cookies_source, (list, tuple)):
+ key = tuple(cookies_source)
+ cookies = _browser_cookies.get(key)
- if cookiejar is None:
+ if cookies is None:
from ..cookies import load_cookies
- cookiejar = self.cookies.__class__()
try:
- load_cookies(cookiejar, cookies)
+ cookies = load_cookies(cookies_source)
except Exception as exc:
self.log.warning("cookies: %s", exc)
+ cookies = ()
else:
- _browser_cookies[key] = cookiejar
+ _browser_cookies[key] = cookies
else:
self.log.debug("Using cached cookies from %s", key)
set_cookie = self.cookies.set_cookie
- for cookie in cookiejar:
+ for cookie in cookies:
set_cookie(cookie)
else:
self.log.warning(
"Expected 'dict', 'list', or 'str' value for 'cookies' "
"option, got '%s' (%s)",
- cookies.__class__.__name__, cookies)
+ cookies_source.__class__.__name__, cookies_source)
def cookies_store(self):
"""Store the session's cookies in a cookies.txt file"""
diff --git a/gallery_dl/extractor/deviantart.py b/gallery_dl/extractor/deviantart.py
index 836fae7..693def9 100644
--- a/gallery_dl/extractor/deviantart.py
+++ b/gallery_dl/extractor/deviantart.py
@@ -401,7 +401,7 @@ class DeviantartExtractor(Extractor):
html = content["html"]
markup = html["markup"]
- if not markup.startswith("{"):
+ if not markup or markup[0] != "{":
return markup
if html["type"] == "tiptap":
@@ -1301,7 +1301,7 @@ class DeviantartOAuthAPI():
metadata = extractor.config("metadata", False)
if not metadata:
- metadata = bool(extractor.extra)
+ metadata = True if extractor.extra else False
if metadata:
self.metadata = True
diff --git a/gallery_dl/extractor/exhentai.py b/gallery_dl/extractor/exhentai.py
index 01af7a4..3e6d537 100644
--- a/gallery_dl/extractor/exhentai.py
+++ b/gallery_dl/extractor/exhentai.py
@@ -260,9 +260,9 @@ class ExhentaiGalleryExtractor(ExhentaiExtractor):
"torrentcount" : extr('>Torrent Download (', ')'),
}
- if data["uploader"].startswith("<"):
- data["uploader"] = text.unescape(text.extr(
- data["uploader"], ">", "<"))
+ uploader = data["uploader"]
+ if uploader and uploader[0] == "<":
+ data["uploader"] = text.unescape(text.extr(uploader, ">", "<"))
f = data["favorites"][0]
if f == "N":
diff --git a/gallery_dl/extractor/foolfuuka.py b/gallery_dl/extractor/foolfuuka.py
index 85dd896..44c4542 100644
--- a/gallery_dl/extractor/foolfuuka.py
+++ b/gallery_dl/extractor/foolfuuka.py
@@ -37,7 +37,7 @@ class FoolfuukaExtractor(BaseExtractor):
if not url and "remote_media_link" in media:
url = self.remote(media)
- if url.startswith("/"):
+ if url and url[0] == "/":
url = self.root + url
post["filename"], _, post["extension"] = \
diff --git a/gallery_dl/extractor/lensdump.py b/gallery_dl/extractor/lensdump.py
index 12e8860..72a6453 100644
--- a/gallery_dl/extractor/lensdump.py
+++ b/gallery_dl/extractor/lensdump.py
@@ -17,42 +17,30 @@ class LensdumpBase():
category = "lensdump"
root = "https://lensdump.com"
- def nodes(self, page=None):
- if page is None:
- page = self.request(self.url).text
-
- # go through all pages starting from the oldest
- page_url = text.urljoin(self.root, text.extr(
- text.extr(page, ' id="list-most-oldest-link"', '>'),
- 'href="', '"'))
- while page_url is not None:
- if page_url == self.url:
- current_page = page
- else:
- current_page = self.request(page_url).text
-
- for node in text.extract_iter(
- current_page, ' class="list-item ', '>'):
- yield node
-
- # find url of next page
- page_url = text.extr(
- text.extr(current_page, ' data-pagination="next"', '>'),
- 'href="', '"')
- if page_url is not None and len(page_url) > 0:
- page_url = text.urljoin(self.root, page_url)
- else:
- page_url = None
+ def _pagination(self, page, begin, end):
+ while True:
+ yield from text.extract_iter(page, begin, end)
+
+ next = text.extr(page, ' data-pagination="next"', '>')
+ if not next:
+ return
+
+ url = text.urljoin(self.root, text.extr(next, 'href="', '"'))
+ page = self.request(url).text
class LensdumpAlbumExtractor(LensdumpBase, GalleryExtractor):
subcategory = "album"
- pattern = BASE_PATTERN + r"/(?:((?!\w+/albums|a/|i/)\w+)|a/(\w+))"
+ pattern = BASE_PATTERN + r"/a/(\w+)(?:/?\?([^#]+))?"
example = "https://lensdump.com/a/ID"
def __init__(self, match):
- GalleryExtractor.__init__(self, match, match.string)
- self.gallery_id = match.group(1) or match.group(2)
+ self.gallery_id, query = match.groups()
+ if query:
+ url = "{}/a/{}/?{}".format(self.root, self.gallery_id, query)
+ else:
+ url = "{}/a/{}".format(self.root, self.gallery_id)
+ GalleryExtractor.__init__(self, match, url)
def metadata(self, page):
return {
@@ -62,40 +50,48 @@ class LensdumpAlbumExtractor(LensdumpBase, GalleryExtractor):
}
def images(self, page):
- for node in self.nodes(page):
- # get urls and filenames of images in current page
- json_data = util.json_loads(text.unquote(
- text.extr(node, "data-object='", "'") or
- text.extr(node, 'data-object="', '"')))
- image_id = json_data.get('name')
- image_url = json_data.get('url')
- image_title = json_data.get('title')
+ for image in self._pagination(page, ' class="list-item ', '>'):
+
+ data = util.json_loads(text.unquote(
+ text.extr(image, "data-object='", "'") or
+ text.extr(image, 'data-object="', '"')))
+ image_id = data.get("name")
+ image_url = data.get("url")
+ image_title = data.get("title")
if image_title is not None:
image_title = text.unescape(image_title)
+
yield (image_url, {
- 'id': image_id,
- 'url': image_url,
- 'title': image_title,
- 'name': json_data.get('filename'),
- 'filename': image_id,
- 'extension': json_data.get('extension'),
- 'height': text.parse_int(json_data.get('height')),
- 'width': text.parse_int(json_data.get('width')),
+ "id" : image_id,
+ "url" : image_url,
+ "title" : image_title,
+ "name" : data.get("filename"),
+ "filename" : image_id,
+ "extension": data.get("extension"),
+ "width" : text.parse_int(data.get("width")),
+ "height" : text.parse_int(data.get("height")),
})
class LensdumpAlbumsExtractor(LensdumpBase, Extractor):
"""Extractor for album list from lensdump.com"""
subcategory = "albums"
- pattern = BASE_PATTERN + r"/\w+/albums"
- example = "https://lensdump.com/USER/albums"
+ pattern = BASE_PATTERN + r"/(?![ai]/)([^/?#]+)(?:/?\?([^#]+))?"
+ example = "https://lensdump.com/USER"
def items(self):
- for node in self.nodes():
- album_url = text.urljoin(self.root, text.extr(
- node, 'data-url-short="', '"'))
- yield Message.Queue, album_url, {
- "_extractor": LensdumpAlbumExtractor}
+ user, query = self.groups
+ url = "{}/{}/".format(self.root, user)
+ if query:
+ params = text.parse_query(query)
+ else:
+ params = {"sort": "date_asc", "page": "1"}
+ page = self.request(url, params=params).text
+
+ data = {"_extractor": LensdumpAlbumExtractor}
+ for album_path in self._pagination(page, 'data-url-short="', '"'):
+ album_url = text.urljoin(self.root, album_path)
+ yield Message.Queue, album_url, data
class LensdumpImageExtractor(LensdumpBase, Extractor):
@@ -107,16 +103,13 @@ class LensdumpImageExtractor(LensdumpBase, Extractor):
pattern = r"(?:https?://)?(?:(?:i\d?\.)?lensdump\.com|\w\.l3n\.co)/i/(\w+)"
example = "https://lensdump.com/i/ID"
- def __init__(self, match):
- Extractor.__init__(self, match)
- self.key = match.group(1)
-
def items(self):
- url = "{}/i/{}".format(self.root, self.key)
+ key = self.groups[0]
+ url = "{}/i/{}".format(self.root, key)
extr = text.extract_from(self.request(url).text)
data = {
- "id" : self.key,
+ "id" : key,
"title" : text.unescape(extr(
'property="og:title" content="', '"')),
"url" : extr(
diff --git a/gallery_dl/extractor/lolisafe.py b/gallery_dl/extractor/lolisafe.py
index 6fc0689..044f4f5 100644
--- a/gallery_dl/extractor/lolisafe.py
+++ b/gallery_dl/extractor/lolisafe.py
@@ -47,7 +47,15 @@ class LolisafeAlbumExtractor(LolisafeExtractor):
url = file["file"]
file.update(data)
text.nameext_from_url(url, file)
- file["name"], sep, file["id"] = file["filename"].rpartition("-")
+
+ if "name" in file:
+ name = file["name"]
+ file["name"] = name.rpartition(".")[0] or name
+ file["id"] = file["filename"].rpartition("-")[2]
+ else:
+ file["name"], sep, file["id"] = \
+ file["filename"].rpartition("-")
+
yield Message.Url, url, file
def fetch_album(self, album_id):
diff --git a/gallery_dl/extractor/mangadex.py b/gallery_dl/extractor/mangadex.py
index bca7e4d..1f24593 100644
--- a/gallery_dl/extractor/mangadex.py
+++ b/gallery_dl/extractor/mangadex.py
@@ -174,6 +174,20 @@ class MangadexListExtractor(MangadexExtractor):
yield Message.Queue, url, data
+class MangadexAuthorExtractor(MangadexExtractor):
+ """Extractor for mangadex authors"""
+ subcategory = "author"
+ pattern = BASE_PATTERN + r"/author/([0-9a-f-]+)"
+ example = ("https://mangadex.org/author"
+ "/01234567-89ab-cdef-0123-456789abcdef/NAME")
+
+ def items(self):
+ for manga in self.api.manga_author(self.uuid):
+ manga["_extractor"] = MangadexMangaExtractor
+ url = "{}/title/{}".format(self.root, manga["id"])
+ yield Message.Queue, url, manga
+
+
class MangadexAPI():
"""Interface for the MangaDex API v5
@@ -195,6 +209,10 @@ class MangadexAPI():
def athome_server(self, uuid):
return self._call("/at-home/server/" + uuid)
+ def author(self, uuid, manga=False):
+ params = {"includes[]": ("manga",)} if manga else None
+ return self._call("/author/" + uuid, params)["data"]
+
def chapter(self, uuid):
params = {"includes[]": ("scanlation_group",)}
return self._call("/chapter/" + uuid, params)["data"]
@@ -210,6 +228,10 @@ class MangadexAPI():
params = {"includes[]": ("artist", "author")}
return self._call("/manga/" + uuid, params)["data"]
+ def manga_author(self, uuid_author):
+ params = {"authorOrArtist": uuid_author}
+ return self._pagination("/manga", params)
+
def manga_feed(self, uuid):
order = "desc" if self.extractor.config("chapter-reverse") else "asc"
params = {
diff --git a/gallery_dl/extractor/mangakakalot.py b/gallery_dl/extractor/mangakakalot.py
index 0183b25..9fc8681 100644
--- a/gallery_dl/extractor/mangakakalot.py
+++ b/gallery_dl/extractor/mangakakalot.py
@@ -19,7 +19,7 @@ BASE_PATTERN = r"(?:https?://)?(?:ww[\dw]?\.)?mangakakalot\.tv"
class MangakakalotBase():
"""Base class for mangakakalot extractors"""
category = "mangakakalot"
- root = "https://ww6.mangakakalot.tv"
+ root = "https://ww8.mangakakalot.tv"
class MangakakalotChapterExtractor(MangakakalotBase, ChapterExtractor):
@@ -40,7 +40,7 @@ class MangakakalotChapterExtractor(MangakakalotBase, ChapterExtractor):
match = re.match(
r"(?:[Vv]ol\. *(\d+) )?"
r"[Cc]hapter *([^:]*)"
- r"(?:: *(.+))?", info)
+ r"(?:: *(.+))?", info or "")
volume, chapter, title = match.groups() if match else ("", "", info)
chapter, sep, minor = chapter.partition(".")
@@ -86,7 +86,7 @@ class MangakakalotMangaExtractor(MangakakalotBase, MangaExtractor):
data["chapter"] = text.parse_int(chapter)
data["chapter_minor"] = sep + minor
- if url.startswith("/"):
+ if url[0] == "/":
url = self.root + url
results.append((url, data.copy()))
return results
diff --git a/gallery_dl/extractor/newgrounds.py b/gallery_dl/extractor/newgrounds.py
index 2928573..61ffdee 100644
--- a/gallery_dl/extractor/newgrounds.py
+++ b/gallery_dl/extractor/newgrounds.py
@@ -14,6 +14,9 @@ from ..cache import cache
import itertools
import re
+BASE_PATTERN = r"(?:https?://)?(?:www\.)?newgrounds\.com"
+USER_PATTERN = r"(?:https?://)?([\w-]+)\.newgrounds\.com"
+
class NewgroundsExtractor(Extractor):
"""Base class for newgrounds extractors"""
@@ -93,7 +96,7 @@ class NewgroundsExtractor(Extractor):
def posts(self):
"""Return URLs of all relevant post pages"""
- return self._pagination(self._path)
+ return self._pagination(self._path, self.groups[1])
def metadata(self):
"""Return general metadata"""
@@ -334,10 +337,10 @@ class NewgroundsExtractor(Extractor):
for fmt in formats:
yield fmt[1][0]["src"]
- def _pagination(self, kind):
+ def _pagination(self, kind, pnum=1):
url = "{}/{}".format(self.user_root, kind)
params = {
- "page": 1,
+ "page": text.parse_int(pnum, 1),
"isAjaxRequest": "1",
}
headers = {
@@ -400,8 +403,7 @@ class NewgroundsImageExtractor(NewgroundsExtractor):
class NewgroundsMediaExtractor(NewgroundsExtractor):
"""Extractor for a media file from newgrounds.com"""
subcategory = "media"
- pattern = (r"(?:https?://)?(?:www\.)?newgrounds\.com"
- r"(/(?:portal/view|audio/listen)/\d+)")
+ pattern = BASE_PATTERN + r"(/(?:portal/view|audio/listen)/\d+)"
example = "https://www.newgrounds.com/portal/view/12345"
def __init__(self, match):
@@ -416,35 +418,35 @@ class NewgroundsMediaExtractor(NewgroundsExtractor):
class NewgroundsArtExtractor(NewgroundsExtractor):
"""Extractor for all images of a newgrounds user"""
subcategory = _path = "art"
- pattern = r"(?:https?://)?([\w-]+)\.newgrounds\.com/art/?$"
+ pattern = USER_PATTERN + r"/art(?:(?:/page/|/?\?page=)(\d+))?/?$"
example = "https://USER.newgrounds.com/art"
class NewgroundsAudioExtractor(NewgroundsExtractor):
"""Extractor for all audio submissions of a newgrounds user"""
subcategory = _path = "audio"
- pattern = r"(?:https?://)?([\w-]+)\.newgrounds\.com/audio/?$"
+ pattern = USER_PATTERN + r"/audio(?:(?:/page/|/?\?page=)(\d+))?/?$"
example = "https://USER.newgrounds.com/audio"
class NewgroundsMoviesExtractor(NewgroundsExtractor):
"""Extractor for all movies of a newgrounds user"""
subcategory = _path = "movies"
- pattern = r"(?:https?://)?([\w-]+)\.newgrounds\.com/movies/?$"
+ pattern = USER_PATTERN + r"/movies(?:(?:/page/|/?\?page=)(\d+))?/?$"
example = "https://USER.newgrounds.com/movies"
class NewgroundsGamesExtractor(NewgroundsExtractor):
"""Extractor for a newgrounds user's games"""
subcategory = _path = "games"
- pattern = r"(?:https?://)?([\w-]+)\.newgrounds\.com/games/?$"
+ pattern = USER_PATTERN + r"/games(?:(?:/page/|/?\?page=)(\d+))?/?$"
example = "https://USER.newgrounds.com/games"
class NewgroundsUserExtractor(NewgroundsExtractor):
"""Extractor for a newgrounds user profile"""
subcategory = "user"
- pattern = r"(?:https?://)?([\w-]+)\.newgrounds\.com/?$"
+ pattern = USER_PATTERN + r"/?$"
example = "https://USER.newgrounds.com"
def initialize(self):
@@ -464,25 +466,22 @@ class NewgroundsFavoriteExtractor(NewgroundsExtractor):
"""Extractor for posts favorited by a newgrounds user"""
subcategory = "favorite"
directory_fmt = ("{category}", "{user}", "Favorites")
- pattern = (r"(?:https?://)?([\w-]+)\.newgrounds\.com"
- r"/favorites(?!/following)(?:/(art|audio|movies))?/?")
+ pattern = (USER_PATTERN + r"/favorites(?!/following)(?:/(art|audio|movies)"
+ r"(?:(?:/page/|/?\?page=)(\d+))?)?")
example = "https://USER.newgrounds.com/favorites"
- def __init__(self, match):
- NewgroundsExtractor.__init__(self, match)
- self.kind = match.group(2)
-
def posts(self):
- if self.kind:
- return self._pagination(self.kind)
+ _, kind, pnum = self.groups
+ if kind:
+ return self._pagination_favorites(kind, pnum)
return itertools.chain.from_iterable(
- self._pagination(k) for k in ("art", "audio", "movies")
+ self._pagination_favorites(k) for k in ("art", "audio", "movies")
)
- def _pagination(self, kind):
+ def _pagination_favorites(self, kind, pnum=1):
url = "{}/favorites/{}".format(self.user_root, kind)
params = {
- "page": 1,
+ "page": text.parse_int(pnum, 1),
"isAjaxRequest": "1",
}
headers = {
@@ -514,12 +513,13 @@ class NewgroundsFavoriteExtractor(NewgroundsExtractor):
class NewgroundsFollowingExtractor(NewgroundsFavoriteExtractor):
"""Extractor for a newgrounds user's favorited users"""
subcategory = "following"
- pattern = r"(?:https?://)?([\w-]+)\.newgrounds\.com/favorites/(following)"
+ pattern = USER_PATTERN + r"/favorites/(following)"
example = "https://USER.newgrounds.com/favorites/following"
def items(self):
+ _, kind, pnum = self.groups
data = {"_extractor": NewgroundsUserExtractor}
- for url in self._pagination(self.kind):
+ for url in self._pagination_favorites(kind, pnum):
yield Message.Queue, url, data
@staticmethod
@@ -534,13 +534,12 @@ class NewgroundsSearchExtractor(NewgroundsExtractor):
"""Extractor for newgrounds.com search reesults"""
subcategory = "search"
directory_fmt = ("{category}", "search", "{search_tags}")
- pattern = (r"(?:https?://)?(?:www\.)?newgrounds\.com"
- r"/search/conduct/([^/?#]+)/?\?([^#]+)")
+ pattern = BASE_PATTERN + r"/search/conduct/([^/?#]+)/?\?([^#]+)"
example = "https://www.newgrounds.com/search/conduct/art?terms=QUERY"
def __init__(self, match):
NewgroundsExtractor.__init__(self, match)
- self._path, query = match.groups()
+ self._path, query = self.groups
self.query = text.parse_query(query)
def posts(self):
@@ -550,19 +549,20 @@ class NewgroundsSearchExtractor(NewgroundsExtractor):
for s in suitabilities.split(",")}
self.request(self.root + "/suitabilities",
method="POST", data=data)
- return self._pagination("/search/conduct/" + self._path, self.query)
+ return self._pagination_search(
+ "/search/conduct/" + self._path, self.query)
def metadata(self):
return {"search_tags": self.query.get("terms", "")}
- def _pagination(self, path, params):
+ def _pagination_search(self, path, params):
url = self.root + path
+ params["inner"] = "1"
+ params["page"] = text.parse_int(params.get("page"), 1)
headers = {
"Accept": "application/json, text/javascript, */*; q=0.01",
"X-Requested-With": "XMLHttpRequest",
}
- params["inner"] = "1"
- params["page"] = 1
while True:
data = self.request(url, params=params, headers=headers).json()
diff --git a/gallery_dl/extractor/nozomi.py b/gallery_dl/extractor/nozomi.py
index 8c7ffe5..851f663 100644
--- a/gallery_dl/extractor/nozomi.py
+++ b/gallery_dl/extractor/nozomi.py
@@ -63,7 +63,8 @@ class NozomiExtractor(Extractor):
yield Message.Directory, post
for post["num"], image in enumerate(images, 1):
post["filename"] = post["dataid"] = did = image["dataid"]
- post["is_video"] = video = bool(image.get("is_video"))
+ post["is_video"] = video = \
+ True if image.get("is_video") else False
ext = image["type"]
if video:
diff --git a/gallery_dl/extractor/patreon.py b/gallery_dl/extractor/patreon.py
index d47ffa2..0b64ea3 100644
--- a/gallery_dl/extractor/patreon.py
+++ b/gallery_dl/extractor/patreon.py
@@ -56,6 +56,7 @@ class PatreonExtractor(Extractor):
text.nameext_from_url(name, post)
if text.ext_from_url(url) == "m3u8":
url = "ytdl:" + url
+ post["_ytdl_manifest"] = "hls"
post["extension"] = "mp4"
yield Message.Url, url, post
else:
@@ -310,7 +311,7 @@ class PatreonCreatorExtractor(PatreonExtractor):
subcategory = "creator"
pattern = (r"(?:https?://)?(?:www\.)?patreon\.com"
r"/(?!(?:home|join|posts|login|signup)(?:$|[/?#]))"
- r"([^/?#]+)(?:/posts)?/?(?:\?([^#]+))?")
+ r"(?:c/)?([^/?#]+)(?:/posts)?/?(?:\?([^#]+))?")
example = "https://www.patreon.com/USER"
def posts(self):
@@ -340,9 +341,9 @@ class PatreonCreatorExtractor(PatreonExtractor):
user_id = query.get("u")
if user_id:
- url = "{}/user/posts?u={}".format(self.root, user_id)
+ url = "{}/user?u={}".format(self.root, user_id)
else:
- url = "{}/{}/posts".format(self.root, creator)
+ url = "{}/{}".format(self.root, creator)
page = self.request(url, notfound="creator").text
try:
diff --git a/gallery_dl/extractor/pinterest.py b/gallery_dl/extractor/pinterest.py
index 8c04ed5..499c579 100644
--- a/gallery_dl/extractor/pinterest.py
+++ b/gallery_dl/extractor/pinterest.py
@@ -18,8 +18,8 @@ BASE_PATTERN = r"(?:https?://)?(?:\w+\.)?pinterest\.[\w.]+"
class PinterestExtractor(Extractor):
"""Base class for pinterest extractors"""
category = "pinterest"
- filename_fmt = "{category}_{id}{media_id:?_//}.{extension}"
- archive_fmt = "{id}{media_id}"
+ filename_fmt = "{category}_{id}{media_id|page_id:?_//}.{extension}"
+ archive_fmt = "{id}{media_id|page_id}"
root = "https://www.pinterest.com"
def _init(self):
@@ -30,12 +30,12 @@ class PinterestExtractor(Extractor):
self.root = text.ensure_http_scheme(domain)
self.api = PinterestAPI(self)
+ self.stories = self.config("stories", True)
+ self.videos = self.config("videos", True)
def items(self):
data = self.metadata()
- videos = self.config("videos", True)
- yield Message.Directory, data
for pin in self.pins():
if isinstance(pin, tuple):
@@ -43,40 +43,35 @@ class PinterestExtractor(Extractor):
yield Message.Queue, url, data
continue
+ try:
+ files = self._extract_files(pin)
+ except Exception as exc:
+ self.log.debug("", exc_info=exc)
+ self.log.warning(
+ "%s: Error when extracting download URLs (%s: %s)",
+ pin.get("id"), exc.__class__.__name__, exc)
+ continue
+
pin.update(data)
+ pin["count"] = len(files)
- carousel_data = pin.get("carousel_data")
- if carousel_data:
- pin["count"] = len(carousel_data["carousel_slots"])
- for num, slot in enumerate(carousel_data["carousel_slots"], 1):
- slot["media_id"] = slot.pop("id")
- pin.update(slot)
- pin["num"] = num
- size, image = next(iter(slot["images"].items()))
- url = image["url"].replace("/" + size + "/", "/originals/")
- yield Message.Url, url, text.nameext_from_url(url, pin)
-
- else:
- try:
- media = self._media_from_pin(pin)
- except Exception:
- self.log.debug("Unable to fetch download URL for pin %s",
- pin.get("id"))
- continue
+ yield Message.Directory, pin
+ for pin["num"], file in enumerate(files, 1):
+ url = file["url"]
+ text.nameext_from_url(url, pin)
+ pin.update(file)
- if videos or media.get("duration") is None:
- pin.update(media)
- pin["num"] = pin["count"] = 1
+ if "media_id" not in file:
pin["media_id"] = ""
+ if "page_id" not in file:
+ pin["page_id"] = ""
- url = media["url"]
- text.nameext_from_url(url, pin)
+ if pin["extension"] == "m3u8":
+ url = "ytdl:" + url
+ pin["_ytdl_manifest"] = "hls"
+ pin["extension"] = "mp4"
- if pin["extension"] == "m3u8":
- url = "ytdl:" + url
- pin["extension"] = "mp4"
-
- yield Message.Url, url, pin
+ yield Message.Url, url, pin
def metadata(self):
"""Return general metadata"""
@@ -84,26 +79,108 @@ class PinterestExtractor(Extractor):
def pins(self):
"""Return all relevant pin objects"""
- @staticmethod
- def _media_from_pin(pin):
+ def _extract_files(self, pin):
+ story_pin_data = pin.get("story_pin_data")
+ if story_pin_data and self.stories:
+ return self._extract_story(pin, story_pin_data)
+
+ carousel_data = pin.get("carousel_data")
+ if carousel_data:
+ return self._extract_carousel(pin, carousel_data)
+
videos = pin.get("videos")
- if videos:
- video_formats = videos["video_list"]
+ if videos and self.videos:
+ return (self._extract_video(videos),)
- for fmt in ("V_HLSV4", "V_HLSV3_WEB", "V_HLSV3_MOBILE"):
- if fmt in video_formats:
- media = video_formats[fmt]
- break
- else:
- media = max(video_formats.values(),
- key=lambda x: x.get("width", 0))
+ try:
+ return (pin["images"]["orig"],)
+ except Exception:
+ self.log.debug("%s: No files found", pin.get("id"))
+ return ()
+
+ def _extract_story(self, pin, story):
+ files = []
+ story_id = story.get("id")
+
+ for page in story["pages"]:
+ page_id = page.get("id")
+
+ for block in page["blocks"]:
+ type = block.get("type")
+
+ if type == "story_pin_image_block":
+ if 1 == len(page["blocks"]) == len(story["pages"]):
+ try:
+ media = pin["images"]["orig"]
+ except Exception:
+ media = self._extract_image(page, block)
+ else:
+ media = self._extract_image(page, block)
+
+ elif type == "story_pin_video_block":
+ video = block["video"]
+ media = self._extract_video(video)
+ media["media_id"] = video.get("id") or ""
+
+ elif type == "story_pin_paragraph_block":
+ media = {"url": "text:" + block["text"],
+ "extension": "txt",
+ "media_id": block.get("id")}
+
+ else:
+ self.log.warning("%s: Unsupported story block '%s'",
+ pin.get("id"), type)
+ continue
- if "V_720P" in video_formats:
- media["_fallback"] = (video_formats["V_720P"]["url"],)
+ media["story_id"] = story_id
+ media["page_id"] = page_id
+ files.append(media)
+
+ return files
+
+ def _extract_carousel(self, pin, carousel_data):
+ files = []
+ for slot in carousel_data["carousel_slots"]:
+ size, image = next(iter(slot["images"].items()))
+ slot["media_id"] = slot.pop("id")
+ slot["url"] = image["url"].replace(
+ "/" + size + "/", "/originals/", 1)
+ files.append(slot)
+ return files
+
+ def _extract_image(self, page, block):
+ sig = block.get("image_signature") or page["image_signature"]
+ url_base = "https://i.pinimg.com/originals/{}/{}/{}/{}.".format(
+ sig[0:2], sig[2:4], sig[4:6], sig)
+ url_jpg = url_base + "jpg"
+ url_png = url_base + "png"
+ url_webp = url_base + "webp"
- return media
+ try:
+ media = block["image"]["images"]["originals"]
+ except Exception:
+ media = {"url": url_jpg, "_fallback": (url_png, url_webp,)}
- return pin["images"]["orig"]
+ if media["url"] == url_jpg:
+ media["_fallback"] = (url_png, url_webp,)
+ else:
+ media["_fallback"] = (url_jpg, url_png, url_webp,)
+ media["media_id"] = sig
+
+ return media
+
+ def _extract_video(self, video):
+ video_formats = video["video_list"]
+ for fmt in ("V_HLSV4", "V_HLSV3_WEB", "V_HLSV3_MOBILE"):
+ if fmt in video_formats:
+ media = video_formats[fmt]
+ break
+ else:
+ media = max(video_formats.values(),
+ key=lambda x: x.get("width", 0))
+ if "V_720P" in video_formats:
+ media["_fallback"] = (video_formats["V_720P"]["url"],)
+ return media
class PinterestPinExtractor(PinterestExtractor):
diff --git a/gallery_dl/extractor/pixiv.py b/gallery_dl/extractor/pixiv.py
index c2d1243..8c6e6d8 100644
--- a/gallery_dl/extractor/pixiv.py
+++ b/gallery_dl/extractor/pixiv.py
@@ -38,6 +38,7 @@ class PixivExtractor(Extractor):
self.meta_user = self.config("metadata")
self.meta_bookmark = self.config("metadata-bookmark")
self.meta_comments = self.config("comments")
+ self.meta_captions = self.config("captions")
def items(self):
tags = self.config("tags", "japanese")
@@ -76,8 +77,8 @@ class PixivExtractor(Extractor):
detail = self.api.illust_bookmark_detail(work["id"])
work["tags_bookmark"] = [tag["name"] for tag in detail["tags"]
if tag["is_registered"]]
- if self.sanity_workaround and not work.get("caption") and \
- not work.get("_mypixiv"):
+ if self.meta_captions and not work.get("caption") and \
+ not work.get("_mypixiv") and not work.get("_ajax"):
body = self._request_ajax("/illust/" + str(work["id"]))
if body:
work["caption"] = text.unescape(body["illustComment"])
@@ -108,10 +109,10 @@ class PixivExtractor(Extractor):
if self.load_ugoira:
try:
return self._extract_ugoira(work)
- except exception.StopExtraction as exc:
+ except Exception as exc:
self.log.warning(
- "Unable to retrieve Ugoira metatdata (%s - %s)",
- work["id"], exc.message)
+ "%s: Unable to retrieve Ugoira metatdata (%s - %s)",
+ work["id"], exc.__class__.__name__, exc)
elif work["page_count"] == 1:
url = meta_single_page["original_image_url"]
@@ -186,6 +187,7 @@ class PixivExtractor(Extractor):
return None
def _extract_ajax(self, work, body):
+ work["_ajax"] = True
url = self._extract_ajax_url(body)
if not url:
return ()
@@ -243,12 +245,12 @@ class PixivExtractor(Extractor):
original = body["urls"]["original"]
if original:
return original
- except KeyError:
+ except Exception:
pass
try:
square1200 = body["userIllusts"][body["id"]]["url"]
- except KeyError:
+ except Exception:
return
parts = square1200.rpartition("_p0")[0].split("/")
del parts[3:5]
@@ -293,9 +295,6 @@ class PixivExtractor(Extractor):
"x_restrict" : 0,
}
- def _web_to_mobile(self, work):
- return work
-
def works(self):
"""Return an iterable containing all relevant 'work' objects"""
@@ -334,15 +333,17 @@ class PixivUserExtractor(PixivExtractor):
class PixivArtworksExtractor(PixivExtractor):
"""Extractor for artworks of a pixiv user"""
subcategory = "artworks"
+ _warning = True
pattern = (BASE_PATTERN + r"/(?:"
r"(?:en/)?users/(\d+)/(?:artworks|illustrations|manga)"
r"(?:/([^/?#]+))?/?(?:$|[?#])"
r"|member_illust\.php\?id=(\d+)(?:&([^#]+))?)")
example = "https://www.pixiv.net/en/users/12345/artworks"
- def __init__(self, match):
- PixivExtractor.__init__(self, match)
- u1, t1, u2, t2 = match.groups()
+ def _init(self):
+ PixivExtractor._init(self)
+
+ u1, t1, u2, t2 = self.groups
if t1:
t1 = text.unquote(t1)
elif t2:
@@ -350,6 +351,14 @@ class PixivArtworksExtractor(PixivExtractor):
self.user_id = u1 or u2
self.tag = t1 or t2
+ if self.sanity_workaround:
+ self.cookies_domain = d = ".pixiv.net"
+ self._init_cookies()
+ if self._warning and not self.cookies.get("PHPSESSID", domain=d):
+ PixivArtworksExtractor._warning = False
+ self.log.warning("No 'PHPSESSID' cookie set. Can detect only "
+ "non R-18 'sanity_level' works.")
+
def metadata(self):
if self.config("metadata"):
self.api.user_detail(self.user_id)
@@ -358,6 +367,19 @@ class PixivArtworksExtractor(PixivExtractor):
def works(self):
works = self.api.user_illusts(self.user_id)
+ if self.sanity_workaround:
+ body = self._request_ajax(
+ "/user/{}/profile/all".format(self.user_id))
+ try:
+ ajax_ids = list(map(int, body["illusts"]))
+ ajax_ids.extend(map(int, body["manga"]))
+ ajax_ids.sort()
+ except Exception as exc:
+ self.log.warning("Unable to collect artwork IDs using AJAX "
+ "API (%s: %s)", exc.__class__.__name__, exc)
+ else:
+ works = self._extend_sanity(works, ajax_ids)
+
if self.tag:
tag = self.tag.lower()
works = (
@@ -367,6 +389,35 @@ class PixivArtworksExtractor(PixivExtractor):
return works
+ def _extend_sanity(self, works, ajax_ids):
+ user = {"id": 1}
+ index = len(ajax_ids) - 1
+
+ for work in works:
+ while index >= 0:
+ work_id = work["id"]
+ ajax_id = ajax_ids[index]
+
+ if ajax_id == work_id:
+ index -= 1
+ break
+
+ elif ajax_id > work_id:
+ index -= 1
+ self.log.debug("Inserting work %s", ajax_id)
+ yield self._make_work(ajax_id, self.sanity_url, user)
+
+ else: # ajax_id < work_id
+ break
+
+ yield work
+
+ while index >= 0:
+ ajax_id = ajax_ids[index]
+ self.log.debug("Inserting work %s", ajax_id)
+ yield self._make_work(ajax_id, self.sanity_url, user)
+ index -= 1
+
class PixivAvatarExtractor(PixivExtractor):
"""Extractor for pixiv avatars"""
diff --git a/gallery_dl/extractor/postmill.py b/gallery_dl/extractor/postmill.py
index 29b351b..8877175 100644
--- a/gallery_dl/extractor/postmill.py
+++ b/gallery_dl/extractor/postmill.py
@@ -50,7 +50,7 @@ class PostmillExtractor(BaseExtractor):
forum = match.group(1)
id = int(match.group(2))
- is_text_post = url.startswith("/")
+ is_text_post = (url[0] == "/")
is_image_post = self._search_image_tag(page) is not None
data = {
"title": title,
diff --git a/gallery_dl/extractor/reddit.py b/gallery_dl/extractor/reddit.py
index ce602f6..8577e74 100644
--- a/gallery_dl/extractor/reddit.py
+++ b/gallery_dl/extractor/reddit.py
@@ -31,6 +31,7 @@ class RedditExtractor(Extractor):
parentdir = self.config("parent-directory")
max_depth = self.config("recursion", 0)
previews = self.config("previews", True)
+ embeds = self.config("embeds", True)
videos = self.config("videos", True)
if videos:
@@ -100,7 +101,7 @@ class RedditExtractor(Extractor):
for comment in comments:
html = comment["body_html"] or ""
href = (' href="' in html)
- media = ("media_metadata" in comment)
+ media = (embeds and "media_metadata" in comment)
if media or href:
comment["date"] = text.parse_timestamp(
@@ -211,8 +212,9 @@ class RedditExtractor(Extractor):
def _extract_video_dash(self, submission):
submission["_ytdl_extra"] = {"title": submission["title"]}
try:
- return (submission["secure_media"]["reddit_video"]["dash_url"] +
- "#__youtubedl_smuggle=%7B%22to_generic%22%3A+1%7D")
+ url = submission["secure_media"]["reddit_video"]["dash_url"]
+ submission["_ytdl_manifest"] = "dash"
+ return url
except Exception:
return submission["url"]
diff --git a/gallery_dl/extractor/scrolller.py b/gallery_dl/extractor/scrolller.py
new file mode 100644
index 0000000..9f9f0c4
--- /dev/null
+++ b/gallery_dl/extractor/scrolller.py
@@ -0,0 +1,227 @@
+# -*- coding: utf-8 -*-
+
+# Copyright 2024 Mike Fährmann
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Extractors for https://scrolller.com/"""
+
+from .common import Extractor, Message
+from .. import text, util, exception
+from ..cache import cache
+
+BASE_PATTERN = r"(?:https?://)?(?:www\.)?scrolller\.com"
+
+
+class ScrolllerExtractor(Extractor):
+ """Base class for scrolller extractors"""
+ category = "scrolller"
+ root = "https://scrolller.com"
+ directory_fmt = ("{category}", "{subredditTitle}")
+ filename_fmt = "{id}{title:? //}.{extension}"
+ archive_fmt = "{id}"
+ request_interval = (0.5, 1.5)
+
+ def _init(self):
+ self.auth_token = None
+
+ def items(self):
+ self.login()
+
+ for post in self.posts():
+
+ src = max(post["mediaSources"], key=self._sort_key)
+ post.update(src)
+ url = src["url"]
+ text.nameext_from_url(url, post)
+
+ yield Message.Directory, post
+ yield Message.Url, url, post
+
+ def posts(self):
+ return ()
+
+ def login(self):
+ username, password = self._get_auth_info()
+ if username:
+ self.auth_token = self._login_impl(username, password)
+
+ @cache(maxage=28*86400, keyarg=1)
+ def _login_impl(self, username, password):
+ self.log.info("Logging in as %s", username)
+
+ variables = {
+ "username": username,
+ "password": password,
+ }
+
+ try:
+ data = self._request_graphql("LoginQuery", variables)
+ except exception.HttpError as exc:
+ if exc.status == 403:
+ raise exception.AuthenticationError()
+ raise
+
+ return data["login"]["token"]
+
+ def _request_graphql(self, opname, variables):
+ url = "https://api.scrolller.com/api/v2/graphql"
+ headers = {
+ "Content-Type" : "text/plain;charset=UTF-8",
+ "Origin" : self.root,
+ "Sec-Fetch-Dest": "empty",
+ "Sec-Fetch-Mode": "cors",
+ "Sec-Fetch-Site": "same-site",
+ }
+ data = {
+ "query" : QUERIES[opname],
+ "variables" : variables,
+ "authorization": self.auth_token,
+ }
+ return self.request(
+ url, method="POST", headers=headers, data=util.json_dumps(data),
+ ).json()["data"]
+
+ def _pagination(self, opname, variables):
+ while True:
+ data = self._request_graphql(opname, variables)
+
+ while "items" not in data:
+ data = data.popitem()[1]
+ yield from data["items"]
+
+ if not data["iterator"]:
+ return
+ variables["iterator"] = data["iterator"]
+
+ def _sort_key(self, src):
+ return src["width"], not src["isOptimized"]
+
+
+class ScrolllerSubredditExtractor(ScrolllerExtractor):
+ """Extractor for media from a scrolller subreddit"""
+ subcategory = "subreddit"
+ pattern = BASE_PATTERN + r"(/r/[^/?#]+)(?:/?\?([^#]+))?"
+ example = "https://scrolller.com/r/SUBREDDIT"
+
+ def posts(self):
+ url, query = self.groups
+ filter = None
+
+ if query:
+ params = text.parse_query(query)
+ if "filter" in params:
+ filter = params["filter"].upper().rstrip("S")
+
+ variables = {
+ "url" : url,
+ "iterator" : None,
+ "filter" : filter,
+ "hostsDown": None,
+ }
+ return self._pagination("SubredditQuery", variables)
+
+
+class ScrolllerFollowingExtractor(ScrolllerExtractor):
+ """Extractor for followed scrolller subreddits"""
+ subcategory = "following"
+ pattern = BASE_PATTERN + r"/following"
+ example = "https://scrolller.com/following"
+
+ def items(self):
+ self.login()
+
+ if not self.auth_token:
+ raise exception.AuthorizationError("Login required")
+
+ variables = {
+ "iterator" : None,
+ "hostsDown": None,
+ }
+
+ for subreddit in self._pagination("FollowingQuery", variables):
+ url = self.root + subreddit["url"]
+ subreddit["_extractor"] = ScrolllerSubredditExtractor
+ yield Message.Queue, url, subreddit
+
+
+class ScrolllerPostExtractor(ScrolllerExtractor):
+ """Extractor for media from a single scrolller post"""
+ subcategory = "post"
+ pattern = BASE_PATTERN + r"/(?!r/|following$)([^/?#]+)"
+ example = "https://scrolller.com/title-slug-a1b2c3d4f5"
+
+ def posts(self):
+ url = "{}/{}".format(self.root, self.groups[0])
+ page = self.request(url).text
+ data = util.json_loads(text.extr(
+ page, '<script>window.scrolllerConfig="', '"</script>')
+ .replace('\\"', '"'))
+ return (data["item"],)
+
+
+QUERIES = {
+
+ "SubredditQuery": """\
+query SubredditQuery(
+ $url: String!
+ $filter: SubredditPostFilter
+ $iterator: String
+) {
+ getSubreddit(
+ url: $url
+ ) {
+ children(
+ limit: 50
+ iterator: $iterator
+ filter: $filter
+ disabledHosts: null
+ ) {
+ iterator items {
+ __typename id url title subredditId subredditTitle
+ subredditUrl redditPath isNsfw albumUrl hasAudio
+ fullLengthSource gfycatSource redgifsSource ownerAvatar
+ username displayName isPaid tags isFavorite
+ mediaSources { url width height isOptimized }
+ blurredMediaSources { url width height isOptimized }
+ }
+ }
+ }
+}
+""",
+
+ "FollowingQuery": """\
+query FollowingQuery(
+ $iterator: String
+) {
+ getFollowing(
+ limit: 10
+ iterator: $iterator
+ ) {
+ iterator items {
+ __typename id url title secondaryTitle description createdAt isNsfw
+ subscribers isComplete itemCount videoCount pictureCount albumCount
+ isPaid username tags isFollowing
+ banner { url width height isOptimized }
+ }
+ }
+}
+""",
+
+ "LoginQuery": """\
+query LoginQuery(
+ $username: String!,
+ $password: String!
+) {
+ login(
+ username: $username,
+ password: $password
+ ) {
+ username token expiresAt isAdmin status isPremium
+ }
+}
+""",
+
+}
diff --git a/gallery_dl/extractor/telegraph.py b/gallery_dl/extractor/telegraph.py
index dd5988f..468840b 100644
--- a/gallery_dl/extractor/telegraph.py
+++ b/gallery_dl/extractor/telegraph.py
@@ -49,7 +49,7 @@ class TelegraphGalleryExtractor(GalleryExtractor):
url, pos = text.extract(figure, 'src="', '"')
if url.startswith("/embed/"):
continue
- elif url.startswith("/"):
+ elif url[0] == "/":
url = self.root + url
caption, pos = text.extract(figure, "<figcaption>", "<", pos)
num += 1
diff --git a/gallery_dl/extractor/tsumino.py b/gallery_dl/extractor/tsumino.py
index bce661a..b196aeb 100644
--- a/gallery_dl/extractor/tsumino.py
+++ b/gallery_dl/extractor/tsumino.py
@@ -148,8 +148,10 @@ class TsuminoSearchExtractor(TsuminoBase, Extractor):
data["PageNumber"] += 1
def _parse(self, query):
+ if not query:
+ return {}
try:
- if query.startswith("?"):
+ if query[0] == "?":
return self._parse_simple(query)
return self._parse_jsurl(query)
except Exception as exc:
@@ -187,8 +189,6 @@ class TsuminoSearchExtractor(TsuminoBase, Extractor):
Example: ~(name~'John*20Doe~age~42~children~(~'Mary~'Bill))
Ref: https://github.com/Sage/jsurl
"""
- if not data:
- return {}
i = 0
imax = len(data)
diff --git a/gallery_dl/extractor/urlgalleries.py b/gallery_dl/extractor/urlgalleries.py
index b21709a..f7ce44b 100644
--- a/gallery_dl/extractor/urlgalleries.py
+++ b/gallery_dl/extractor/urlgalleries.py
@@ -7,7 +7,7 @@
"""Extractors for https://urlgalleries.net/"""
from .common import GalleryExtractor, Message
-from .. import text
+from .. import text, exception
class UrlgalleriesGalleryExtractor(GalleryExtractor):
@@ -16,27 +16,31 @@ class UrlgalleriesGalleryExtractor(GalleryExtractor):
root = "urlgalleries.net"
request_interval = (0.5, 1.0)
pattern = r"(?:https?://)(?:(\w+)\.)?urlgalleries\.net/(?:[\w-]+-)?(\d+)"
- example = "https://blog.urlgalleries.net/gallery-12345/TITLE"
+ example = "https://BLOG.urlgalleries.net/gallery-12345/TITLE"
- def __init__(self, match):
- self.blog, self.gallery_id = match.groups()
+ def items(self):
+ blog, self.gallery_id = self.groups
url = "https://{}.urlgalleries.net/porn-gallery-{}/?a=10000".format(
- self.blog, self.gallery_id)
- GalleryExtractor.__init__(self, match, url)
+ blog, self.gallery_id)
+
+ with self.request(url, allow_redirects=False, fatal=...) as response:
+ if 300 <= response.status_code < 500:
+ if response.headers.get("location", "").endswith(
+ "/not_found_adult.php"):
+ raise exception.NotFoundError("gallery")
+ raise exception.HttpError(None, response)
+ page = response.text
- def items(self):
- page = self.request(self.gallery_url).text
imgs = self.images(page)
data = self.metadata(page)
data["count"] = len(imgs)
- del page
- root = "https://{}.urlgalleries.net".format(self.blog)
+ root = "https://{}.urlgalleries.net".format(blog)
yield Message.Directory, data
for data["num"], img in enumerate(imgs, 1):
- response = self.request(
- root + img, method="HEAD", allow_redirects=False)
- yield Message.Queue, response.headers["Location"], data
+ page = self.request(root + img).text
+ url = text.extr(page, "window.location.href = '", "'")
+ yield Message.Queue, url, data
def metadata(self, page):
extr = text.extract_from(page)
diff --git a/gallery_dl/extractor/vk.py b/gallery_dl/extractor/vk.py
index 95eeafe..ea034a7 100644
--- a/gallery_dl/extractor/vk.py
+++ b/gallery_dl/extractor/vk.py
@@ -24,6 +24,13 @@ class VkExtractor(Extractor):
root = "https://vk.com"
request_interval = (0.5, 1.5)
+ def _init(self):
+ self.offset = text.parse_int(self.config("offset"))
+
+ def skip(self, num):
+ self.offset += num
+ return num
+
def items(self):
sub = re.compile(r"/imp[fg]/").sub
sizes = "wzyxrqpo"
@@ -75,7 +82,7 @@ class VkExtractor(Extractor):
"al" : "1",
"direction": "1",
"list" : photos_id,
- "offset" : 0,
+ "offset" : self.offset,
}
while True:
diff --git a/gallery_dl/extractor/wikimedia.py b/gallery_dl/extractor/wikimedia.py
index 116f557..4eae537 100644
--- a/gallery_dl/extractor/wikimedia.py
+++ b/gallery_dl/extractor/wikimedia.py
@@ -193,7 +193,10 @@ class WikimediaArticleExtractor(WikimediaExtractor):
def __init__(self, match):
WikimediaExtractor.__init__(self, match)
- path = match.group(match.lastindex)
+ path = self.groups[-1]
+ if path[2] == "/":
+ self.root = self.root + "/" + path[:2]
+ path = path[3:]
if path.startswith("wiki/"):
path = path[5:]
diff --git a/gallery_dl/job.py b/gallery_dl/job.py
index 4affd55..30801ee 100644
--- a/gallery_dl/job.py
+++ b/gallery_dl/job.py
@@ -347,6 +347,9 @@ class DownloadJob(Job):
self.status |= 4
self.log.error("Failed to download %s",
pathfmt.filename or url)
+ if "error" in hooks:
+ for callback in hooks["error"]:
+ callback(pathfmt)
return
if not pathfmt.temppath:
@@ -433,7 +436,8 @@ class DownloadJob(Job):
if status:
self.status |= status
- if "_fallback" in kwdict and self.fallback:
+ if (status & 95 and # not FormatError or OSError
+ "_fallback" in kwdict and self.fallback):
fallback = kwdict["_fallback"] = \
iter(kwdict["_fallback"])
try:
diff --git a/gallery_dl/option.py b/gallery_dl/option.py
index c4f5b94..b38ad74 100644
--- a/gallery_dl/option.py
+++ b/gallery_dl/option.py
@@ -10,6 +10,7 @@
import argparse
import logging
+import os.path
import sys
from . import job, util, version
@@ -152,6 +153,49 @@ class UgoiraAction(argparse.Action):
namespace.postprocessors.append(pp)
+class PrintAction(argparse.Action):
+ def __call__(self, parser, namespace, value, option_string=None):
+ if self.const:
+ filename = self.const
+ base = None
+ mode = "w"
+ else:
+ value, path = value
+ base, filename = os.path.split(path)
+ mode = "a"
+
+ event, sep, format_string = value.partition(":")
+ if not sep:
+ format_string = event
+ event = ("prepare",)
+ else:
+ event = event.strip().lower()
+ if event not in {"init", "file", "after", "skip", "error",
+ "prepare", "prepare-after", "post", "post-after",
+ "finalize", "finalize-success", "finalize-error"}:
+ format_string = value
+ event = ("prepare",)
+
+ if not format_string:
+ return
+
+ if "{" not in format_string and \
+ " " not in format_string and \
+ format_string[0] != "\f":
+ format_string = "{" + format_string + "}"
+ if format_string[-1] != "\n":
+ format_string += "\n"
+
+ namespace.postprocessors.append({
+ "name" : "metadata",
+ "event" : event,
+ "filename" : filename,
+ "base-directory": base or ".",
+ "content-format": format_string,
+ "open" : mode,
+ })
+
+
class Formatter(argparse.HelpFormatter):
"""Custom HelpFormatter class to customize help output"""
def __init__(self, prog):
@@ -343,6 +387,19 @@ def build_parser():
help="Add input URLs which returned an error to FILE",
)
output.add_argument(
+ "-N", "--print",
+ dest="postprocessors", metavar="[EVENT:]FORMAT",
+ action=PrintAction, const="-", default=[],
+ help=("Write FORMAT during EVENT (default 'prepare') to standard "
+ "output. Examples: 'id' or 'post:{md5[:8]}'"),
+ )
+ output.add_argument(
+ "--print-to-file",
+ dest="postprocessors", metavar="[EVENT:]FORMAT FILE",
+ action=PrintAction, nargs=2,
+ help="Append FORMAT during EVENT to FILE",
+ )
+ output.add_argument(
"--list-modules",
dest="list_modules", action="store_true",
help="Print a list of available extractor modules",
@@ -616,7 +673,7 @@ def build_parser():
postprocessor = parser.add_argument_group("Post-processing Options")
postprocessor.add_argument(
"-P", "--postprocessor",
- dest="postprocessors", metavar="NAME", action="append", default=[],
+ dest="postprocessors", metavar="NAME", action="append",
help="Activate the specified post processor",
)
postprocessor.add_argument(
diff --git a/gallery_dl/util.py b/gallery_dl/util.py
index d5bc171..6cdd994 100644
--- a/gallery_dl/util.py
+++ b/gallery_dl/util.py
@@ -403,9 +403,9 @@ def set_mtime(path, mtime):
pass
-def cookiestxt_load(fp, cookiejar):
- """Parse a Netscape cookies.txt file and add its Cookies to 'cookiejar'"""
- set_cookie = cookiejar.set_cookie
+def cookiestxt_load(fp):
+ """Parse a Netscape cookies.txt file and add return its Cookies"""
+ cookies = []
for line in fp:
@@ -427,18 +427,20 @@ def cookiestxt_load(fp, cookiejar):
name = value
value = None
- set_cookie(Cookie(
+ cookies.append(Cookie(
0, name, value,
None, False,
domain,
domain_specified == "TRUE",
- domain.startswith("."),
+ domain[0] == "." if domain else False,
path, False,
secure == "TRUE",
None if expires == "0" or not expires else expires,
False, None, None, {},
))
+ return cookies
+
def cookiestxt_store(fp, cookies):
"""Write 'cookies' in Netscape cookies.txt format to 'fp'"""
@@ -456,9 +458,10 @@ def cookiestxt_store(fp, cookies):
name = cookie.name
value = cookie.value
+ domain = cookie.domain
write("\t".join((
- cookie.domain,
- "TRUE" if cookie.domain.startswith(".") else "FALSE",
+ domain,
+ "TRUE" if domain and domain[0] == "." else "FALSE",
cookie.path,
"TRUE" if cookie.secure else "FALSE",
"0" if cookie.expires is None else str(cookie.expires),
@@ -529,6 +532,24 @@ class HTTPBasicAuth():
return request
+class ModuleProxy():
+ __slots__ = ()
+
+ def __getitem__(self, key, modules=sys.modules):
+ try:
+ return modules[key]
+ except KeyError:
+ pass
+ try:
+ __import__(key)
+ except ImportError:
+ modules[key] = NONE
+ return NONE
+ return modules[key]
+
+ __getattr__ = __getitem__
+
+
class LazyPrompt():
__slots__ = ()
@@ -537,6 +558,7 @@ class LazyPrompt():
class NullContext():
+ __slots__ = ()
def __enter__(self):
return None
@@ -643,6 +665,7 @@ GLOBALS = {
"restart" : raises(exception.RestartExtraction),
"hash_sha1": sha1,
"hash_md5" : md5,
+ "std" : ModuleProxy(),
"re" : re,
}
diff --git a/gallery_dl/version.py b/gallery_dl/version.py
index dd96a9a..6c2a32e 100644
--- a/gallery_dl/version.py
+++ b/gallery_dl/version.py
@@ -6,5 +6,5 @@
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
-__version__ = "1.27.6"
+__version__ = "1.27.7"
__variant__ = None
diff --git a/setup.py b/setup.py
index 5f9dbc8..44acef9 100644
--- a/setup.py
+++ b/setup.py
@@ -136,6 +136,7 @@ def build_setuptools():
"Programming Language :: Python :: 3.10",
"Programming Language :: Python :: 3.11",
"Programming Language :: Python :: 3.12",
+ "Programming Language :: Python :: 3.13",
"Programming Language :: Python :: Implementation :: CPython",
"Programming Language :: Python :: Implementation :: PyPy",
"Topic :: Internet :: WWW/HTTP",
diff --git a/test/test_results.py b/test/test_results.py
index ed9c9a9..f36f798 100644
--- a/test/test_results.py
+++ b/test/test_results.py
@@ -358,7 +358,7 @@ class TestPathfmt():
def __enter__(self):
return self
- def __exit__(self, *args):
+ def __exit__(self, exc_type, exc_value, traceback):
pass
def open(self, mode):
diff --git a/test/test_util.py b/test/test_util.py
index e2db29b..888a70a 100644
--- a/test/test_util.py
+++ b/test/test_util.py
@@ -205,9 +205,8 @@ class TestCookiesTxt(unittest.TestCase):
def test_cookiestxt_load(self):
def _assert(content, expected):
- jar = http.cookiejar.CookieJar()
- util.cookiestxt_load(io.StringIO(content, None), jar)
- for c, e in zip(jar, expected):
+ cookies = util.cookiestxt_load(io.StringIO(content, None))
+ for c, e in zip(cookies, expected):
self.assertEqual(c.__dict__, e.__dict__)
_assert("", [])
@@ -253,8 +252,7 @@ class TestCookiesTxt(unittest.TestCase):
)
with self.assertRaises(ValueError):
- util.cookiestxt_load("example.org\tTRUE\t/\tTRUE\t0\tname",
- http.cookiejar.CookieJar())
+ util.cookiestxt_load("example.org\tTRUE\t/\tTRUE\t0\tname")
def test_cookiestxt_store(self):
@@ -832,6 +830,34 @@ def hash(value):
i += 1
self.assertEqual(i, 0)
+ def test_module_proxy(self):
+ proxy = util.ModuleProxy()
+
+ self.assertIs(proxy.os, os)
+ self.assertIs(proxy.os.path, os.path)
+ self.assertIs(proxy["os"], os)
+ self.assertIs(proxy["os.path"], os.path)
+ self.assertIs(proxy["os"].path, os.path)
+
+ self.assertIs(proxy.abcdefghi, util.NONE)
+ self.assertIs(proxy["abcdefghi"], util.NONE)
+ self.assertIs(proxy["abc.def.ghi"], util.NONE)
+ self.assertIs(proxy["os.path2"], util.NONE)
+
+ def test_null_context(self):
+ with util.NullContext():
+ pass
+
+ with util.NullContext() as ctx:
+ self.assertIs(ctx, None)
+
+ try:
+ with util.NullContext() as ctx:
+ exc_orig = ValueError()
+ raise exc_orig
+ except ValueError as exc:
+ self.assertIs(exc, exc_orig)
+
class TestExtractor():
category = "test_category"