aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--CHANGELOG.md32
-rw-r--r--PKG-INFO8
-rw-r--r--README.rst6
-rw-r--r--data/completion/_gallery-dl1
-rw-r--r--data/completion/gallery-dl2
-rw-r--r--data/completion/gallery-dl.fish1
-rw-r--r--data/man/gallery-dl.15
-rw-r--r--data/man/gallery-dl.conf.5169
-rw-r--r--gallery_dl.egg-info/PKG-INFO8
-rw-r--r--gallery_dl.egg-info/SOURCES.txt2
-rw-r--r--gallery_dl/downloader/http.py11
-rw-r--r--gallery_dl/extractor/__init__.py2
-rw-r--r--gallery_dl/extractor/aryion.py17
-rw-r--r--gallery_dl/extractor/common.py3
-rw-r--r--gallery_dl/extractor/furaffinity.py37
-rw-r--r--gallery_dl/extractor/gofile.py124
-rw-r--r--gallery_dl/extractor/hitomi.py49
-rw-r--r--gallery_dl/extractor/instagram.py21
-rw-r--r--gallery_dl/extractor/kemonoparty.py13
-rw-r--r--gallery_dl/extractor/kissgoddess.py4
-rw-r--r--gallery_dl/extractor/mangasee.py4
-rw-r--r--gallery_dl/extractor/newgrounds.py70
-rw-r--r--gallery_dl/extractor/pinterest.py86
-rw-r--r--gallery_dl/extractor/skeb.py90
-rw-r--r--gallery_dl/extractor/telegraph.py95
-rw-r--r--gallery_dl/extractor/twibooru.py5
-rw-r--r--gallery_dl/extractor/twitter.py114
-rw-r--r--gallery_dl/extractor/unsplash.py4
-rw-r--r--gallery_dl/formatter.py13
-rw-r--r--gallery_dl/job.py35
-rw-r--r--gallery_dl/option.py12
-rw-r--r--gallery_dl/postprocessor/metadata.py29
-rw-r--r--gallery_dl/postprocessor/mtime.py10
-rw-r--r--gallery_dl/postprocessor/ugoira.py184
-rw-r--r--gallery_dl/util.py44
-rw-r--r--gallery_dl/version.py2
-rw-r--r--test/test_cookies.py4
-rw-r--r--test/test_formatter.py8
-rw-r--r--test/test_postprocessor.py6
-rw-r--r--test/test_util.py40
40 files changed, 1074 insertions, 296 deletions
diff --git a/CHANGELOG.md b/CHANGELOG.md
index ffd11a6..994d5f0 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,37 @@
# Changelog
+## 1.21.1 - 2022-04-08
+### Additions
+- [gofile] add gofile.io extractor ([#2364](https://github.com/mikf/gallery-dl/issues/2364))
+- [instagram] add `previews` option ([#2135](https://github.com/mikf/gallery-dl/issues/2135))
+- [kemonoparty] add `duplicates` option ([#2440](https://github.com/mikf/gallery-dl/issues/2440))
+- [pinterest] add extractor for created pins ([#2452](https://github.com/mikf/gallery-dl/issues/2452))
+- [pinterest] support multiple files per pin ([#1619](https://github.com/mikf/gallery-dl/issues/1619), [#2452](https://github.com/mikf/gallery-dl/issues/2452))
+- [telegraph] Add telegra.ph extractor ([#2312](https://github.com/mikf/gallery-dl/issues/2312))
+- [twitter] add `syndication` option ([#2354](https://github.com/mikf/gallery-dl/issues/2354))
+- [twitter] accept fxtwitter.com URLs ([#2484](https://github.com/mikf/gallery-dl/issues/2484))
+- [downloader:http] support using an arbitrary method and sending POST data ([#2433](https://github.com/mikf/gallery-dl/issues/2433))
+- [postprocessor:metadata] implement archive options ([#2421](https://github.com/mikf/gallery-dl/issues/2421))
+- [postprocessor:ugoira] add `mtime` option ([#2307](https://github.com/mikf/gallery-dl/issues/2307))
+- [postprocessor:ugoira] support setting timecodes with `mkvmerge` ([#1550](https://github.com/mikf/gallery-dl/issues/1550))
+- [formatter] support evaluating f-string literals
+- add `--ugoira-conv-copy` command-line option ([#1550](https://github.com/mikf/gallery-dl/issues/1550))
+- implement a `contains()` function for filter statements ([#2446](https://github.com/mikf/gallery-dl/issues/2446))
+### Fixes
+- [aryion] provide correct `date` metadata independent of DST
+- [furaffinity] fix search result pagination ([#2402](https://github.com/mikf/gallery-dl/issues/2402))
+- [hitomi] update and fix metadata extraction ([#2444](https://github.com/mikf/gallery-dl/issues/2444))
+- [kissgoddess] extract all images ([#2473](https://github.com/mikf/gallery-dl/issues/2473))
+- [mangasee] unescape manga names ([#2454](https://github.com/mikf/gallery-dl/issues/2454))
+- [newgrounds] update and fix pagination ([#2456](https://github.com/mikf/gallery-dl/issues/2456))
+- [newgrounds] warn about age-restricted posts ([#2456](https://github.com/mikf/gallery-dl/issues/2456))
+- [pinterest] do not force `m3u8_native` for video downloads ([#2436](https://github.com/mikf/gallery-dl/issues/2436))
+- [twibooru] fix posts without `name` ([#2434](https://github.com/mikf/gallery-dl/issues/2434))
+- [unsplash] replace dash with space in search API queries ([#2429](https://github.com/mikf/gallery-dl/issues/2429))
+- [postprocessor:mtime] fix timestamps from datetime objects ([#2307](https://github.com/mikf/gallery-dl/issues/2307))
+- fix yet another bug in `_check_cookies()` ([#2372](https://github.com/mikf/gallery-dl/issues/2372))
+- fix loading/storing cookies without domain
+
## 1.21.0 - 2022-03-14
### Additions
- [fantia] add `num` enumeration index ([#2377](https://github.com/mikf/gallery-dl/issues/2377))
diff --git a/PKG-INFO b/PKG-INFO
index 1fddcdc..c3c66f3 100644
--- a/PKG-INFO
+++ b/PKG-INFO
@@ -1,6 +1,6 @@
Metadata-Version: 2.1
Name: gallery_dl
-Version: 1.21.0
+Version: 1.21.1
Summary: Command-line program to download image galleries and collections from several image hosting sites
Home-page: https://github.com/mikf/gallery-dl
Author: Mike Fährmann
@@ -58,6 +58,7 @@ Optional
- FFmpeg_: Pixiv Ugoira to WebM conversion
- yt-dlp_ or youtube-dl_: Video downloads
+- PySocks_: SOCKS proxy support
Installation
@@ -98,8 +99,8 @@ Standalone Executable
Prebuilt executable files with a Python interpreter and
required Python packages included are available for
-- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.21.0/gallery-dl.exe>`__
-- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.21.0/gallery-dl.bin>`__
+- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.21.1/gallery-dl.exe>`__
+- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.21.1/gallery-dl.bin>`__
| Executables build from the latest commit can be found at
| https://github.com/mikf/gallery-dl/actions/workflows/executables.yml
@@ -364,6 +365,7 @@ To authenticate with a ``mastodon`` instance, run *gallery-dl* with
.. _FFmpeg: https://www.ffmpeg.org/
.. _yt-dlp: https://github.com/yt-dlp/yt-dlp
.. _youtube-dl: https://ytdl-org.github.io/youtube-dl/
+.. _PySocks: https://pypi.org/project/PySocks/
.. _pyOpenSSL: https://pyopenssl.org/
.. _Snapd: https://docs.snapcraft.io/installing-snapd
.. _OAuth: https://en.wikipedia.org/wiki/OAuth
diff --git a/README.rst b/README.rst
index 8324066..a9dfe11 100644
--- a/README.rst
+++ b/README.rst
@@ -24,6 +24,7 @@ Optional
- FFmpeg_: Pixiv Ugoira to WebM conversion
- yt-dlp_ or youtube-dl_: Video downloads
+- PySocks_: SOCKS proxy support
Installation
@@ -64,8 +65,8 @@ Standalone Executable
Prebuilt executable files with a Python interpreter and
required Python packages included are available for
-- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.21.0/gallery-dl.exe>`__
-- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.21.0/gallery-dl.bin>`__
+- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.21.1/gallery-dl.exe>`__
+- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.21.1/gallery-dl.bin>`__
| Executables build from the latest commit can be found at
| https://github.com/mikf/gallery-dl/actions/workflows/executables.yml
@@ -330,6 +331,7 @@ To authenticate with a ``mastodon`` instance, run *gallery-dl* with
.. _FFmpeg: https://www.ffmpeg.org/
.. _yt-dlp: https://github.com/yt-dlp/yt-dlp
.. _youtube-dl: https://ytdl-org.github.io/youtube-dl/
+.. _PySocks: https://pypi.org/project/PySocks/
.. _pyOpenSSL: https://pyopenssl.org/
.. _Snapd: https://docs.snapcraft.io/installing-snapd
.. _OAuth: https://en.wikipedia.org/wiki/OAuth
diff --git a/data/completion/_gallery-dl b/data/completion/_gallery-dl
index ddc75fa..f630c8e 100644
--- a/data/completion/_gallery-dl
+++ b/data/completion/_gallery-dl
@@ -58,6 +58,7 @@ _arguments -C -S \
--zip'[Store downloaded files in a ZIP archive]' \
--ugoira-conv'[Convert Pixiv Ugoira to WebM (requires FFmpeg)]' \
--ugoira-conv-lossless'[Convert Pixiv Ugoira to WebM in VP9 lossless mode]' \
+--ugoira-conv-copy'[Convert Pixiv Ugoira to MKV without re-encoding any frames]' \
--write-metadata'[Write metadata to separate JSON files]' \
--write-info-json'[Write gallery metadata to a info.json file]' \
--write-infojson'[==SUPPRESS==]' \
diff --git a/data/completion/gallery-dl b/data/completion/gallery-dl
index 2aa37e6..d8a6124 100644
--- a/data/completion/gallery-dl
+++ b/data/completion/gallery-dl
@@ -10,7 +10,7 @@ _gallery_dl()
elif [[ "${prev}" =~ ^()$ ]]; then
COMPREPLY=( $(compgen -d -- "${cur}") )
else
- COMPREPLY=( $(compgen -W "--help --version --input-file --destination --directory --filename --cookies --proxy --source-address --clear-cache --quiet --verbose --get-urls --resolve-urls --dump-json --simulate --extractor-info --list-keywords --list-modules --list-extractors --write-log --write-unsupported --write-pages --limit-rate --retries --http-timeout --sleep --sleep-request --sleep-extractor --filesize-min --filesize-max --no-part --no-skip --no-mtime --no-download --no-check-certificate --config --config-yaml --option --ignore-config --username --password --netrc --download-archive --abort --terminate --range --chapter-range --filter --chapter-filter --zip --ugoira-conv --ugoira-conv-lossless --write-metadata --write-info-json --write-infojson --write-tags --mtime-from-date --exec --exec-after --postprocessor" -- "${cur}") )
+ COMPREPLY=( $(compgen -W "--help --version --input-file --destination --directory --filename --cookies --proxy --source-address --clear-cache --quiet --verbose --get-urls --resolve-urls --dump-json --simulate --extractor-info --list-keywords --list-modules --list-extractors --write-log --write-unsupported --write-pages --limit-rate --retries --http-timeout --sleep --sleep-request --sleep-extractor --filesize-min --filesize-max --no-part --no-skip --no-mtime --no-download --no-check-certificate --config --config-yaml --option --ignore-config --username --password --netrc --download-archive --abort --terminate --range --chapter-range --filter --chapter-filter --zip --ugoira-conv --ugoira-conv-lossless --ugoira-conv-copy --write-metadata --write-info-json --write-infojson --write-tags --mtime-from-date --exec --exec-after --postprocessor" -- "${cur}") )
fi
}
diff --git a/data/completion/gallery-dl.fish b/data/completion/gallery-dl.fish
index 8f915fd..ff0ee84 100644
--- a/data/completion/gallery-dl.fish
+++ b/data/completion/gallery-dl.fish
@@ -52,6 +52,7 @@ complete -c gallery-dl -x -l 'chapter-filter' -d 'Like "--filter", but applies t
complete -c gallery-dl -l 'zip' -d 'Store downloaded files in a ZIP archive'
complete -c gallery-dl -l 'ugoira-conv' -d 'Convert Pixiv Ugoira to WebM (requires FFmpeg)'
complete -c gallery-dl -l 'ugoira-conv-lossless' -d 'Convert Pixiv Ugoira to WebM in VP9 lossless mode'
+complete -c gallery-dl -l 'ugoira-conv-copy' -d 'Convert Pixiv Ugoira to MKV without re-encoding any frames'
complete -c gallery-dl -l 'write-metadata' -d 'Write metadata to separate JSON files'
complete -c gallery-dl -l 'write-info-json' -d 'Write gallery metadata to a info.json file'
complete -c gallery-dl -l 'write-infojson' -d '==SUPPRESS=='
diff --git a/data/man/gallery-dl.1 b/data/man/gallery-dl.1
index 3e373fd..6e3a965 100644
--- a/data/man/gallery-dl.1
+++ b/data/man/gallery-dl.1
@@ -1,4 +1,4 @@
-.TH "GALLERY-DL" "1" "2022-03-14" "1.21.0" "gallery-dl Manual"
+.TH "GALLERY-DL" "1" "2022-04-08" "1.21.1" "gallery-dl Manual"
.\" disable hyphenation
.nh
@@ -173,6 +173,9 @@ Convert Pixiv Ugoira to WebM (requires FFmpeg)
.B "\-\-ugoira\-conv\-lossless"
Convert Pixiv Ugoira to WebM in VP9 lossless mode
.TP
+.B "\-\-ugoira\-conv\-copy"
+Convert Pixiv Ugoira to MKV without re-encoding any frames
+.TP
.B "\-\-write\-metadata"
Write metadata to separate JSON files
.TP
diff --git a/data/man/gallery-dl.conf.5 b/data/man/gallery-dl.conf.5
index 9651d18..950300e 100644
--- a/data/man/gallery-dl.conf.5
+++ b/data/man/gallery-dl.conf.5
@@ -1,4 +1,4 @@
-.TH "GALLERY-DL.CONF" "5" "2022-03-14" "1.21.0" "gallery-dl Manual"
+.TH "GALLERY-DL.CONF" "5" "2022-04-08" "1.21.1" "gallery-dl Manual"
.\" disable hyphenation
.nh
.\" disable justification (adjust text to left margin only)
@@ -710,7 +710,9 @@ database, as either lookup operations are significantly faster or
memory requirements are significantly lower when the
amount of stored IDs gets reasonably large.
-Note: archive paths support regular \f[I]format string\f[] replacements,
+Note: Archive files that do not already exist get generated automatically.
+
+Note: Archive paths support regular \f[I]format string\f[] replacements,
but be aware that using external inputs for building local paths
may pose a security risk.
@@ -1497,6 +1499,30 @@ If the format is given as \f[I]string\f[], it will be extended with
restrict it to only one possible format.
+.SS extractor.gofile.api-token
+.IP "Type:" 6
+\f[I]string\f[]
+
+.IP "Default:" 9
+\f[I]null\f[]
+
+.IP "Description:" 4
+API token value found at the bottom of your \f[I]profile page\f[].
+
+If not set, a temporary guest token will be used.
+
+
+.SS extractor.gofile.recursive
+.IP "Type:" 6
+\f[I]bool\f[]
+
+.IP "Default:" 9
+\f[I]false\f[]
+
+.IP "Description:" 4
+Recursively download files from subfolders.
+
+
.SS extractor.hentaifoundry.include
.IP "Type:" 6
\f[I]string\f[] or \f[I]list\f[] of \f[I]strings\f[]
@@ -1533,18 +1559,6 @@ Available formats are \f[I]"webp"\f[] and \f[I]"avif"\f[].
but is most likely going to fail with \f[I]403 Forbidden\f[] errors.
-.SS extractor.hitomi.metadata
-.IP "Type:" 6
-\f[I]bool\f[]
-
-.IP "Default:" 9
-\f[I]false\f[]
-
-.IP "Description:" 4
-Try to extract
-\f[I]artist\f[], \f[I]group\f[], \f[I]parody\f[], and \f[I]characters\f[] metadata.
-
-
.SS extractor.imgur.mp4
.IP "Type:" 6
\f[I]bool\f[] or \f[I]string\f[]
@@ -1599,6 +1613,17 @@ Possible values are
You can use \f[I]"all"\f[] instead of listing all values separately.
+.SS extractor.instagram.previews
+.IP "Type:" 6
+\f[I]bool\f[]
+
+.IP "Default:" 9
+\f[I]false\f[]
+
+.IP "Description:" 4
+Download video previews.
+
+
.SS extractor.instagram.videos
.IP "Type:" 6
\f[I]bool\f[]
@@ -1621,6 +1646,22 @@ Download video files.
Extract \f[I]comments\f[] metadata.
+.SS extractor.kemonoparty.duplicates
+.IP "Type:" 6
+\f[I]bool\f[]
+
+.IP "Default:" 9
+\f[I]false\f[]
+
+.IP "Description:" 4
+Controls how to handle duplicate files in a post.
+
+.br
+* \f[I]true\f[]: Download duplicates
+.br
+* \f[I]false\f[]: Ignore duplicates
+
+
.SS extractor.kemonoparty.dms
.IP "Type:" 6
\f[I]bool\f[]
@@ -2436,6 +2477,17 @@ Known available sizes are
\f[I]4096x4096\f[], \f[I]orig\f[], \f[I]large\f[], \f[I]medium\f[], and \f[I]small\f[].
+.SS extractor.twitter.syndication
+.IP "Type:" 6
+\f[I]bool\f[]
+
+.IP "Default:" 9
+\f[I]false\f[]
+
+.IP "Description:" 4
+Retrieve age-restricted content using Twitter's syndication API.
+
+
.SS extractor.twitter.logout
.IP "Type:" 6
\f[I]bool\f[]
@@ -3122,17 +3174,6 @@ Location of a youtube-dl configuration file to load options from.
.SH OUTPUT OPTIONS
-.SS output.fallback
-.IP "Type:" 6
-\f[I]bool\f[]
-
-.IP "Default:" 9
-\f[I]true\f[]
-
-.IP "Description:" 4
-Include fallback URLs in the output of \f[I]-g/--get-urls\f[].
-
-
.SS output.mode
.IP "Type:" 6
\f[I]string\f[]
@@ -3181,6 +3222,30 @@ with a display width greater than 1.
Show skipped file downloads.
+.SS output.fallback
+.IP "Type:" 6
+\f[I]bool\f[]
+
+.IP "Default:" 9
+\f[I]true\f[]
+
+.IP "Description:" 4
+Include fallback URLs in the output of \f[I]-g/--get-urls\f[].
+
+
+.SS output.private
+.IP "Type:" 6
+\f[I]bool\f[]
+
+.IP "Default:" 9
+\f[I]false\f[]
+
+.IP "Description:" 4
+Include private fields,
+i.e. fields whose name starts with an underscore,
+in the output of \f[I]-K/--list-keywords\f[] and \f[I]-j/--dump-json\f[].
+
+
.SS output.progress
.IP "Type:" 6
\f[I]bool\f[] or \f[I]string\f[]
@@ -3511,6 +3576,19 @@ Custom format string to build the content of metadata files with.
Note: Only applies for \f[I]"mode": "custom"\f[].
+.SS metadata.archive
+.IP "Type:" 6
+\f[I]Path\f[]
+
+.IP "Description:" 4
+File to store IDs of generated metadata files in,
+similar to \f[I]extractor.*.archive\f[].
+
+\f[I]archive-format\f[] and \f[I]archive-prefix\f[] options,
+akin to \f[I]extractor.*.archive-format\f[] and \f[I]extractor.*.archive-prefix\f[],
+are supported as well.
+
+
.SS metadata.mtime
.IP "Type:" 6
\f[I]bool\f[]
@@ -3519,7 +3597,7 @@ Note: Only applies for \f[I]"mode": "custom"\f[].
\f[I]false\f[]
.IP "Description:" 4
-Set modification times for generated metadata files
+Set modification times of generated metadata files
according to the accompanying downloaded file.
Enabling this option will only have an effect
@@ -3590,12 +3668,20 @@ Additional FFmpeg command-line arguments.
\f[I]string\f[]
.IP "Default:" 9
-\f[I]image2\f[]
+\f[I]auto\f[]
.IP "Description:" 4
-FFmpeg demuxer to read input files with. Possible values are
-"\f[I]image2\f[]" and
-"\f[I]concat\f[]".
+FFmpeg demuxer to read and process input files with. Possible values are
+
+.br
+* "\f[I]concat\f[]" (inaccurate frame timecodes)
+.br
+* "\f[I]image2\f[]" (accurate timecodes, not usable on Windows)
+.br
+* "mkvmerge" (accurate timecodes, only WebM or MKV, requires \f[I]mkvmerge\f[])
+
+"auto" will select mkvmerge if possible and fall back to image2 or
+concat depending on the local operating system.
.SS ugoira.ffmpeg-location
@@ -3609,6 +3695,18 @@ FFmpeg demuxer to read input files with. Possible values are
Location of the \f[I]ffmpeg\f[] (or \f[I]avconv\f[]) executable to use.
+.SS ugoira.mkvmerge-location
+.IP "Type:" 6
+\f[I]Path\f[]
+
+.IP "Default:" 9
+\f[I]"mkvmerge"\f[]
+
+.IP "Description:" 4
+Location of the \f[I]mkvmerge\f[] executable for use with the
+\f[I]mkvmerge demuxer\f[].
+
+
.SS ugoira.ffmpeg-output
.IP "Type:" 6
\f[I]bool\f[]
@@ -3681,6 +3779,17 @@ to the list of FFmpeg command-line arguments
to reduce an odd width/height by 1 pixel and make them even.
+.SS ugoira.mtime
+.IP "Type:" 6
+\f[I]bool\f[]
+
+.IP "Default:" 9
+\f[I]false\f[]
+
+.IP "Description:" 4
+Set modification times of generated ugoira aniomations.
+
+
.SS ugoira.repeat-last-frame
.IP "Type:" 6
\f[I]bool\f[]
diff --git a/gallery_dl.egg-info/PKG-INFO b/gallery_dl.egg-info/PKG-INFO
index 009ede8..1c00d88 100644
--- a/gallery_dl.egg-info/PKG-INFO
+++ b/gallery_dl.egg-info/PKG-INFO
@@ -1,6 +1,6 @@
Metadata-Version: 2.1
Name: gallery-dl
-Version: 1.21.0
+Version: 1.21.1
Summary: Command-line program to download image galleries and collections from several image hosting sites
Home-page: https://github.com/mikf/gallery-dl
Author: Mike Fährmann
@@ -58,6 +58,7 @@ Optional
- FFmpeg_: Pixiv Ugoira to WebM conversion
- yt-dlp_ or youtube-dl_: Video downloads
+- PySocks_: SOCKS proxy support
Installation
@@ -98,8 +99,8 @@ Standalone Executable
Prebuilt executable files with a Python interpreter and
required Python packages included are available for
-- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.21.0/gallery-dl.exe>`__
-- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.21.0/gallery-dl.bin>`__
+- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.21.1/gallery-dl.exe>`__
+- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.21.1/gallery-dl.bin>`__
| Executables build from the latest commit can be found at
| https://github.com/mikf/gallery-dl/actions/workflows/executables.yml
@@ -364,6 +365,7 @@ To authenticate with a ``mastodon`` instance, run *gallery-dl* with
.. _FFmpeg: https://www.ffmpeg.org/
.. _yt-dlp: https://github.com/yt-dlp/yt-dlp
.. _youtube-dl: https://ytdl-org.github.io/youtube-dl/
+.. _PySocks: https://pypi.org/project/PySocks/
.. _pyOpenSSL: https://pyopenssl.org/
.. _Snapd: https://docs.snapcraft.io/installing-snapd
.. _OAuth: https://en.wikipedia.org/wiki/OAuth
diff --git a/gallery_dl.egg-info/SOURCES.txt b/gallery_dl.egg-info/SOURCES.txt
index 4139a4d..4e226fb 100644
--- a/gallery_dl.egg-info/SOURCES.txt
+++ b/gallery_dl.egg-info/SOURCES.txt
@@ -78,6 +78,7 @@ gallery_dl/extractor/gelbooru_v01.py
gallery_dl/extractor/gelbooru_v02.py
gallery_dl/extractor/generic.py
gallery_dl/extractor/gfycat.py
+gallery_dl/extractor/gofile.py
gallery_dl/extractor/hbrowse.py
gallery_dl/extractor/hentai2read.py
gallery_dl/extractor/hentaicosplays.py
@@ -168,6 +169,7 @@ gallery_dl/extractor/smugmug.py
gallery_dl/extractor/speakerdeck.py
gallery_dl/extractor/subscribestar.py
gallery_dl/extractor/tapas.py
+gallery_dl/extractor/telegraph.py
gallery_dl/extractor/test.py
gallery_dl/extractor/toyhouse.py
gallery_dl/extractor/tsumino.py
diff --git a/gallery_dl/downloader/http.py b/gallery_dl/downloader/http.py
index b878f5f..5622462 100644
--- a/gallery_dl/downloader/http.py
+++ b/gallery_dl/downloader/http.py
@@ -120,9 +120,14 @@ class HttpDownloader(DownloaderBase):
# connect to (remote) source
try:
response = self.session.request(
- "GET", url, stream=True, headers=headers,
- timeout=self.timeout, verify=self.verify,
- proxies=self.proxies)
+ kwdict.get("_http_method", "GET"), url,
+ stream=True,
+ headers=headers,
+ data=kwdict.get("_http_data"),
+ timeout=self.timeout,
+ proxies=self.proxies,
+ verify=self.verify,
+ )
except (ConnectionError, Timeout) as exc:
msg = str(exc)
continue
diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py
index 1bec48e..6d6c7ee 100644
--- a/gallery_dl/extractor/__init__.py
+++ b/gallery_dl/extractor/__init__.py
@@ -43,6 +43,7 @@ modules = [
"gelbooru_v01",
"gelbooru_v02",
"gfycat",
+ "gofile",
"hbrowse",
"hentai2read",
"hentaicosplays",
@@ -125,6 +126,7 @@ modules = [
"speakerdeck",
"subscribestar",
"tapas",
+ "telegraph",
"toyhouse",
"tsumino",
"tumblr",
diff --git a/gallery_dl/extractor/aryion.py b/gallery_dl/extractor/aryion.py
index 06ec571..fa590b9 100644
--- a/gallery_dl/extractor/aryion.py
+++ b/gallery_dl/extractor/aryion.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2020-2021 Mike Fährmann
+# Copyright 2020-2022 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -11,6 +11,8 @@
from .common import Extractor, Message
from .. import text, util, exception
from ..cache import cache
+from email.utils import parsedate_tz
+from datetime import datetime
BASE_PATTERN = r"(?:https?://)?(?:www\.)?aryion\.com/g4"
@@ -144,7 +146,8 @@ class AryionExtractor(Extractor):
title, _, artist = text.unescape(extr(
"<title>g4 :: ", "<")).rpartition(" by ")
- data = {
+
+ return {
"id" : text.parse_int(post_id),
"url" : url,
"user" : self.user or artist,
@@ -152,7 +155,7 @@ class AryionExtractor(Extractor):
"artist": artist,
"path" : text.split_html(extr(
"cookiecrumb'>", '</span'))[4:-1:2],
- "date" : extr("class='pretty-date' title='", "'"),
+ "date" : datetime(*parsedate_tz(lmod)[:6]),
"size" : text.parse_int(clen),
"views" : text.parse_int(extr("Views</b>:", "<").replace(",", "")),
"width" : text.parse_int(extr("Resolution</b>:", "x")),
@@ -167,12 +170,6 @@ class AryionExtractor(Extractor):
"_mtime" : lmod,
}
- d1, _, d2 = data["date"].partition(",")
- data["date"] = text.parse_datetime(
- d1[:-2] + d2, "%b %d %Y %I:%M %p", -5)
-
- return data
-
class AryionGalleryExtractor(AryionExtractor):
"""Extractor for a user's gallery on eka's portal"""
@@ -249,7 +246,7 @@ class AryionPostExtractor(AryionExtractor):
"title" : "I'm on subscribestar now too!",
"description": r"re:Doesn't hurt to have a backup, right\?",
"tags" : ["Non-Vore", "subscribestar"],
- "date" : "dt:2019-02-16 19:30:00",
+ "date" : "dt:2019-02-16 19:30:34",
"path" : [],
"views" : int,
"favorites": int,
diff --git a/gallery_dl/extractor/common.py b/gallery_dl/extractor/common.py
index e3559f9..ff49d89 100644
--- a/gallery_dl/extractor/common.py
+++ b/gallery_dl/extractor/common.py
@@ -183,7 +183,7 @@ class Extractor():
elif until:
if isinstance(until, datetime.datetime):
# convert to UTC timestamp
- until = (until - util.EPOCH) / util.SECOND
+ until = util.datetime_to_timestamp(until)
else:
until = float(until)
seconds = until - now
@@ -373,7 +373,6 @@ class Extractor():
self.log.warning(
"Cookie '%s' will expire in less than %s hour%s",
cookie.name, hours + 1, "s" if hours else "")
- continue
names.discard(cookie.name)
if not names:
diff --git a/gallery_dl/extractor/furaffinity.py b/gallery_dl/extractor/furaffinity.py
index 6a8744a..b63cfc1 100644
--- a/gallery_dl/extractor/furaffinity.py
+++ b/gallery_dl/extractor/furaffinity.py
@@ -165,22 +165,24 @@ class FuraffinityExtractor(Extractor):
def _pagination_search(self, query):
url = self.root + "/search/"
data = {
- "page" : 0,
- "next_page" : "Next",
+ "page" : 1,
"order-by" : "relevancy",
"order-direction": "desc",
"range" : "all",
- "rating-general" : "on",
- "rating-mature" : "on",
- "rating-adult" : "on",
- "type-art" : "on",
- "type-music" : "on",
- "type-flash" : "on",
- "type-story" : "on",
- "type-photo" : "on",
- "type-poetry" : "on",
+ "range_from" : "",
+ "range_to" : "",
+ "rating-general" : "1",
+ "rating-mature" : "1",
+ "rating-adult" : "1",
+ "type-art" : "1",
+ "type-music" : "1",
+ "type-flash" : "1",
+ "type-story" : "1",
+ "type-photo" : "1",
+ "type-poetry" : "1",
"mode" : "extended",
}
+
data.update(query)
if "page" in query:
data["page"] = text.parse_int(query["page"])
@@ -194,7 +196,11 @@ class FuraffinityExtractor(Extractor):
if not post_id:
return
- data["page"] += 1
+
+ if "next_page" in data:
+ data["page"] += 1
+ else:
+ data["next_page"] = "Next"
class FuraffinityGalleryExtractor(FuraffinityExtractor):
@@ -255,9 +261,10 @@ class FuraffinitySearchExtractor(FuraffinityExtractor):
"range": "45-50",
"count": 6,
}),
- ("https://www.furaffinity.net/search/cute&rating-general=0", {
- "range": "1",
- "count": 1,
+ # first page of search results (#2402)
+ ("https://www.furaffinity.net/search/?q=leaf&range=1day", {
+ "range": "1-3",
+ "count": 3,
}),
)
diff --git a/gallery_dl/extractor/gofile.py b/gallery_dl/extractor/gofile.py
new file mode 100644
index 0000000..37d2986
--- /dev/null
+++ b/gallery_dl/extractor/gofile.py
@@ -0,0 +1,124 @@
+# -*- coding: utf-8 -*-
+
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+from .common import Extractor, Message
+from .. import exception
+from ..cache import memcache
+
+
+class GofileFolderExtractor(Extractor):
+ category = "gofile"
+ subcategory = "folder"
+ root = "https://gofile.io"
+ directory_fmt = ("{category}", "{name} ({code})")
+ archive_fmt = "{id}"
+ pattern = r"(?:https?://)?(?:www\.)?gofile\.io/d/([^/?#]+)"
+ test = (
+ ("https://gofile.io/d/5qHmQj", {
+ "pattern": r"https://file\d+\.gofile\.io/download"
+ r"/\w{8}-\w{4}-\w{4}-\w{4}-\w{12}"
+ r"/test-%E3%83%86%E3%82%B9%E3%83%88-%2522%26!\.png",
+ "keyword": {
+ "createTime": int,
+ "directLink": "re:https://store3.gofile.io/download/direct/.+",
+ "downloadCount": int,
+ "extension": "png",
+ "filename": "test-テスト-%22&!",
+ "folder": {
+ "childs": [
+ "346429cc-aee4-4996-be3f-e58616fe231f",
+ "765b6b12-b354-4e14-9a45-f763fa455682",
+ "2a44600a-4a59-4389-addc-4a0d542c457b"
+ ],
+ "code": "5qHmQj",
+ "createTime": 1648536501,
+ "id": "45cd45d1-dc78-4553-923f-04091c621699",
+ "isRoot": True,
+ "name": "root",
+ "public": True,
+ "totalDownloadCount": int,
+ "totalSize": 364,
+ "type": "folder"
+ },
+ "id": r"re:\w{8}-\w{4}-\w{4}-\w{4}-\w{12}",
+ "link": r"re:https://file17.gofile.io/download/.+\.png",
+ "md5": "re:[0-9a-f]{32}",
+ "mimetype": "image/png",
+ "name": "test-テスト-%22&!.png",
+ "num": int,
+ "parentFolder": "45cd45d1-dc78-4553-923f-04091c621699",
+ "serverChoosen": "file17",
+ "size": 182,
+ "thumbnail": r"re:https://store3.gofile.io/download/.+\.png",
+ "type": "file"
+ },
+ }),
+ ("https://gofile.io/d/346429cc-aee4-4996-be3f-e58616fe231f", {
+ "content": "0c8768055e4e20e7c7259608b67799171b691140",
+ }),
+ )
+
+ def __init__(self, match):
+ Extractor.__init__(self, match)
+ self.content_id = match.group(1)
+
+ def items(self):
+ recursive = self.config("recursive")
+
+ token = self.config("api-token")
+ if token is None:
+ self.log.debug("creating temporary account")
+ token = self._create_account()
+ self.session.cookies.set("accountToken", token, domain=".gofile.io")
+
+ folder = self._get_content(self.content_id, token)
+ yield Message.Directory, folder
+
+ num = 0
+ contents = folder.pop("contents")
+ for content_id in folder["childs"]:
+ content = contents[content_id]
+ content["folder"] = folder
+
+ if content["type"] == "file":
+ num += 1
+ content["num"] = num
+ content["filename"], _, content["extension"] = \
+ content["name"].rpartition(".")
+ yield Message.Url, content["link"], content
+
+ elif content["type"] == "folder":
+ if recursive:
+ url = "https://gofile.io/d/" + content["id"]
+ content["_extractor"] = GofileFolderExtractor
+ yield Message.Queue, url, content
+
+ else:
+ self.log.debug("'%s' is of unknown type (%s)",
+ content.get("name"), content["type"])
+
+ @memcache()
+ def _create_account(self):
+ return self._api_request("createAccount")["token"]
+
+ def _get_content(self, content_id, token):
+ return self._api_request("getContent", {
+ "contentId" : content_id,
+ "token" : token,
+ "websiteToken": "websiteToken",
+ })
+
+ def _api_request(self, endpoint, params=None):
+ response = self.request(
+ "https://api.gofile.io/" + endpoint, params=params).json()
+
+ if response["status"] != "ok":
+ if response["status"] == "error-notFound":
+ raise exception.NotFoundError("content")
+ raise exception.StopExtraction(
+ "%s failed (Status: %s)", endpoint, response["status"])
+
+ return response["data"]
diff --git a/gallery_dl/extractor/hitomi.py b/gallery_dl/extractor/hitomi.py
index 34eaaab..ca7e692 100644
--- a/gallery_dl/extractor/hitomi.py
+++ b/gallery_dl/extractor/hitomi.py
@@ -28,8 +28,7 @@ class HitomiGalleryExtractor(GalleryExtractor):
("https://hitomi.la/galleries/867789.html", {
"pattern": r"https://[a-c]a\.hitomi\.la/webp/\d+/\d+"
r"/[0-9a-f]{64}\.webp",
- "keyword": "4b584d09d535694d7d757c47daf5c15d116420d2",
- "options": (("metadata", True),),
+ "keyword": "86af5371f38117a07407f11af689bdd460b09710",
"count": 16,
}),
# download test
@@ -77,23 +76,18 @@ class HitomiGalleryExtractor(GalleryExtractor):
def metadata(self, page):
self.info = info = json.loads(page.partition("=")[2])
+ iget = info.get
- data = self._data_from_gallery_info(info)
- if self.config("metadata", False):
- data.update(self._data_from_gallery_page(info))
- return data
-
- def _data_from_gallery_info(self, info):
- language = info.get("language")
+ language = iget("language")
if language:
language = language.capitalize()
- date = info.get("date")
+ date = iget("date")
if date:
date += ":00"
tags = []
- for tinfo in info.get("tags") or ():
+ for tinfo in iget("tags") or ():
tag = string.capwords(tinfo["tag"])
if tinfo.get("female"):
tag += " ♀"
@@ -109,35 +103,10 @@ class HitomiGalleryExtractor(GalleryExtractor):
"lang" : util.language_to_code(language),
"date" : text.parse_datetime(date, "%Y-%m-%d %H:%M:%S%z"),
"tags" : tags,
- }
-
- def _data_from_gallery_page(self, info):
- url = "{}/galleries/{}.html".format(self.root, info["id"])
-
- # follow redirects
- while True:
- response = self.request(url, fatal=False)
- if b"<title>Redirect</title>" not in response.content:
- break
- url = text.extract(
- response.text, 'http-equiv="refresh" content="', '"',
- )[0].partition("=")[2]
-
- if response.status_code >= 400:
- return {}
-
- def prep(value):
- return [
- text.unescape(string.capwords(v))
- for v in text.extract_iter(value or "", '.html">', '<')
- ]
-
- extr = text.extract_from(response.text)
- return {
- "artist" : prep(extr('<h2>', '</h2>')),
- "group" : prep(extr('<td>Group</td><td>', '</td>')),
- "parody" : prep(extr('<td>Series</td><td>', '</td>')),
- "characters": prep(extr('<td>Characters</td><td>', '</td>')),
+ "artist" : [o["artist"] for o in iget("artists") or ()],
+ "group" : [o["group"] for o in iget("groups") or ()],
+ "parody" : [o["parody"] for o in iget("parodys") or ()],
+ "characters": [o["character"] for o in iget("characters") or ()]
}
def images(self, _):
diff --git a/gallery_dl/extractor/instagram.py b/gallery_dl/extractor/instagram.py
index 20a4c1a..e07b64e 100644
--- a/gallery_dl/extractor/instagram.py
+++ b/gallery_dl/extractor/instagram.py
@@ -1,7 +1,7 @@
# -*- coding: utf-8 -*-
# Copyright 2018-2020 Leonardo Taccari
-# Copyright 2018-2021 Mike Fährmann
+# Copyright 2018-2022 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -43,6 +43,7 @@ class InstagramExtractor(Extractor):
self.login()
data = self.metadata()
videos = self.config("videos", True)
+ previews = self.config("previews", False)
video_headers = {"User-Agent": "Mozilla/5.0"}
for post in self.posts():
@@ -56,14 +57,18 @@ class InstagramExtractor(Extractor):
yield Message.Directory, post
for file in files:
- url = file.get("video_url")
- if not url:
- url = file["display_url"]
- elif not videos:
- continue
- else:
- file["_http_headers"] = video_headers
file.update(post)
+
+ url = file.get("video_url")
+ if url:
+ if videos:
+ file["_http_headers"] = video_headers
+ text.nameext_from_url(url, file)
+ yield Message.Url, url, file
+ if not previews:
+ continue
+
+ url = file["display_url"]
yield Message.Url, url, text.nameext_from_url(url, file)
def metadata(self):
diff --git a/gallery_dl/extractor/kemonoparty.py b/gallery_dl/extractor/kemonoparty.py
index 9537263..7287c38 100644
--- a/gallery_dl/extractor/kemonoparty.py
+++ b/gallery_dl/extractor/kemonoparty.py
@@ -42,6 +42,7 @@ class KemonopartyExtractor(Extractor):
r'|/[0-9a-f]{2}/[0-9a-f]{2}/[0-9a-f]{64}\.[^"]+)').findall
find_hash = re.compile("/[0-9a-f]{2}/[0-9a-f]{2}/([0-9a-f]{64})").match
generators = self._build_file_generators(self.config("files"))
+ duplicates = self.config("duplicates")
comments = self.config("comments")
username = dms = None
@@ -84,7 +85,7 @@ class KemonopartyExtractor(Extractor):
match = find_hash(url)
if match:
post["hash"] = hash = match.group(1)
- if hash in hashes:
+ if hash in hashes and not duplicates:
self.log.debug("Skipping %s (duplicate)", url)
continue
hashes.add(hash)
@@ -273,6 +274,11 @@ class KemonopartyPostExtractor(KemonopartyExtractor):
("https://kemono.party/patreon/user/4158582/post/32099982", {
"count": 2,
}),
+ # allow duplicates (#2440)
+ ("https://kemono.party/patreon/user/4158582/post/32099982", {
+ "options": (("duplicates", True),),
+ "count": 3,
+ }),
# DMs (#2008)
("https://kemono.party/patreon/user/34134344/post/38129255", {
"options": (("dms", True),),
@@ -323,8 +329,9 @@ class KemonopartyDiscordExtractor(KemonopartyExtractor):
}),
(("https://kemono.party/discord"
"/server/256559665620451329/channel/462437519519383555#"), {
- "pattern": r"https://kemono\.party/data/attachments/discord"
- r"/256559665620451329/\d+/\d+/.+",
+ "pattern": r"https://kemono\.party/data/("
+ r"e3/77/e377e3525164559484ace2e64425b0cec1db08.*\.png|"
+ r"51/45/51453640a5e0a4d23fbf57fb85390f9c5ec154.*\.gif)",
"count": ">= 2",
}),
# 'inline' files
diff --git a/gallery_dl/extractor/kissgoddess.py b/gallery_dl/extractor/kissgoddess.py
index 85ec806..6e66772 100644
--- a/gallery_dl/extractor/kissgoddess.py
+++ b/gallery_dl/extractor/kissgoddess.py
@@ -20,7 +20,7 @@ class KissgoddessGalleryExtractor(GalleryExtractor):
test = ("https://kissgoddess.com/album/18285.html", {
"pattern": r"https://pic\.kissgoddess\.com"
r"/gallery/16473/18285/s/\d+\.jpg",
- "count": 8,
+ "count": 19,
"keyword": {
"gallery_id": 18285,
"title": "[Young Champion Extra] 2016.02 No.03 菜乃花 安枝瞳 葉月あや",
@@ -45,6 +45,8 @@ class KissgoddessGalleryExtractor(GalleryExtractor):
while page:
for url in text.extract_iter(page, "<img src='", "'"):
yield url, None
+ for url in text.extract_iter(page, "<img data-original='", "'"):
+ yield url, None
pnum += 1
url = "{}/album/{}_{}.html".format(
diff --git a/gallery_dl/extractor/mangasee.py b/gallery_dl/extractor/mangasee.py
index 1b3dd18..0b0da65 100644
--- a/gallery_dl/extractor/mangasee.py
+++ b/gallery_dl/extractor/mangasee.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2021 Mike Fährmann
+# Copyright 2021-2022 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -64,7 +64,7 @@ class MangaseeChapterExtractor(MangaseeBase, ChapterExtractor):
self.slug = extr('vm.IndexName = "', '"')
data = self._transform_chapter(data)
- data["manga"] = extr('vm.SeriesName = "', '"')
+ data["manga"] = text.unescape(extr('vm.SeriesName = "', '"'))
return data
def images(self, page):
diff --git a/gallery_dl/extractor/newgrounds.py b/gallery_dl/extractor/newgrounds.py
index 6d0e94b..e9fde97 100644
--- a/gallery_dl/extractor/newgrounds.py
+++ b/gallery_dl/extractor/newgrounds.py
@@ -113,10 +113,16 @@ class NewgroundsExtractor(Extractor):
if self.flash:
url += "/format/flash"
- response = self.request(url, fatal=False)
- if response.status_code >= 400:
- return {}
- page = response.text
+ with self.request(url, fatal=False) as response:
+ if response.status_code >= 400:
+ return {}
+ page = response.text
+
+ pos = page.find('id="adults_only"')
+ if pos >= 0:
+ msg = text.extract(page, 'class="highlight">', '<', pos)[0]
+ self.log.warning('"%s"', msg)
+
extr = text.extract_from(page)
data = extract_data(extr, post_url)
@@ -230,16 +236,20 @@ class NewgroundsExtractor(Extractor):
yield fmt[1][0]["src"]
def _pagination(self, kind):
- root = self.user_root
+ url = "{}/{}".format(self.user_root, kind)
+ params = {
+ "page": 1,
+ "isAjaxRequest": "1",
+ }
headers = {
- "Accept": "application/json, text/javascript, */*; q=0.01",
+ "Referer": url,
"X-Requested-With": "XMLHttpRequest",
- "Referer": root,
}
- url = "{}/{}/page/1".format(root, kind)
while True:
- with self.request(url, headers=headers, fatal=False) as response:
+ with self.request(
+ url, params=params, headers=headers,
+ fatal=False) as response:
try:
data = response.json()
except ValueError:
@@ -250,14 +260,17 @@ class NewgroundsExtractor(Extractor):
msg = ", ".join(text.unescape(e) for e in data["errors"])
raise exception.StopExtraction(msg)
- for year in data["sequence"]:
- for item in data["years"][str(year)]["items"]:
+ for year, items in data["items"].items():
+ for item in items:
page_url = text.extract(item, 'href="', '"')[0]
- yield text.urljoin(root, page_url)
+ if page_url[0] == "/":
+ page_url = self.root + page_url
+ yield page_url
- if not data["more"]:
+ more = data.get("load_more")
+ if not more or len(more) < 8:
return
- url = text.urljoin(root, data["more"])
+ params["page"] += 1
class NewgroundsImageExtractor(NewgroundsExtractor):
@@ -293,7 +306,12 @@ class NewgroundsImageExtractor(NewgroundsExtractor):
("https://www.newgrounds.com/art/view/sailoryon/yon-dream-buster", {
"url": "84eec95e663041a80630df72719f231e157e5f5d",
"count": 2,
- })
+ }),
+ # "adult" rated (#2456)
+ ("https://www.newgrounds.com/art/view/kekiiro/red", {
+ "options": (("username", None),),
+ "count": 1,
+ }),
)
def __init__(self, match):
@@ -360,6 +378,11 @@ class NewgroundsMediaExtractor(NewgroundsExtractor):
"pattern": r"https://uploads\.ungrounded\.net/alternate/1482000"
r"/1482860_alternate_102516\.720p\.mp4\?\d+",
}),
+ # "adult" rated (#2456)
+ ("https://www.newgrounds.com/portal/view/717744", {
+ "options": (("username", None),),
+ "count": 1,
+ }),
)
def __init__(self, match):
@@ -454,25 +477,28 @@ class NewgroundsFavoriteExtractor(NewgroundsExtractor):
)
def _pagination(self, kind):
- num = 1
+ url = "{}/favorites/{}".format(self.user_root, kind)
+ params = {
+ "page": 1,
+ "isAjaxRequest": "1",
+ }
headers = {
- "Accept": "application/json, text/javascript, */*; q=0.01",
+ "Referer": url,
"X-Requested-With": "XMLHttpRequest",
- "Referer": self.user_root,
}
while True:
- url = "{}/favorites/{}/{}".format(self.user_root, kind, num)
- response = self.request(url, headers=headers)
+ response = self.request(url, params=params, headers=headers)
if response.history:
return
- favs = self._extract_favorites(response.text)
+ data = response.json()
+ favs = self._extract_favorites(data.get("component") or "")
yield from favs
if len(favs) < 24:
return
- num += 1
+ params["page"] += 1
def _extract_favorites(self, page):
return [
diff --git a/gallery_dl/extractor/pinterest.py b/gallery_dl/extractor/pinterest.py
index 25344e8..2079b73 100644
--- a/gallery_dl/extractor/pinterest.py
+++ b/gallery_dl/extractor/pinterest.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2016-2021 Mike Fährmann
+# Copyright 2016-2022 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -20,8 +20,8 @@ BASE_PATTERN = r"(?:https?://)?(?:\w+\.)?pinterest\.[\w.]+"
class PinterestExtractor(Extractor):
"""Base class for pinterest extractors"""
category = "pinterest"
- filename_fmt = "{category}_{id}.{extension}"
- archive_fmt = "{id}"
+ filename_fmt = "{category}_{id}{media_id:?_//}.{extension}"
+ archive_fmt = "{id}{media_id}"
root = "https://www.pinterest.com"
def __init__(self, match):
@@ -35,28 +35,39 @@ class PinterestExtractor(Extractor):
yield Message.Directory, data
for pin in self.pins():
+ pin.update(data)
- try:
- media = self._media_from_pin(pin)
- except Exception:
- self.log.debug("Unable to fetch download URL for pin %s",
- pin.get("id"))
- continue
+ carousel_data = pin.get("carousel_data")
+ if carousel_data:
+ for num, slot in enumerate(carousel_data["carousel_slots"], 1):
+ slot["media_id"] = slot.pop("id")
+ pin.update(slot)
+ pin["num"] = num
+ size, image = next(iter(slot["images"].items()))
+ url = image["url"].replace("/" + size + "/", "/originals/")
+ yield Message.Url, url, text.nameext_from_url(url, pin)
- if not videos and media.get("duration") is not None:
- continue
+ else:
+ try:
+ media = self._media_from_pin(pin)
+ except Exception:
+ self.log.debug("Unable to fetch download URL for pin %s",
+ pin.get("id"))
+ continue
- pin.update(data)
- pin.update(media)
- url = media["url"]
- text.nameext_from_url(url, pin)
+ if videos or media.get("duration") is None:
+ pin.update(media)
+ pin["num"] = 0
+ pin["media_id"] = ""
+
+ url = media["url"]
+ text.nameext_from_url(url, pin)
- if pin["extension"] == "m3u8":
- url = "ytdl:" + url
- pin["extension"] = "mp4"
- pin["_ytdl_extra"] = {"protocol": "m3u8_native"}
+ if pin["extension"] == "m3u8":
+ url = "ytdl:" + url
+ pin["extension"] = "mp4"
- yield Message.Url, url, pin
+ yield Message.Url, url, pin
def metadata(self):
"""Return general metadata"""
@@ -124,7 +135,8 @@ class PinterestBoardExtractor(PinterestExtractor):
subcategory = "board"
directory_fmt = ("{category}", "{board[owner][username]}", "{board[name]}")
archive_fmt = "{board[id]}_{id}"
- pattern = BASE_PATTERN + r"/(?!pin/)([^/?#&]+)/(?!_saved)([^/?#&]+)/?$"
+ pattern = (BASE_PATTERN + r"/(?!pin/)([^/?#&]+)"
+ "/(?!_saved|_created)([^/?#&]+)/?$")
test = (
("https://www.pinterest.com/g1952849/test-/", {
"pattern": r"https://i\.pinimg\.com/originals/",
@@ -192,6 +204,28 @@ class PinterestUserExtractor(PinterestExtractor):
yield Message.Queue, self.root + url, board
+class PinterestCreatedExtractor(PinterestExtractor):
+ """Extractor for a user's created pins"""
+ subcategory = "created"
+ directory_fmt = ("{category}", "{user}")
+ pattern = BASE_PATTERN + r"/(?!pin/)([^/?#&]+)/_created/?$"
+ test = ("https://www.pinterest.com/amazon/_created", {
+ "pattern": r"https://i\.pinimg\.com/originals/[0-9a-f]{2}"
+ r"/[0-9a-f]{2}/[0-9a-f]{2}/[0-9a-f]{32}\.jpg",
+ "count": 10,
+ })
+
+ def __init__(self, match):
+ PinterestExtractor.__init__(self, match)
+ self.user = text.unquote(match.group(1))
+
+ def metadata(self):
+ return {"user": self.user}
+
+ def pins(self):
+ return self.api.user_activity_pins(self.user)
+
+
class PinterestSectionExtractor(PinterestExtractor):
"""Extractor for board sections on pinterest.com"""
subcategory = "section"
@@ -385,6 +419,16 @@ class PinterestAPI():
options = {"board_id": board_id, "add_vase": True}
return self._pagination("BoardRelatedPixieFeed", options)
+ def user_activity_pins(self, user):
+ """Yield pins created by 'user'"""
+ options = {
+ "exclude_add_pin_rep": True,
+ "field_set_key" : "grid_item",
+ "is_own_profile_pins": False,
+ "username" : user,
+ }
+ return self._pagination("UserActivityPins", options)
+
def search(self, query):
"""Yield pins from searches"""
options = {"query": query, "scope": "pins", "rs": "typed"}
diff --git a/gallery_dl/extractor/skeb.py b/gallery_dl/extractor/skeb.py
index 965391c..2af917d 100644
--- a/gallery_dl/extractor/skeb.py
+++ b/gallery_dl/extractor/skeb.py
@@ -8,6 +8,7 @@
from .common import Extractor, Message
from .. import text
+import itertools
class SkebExtractor(Extractor):
@@ -22,7 +23,6 @@ class SkebExtractor(Extractor):
Extractor.__init__(self, match)
self.user_name = match.group(1)
self.thumbnails = self.config("thumbnails", False)
- self.sent_requests = self.config("sent-requests", False)
def items(self):
for user_name, post_num in self.posts():
@@ -35,18 +35,18 @@ class SkebExtractor(Extractor):
def posts(self):
"""Return post number"""
- def _pagination(self):
- url = "{}/api/users/{}/works".format(self.root, self.user_name)
- params = {"role": "creator", "sort": "date", "offset": 0}
+ def _pagination(self, url, params):
headers = {"Referer": self.root, "Authorization": "Bearer null"}
- do_requests = self.sent_requests
+ params["offset"] = 0
while True:
posts = self.request(url, params=params, headers=headers).json()
for post in posts:
- post_num = post["path"].rpartition("/")[2]
- user_name = post["path"].split("/")[1][1:]
+ parts = post["path"].split("/")
+ user_name = parts[1][1:]
+ post_num = parts[3]
+
if post["private"]:
self.log.debug("Skipping @%s/%s (private)",
user_name, post_num)
@@ -54,13 +54,7 @@ class SkebExtractor(Extractor):
yield user_name, post_num
if len(posts) < 30:
- if do_requests:
- params["offset"] = 0
- params['role'] = "client"
- do_requests = False
- continue
- else:
- return
+ return
params["offset"] += 30
def _get_post_data(self, user_name, post_num):
@@ -134,6 +128,54 @@ class SkebPostExtractor(SkebExtractor):
"""Extractor for a single skeb post"""
subcategory = "post"
pattern = r"(?:https?://)?skeb\.jp/@([^/?#]+)/works/(\d+)"
+ test = ("https://skeb.jp/@kanade_cocotte/works/38", {
+ "count": 2,
+ "keyword": {
+ "anonymous": False,
+ "body": "re:はじめまして。私はYouTubeにてVTuberとして活動をしている湊ラ",
+ "client": {
+ "avatar_url": "https://pbs.twimg.com/profile_images"
+ "/1471184042791895042/f0DcWFGl.jpg",
+ "header_url": None,
+ "id": 1196514,
+ "name": "湊ラギ",
+ "screen_name": "minato_ragi",
+ },
+ "completed_at": "2022-02-27T14:03:45.442Z",
+ "content_category": "preview",
+ "creator": {
+ "avatar_url": "https://pbs.twimg.com/profile_images"
+ "/1225470417063645184/P8_SiB0V.jpg",
+ "header_url": "https://pbs.twimg.com/profile_banners"
+ "/71243217/1647958329/1500x500",
+ "id": 159273,
+ "name": "イチノセ奏",
+ "screen_name": "kanade_cocotte",
+ },
+ "date": "dt:2022-02-27 14:03:45",
+ "file_id": int,
+ "file_url": str,
+ "genre": "art",
+ "nsfw": False,
+ "original": {
+ "byte_size": int,
+ "duration": None,
+ "extension": "re:psd|png",
+ "frame_rate": None,
+ "height": 3727,
+ "is_movie": False,
+ "width": 2810,
+ },
+ "post_num": "38",
+ "post_url": "https://skeb.jp/@kanade_cocotte/works/38",
+ "source_body": None,
+ "source_thanks": None,
+ "tags": list,
+ "thanks": None,
+ "translated_body": False,
+ "translated_thanks": None,
+ }
+ })
def __init__(self, match):
SkebExtractor.__init__(self, match)
@@ -146,7 +188,23 @@ class SkebPostExtractor(SkebExtractor):
class SkebUserExtractor(SkebExtractor):
"""Extractor for all posts from a skeb user"""
subcategory = "user"
- pattern = r"(?:https?://)?skeb\.jp/@([^/?#]+)"
+ pattern = r"(?:https?://)?skeb\.jp/@([^/?#]+)/?$"
+ test = ("https://skeb.jp/@kanade_cocotte", {
+ "pattern": r"https://skeb\.imgix\.net/uploads/origins/[\w-]+"
+ r"\?bg=%23fff&auto=format&txtfont=bold&txtshad=70"
+ r"&txtclr=BFFFFFFF&txtalign=middle%2Ccenter&txtsize=150"
+ r"&txt=SAMPLE&w=800&s=\w+",
+ "range": "1-5",
+ })
def posts(self):
- return self._pagination()
+ url = "{}/api/users/{}/works".format(self.root, self.user_name)
+
+ params = {"role": "creator", "sort": "date"}
+ posts = self._pagination(url, params)
+
+ if self.config("sent-requests", False):
+ params = {"role": "client", "sort": "date"}
+ posts = itertools.chain(posts, self._pagination(url, params))
+
+ return posts
diff --git a/gallery_dl/extractor/telegraph.py b/gallery_dl/extractor/telegraph.py
new file mode 100644
index 0000000..8e9bf2c
--- /dev/null
+++ b/gallery_dl/extractor/telegraph.py
@@ -0,0 +1,95 @@
+# -*- coding: utf-8 -*-
+
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Extractor for https://telegra.ph/"""
+
+from .common import GalleryExtractor
+from .. import text
+
+
+class TelegraphGalleryExtractor(GalleryExtractor):
+ """Extractor for articles from telegra.ph"""
+
+ category = "telegraph"
+ root = "https://telegra.ph"
+ directory_fmt = ("{category}", "{slug}")
+ filename_fmt = "{num_formatted}_{filename}.{extension}"
+ archive_fmt = "{slug}_{num}"
+ pattern = r"(?:https?://)(?:www\.)??telegra\.ph(/[^/?#]+)"
+ test = (
+ ("https://telegra.ph/Telegraph-Test-03-28", {
+ "pattern": r"https://telegra\.ph/file/[0-9a-f]+\.png",
+ "keyword": {
+ "author": "mikf",
+ "caption": r"re:test|",
+ "count": 2,
+ "date": "dt:2022-03-28 16:01:36",
+ "description": "Just a test",
+ "post_url": "https://telegra.ph/Telegraph-Test-03-28",
+ "slug": "Telegraph-Test-03-28",
+ "title": "Telegra.ph Test",
+ },
+ }),
+ ("https://telegra.ph/森-03-28", {
+ "pattern": "https://telegra.ph/file/3ea79d23b0dd0889f215a.jpg",
+ "count": 1,
+ "keyword": {
+ "author": "&",
+ "caption": "kokiri",
+ "count": 1,
+ "date": "dt:2022-03-28 16:31:26",
+ "description": "コキリの森",
+ "extension": "jpg",
+ "filename": "3ea79d23b0dd0889f215a",
+ "num": 1,
+ "num_formatted": "1",
+ "post_url": "https://telegra.ph/森-03-28",
+ "slug": "森-03-28",
+ "title": '"森"',
+ "url": "https://telegra.ph/file/3ea79d23b0dd0889f215a.jpg",
+ },
+ }),
+ )
+
+ def metadata(self, page):
+ extr = text.extract_from(page)
+ data = {
+ "title": text.unescape(extr(
+ 'property="og:title" content="', '"')),
+ "description": text.unescape(extr(
+ 'property="og:description" content="', '"')),
+ "date": text.parse_datetime(extr(
+ 'property="article:published_time" content="', '"'),
+ "%Y-%m-%dT%H:%M:%S%z"),
+ "author": text.unescape(extr(
+ 'property="article:author" content="', '"')),
+ "post_url": text.unescape(extr(
+ 'rel="canonical" href="', '"')),
+ }
+ data["slug"] = data["post_url"][19:]
+ return data
+
+ def images(self, page):
+ figures = tuple(text.extract_iter(page, "<figure>", "</figure>"))
+ num_zeroes = len(str(len(figures)))
+ num = 0
+
+ result = []
+ for figure in figures:
+ src, pos = text.extract(figure, 'src="', '"')
+ if src.startswith("/embed/"):
+ continue
+ caption, pos = text.extract(figure, "<figcaption>", "<", pos)
+ url = self.root + src
+ num += 1
+
+ result.append((url, {
+ "url" : url,
+ "caption" : text.unescape(caption),
+ "num" : num,
+ "num_formatted": str(num).zfill(num_zeroes),
+ }))
+ return result
diff --git a/gallery_dl/extractor/twibooru.py b/gallery_dl/extractor/twibooru.py
index ec8ab35..355ca21 100644
--- a/gallery_dl/extractor/twibooru.py
+++ b/gallery_dl/extractor/twibooru.py
@@ -36,8 +36,9 @@ class TwibooruExtractor(BooruExtractor):
post["date"] = text.parse_datetime(
post["created_at"], "%Y-%m-%dT%H:%M:%S.%fZ")
- name, sep, rest = post["name"].rpartition(".")
- post["filename"] = name if sep else rest
+ if "name" in post:
+ name, sep, rest = post["name"].rpartition(".")
+ post["filename"] = name if sep else rest
class TwibooruPostExtractor(TwibooruExtractor):
diff --git a/gallery_dl/extractor/twitter.py b/gallery_dl/extractor/twitter.py
index 6d51834..4c46170 100644
--- a/gallery_dl/extractor/twitter.py
+++ b/gallery_dl/extractor/twitter.py
@@ -15,7 +15,7 @@ import json
BASE_PATTERN = (
r"(?:https?://)?(?:www\.|mobile\.)?"
- r"(?:twitter\.com|nitter\.net)"
+ r"(?:(?:fx)?twitter\.com|nitter\.net)"
)
@@ -217,23 +217,24 @@ class TwitterExtractor(Extractor):
if "legacy" in tweet:
tweet = tweet["legacy"]
+ tget = tweet.get
entities = tweet["entities"]
tdata = {
"tweet_id" : text.parse_int(tweet["id_str"]),
"retweet_id" : text.parse_int(
- tweet.get("retweeted_status_id_str")),
+ tget("retweeted_status_id_str")),
"quote_id" : text.parse_int(
- tweet.get("quoted_status_id_str")),
+ tget("quoted_status_id_str")),
"reply_id" : text.parse_int(
- tweet.get("in_reply_to_status_id_str")),
+ tget("in_reply_to_status_id_str")),
"date" : text.parse_datetime(
tweet["created_at"], "%a %b %d %H:%M:%S %z %Y"),
"user" : user,
"lang" : tweet["lang"],
- "favorite_count": tweet["favorite_count"],
- "quote_count" : tweet["quote_count"],
- "reply_count" : tweet["reply_count"],
- "retweet_count" : tweet["retweet_count"],
+ "favorite_count": tget("favorite_count"),
+ "quote_count" : tget("quote_count"),
+ "reply_count" : tget("reply_count"),
+ "retweet_count" : tget("retweet_count"),
}
hashtags = entities.get("hashtags")
@@ -248,7 +249,7 @@ class TwitterExtractor(Extractor):
"nick": u["name"],
} for u in mentions]
- content = tweet["full_text"]
+ content = tget("full_text") or tget("text") or ""
urls = entities.get("urls")
if urls:
for url in urls:
@@ -269,33 +270,36 @@ class TwitterExtractor(Extractor):
return tdata
def _transform_user(self, user):
+ uid = user.get("rest_id") or user["id_str"]
+
try:
- return self._user_cache[user.get("rest_id") or user["id_str"]]
+ return self._user_cache[uid]
except KeyError:
pass
- uid = user.get("rest_id") or user["id_str"]
if "legacy" in user:
user = user["legacy"]
+
+ uget = user.get
entities = user["entities"]
self._user_cache[uid] = udata = {
"id" : text.parse_int(uid),
"name" : user["screen_name"],
"nick" : user["name"],
- "location" : user["location"],
+ "location" : uget("location"),
"date" : text.parse_datetime(
- user["created_at"], "%a %b %d %H:%M:%S %z %Y"),
- "verified" : user.get("verified", False),
- "profile_banner" : user.get("profile_banner_url", ""),
- "profile_image" : user.get(
+ uget("created_at"), "%a %b %d %H:%M:%S %z %Y"),
+ "verified" : uget("verified", False),
+ "profile_banner" : uget("profile_banner_url", ""),
+ "profile_image" : uget(
"profile_image_url_https", "").replace("_normal.", "."),
- "favourites_count": user["favourites_count"],
- "followers_count" : user["followers_count"],
- "friends_count" : user["friends_count"],
- "listed_count" : user["listed_count"],
- "media_count" : user["media_count"],
- "statuses_count" : user["statuses_count"],
+ "favourites_count": uget("favourites_count"),
+ "followers_count" : uget("followers_count"),
+ "friends_count" : uget("friends_count"),
+ "listed_count" : uget("listed_count"),
+ "media_count" : uget("media_count"),
+ "statuses_count" : uget("statuses_count"),
}
descr = user["description"]
@@ -653,6 +657,11 @@ class TwitterTweetExtractor(TwitterExtractor):
("https://twitter.com/i/web/status/1486373748911575046", {
"count": 4,
}),
+ # age-restricted (#2354)
+ ("https://twitter.com/mightbecursed/status/1492954264909479936", {
+ "options": (("syndication", True),),
+ "count": 1,
+ }),
)
def __init__(self, match):
@@ -770,6 +779,7 @@ class TwitterAPI():
}
self._nsfw_warning = True
+ self._syndication = extractor.config("syndication")
self._json_dumps = json.JSONEncoder(separators=(",", ":")).encode
self._user = None
@@ -1153,9 +1163,10 @@ class TwitterAPI():
elif esw("conversationthread-"):
tweets.extend(entry["content"]["items"])
elif esw("tombstone-"):
- self._report_tombstone(
- entry,
- entry["content"]["itemContent"]["tombstoneInfo"])
+ item = entry["content"]["itemContent"]
+ item["tweet_results"] = \
+ {"result": {"tombstone": item["tombstoneInfo"]}}
+ tweets.append(entry)
elif esw("cursor-bottom-"):
cursor = entry["content"]
if not cursor.get("stopOnEmptyResponse", True):
@@ -1168,8 +1179,10 @@ class TwitterAPI():
tweet = ((entry.get("content") or entry["item"])
["itemContent"]["tweet_results"]["result"])
if "tombstone" in tweet:
- self._report_tombstone(entry, tweet["tombstone"])
- continue
+ tweet = self._process_tombstone(
+ entry, tweet["tombstone"])
+ if not tweet:
+ continue
if "tweet" in tweet:
tweet = tweet["tweet"]
legacy = tweet["legacy"]
@@ -1259,10 +1272,45 @@ class TwitterAPI():
return
variables["cursor"] = cursor
- def _report_tombstone(self, entry, tombstone):
+ def _process_tombstone(self, entry, tombstone):
text = (tombstone.get("richText") or tombstone["text"])["text"]
- if text.startswith("Age-restricted") and self._nsfw_warning:
- self.extractor.log.warning(text)
- self._nsfw_warning = False
- self.extractor.log.debug(
- "Skipping %s (%s)", entry["entryId"].rpartition("-")[2], text)
+ tweet_id = entry["entryId"].rpartition("-")[2]
+
+ if text.startswith("Age-restricted"):
+ if self._syndication:
+ return self._syndication_tweet(tweet_id)
+ elif self._nsfw_warning:
+ self._nsfw_warning = False
+ self.extractor.log.warning('"%s"', text)
+
+ self.extractor.log.debug("Skipping %s (\"%s\")", tweet_id, text)
+
+ def _syndication_tweet(self, tweet_id):
+ tweet = self.extractor.request(
+ "https://cdn.syndication.twimg.com/tweet?id=" + tweet_id).json()
+
+ tweet["user"]["description"] = ""
+ tweet["user"]["entities"] = {"description": {}}
+
+ if "video" in tweet:
+ video = tweet["video"]
+ del video["variants"][:-1]
+ video["variants"][0]["url"] = video["variants"][0]["src"]
+ tweet["extended_entities"] = {"media": [{
+ "video_info" : video,
+ "original_info": {"width" : 0, "height": 0},
+ }]}
+ elif "photos" in tweet:
+ for p in tweet["photos"]:
+ p["media_url_https"] = p["url"]
+ p["original_info"] = {
+ "width" : p["width"],
+ "height": p["height"],
+ }
+ tweet["extended_entities"] = {"media": tweet["photos"]}
+
+ return {
+ "rest_id": tweet["id_str"],
+ "legacy" : tweet,
+ "user" : tweet["user"],
+ }
diff --git a/gallery_dl/extractor/unsplash.py b/gallery_dl/extractor/unsplash.py
index 2405dc3..6036322 100644
--- a/gallery_dl/extractor/unsplash.py
+++ b/gallery_dl/extractor/unsplash.py
@@ -193,7 +193,7 @@ class UnsplashSearchExtractor(UnsplashExtractor):
"""Extractor for unsplash search results"""
subcategory = "search"
pattern = BASE_PATTERN + r"/s/photos/([^/?#]+)(?:\?([^/?#]+))?"
- test = ("https://unsplash.com/s/photos/nature", {
+ test = ("https://unsplash.com/s/photos/hair-style", {
"pattern": r"https://images\.unsplash\.com/((flagged/)?photo-\d+-\w+"
r"|reserve/[^/?#]+)\?ixid=\w+&ixlib=rb-1\.2\.1$",
"range": "1-30",
@@ -206,7 +206,7 @@ class UnsplashSearchExtractor(UnsplashExtractor):
def photos(self):
url = self.root + "/napi/search/photos"
- params = {"query": text.unquote(self.item)}
+ params = {"query": text.unquote(self.item.replace('-', ' '))}
if self.query:
params.update(text.parse_query(self.query))
return self._pagination(url, params, True)
diff --git a/gallery_dl/formatter.py b/gallery_dl/formatter.py
index c2b4d99..27d5e40 100644
--- a/gallery_dl/formatter.py
+++ b/gallery_dl/formatter.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2021 Mike Fährmann
+# Copyright 2021-2022 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -43,6 +43,8 @@ def parse(format_string, default=None):
cls = ExpressionFormatter
elif kind == "M":
cls = ModuleFormatter
+ elif kind == "F":
+ cls = FStringFormatter
formatter = _CACHE[key] = cls(format_string, default)
return formatter
@@ -206,6 +208,13 @@ class ModuleFormatter():
self.format_map = getattr(module, function_name)
+class FStringFormatter():
+ """Generate text by evaluaring an f-string literal"""
+
+ def __init__(self, fstring, default=None):
+ self.format_map = util.compile_expression("f'''" + fstring + "'''")
+
+
def parse_field_name(field_name):
first, rest = _string.formatter_field_name_split(field_name)
funcs = []
@@ -245,7 +254,7 @@ def parse_format_spec(format_spec, conversion):
"C": string.capwords,
"j": json.dumps,
"t": str.strip,
- "T": util.to_timestamp,
+ "T": util.datetime_to_timestamp_string,
"d": text.parse_timestamp,
"U": text.unescape,
"S": util.to_string,
diff --git a/gallery_dl/job.py b/gallery_dl/job.py
index 3eebf0b..044369a 100644
--- a/gallery_dl/job.py
+++ b/gallery_dl/job.py
@@ -389,8 +389,10 @@ class DownloadJob(Job):
def initialize(self, kwdict=None):
"""Delayed initialization of PathFormat, etc."""
- cfg = self.extractor.config
- pathfmt = self.pathfmt = path.PathFormat(self.extractor)
+ extr = self.extractor
+ cfg = extr.config
+
+ pathfmt = self.pathfmt = path.PathFormat(extr)
if kwdict:
pathfmt.set_directory(kwdict)
@@ -403,17 +405,18 @@ class DownloadJob(Job):
archive = cfg("archive")
if archive:
archive = util.expand_path(archive)
+ archive_format = (cfg("archive-prefix", extr.category) +
+ cfg("archive-format", extr.archive_fmt))
try:
if "{" in archive:
archive = formatter.parse(archive).format_map(kwdict)
- self.archive = util.DownloadArchive(archive, self.extractor)
+ self.archive = util.DownloadArchive(archive, archive_format)
except Exception as exc:
- self.extractor.log.warning(
+ extr.log.warning(
"Failed to open download archive at '%s' ('%s: %s')",
archive, exc.__class__.__name__, exc)
else:
- self.extractor.log.debug(
- "Using download archive '%s'", archive)
+ extr.log.debug("Using download archive '%s'", archive)
skip = cfg("skip", True)
if skip:
@@ -435,7 +438,7 @@ class DownloadJob(Job):
if self.archive:
self.archive.check = pathfmt.exists
- postprocessors = self.extractor.config_accumulate("postprocessors")
+ postprocessors = extr.config_accumulate("postprocessors")
if postprocessors:
self.hooks = collections.defaultdict(list)
pp_log = self.get_logger("postprocessor")
@@ -453,7 +456,7 @@ class DownloadJob(Job):
clist = pp_dict.get("blacklist")
negate = True
if clist and not util.build_extractor_filter(
- clist, negate)(self.extractor):
+ clist, negate)(extr):
continue
name = pp_dict.get("name")
@@ -471,8 +474,7 @@ class DownloadJob(Job):
pp_list.append(pp_obj)
if pp_list:
- self.extractor.log.debug(
- "Active postprocessor modules: %s", pp_list)
+ extr.log.debug("Active postprocessor modules: %s", pp_list)
if "init" in self.hooks:
for callback in self.hooks["init"]:
callback(pathfmt)
@@ -530,6 +532,10 @@ class SimulationJob(DownloadJob):
class KeywordJob(Job):
"""Print available keywords"""
+ def __init__(self, url, parent=None):
+ Job.__init__(self, url, parent)
+ self.private = config.get(("output",), "private")
+
def handle_url(self, url, kwdict):
print("\nKeywords for filenames and --filter:")
print("------------------------------------")
@@ -567,21 +573,20 @@ class KeywordJob(Job):
KeywordJob(extr or url, self).run()
raise exception.StopExtraction()
- @staticmethod
- def print_kwdict(kwdict, prefix=""):
+ def print_kwdict(self, kwdict, prefix=""):
"""Print key-value pairs in 'kwdict' with formatting"""
suffix = "]" if prefix else ""
for key, value in sorted(kwdict.items()):
- if key[0] == "_":
+ if key[0] == "_" and not self.private:
continue
key = prefix + key + suffix
if isinstance(value, dict):
- KeywordJob.print_kwdict(value, key + "[")
+ self.print_kwdict(value, key + "[")
elif isinstance(value, list):
if value and isinstance(value[0], dict):
- KeywordJob.print_kwdict(value[0], key + "[][")
+ self.print_kwdict(value[0], key + "[][")
else:
print(key, "[]", sep="")
for val in value:
diff --git a/gallery_dl/option.py b/gallery_dl/option.py
index e1ada09..782063d 100644
--- a/gallery_dl/option.py
+++ b/gallery_dl/option.py
@@ -401,6 +401,18 @@ def build_parser():
help="Convert Pixiv Ugoira to WebM in VP9 lossless mode",
)
postprocessor.add_argument(
+ "--ugoira-conv-copy",
+ dest="postprocessors", action="append_const", const={
+ "name" : "ugoira",
+ "extension" : "mkv",
+ "ffmpeg-args" : ("-c:v", "copy"),
+ "ffmpeg-twopass" : False,
+ "repeat-last-frame": False,
+ "whitelist" : ("pixiv", "danbooru"),
+ },
+ help="Convert Pixiv Ugoira to MKV without re-encoding any frames",
+ )
+ postprocessor.add_argument(
"--write-metadata",
dest="postprocessors",
action="append_const", const="metadata",
diff --git a/gallery_dl/postprocessor/metadata.py b/gallery_dl/postprocessor/metadata.py
index e776888..5e8f3e9 100644
--- a/gallery_dl/postprocessor/metadata.py
+++ b/gallery_dl/postprocessor/metadata.py
@@ -59,9 +59,35 @@ class MetadataPP(PostProcessor):
events = events.split(",")
job.register_hooks({event: self.run for event in events}, options)
+ archive = options.get("archive")
+ if archive:
+ extr = job.extractor
+ archive = util.expand_path(archive)
+ archive_format = (
+ options.get("archive-prefix", extr.category) +
+ options.get("archive-format", "_MD_" + extr.archive_fmt))
+ try:
+ if "{" in archive:
+ archive = formatter.parse(archive).format_map(
+ job.pathfmt.kwdict)
+ self.archive = util.DownloadArchive(
+ archive, archive_format, "_archive_metadata")
+ except Exception as exc:
+ self.log.warning(
+ "Failed to open download archive at '%s' ('%s: %s')",
+ archive, exc.__class__.__name__, exc)
+ else:
+ self.log.debug("Using download archive '%s'", archive)
+ else:
+ self.archive = None
+
self.mtime = options.get("mtime")
def run(self, pathfmt):
+ archive = self.archive
+ if archive and archive.check(pathfmt.kwdict):
+ return
+
directory = self._directory(pathfmt)
path = directory + self._filename(pathfmt)
@@ -73,6 +99,9 @@ class MetadataPP(PostProcessor):
with open(path, "w", encoding="utf-8") as fp:
self.write(fp, pathfmt.kwdict)
+ if archive:
+ archive.add(pathfmt.kwdict)
+
if self.mtime:
mtime = pathfmt.kwdict.get("_mtime")
if mtime:
diff --git a/gallery_dl/postprocessor/mtime.py b/gallery_dl/postprocessor/mtime.py
index 098984a..3f8d90a 100644
--- a/gallery_dl/postprocessor/mtime.py
+++ b/gallery_dl/postprocessor/mtime.py
@@ -9,7 +9,8 @@
"""Use metadata as file modification time"""
from .common import PostProcessor
-from ..text import parse_int
+from .. import text, util
+from datetime import datetime
class MtimePP(PostProcessor):
@@ -27,8 +28,11 @@ class MtimePP(PostProcessor):
def run(self, pathfmt):
mtime = pathfmt.kwdict.get(self.key)
- ts = getattr(mtime, "timestamp", None)
- pathfmt.kwdict["_mtime"] = ts() if ts else parse_int(mtime)
+ pathfmt.kwdict["_mtime"] = (
+ util.datetime_to_timestamp(mtime)
+ if isinstance(mtime, datetime) else
+ text.parse_int(mtime)
+ )
__postprocessor__ = MtimePP
diff --git a/gallery_dl/postprocessor/ugoira.py b/gallery_dl/postprocessor/ugoira.py
index e5bdebc..c5477d2 100644
--- a/gallery_dl/postprocessor/ugoira.py
+++ b/gallery_dl/postprocessor/ugoira.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2018-2021 Mike Fährmann
+# Copyright 2018-2022 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -10,12 +10,20 @@
from .common import PostProcessor
from .. import util
-import collections
import subprocess
import tempfile
import zipfile
+import shutil
import os
+try:
+ from math import gcd
+except ImportError:
+ def gcd(a, b):
+ while b:
+ a, b = b, a % b
+ return a
+
class UgoiraPP(PostProcessor):
@@ -27,19 +35,37 @@ class UgoiraPP(PostProcessor):
self.output = options.get("ffmpeg-output", True)
self.delete = not options.get("keep-files", False)
self.repeat = options.get("repeat-last-frame", True)
+ self.mtime = options.get("mtime")
ffmpeg = options.get("ffmpeg-location")
self.ffmpeg = util.expand_path(ffmpeg) if ffmpeg else "ffmpeg"
+ mkvmerge = options.get("mkvmerge-location")
+ self.mkvmerge = util.expand_path(mkvmerge) if mkvmerge else "mkvmerge"
+
+ demuxer = options.get("ffmpeg-demuxer")
+ if demuxer is None or demuxer == "auto":
+ if self.extension in ("webm", "mkv") and (
+ mkvmerge or shutil.which("mkvmerge")):
+ demuxer = "mkvmerge"
+ else:
+ demuxer = "concat" if util.WINDOWS else "image2"
+
+ if demuxer == "mkvmerge":
+ self._process = self._process_mkvmerge
+ self._finalize = self._finalize_mkvmerge
+ elif demuxer == "image2":
+ self._process = self._process_image2
+ self._finalize = None
+ else:
+ self._process = self._process_concat
+ self._finalize = None
+ self.log.debug("using %s demuxer", demuxer)
+
rate = options.get("framerate", "auto")
if rate != "auto":
self.calculate_framerate = lambda _: (None, rate)
- if options.get("ffmpeg-demuxer") == "image2":
- self._process = self._image2
- else:
- self._process = self._concat
-
if options.get("libx264-prevent-odd", True):
# get last video-codec argument
vcodec = None
@@ -88,13 +114,12 @@ class UgoiraPP(PostProcessor):
return
# process frames and collect command-line arguments
- args = self._process(tempdir)
+ pathfmt.set_extension(self.extension)
+ args = self._process(pathfmt, tempdir)
if self.args:
args += self.args
- self.log.debug("ffmpeg args: %s", args)
# invoke ffmpeg
- pathfmt.set_extension(self.extension)
try:
if self.twopass:
if "-f" not in self.args:
@@ -105,48 +130,61 @@ class UgoiraPP(PostProcessor):
else:
args.append(pathfmt.realpath)
self._exec(args)
+ if self._finalize:
+ self._finalize(pathfmt, tempdir)
except OSError as exc:
print()
self.log.error("Unable to invoke FFmpeg (%s: %s)",
exc.__class__.__name__, exc)
pathfmt.realpath = pathfmt.temppath
else:
+ if self.mtime:
+ mtime = pathfmt.kwdict.get("_mtime")
+ if mtime:
+ util.set_mtime(pathfmt.realpath, mtime)
if self.delete:
pathfmt.delete = True
else:
pathfmt.set_extension("zip")
- def _concat(self, path):
- ffconcat = path + "/ffconcat.txt"
-
- content = ["ffconcat version 1.0"]
- append = content.append
- for frame in self._frames:
- append("file '{}'\nduration {}".format(
- frame["file"], frame["delay"] / 1000))
- if self.repeat:
- append("file '{}'".format(frame["file"]))
- append("")
-
- with open(ffconcat, "w") as file:
- file.write("\n".join(content))
+ def _exec(self, args):
+ self.log.debug(args)
+ out = None if self.output else subprocess.DEVNULL
+ return subprocess.Popen(args, stdout=out, stderr=out).wait()
+ def _process_concat(self, pathfmt, tempdir):
rate_in, rate_out = self.calculate_framerate(self._frames)
args = [self.ffmpeg, "-f", "concat"]
if rate_in:
args += ("-r", str(rate_in))
- args += ("-i", ffconcat)
+ args += ("-i", self._write_ffmpeg_concat(tempdir))
if rate_out:
args += ("-r", str(rate_out))
return args
- def _image2(self, path):
- path += "/"
+ def _process_image2(self, pathfmt, tempdir):
+ tempdir += "/"
+ frames = self._frames
+
+ # add extra frame if necessary
+ if self.repeat and not self._delay_is_uniform(frames):
+ last = frames[-1]
+ delay_gcd = self._delay_gcd(frames)
+ if last["delay"] - delay_gcd > 0:
+ last["delay"] -= delay_gcd
+
+ self.log.debug("non-uniform delays; inserting extra frame")
+ last_copy = last.copy()
+ frames.append(last_copy)
+ name, _, ext = last_copy["file"].rpartition(".")
+ last_copy["file"] = "{:>06}.{}".format(int(name)+1, ext)
+ shutil.copyfile(tempdir + last["file"],
+ tempdir + last_copy["file"])
# adjust frame mtime values
ts = 0
- for frame in self._frames:
- os.utime(path + frame["file"], ns=(ts, ts))
+ for frame in frames:
+ os.utime(tempdir + frame["file"], ns=(ts, ts))
ts += frame["delay"] * 1000000
return [
@@ -155,18 +193,90 @@ class UgoiraPP(PostProcessor):
"-ts_from_file", "2",
"-pattern_type", "sequence",
"-i", "{}%06d.{}".format(
- path.replace("%", "%%"), frame["file"].rpartition(".")[2]),
+ tempdir.replace("%", "%%"),
+ frame["file"].rpartition(".")[2]
+ ),
]
- def _exec(self, args):
- out = None if self.output else subprocess.DEVNULL
- return subprocess.Popen(args, stdout=out, stderr=out).wait()
+ def _process_mkvmerge(self, pathfmt, tempdir):
+ self._realpath = pathfmt.realpath
+ pathfmt.realpath = tempdir + "/temp." + self.extension
+
+ return [
+ self.ffmpeg,
+ "-f", "image2",
+ "-pattern_type", "sequence",
+ "-i", "{}/%06d.{}".format(
+ tempdir.replace("%", "%%"),
+ self._frames[0]["file"].rpartition(".")[2]
+ ),
+ ]
+
+ def _finalize_mkvmerge(self, pathfmt, tempdir):
+ args = [
+ self.mkvmerge,
+ "-o", self._realpath,
+ "--timecodes", "0:" + self._write_mkvmerge_timecodes(tempdir),
+ ]
+ if self.extension == "webm":
+ args.append("--webm")
+ args += ("=", pathfmt.realpath)
+
+ pathfmt.realpath = self._realpath
+ self._exec(args)
+
+ def _write_ffmpeg_concat(self, tempdir):
+ content = ["ffconcat version 1.0"]
+ append = content.append
+
+ for frame in self._frames:
+ append("file '{}'\nduration {}".format(
+ frame["file"], frame["delay"] / 1000))
+ if self.repeat:
+ append("file '{}'".format(frame["file"]))
+ append("")
+
+ ffconcat = tempdir + "/ffconcat.txt"
+ with open(ffconcat, "w") as file:
+ file.write("\n".join(content))
+ return ffconcat
+
+ def _write_mkvmerge_timecodes(self, tempdir):
+ content = ["# timecode format v2"]
+ append = content.append
+
+ delay_sum = 0
+ for frame in self._frames:
+ append(str(delay_sum))
+ delay_sum += frame["delay"]
+ append(str(delay_sum))
+ append("")
+
+ timecodes = tempdir + "/timecodes.tc"
+ with open(timecodes, "w") as file:
+ file.write("\n".join(content))
+ return timecodes
+
+ def calculate_framerate(self, frames):
+ uniform = self._delay_is_uniform(frames)
+ if uniform:
+ return ("1000/{}".format(frames[0]["delay"]), None)
+ return (None, "1000/{}".format(self._delay_gcd(frames)))
+
+ @staticmethod
+ def _delay_gcd(frames):
+ result = frames[0]["delay"]
+ for f in frames:
+ result = gcd(result, f["delay"])
+ return result
@staticmethod
- def calculate_framerate(framelist):
- counter = collections.Counter(frame["delay"] for frame in framelist)
- fps = "1000/{}".format(min(counter))
- return (fps, None) if len(counter) == 1 else (None, fps)
+ def _delay_is_uniform(frames):
+ delay = frames[0]["delay"]
+ for f in frames:
+ if f["delay"] != delay:
+ return False
+ return True
__postprocessor__ = UgoiraPP
diff --git a/gallery_dl/util.py b/gallery_dl/util.py
index 92d1620..e8af358 100644
--- a/gallery_dl/util.py
+++ b/gallery_dl/util.py
@@ -71,6 +71,20 @@ def unique_sequence(iterable):
yield element
+def contains(values, elements, separator=" "):
+ """Returns True if at least one of 'elements' is contained in 'values'"""
+ if isinstance(values, str):
+ values = values.split(separator)
+
+ if not isinstance(elements, (tuple, list)):
+ return elements in values
+
+ for e in elements:
+ if e in values:
+ return True
+ return False
+
+
def raises(cls):
"""Returns a function that raises 'cls' as exception"""
def wrap(*args):
@@ -173,8 +187,13 @@ def to_string(value):
return str(value)
-def to_timestamp(dt):
- """Convert naive datetime to UTC timestamp string"""
+def datetime_to_timestamp(dt):
+ """Convert naive UTC datetime to timestamp"""
+ return (dt - EPOCH) / SECOND
+
+
+def datetime_to_timestamp_string(dt):
+ """Convert naive UTC datetime to timestamp string"""
try:
return str((dt - EPOCH) // SECOND)
except Exception:
@@ -289,12 +308,12 @@ def load_cookiestxt(fp):
for line in fp:
- line = line.lstrip()
+ line = line.lstrip(" ")
# strip '#HttpOnly_'
if line.startswith("#HttpOnly_"):
line = line[10:]
# ignore empty lines and comments
- if not line or line[0] in ("#", "$"):
+ if not line or line[0] in ("#", "$", "\n"):
continue
# strip trailing '\n'
if line[-1] == "\n":
@@ -326,6 +345,9 @@ def save_cookiestxt(fp, cookies):
fp.write("# Netscape HTTP Cookie File\n\n")
for cookie in cookies:
+ if not cookie.domain:
+ continue
+
if cookie.value is None:
name = ""
value = cookie.name
@@ -421,6 +443,7 @@ WINDOWS = (os.name == "nt")
SENTINEL = object()
SPECIAL_EXTRACTORS = {"oauth", "recursive", "test"}
GLOBALS = {
+ "contains" : contains,
"parse_int": text.parse_int,
"urlsplit" : urllib.parse.urlsplit,
"datetime" : datetime.datetime,
@@ -669,11 +692,14 @@ class ExtendedUrl():
class DownloadArchive():
- def __init__(self, path, extractor):
+ def __init__(self, path, format_string, cache_key="_archive_key"):
con = sqlite3.connect(path, timeout=60, check_same_thread=False)
con.isolation_level = None
+
self.close = con.close
self.cursor = con.cursor()
+ self.keygen = format_string.format_map
+ self._cache_key = cache_key
try:
self.cursor.execute("CREATE TABLE IF NOT EXISTS archive "
@@ -682,20 +708,16 @@ class DownloadArchive():
# fallback for missing WITHOUT ROWID support (#553)
self.cursor.execute("CREATE TABLE IF NOT EXISTS archive "
"(entry PRIMARY KEY)")
- self.keygen = (
- extractor.config("archive-prefix", extractor.category) +
- extractor.config("archive-format", extractor.archive_fmt)
- ).format_map
def check(self, kwdict):
"""Return True if the item described by 'kwdict' exists in archive"""
- key = kwdict["_archive_key"] = self.keygen(kwdict)
+ key = kwdict[self._cache_key] = self.keygen(kwdict)
self.cursor.execute(
"SELECT 1 FROM archive WHERE entry=? LIMIT 1", (key,))
return self.cursor.fetchone()
def add(self, kwdict):
"""Add item described by 'kwdict' to archive"""
- key = kwdict.get("_archive_key") or self.keygen(kwdict)
+ key = kwdict.get(self._cache_key) or self.keygen(kwdict)
self.cursor.execute(
"INSERT OR IGNORE INTO archive VALUES (?)", (key,))
diff --git a/gallery_dl/version.py b/gallery_dl/version.py
index 54c81aa..fe9a0f8 100644
--- a/gallery_dl/version.py
+++ b/gallery_dl/version.py
@@ -6,4 +6,4 @@
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
-__version__ = "1.21.0"
+__version__ = "1.21.1"
diff --git a/test/test_cookies.py b/test/test_cookies.py
index 0657456..188b54c 100644
--- a/test/test_cookies.py
+++ b/test/test_cookies.py
@@ -180,14 +180,14 @@ class TestCookieUtils(unittest.TestCase):
extr._cookiejar.set("a", "1", expires=now+100)
with mock.patch.object(log, "warning") as mw:
- self.assertFalse(extr._check_cookies(("a",)))
+ self.assertTrue(extr._check_cookies(("a",)))
self.assertEqual(mw.call_count, 1)
self.assertEqual(mw.call_args[0], (
"Cookie '%s' will expire in less than %s hour%s", "a", 1, ""))
extr._cookiejar.set("a", "1", expires=now+100+7200)
with mock.patch.object(log, "warning") as mw:
- self.assertFalse(extr._check_cookies(("a",)))
+ self.assertTrue(extr._check_cookies(("a",)))
self.assertEqual(mw.call_count, 1)
self.assertEqual(mw.call_args[0], (
"Cookie '%s' will expire in less than %s hour%s", "a", 3, "s"))
diff --git a/test/test_formatter.py b/test/test_formatter.py
index 8464b1b..4cce8a3 100644
--- a/test/test_formatter.py
+++ b/test/test_formatter.py
@@ -232,6 +232,14 @@ class TestFormatter(unittest.TestCase):
self._run_test("\fE name * 2 + ' ' + a", "{}{} {}".format(
self.kwdict["name"], self.kwdict["name"], self.kwdict["a"]))
+ @unittest.skipIf(sys.hexversion < 0x3060000, "no fstring support")
+ def test_fstring(self):
+ self._run_test("\fF {a}", self.kwdict["a"])
+ self._run_test("\fF {name}{name} {a}", "{}{} {}".format(
+ self.kwdict["name"], self.kwdict["name"], self.kwdict["a"]))
+ self._run_test("\fF foo-'\"{a.upper()}\"'-bar",
+ """foo-'"{}"'-bar""".format(self.kwdict["a"].upper()))
+
def test_module(self):
with tempfile.TemporaryDirectory() as tmpdirname:
path = os.path.join(tmpdirname, "testmod.py")
diff --git a/test/test_postprocessor.py b/test/test_postprocessor.py
index 84d2747..e23cfa2 100644
--- a/test/test_postprocessor.py
+++ b/test/test_postprocessor.py
@@ -1,7 +1,7 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
-# Copyright 2019-2021 Mike Fährmann
+# Copyright 2019-2022 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -16,7 +16,7 @@ import logging
import zipfile
import tempfile
import collections
-from datetime import datetime, timezone as tz
+from datetime import datetime
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from gallery_dl import extractor, output, path # noqa E402
@@ -345,7 +345,7 @@ class MtimeTest(BasePostprocessorTest):
self.assertEqual(pp.key, "date")
def test_mtime_datetime(self):
- self._create(None, {"date": datetime(1980, 1, 1, tzinfo=tz.utc)})
+ self._create(None, {"date": datetime(1980, 1, 1)})
self._trigger()
self.assertEqual(self.pathfmt.kwdict["_mtime"], 315532800)
diff --git a/test/test_util.py b/test/test_util.py
index ce403a8..3cf3d68 100644
--- a/test/test_util.py
+++ b/test/test_util.py
@@ -1,7 +1,7 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
-# Copyright 2015-2021 Mike Fährmann
+# Copyright 2015-2022 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -189,6 +189,10 @@ class TestCookiesTxt(unittest.TestCase):
[self._cookie("name", "", ".example.org")],
)
_assert(
+ "\tTRUE\t/\tTRUE\t\tname\t",
+ [self._cookie("name", "", "")],
+ )
+ _assert(
"# Netscape HTTP Cookie File\n"
"\n"
"# default\n"
@@ -241,6 +245,8 @@ class TestCookiesTxt(unittest.TestCase):
"n4", "" , "www.example.org", False, "/", False),
self._cookie(
"n5", "v5", "www.example.org", False, "/path", False, 100),
+ self._cookie(
+ "n6", "v6", "", False),
],
"# Netscape HTTP Cookie File\n"
"\n"
@@ -313,6 +319,27 @@ class TestOther(unittest.TestCase):
self.assertSequenceEqual(
list(util.unique_sequence([1, 2, 1, 3, 2, 1])), [1, 2, 1, 3, 2, 1])
+ def test_contains(self):
+ c = [1, "2", 3, 4, "5", "foo"]
+ self.assertTrue(util.contains(c, 1))
+ self.assertTrue(util.contains(c, "foo"))
+ self.assertTrue(util.contains(c, [1, 3, "5"]))
+ self.assertTrue(util.contains(c, ["a", "b", "5"]))
+ self.assertFalse(util.contains(c, "bar"))
+ self.assertFalse(util.contains(c, [2, 5, "bar"]))
+
+ s = "1 2 3 asd qwe y(+)c f(+)(-) bar"
+ self.assertTrue(util.contains(s, "y(+)c"))
+ self.assertTrue(util.contains(s, ["asd", "qwe", "yxc"]))
+ self.assertTrue(util.contains(s, ["sdf", "dfg", "qwe"]))
+ self.assertFalse(util.contains(s, "tag1"))
+ self.assertFalse(util.contains(s, ["tag1", "tag2", "tag3"]))
+
+ s = "1, 2, 3, asd, qwe, y(+)c, f(+)(-), bar"
+ self.assertTrue(util.contains(s, "y(+)c", ", "))
+ self.assertTrue(util.contains(s, ["sdf", "dfg", "qwe"], ", "))
+ self.assertFalse(util.contains(s, "tag1", ", "))
+
def test_raises(self):
func = util.raises(Exception)
with self.assertRaises(Exception):
@@ -531,7 +558,16 @@ class TestOther(unittest.TestCase):
self.assertEqual(f(["a", "b", "c"]), "a, b, c")
self.assertEqual(f([1, 2, 3]), "1, 2, 3")
- def test_to_timestamp(self, f=util.to_timestamp):
+ def test_datetime_to_timestamp(self, f=util.datetime_to_timestamp):
+ self.assertEqual(f(util.EPOCH), 0.0)
+ self.assertEqual(f(datetime.datetime(2010, 1, 1)), 1262304000.0)
+ self.assertEqual(f(datetime.datetime(2010, 1, 1, 0, 0, 0, 128000)),
+ 1262304000.128000)
+ with self.assertRaises(TypeError):
+ f(None)
+
+ def test_datetime_to_timestamp_string(
+ self, f=util.datetime_to_timestamp_string):
self.assertEqual(f(util.EPOCH), "0")
self.assertEqual(f(datetime.datetime(2010, 1, 1)), "1262304000")
self.assertEqual(f(None), "")