diff options
40 files changed, 1074 insertions, 296 deletions
diff --git a/CHANGELOG.md b/CHANGELOG.md index ffd11a6..994d5f0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,37 @@ # Changelog +## 1.21.1 - 2022-04-08 +### Additions +- [gofile] add gofile.io extractor ([#2364](https://github.com/mikf/gallery-dl/issues/2364)) +- [instagram] add `previews` option ([#2135](https://github.com/mikf/gallery-dl/issues/2135)) +- [kemonoparty] add `duplicates` option ([#2440](https://github.com/mikf/gallery-dl/issues/2440)) +- [pinterest] add extractor for created pins ([#2452](https://github.com/mikf/gallery-dl/issues/2452)) +- [pinterest] support multiple files per pin ([#1619](https://github.com/mikf/gallery-dl/issues/1619), [#2452](https://github.com/mikf/gallery-dl/issues/2452)) +- [telegraph] Add telegra.ph extractor ([#2312](https://github.com/mikf/gallery-dl/issues/2312)) +- [twitter] add `syndication` option ([#2354](https://github.com/mikf/gallery-dl/issues/2354)) +- [twitter] accept fxtwitter.com URLs ([#2484](https://github.com/mikf/gallery-dl/issues/2484)) +- [downloader:http] support using an arbitrary method and sending POST data ([#2433](https://github.com/mikf/gallery-dl/issues/2433)) +- [postprocessor:metadata] implement archive options ([#2421](https://github.com/mikf/gallery-dl/issues/2421)) +- [postprocessor:ugoira] add `mtime` option ([#2307](https://github.com/mikf/gallery-dl/issues/2307)) +- [postprocessor:ugoira] support setting timecodes with `mkvmerge` ([#1550](https://github.com/mikf/gallery-dl/issues/1550)) +- [formatter] support evaluating f-string literals +- add `--ugoira-conv-copy` command-line option ([#1550](https://github.com/mikf/gallery-dl/issues/1550)) +- implement a `contains()` function for filter statements ([#2446](https://github.com/mikf/gallery-dl/issues/2446)) +### Fixes +- [aryion] provide correct `date` metadata independent of DST +- [furaffinity] fix search result pagination ([#2402](https://github.com/mikf/gallery-dl/issues/2402)) +- [hitomi] update and fix metadata extraction ([#2444](https://github.com/mikf/gallery-dl/issues/2444)) +- [kissgoddess] extract all images ([#2473](https://github.com/mikf/gallery-dl/issues/2473)) +- [mangasee] unescape manga names ([#2454](https://github.com/mikf/gallery-dl/issues/2454)) +- [newgrounds] update and fix pagination ([#2456](https://github.com/mikf/gallery-dl/issues/2456)) +- [newgrounds] warn about age-restricted posts ([#2456](https://github.com/mikf/gallery-dl/issues/2456)) +- [pinterest] do not force `m3u8_native` for video downloads ([#2436](https://github.com/mikf/gallery-dl/issues/2436)) +- [twibooru] fix posts without `name` ([#2434](https://github.com/mikf/gallery-dl/issues/2434)) +- [unsplash] replace dash with space in search API queries ([#2429](https://github.com/mikf/gallery-dl/issues/2429)) +- [postprocessor:mtime] fix timestamps from datetime objects ([#2307](https://github.com/mikf/gallery-dl/issues/2307)) +- fix yet another bug in `_check_cookies()` ([#2372](https://github.com/mikf/gallery-dl/issues/2372)) +- fix loading/storing cookies without domain + ## 1.21.0 - 2022-03-14 ### Additions - [fantia] add `num` enumeration index ([#2377](https://github.com/mikf/gallery-dl/issues/2377)) @@ -1,6 +1,6 @@ Metadata-Version: 2.1 Name: gallery_dl -Version: 1.21.0 +Version: 1.21.1 Summary: Command-line program to download image galleries and collections from several image hosting sites Home-page: https://github.com/mikf/gallery-dl Author: Mike Fährmann @@ -58,6 +58,7 @@ Optional - FFmpeg_: Pixiv Ugoira to WebM conversion - yt-dlp_ or youtube-dl_: Video downloads +- PySocks_: SOCKS proxy support Installation @@ -98,8 +99,8 @@ Standalone Executable Prebuilt executable files with a Python interpreter and required Python packages included are available for -- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.21.0/gallery-dl.exe>`__ -- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.21.0/gallery-dl.bin>`__ +- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.21.1/gallery-dl.exe>`__ +- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.21.1/gallery-dl.bin>`__ | Executables build from the latest commit can be found at | https://github.com/mikf/gallery-dl/actions/workflows/executables.yml @@ -364,6 +365,7 @@ To authenticate with a ``mastodon`` instance, run *gallery-dl* with .. _FFmpeg: https://www.ffmpeg.org/ .. _yt-dlp: https://github.com/yt-dlp/yt-dlp .. _youtube-dl: https://ytdl-org.github.io/youtube-dl/ +.. _PySocks: https://pypi.org/project/PySocks/ .. _pyOpenSSL: https://pyopenssl.org/ .. _Snapd: https://docs.snapcraft.io/installing-snapd .. _OAuth: https://en.wikipedia.org/wiki/OAuth @@ -24,6 +24,7 @@ Optional - FFmpeg_: Pixiv Ugoira to WebM conversion - yt-dlp_ or youtube-dl_: Video downloads +- PySocks_: SOCKS proxy support Installation @@ -64,8 +65,8 @@ Standalone Executable Prebuilt executable files with a Python interpreter and required Python packages included are available for -- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.21.0/gallery-dl.exe>`__ -- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.21.0/gallery-dl.bin>`__ +- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.21.1/gallery-dl.exe>`__ +- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.21.1/gallery-dl.bin>`__ | Executables build from the latest commit can be found at | https://github.com/mikf/gallery-dl/actions/workflows/executables.yml @@ -330,6 +331,7 @@ To authenticate with a ``mastodon`` instance, run *gallery-dl* with .. _FFmpeg: https://www.ffmpeg.org/ .. _yt-dlp: https://github.com/yt-dlp/yt-dlp .. _youtube-dl: https://ytdl-org.github.io/youtube-dl/ +.. _PySocks: https://pypi.org/project/PySocks/ .. _pyOpenSSL: https://pyopenssl.org/ .. _Snapd: https://docs.snapcraft.io/installing-snapd .. _OAuth: https://en.wikipedia.org/wiki/OAuth diff --git a/data/completion/_gallery-dl b/data/completion/_gallery-dl index ddc75fa..f630c8e 100644 --- a/data/completion/_gallery-dl +++ b/data/completion/_gallery-dl @@ -58,6 +58,7 @@ _arguments -C -S \ --zip'[Store downloaded files in a ZIP archive]' \ --ugoira-conv'[Convert Pixiv Ugoira to WebM (requires FFmpeg)]' \ --ugoira-conv-lossless'[Convert Pixiv Ugoira to WebM in VP9 lossless mode]' \ +--ugoira-conv-copy'[Convert Pixiv Ugoira to MKV without re-encoding any frames]' \ --write-metadata'[Write metadata to separate JSON files]' \ --write-info-json'[Write gallery metadata to a info.json file]' \ --write-infojson'[==SUPPRESS==]' \ diff --git a/data/completion/gallery-dl b/data/completion/gallery-dl index 2aa37e6..d8a6124 100644 --- a/data/completion/gallery-dl +++ b/data/completion/gallery-dl @@ -10,7 +10,7 @@ _gallery_dl() elif [[ "${prev}" =~ ^()$ ]]; then COMPREPLY=( $(compgen -d -- "${cur}") ) else - COMPREPLY=( $(compgen -W "--help --version --input-file --destination --directory --filename --cookies --proxy --source-address --clear-cache --quiet --verbose --get-urls --resolve-urls --dump-json --simulate --extractor-info --list-keywords --list-modules --list-extractors --write-log --write-unsupported --write-pages --limit-rate --retries --http-timeout --sleep --sleep-request --sleep-extractor --filesize-min --filesize-max --no-part --no-skip --no-mtime --no-download --no-check-certificate --config --config-yaml --option --ignore-config --username --password --netrc --download-archive --abort --terminate --range --chapter-range --filter --chapter-filter --zip --ugoira-conv --ugoira-conv-lossless --write-metadata --write-info-json --write-infojson --write-tags --mtime-from-date --exec --exec-after --postprocessor" -- "${cur}") ) + COMPREPLY=( $(compgen -W "--help --version --input-file --destination --directory --filename --cookies --proxy --source-address --clear-cache --quiet --verbose --get-urls --resolve-urls --dump-json --simulate --extractor-info --list-keywords --list-modules --list-extractors --write-log --write-unsupported --write-pages --limit-rate --retries --http-timeout --sleep --sleep-request --sleep-extractor --filesize-min --filesize-max --no-part --no-skip --no-mtime --no-download --no-check-certificate --config --config-yaml --option --ignore-config --username --password --netrc --download-archive --abort --terminate --range --chapter-range --filter --chapter-filter --zip --ugoira-conv --ugoira-conv-lossless --ugoira-conv-copy --write-metadata --write-info-json --write-infojson --write-tags --mtime-from-date --exec --exec-after --postprocessor" -- "${cur}") ) fi } diff --git a/data/completion/gallery-dl.fish b/data/completion/gallery-dl.fish index 8f915fd..ff0ee84 100644 --- a/data/completion/gallery-dl.fish +++ b/data/completion/gallery-dl.fish @@ -52,6 +52,7 @@ complete -c gallery-dl -x -l 'chapter-filter' -d 'Like "--filter", but applies t complete -c gallery-dl -l 'zip' -d 'Store downloaded files in a ZIP archive' complete -c gallery-dl -l 'ugoira-conv' -d 'Convert Pixiv Ugoira to WebM (requires FFmpeg)' complete -c gallery-dl -l 'ugoira-conv-lossless' -d 'Convert Pixiv Ugoira to WebM in VP9 lossless mode' +complete -c gallery-dl -l 'ugoira-conv-copy' -d 'Convert Pixiv Ugoira to MKV without re-encoding any frames' complete -c gallery-dl -l 'write-metadata' -d 'Write metadata to separate JSON files' complete -c gallery-dl -l 'write-info-json' -d 'Write gallery metadata to a info.json file' complete -c gallery-dl -l 'write-infojson' -d '==SUPPRESS==' diff --git a/data/man/gallery-dl.1 b/data/man/gallery-dl.1 index 3e373fd..6e3a965 100644 --- a/data/man/gallery-dl.1 +++ b/data/man/gallery-dl.1 @@ -1,4 +1,4 @@ -.TH "GALLERY-DL" "1" "2022-03-14" "1.21.0" "gallery-dl Manual" +.TH "GALLERY-DL" "1" "2022-04-08" "1.21.1" "gallery-dl Manual" .\" disable hyphenation .nh @@ -173,6 +173,9 @@ Convert Pixiv Ugoira to WebM (requires FFmpeg) .B "\-\-ugoira\-conv\-lossless" Convert Pixiv Ugoira to WebM in VP9 lossless mode .TP +.B "\-\-ugoira\-conv\-copy" +Convert Pixiv Ugoira to MKV without re-encoding any frames +.TP .B "\-\-write\-metadata" Write metadata to separate JSON files .TP diff --git a/data/man/gallery-dl.conf.5 b/data/man/gallery-dl.conf.5 index 9651d18..950300e 100644 --- a/data/man/gallery-dl.conf.5 +++ b/data/man/gallery-dl.conf.5 @@ -1,4 +1,4 @@ -.TH "GALLERY-DL.CONF" "5" "2022-03-14" "1.21.0" "gallery-dl Manual" +.TH "GALLERY-DL.CONF" "5" "2022-04-08" "1.21.1" "gallery-dl Manual" .\" disable hyphenation .nh .\" disable justification (adjust text to left margin only) @@ -710,7 +710,9 @@ database, as either lookup operations are significantly faster or memory requirements are significantly lower when the amount of stored IDs gets reasonably large. -Note: archive paths support regular \f[I]format string\f[] replacements, +Note: Archive files that do not already exist get generated automatically. + +Note: Archive paths support regular \f[I]format string\f[] replacements, but be aware that using external inputs for building local paths may pose a security risk. @@ -1497,6 +1499,30 @@ If the format is given as \f[I]string\f[], it will be extended with restrict it to only one possible format. +.SS extractor.gofile.api-token +.IP "Type:" 6 +\f[I]string\f[] + +.IP "Default:" 9 +\f[I]null\f[] + +.IP "Description:" 4 +API token value found at the bottom of your \f[I]profile page\f[]. + +If not set, a temporary guest token will be used. + + +.SS extractor.gofile.recursive +.IP "Type:" 6 +\f[I]bool\f[] + +.IP "Default:" 9 +\f[I]false\f[] + +.IP "Description:" 4 +Recursively download files from subfolders. + + .SS extractor.hentaifoundry.include .IP "Type:" 6 \f[I]string\f[] or \f[I]list\f[] of \f[I]strings\f[] @@ -1533,18 +1559,6 @@ Available formats are \f[I]"webp"\f[] and \f[I]"avif"\f[]. but is most likely going to fail with \f[I]403 Forbidden\f[] errors. -.SS extractor.hitomi.metadata -.IP "Type:" 6 -\f[I]bool\f[] - -.IP "Default:" 9 -\f[I]false\f[] - -.IP "Description:" 4 -Try to extract -\f[I]artist\f[], \f[I]group\f[], \f[I]parody\f[], and \f[I]characters\f[] metadata. - - .SS extractor.imgur.mp4 .IP "Type:" 6 \f[I]bool\f[] or \f[I]string\f[] @@ -1599,6 +1613,17 @@ Possible values are You can use \f[I]"all"\f[] instead of listing all values separately. +.SS extractor.instagram.previews +.IP "Type:" 6 +\f[I]bool\f[] + +.IP "Default:" 9 +\f[I]false\f[] + +.IP "Description:" 4 +Download video previews. + + .SS extractor.instagram.videos .IP "Type:" 6 \f[I]bool\f[] @@ -1621,6 +1646,22 @@ Download video files. Extract \f[I]comments\f[] metadata. +.SS extractor.kemonoparty.duplicates +.IP "Type:" 6 +\f[I]bool\f[] + +.IP "Default:" 9 +\f[I]false\f[] + +.IP "Description:" 4 +Controls how to handle duplicate files in a post. + +.br +* \f[I]true\f[]: Download duplicates +.br +* \f[I]false\f[]: Ignore duplicates + + .SS extractor.kemonoparty.dms .IP "Type:" 6 \f[I]bool\f[] @@ -2436,6 +2477,17 @@ Known available sizes are \f[I]4096x4096\f[], \f[I]orig\f[], \f[I]large\f[], \f[I]medium\f[], and \f[I]small\f[]. +.SS extractor.twitter.syndication +.IP "Type:" 6 +\f[I]bool\f[] + +.IP "Default:" 9 +\f[I]false\f[] + +.IP "Description:" 4 +Retrieve age-restricted content using Twitter's syndication API. + + .SS extractor.twitter.logout .IP "Type:" 6 \f[I]bool\f[] @@ -3122,17 +3174,6 @@ Location of a youtube-dl configuration file to load options from. .SH OUTPUT OPTIONS -.SS output.fallback -.IP "Type:" 6 -\f[I]bool\f[] - -.IP "Default:" 9 -\f[I]true\f[] - -.IP "Description:" 4 -Include fallback URLs in the output of \f[I]-g/--get-urls\f[]. - - .SS output.mode .IP "Type:" 6 \f[I]string\f[] @@ -3181,6 +3222,30 @@ with a display width greater than 1. Show skipped file downloads. +.SS output.fallback +.IP "Type:" 6 +\f[I]bool\f[] + +.IP "Default:" 9 +\f[I]true\f[] + +.IP "Description:" 4 +Include fallback URLs in the output of \f[I]-g/--get-urls\f[]. + + +.SS output.private +.IP "Type:" 6 +\f[I]bool\f[] + +.IP "Default:" 9 +\f[I]false\f[] + +.IP "Description:" 4 +Include private fields, +i.e. fields whose name starts with an underscore, +in the output of \f[I]-K/--list-keywords\f[] and \f[I]-j/--dump-json\f[]. + + .SS output.progress .IP "Type:" 6 \f[I]bool\f[] or \f[I]string\f[] @@ -3511,6 +3576,19 @@ Custom format string to build the content of metadata files with. Note: Only applies for \f[I]"mode": "custom"\f[]. +.SS metadata.archive +.IP "Type:" 6 +\f[I]Path\f[] + +.IP "Description:" 4 +File to store IDs of generated metadata files in, +similar to \f[I]extractor.*.archive\f[]. + +\f[I]archive-format\f[] and \f[I]archive-prefix\f[] options, +akin to \f[I]extractor.*.archive-format\f[] and \f[I]extractor.*.archive-prefix\f[], +are supported as well. + + .SS metadata.mtime .IP "Type:" 6 \f[I]bool\f[] @@ -3519,7 +3597,7 @@ Note: Only applies for \f[I]"mode": "custom"\f[]. \f[I]false\f[] .IP "Description:" 4 -Set modification times for generated metadata files +Set modification times of generated metadata files according to the accompanying downloaded file. Enabling this option will only have an effect @@ -3590,12 +3668,20 @@ Additional FFmpeg command-line arguments. \f[I]string\f[] .IP "Default:" 9 -\f[I]image2\f[] +\f[I]auto\f[] .IP "Description:" 4 -FFmpeg demuxer to read input files with. Possible values are -"\f[I]image2\f[]" and -"\f[I]concat\f[]". +FFmpeg demuxer to read and process input files with. Possible values are + +.br +* "\f[I]concat\f[]" (inaccurate frame timecodes) +.br +* "\f[I]image2\f[]" (accurate timecodes, not usable on Windows) +.br +* "mkvmerge" (accurate timecodes, only WebM or MKV, requires \f[I]mkvmerge\f[]) + +"auto" will select mkvmerge if possible and fall back to image2 or +concat depending on the local operating system. .SS ugoira.ffmpeg-location @@ -3609,6 +3695,18 @@ FFmpeg demuxer to read input files with. Possible values are Location of the \f[I]ffmpeg\f[] (or \f[I]avconv\f[]) executable to use. +.SS ugoira.mkvmerge-location +.IP "Type:" 6 +\f[I]Path\f[] + +.IP "Default:" 9 +\f[I]"mkvmerge"\f[] + +.IP "Description:" 4 +Location of the \f[I]mkvmerge\f[] executable for use with the +\f[I]mkvmerge demuxer\f[]. + + .SS ugoira.ffmpeg-output .IP "Type:" 6 \f[I]bool\f[] @@ -3681,6 +3779,17 @@ to the list of FFmpeg command-line arguments to reduce an odd width/height by 1 pixel and make them even. +.SS ugoira.mtime +.IP "Type:" 6 +\f[I]bool\f[] + +.IP "Default:" 9 +\f[I]false\f[] + +.IP "Description:" 4 +Set modification times of generated ugoira aniomations. + + .SS ugoira.repeat-last-frame .IP "Type:" 6 \f[I]bool\f[] diff --git a/gallery_dl.egg-info/PKG-INFO b/gallery_dl.egg-info/PKG-INFO index 009ede8..1c00d88 100644 --- a/gallery_dl.egg-info/PKG-INFO +++ b/gallery_dl.egg-info/PKG-INFO @@ -1,6 +1,6 @@ Metadata-Version: 2.1 Name: gallery-dl -Version: 1.21.0 +Version: 1.21.1 Summary: Command-line program to download image galleries and collections from several image hosting sites Home-page: https://github.com/mikf/gallery-dl Author: Mike Fährmann @@ -58,6 +58,7 @@ Optional - FFmpeg_: Pixiv Ugoira to WebM conversion - yt-dlp_ or youtube-dl_: Video downloads +- PySocks_: SOCKS proxy support Installation @@ -98,8 +99,8 @@ Standalone Executable Prebuilt executable files with a Python interpreter and required Python packages included are available for -- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.21.0/gallery-dl.exe>`__ -- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.21.0/gallery-dl.bin>`__ +- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.21.1/gallery-dl.exe>`__ +- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.21.1/gallery-dl.bin>`__ | Executables build from the latest commit can be found at | https://github.com/mikf/gallery-dl/actions/workflows/executables.yml @@ -364,6 +365,7 @@ To authenticate with a ``mastodon`` instance, run *gallery-dl* with .. _FFmpeg: https://www.ffmpeg.org/ .. _yt-dlp: https://github.com/yt-dlp/yt-dlp .. _youtube-dl: https://ytdl-org.github.io/youtube-dl/ +.. _PySocks: https://pypi.org/project/PySocks/ .. _pyOpenSSL: https://pyopenssl.org/ .. _Snapd: https://docs.snapcraft.io/installing-snapd .. _OAuth: https://en.wikipedia.org/wiki/OAuth diff --git a/gallery_dl.egg-info/SOURCES.txt b/gallery_dl.egg-info/SOURCES.txt index 4139a4d..4e226fb 100644 --- a/gallery_dl.egg-info/SOURCES.txt +++ b/gallery_dl.egg-info/SOURCES.txt @@ -78,6 +78,7 @@ gallery_dl/extractor/gelbooru_v01.py gallery_dl/extractor/gelbooru_v02.py gallery_dl/extractor/generic.py gallery_dl/extractor/gfycat.py +gallery_dl/extractor/gofile.py gallery_dl/extractor/hbrowse.py gallery_dl/extractor/hentai2read.py gallery_dl/extractor/hentaicosplays.py @@ -168,6 +169,7 @@ gallery_dl/extractor/smugmug.py gallery_dl/extractor/speakerdeck.py gallery_dl/extractor/subscribestar.py gallery_dl/extractor/tapas.py +gallery_dl/extractor/telegraph.py gallery_dl/extractor/test.py gallery_dl/extractor/toyhouse.py gallery_dl/extractor/tsumino.py diff --git a/gallery_dl/downloader/http.py b/gallery_dl/downloader/http.py index b878f5f..5622462 100644 --- a/gallery_dl/downloader/http.py +++ b/gallery_dl/downloader/http.py @@ -120,9 +120,14 @@ class HttpDownloader(DownloaderBase): # connect to (remote) source try: response = self.session.request( - "GET", url, stream=True, headers=headers, - timeout=self.timeout, verify=self.verify, - proxies=self.proxies) + kwdict.get("_http_method", "GET"), url, + stream=True, + headers=headers, + data=kwdict.get("_http_data"), + timeout=self.timeout, + proxies=self.proxies, + verify=self.verify, + ) except (ConnectionError, Timeout) as exc: msg = str(exc) continue diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py index 1bec48e..6d6c7ee 100644 --- a/gallery_dl/extractor/__init__.py +++ b/gallery_dl/extractor/__init__.py @@ -43,6 +43,7 @@ modules = [ "gelbooru_v01", "gelbooru_v02", "gfycat", + "gofile", "hbrowse", "hentai2read", "hentaicosplays", @@ -125,6 +126,7 @@ modules = [ "speakerdeck", "subscribestar", "tapas", + "telegraph", "toyhouse", "tsumino", "tumblr", diff --git a/gallery_dl/extractor/aryion.py b/gallery_dl/extractor/aryion.py index 06ec571..fa590b9 100644 --- a/gallery_dl/extractor/aryion.py +++ b/gallery_dl/extractor/aryion.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2020-2021 Mike Fährmann +# Copyright 2020-2022 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -11,6 +11,8 @@ from .common import Extractor, Message from .. import text, util, exception from ..cache import cache +from email.utils import parsedate_tz +from datetime import datetime BASE_PATTERN = r"(?:https?://)?(?:www\.)?aryion\.com/g4" @@ -144,7 +146,8 @@ class AryionExtractor(Extractor): title, _, artist = text.unescape(extr( "<title>g4 :: ", "<")).rpartition(" by ") - data = { + + return { "id" : text.parse_int(post_id), "url" : url, "user" : self.user or artist, @@ -152,7 +155,7 @@ class AryionExtractor(Extractor): "artist": artist, "path" : text.split_html(extr( "cookiecrumb'>", '</span'))[4:-1:2], - "date" : extr("class='pretty-date' title='", "'"), + "date" : datetime(*parsedate_tz(lmod)[:6]), "size" : text.parse_int(clen), "views" : text.parse_int(extr("Views</b>:", "<").replace(",", "")), "width" : text.parse_int(extr("Resolution</b>:", "x")), @@ -167,12 +170,6 @@ class AryionExtractor(Extractor): "_mtime" : lmod, } - d1, _, d2 = data["date"].partition(",") - data["date"] = text.parse_datetime( - d1[:-2] + d2, "%b %d %Y %I:%M %p", -5) - - return data - class AryionGalleryExtractor(AryionExtractor): """Extractor for a user's gallery on eka's portal""" @@ -249,7 +246,7 @@ class AryionPostExtractor(AryionExtractor): "title" : "I'm on subscribestar now too!", "description": r"re:Doesn't hurt to have a backup, right\?", "tags" : ["Non-Vore", "subscribestar"], - "date" : "dt:2019-02-16 19:30:00", + "date" : "dt:2019-02-16 19:30:34", "path" : [], "views" : int, "favorites": int, diff --git a/gallery_dl/extractor/common.py b/gallery_dl/extractor/common.py index e3559f9..ff49d89 100644 --- a/gallery_dl/extractor/common.py +++ b/gallery_dl/extractor/common.py @@ -183,7 +183,7 @@ class Extractor(): elif until: if isinstance(until, datetime.datetime): # convert to UTC timestamp - until = (until - util.EPOCH) / util.SECOND + until = util.datetime_to_timestamp(until) else: until = float(until) seconds = until - now @@ -373,7 +373,6 @@ class Extractor(): self.log.warning( "Cookie '%s' will expire in less than %s hour%s", cookie.name, hours + 1, "s" if hours else "") - continue names.discard(cookie.name) if not names: diff --git a/gallery_dl/extractor/furaffinity.py b/gallery_dl/extractor/furaffinity.py index 6a8744a..b63cfc1 100644 --- a/gallery_dl/extractor/furaffinity.py +++ b/gallery_dl/extractor/furaffinity.py @@ -165,22 +165,24 @@ class FuraffinityExtractor(Extractor): def _pagination_search(self, query): url = self.root + "/search/" data = { - "page" : 0, - "next_page" : "Next", + "page" : 1, "order-by" : "relevancy", "order-direction": "desc", "range" : "all", - "rating-general" : "on", - "rating-mature" : "on", - "rating-adult" : "on", - "type-art" : "on", - "type-music" : "on", - "type-flash" : "on", - "type-story" : "on", - "type-photo" : "on", - "type-poetry" : "on", + "range_from" : "", + "range_to" : "", + "rating-general" : "1", + "rating-mature" : "1", + "rating-adult" : "1", + "type-art" : "1", + "type-music" : "1", + "type-flash" : "1", + "type-story" : "1", + "type-photo" : "1", + "type-poetry" : "1", "mode" : "extended", } + data.update(query) if "page" in query: data["page"] = text.parse_int(query["page"]) @@ -194,7 +196,11 @@ class FuraffinityExtractor(Extractor): if not post_id: return - data["page"] += 1 + + if "next_page" in data: + data["page"] += 1 + else: + data["next_page"] = "Next" class FuraffinityGalleryExtractor(FuraffinityExtractor): @@ -255,9 +261,10 @@ class FuraffinitySearchExtractor(FuraffinityExtractor): "range": "45-50", "count": 6, }), - ("https://www.furaffinity.net/search/cute&rating-general=0", { - "range": "1", - "count": 1, + # first page of search results (#2402) + ("https://www.furaffinity.net/search/?q=leaf&range=1day", { + "range": "1-3", + "count": 3, }), ) diff --git a/gallery_dl/extractor/gofile.py b/gallery_dl/extractor/gofile.py new file mode 100644 index 0000000..37d2986 --- /dev/null +++ b/gallery_dl/extractor/gofile.py @@ -0,0 +1,124 @@ +# -*- coding: utf-8 -*- + +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. + +from .common import Extractor, Message +from .. import exception +from ..cache import memcache + + +class GofileFolderExtractor(Extractor): + category = "gofile" + subcategory = "folder" + root = "https://gofile.io" + directory_fmt = ("{category}", "{name} ({code})") + archive_fmt = "{id}" + pattern = r"(?:https?://)?(?:www\.)?gofile\.io/d/([^/?#]+)" + test = ( + ("https://gofile.io/d/5qHmQj", { + "pattern": r"https://file\d+\.gofile\.io/download" + r"/\w{8}-\w{4}-\w{4}-\w{4}-\w{12}" + r"/test-%E3%83%86%E3%82%B9%E3%83%88-%2522%26!\.png", + "keyword": { + "createTime": int, + "directLink": "re:https://store3.gofile.io/download/direct/.+", + "downloadCount": int, + "extension": "png", + "filename": "test-テスト-%22&!", + "folder": { + "childs": [ + "346429cc-aee4-4996-be3f-e58616fe231f", + "765b6b12-b354-4e14-9a45-f763fa455682", + "2a44600a-4a59-4389-addc-4a0d542c457b" + ], + "code": "5qHmQj", + "createTime": 1648536501, + "id": "45cd45d1-dc78-4553-923f-04091c621699", + "isRoot": True, + "name": "root", + "public": True, + "totalDownloadCount": int, + "totalSize": 364, + "type": "folder" + }, + "id": r"re:\w{8}-\w{4}-\w{4}-\w{4}-\w{12}", + "link": r"re:https://file17.gofile.io/download/.+\.png", + "md5": "re:[0-9a-f]{32}", + "mimetype": "image/png", + "name": "test-テスト-%22&!.png", + "num": int, + "parentFolder": "45cd45d1-dc78-4553-923f-04091c621699", + "serverChoosen": "file17", + "size": 182, + "thumbnail": r"re:https://store3.gofile.io/download/.+\.png", + "type": "file" + }, + }), + ("https://gofile.io/d/346429cc-aee4-4996-be3f-e58616fe231f", { + "content": "0c8768055e4e20e7c7259608b67799171b691140", + }), + ) + + def __init__(self, match): + Extractor.__init__(self, match) + self.content_id = match.group(1) + + def items(self): + recursive = self.config("recursive") + + token = self.config("api-token") + if token is None: + self.log.debug("creating temporary account") + token = self._create_account() + self.session.cookies.set("accountToken", token, domain=".gofile.io") + + folder = self._get_content(self.content_id, token) + yield Message.Directory, folder + + num = 0 + contents = folder.pop("contents") + for content_id in folder["childs"]: + content = contents[content_id] + content["folder"] = folder + + if content["type"] == "file": + num += 1 + content["num"] = num + content["filename"], _, content["extension"] = \ + content["name"].rpartition(".") + yield Message.Url, content["link"], content + + elif content["type"] == "folder": + if recursive: + url = "https://gofile.io/d/" + content["id"] + content["_extractor"] = GofileFolderExtractor + yield Message.Queue, url, content + + else: + self.log.debug("'%s' is of unknown type (%s)", + content.get("name"), content["type"]) + + @memcache() + def _create_account(self): + return self._api_request("createAccount")["token"] + + def _get_content(self, content_id, token): + return self._api_request("getContent", { + "contentId" : content_id, + "token" : token, + "websiteToken": "websiteToken", + }) + + def _api_request(self, endpoint, params=None): + response = self.request( + "https://api.gofile.io/" + endpoint, params=params).json() + + if response["status"] != "ok": + if response["status"] == "error-notFound": + raise exception.NotFoundError("content") + raise exception.StopExtraction( + "%s failed (Status: %s)", endpoint, response["status"]) + + return response["data"] diff --git a/gallery_dl/extractor/hitomi.py b/gallery_dl/extractor/hitomi.py index 34eaaab..ca7e692 100644 --- a/gallery_dl/extractor/hitomi.py +++ b/gallery_dl/extractor/hitomi.py @@ -28,8 +28,7 @@ class HitomiGalleryExtractor(GalleryExtractor): ("https://hitomi.la/galleries/867789.html", { "pattern": r"https://[a-c]a\.hitomi\.la/webp/\d+/\d+" r"/[0-9a-f]{64}\.webp", - "keyword": "4b584d09d535694d7d757c47daf5c15d116420d2", - "options": (("metadata", True),), + "keyword": "86af5371f38117a07407f11af689bdd460b09710", "count": 16, }), # download test @@ -77,23 +76,18 @@ class HitomiGalleryExtractor(GalleryExtractor): def metadata(self, page): self.info = info = json.loads(page.partition("=")[2]) + iget = info.get - data = self._data_from_gallery_info(info) - if self.config("metadata", False): - data.update(self._data_from_gallery_page(info)) - return data - - def _data_from_gallery_info(self, info): - language = info.get("language") + language = iget("language") if language: language = language.capitalize() - date = info.get("date") + date = iget("date") if date: date += ":00" tags = [] - for tinfo in info.get("tags") or (): + for tinfo in iget("tags") or (): tag = string.capwords(tinfo["tag"]) if tinfo.get("female"): tag += " ♀" @@ -109,35 +103,10 @@ class HitomiGalleryExtractor(GalleryExtractor): "lang" : util.language_to_code(language), "date" : text.parse_datetime(date, "%Y-%m-%d %H:%M:%S%z"), "tags" : tags, - } - - def _data_from_gallery_page(self, info): - url = "{}/galleries/{}.html".format(self.root, info["id"]) - - # follow redirects - while True: - response = self.request(url, fatal=False) - if b"<title>Redirect</title>" not in response.content: - break - url = text.extract( - response.text, 'http-equiv="refresh" content="', '"', - )[0].partition("=")[2] - - if response.status_code >= 400: - return {} - - def prep(value): - return [ - text.unescape(string.capwords(v)) - for v in text.extract_iter(value or "", '.html">', '<') - ] - - extr = text.extract_from(response.text) - return { - "artist" : prep(extr('<h2>', '</h2>')), - "group" : prep(extr('<td>Group</td><td>', '</td>')), - "parody" : prep(extr('<td>Series</td><td>', '</td>')), - "characters": prep(extr('<td>Characters</td><td>', '</td>')), + "artist" : [o["artist"] for o in iget("artists") or ()], + "group" : [o["group"] for o in iget("groups") or ()], + "parody" : [o["parody"] for o in iget("parodys") or ()], + "characters": [o["character"] for o in iget("characters") or ()] } def images(self, _): diff --git a/gallery_dl/extractor/instagram.py b/gallery_dl/extractor/instagram.py index 20a4c1a..e07b64e 100644 --- a/gallery_dl/extractor/instagram.py +++ b/gallery_dl/extractor/instagram.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- # Copyright 2018-2020 Leonardo Taccari -# Copyright 2018-2021 Mike Fährmann +# Copyright 2018-2022 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -43,6 +43,7 @@ class InstagramExtractor(Extractor): self.login() data = self.metadata() videos = self.config("videos", True) + previews = self.config("previews", False) video_headers = {"User-Agent": "Mozilla/5.0"} for post in self.posts(): @@ -56,14 +57,18 @@ class InstagramExtractor(Extractor): yield Message.Directory, post for file in files: - url = file.get("video_url") - if not url: - url = file["display_url"] - elif not videos: - continue - else: - file["_http_headers"] = video_headers file.update(post) + + url = file.get("video_url") + if url: + if videos: + file["_http_headers"] = video_headers + text.nameext_from_url(url, file) + yield Message.Url, url, file + if not previews: + continue + + url = file["display_url"] yield Message.Url, url, text.nameext_from_url(url, file) def metadata(self): diff --git a/gallery_dl/extractor/kemonoparty.py b/gallery_dl/extractor/kemonoparty.py index 9537263..7287c38 100644 --- a/gallery_dl/extractor/kemonoparty.py +++ b/gallery_dl/extractor/kemonoparty.py @@ -42,6 +42,7 @@ class KemonopartyExtractor(Extractor): r'|/[0-9a-f]{2}/[0-9a-f]{2}/[0-9a-f]{64}\.[^"]+)').findall find_hash = re.compile("/[0-9a-f]{2}/[0-9a-f]{2}/([0-9a-f]{64})").match generators = self._build_file_generators(self.config("files")) + duplicates = self.config("duplicates") comments = self.config("comments") username = dms = None @@ -84,7 +85,7 @@ class KemonopartyExtractor(Extractor): match = find_hash(url) if match: post["hash"] = hash = match.group(1) - if hash in hashes: + if hash in hashes and not duplicates: self.log.debug("Skipping %s (duplicate)", url) continue hashes.add(hash) @@ -273,6 +274,11 @@ class KemonopartyPostExtractor(KemonopartyExtractor): ("https://kemono.party/patreon/user/4158582/post/32099982", { "count": 2, }), + # allow duplicates (#2440) + ("https://kemono.party/patreon/user/4158582/post/32099982", { + "options": (("duplicates", True),), + "count": 3, + }), # DMs (#2008) ("https://kemono.party/patreon/user/34134344/post/38129255", { "options": (("dms", True),), @@ -323,8 +329,9 @@ class KemonopartyDiscordExtractor(KemonopartyExtractor): }), (("https://kemono.party/discord" "/server/256559665620451329/channel/462437519519383555#"), { - "pattern": r"https://kemono\.party/data/attachments/discord" - r"/256559665620451329/\d+/\d+/.+", + "pattern": r"https://kemono\.party/data/(" + r"e3/77/e377e3525164559484ace2e64425b0cec1db08.*\.png|" + r"51/45/51453640a5e0a4d23fbf57fb85390f9c5ec154.*\.gif)", "count": ">= 2", }), # 'inline' files diff --git a/gallery_dl/extractor/kissgoddess.py b/gallery_dl/extractor/kissgoddess.py index 85ec806..6e66772 100644 --- a/gallery_dl/extractor/kissgoddess.py +++ b/gallery_dl/extractor/kissgoddess.py @@ -20,7 +20,7 @@ class KissgoddessGalleryExtractor(GalleryExtractor): test = ("https://kissgoddess.com/album/18285.html", { "pattern": r"https://pic\.kissgoddess\.com" r"/gallery/16473/18285/s/\d+\.jpg", - "count": 8, + "count": 19, "keyword": { "gallery_id": 18285, "title": "[Young Champion Extra] 2016.02 No.03 菜乃花 安枝瞳 葉月あや", @@ -45,6 +45,8 @@ class KissgoddessGalleryExtractor(GalleryExtractor): while page: for url in text.extract_iter(page, "<img src='", "'"): yield url, None + for url in text.extract_iter(page, "<img data-original='", "'"): + yield url, None pnum += 1 url = "{}/album/{}_{}.html".format( diff --git a/gallery_dl/extractor/mangasee.py b/gallery_dl/extractor/mangasee.py index 1b3dd18..0b0da65 100644 --- a/gallery_dl/extractor/mangasee.py +++ b/gallery_dl/extractor/mangasee.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2021 Mike Fährmann +# Copyright 2021-2022 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -64,7 +64,7 @@ class MangaseeChapterExtractor(MangaseeBase, ChapterExtractor): self.slug = extr('vm.IndexName = "', '"') data = self._transform_chapter(data) - data["manga"] = extr('vm.SeriesName = "', '"') + data["manga"] = text.unescape(extr('vm.SeriesName = "', '"')) return data def images(self, page): diff --git a/gallery_dl/extractor/newgrounds.py b/gallery_dl/extractor/newgrounds.py index 6d0e94b..e9fde97 100644 --- a/gallery_dl/extractor/newgrounds.py +++ b/gallery_dl/extractor/newgrounds.py @@ -113,10 +113,16 @@ class NewgroundsExtractor(Extractor): if self.flash: url += "/format/flash" - response = self.request(url, fatal=False) - if response.status_code >= 400: - return {} - page = response.text + with self.request(url, fatal=False) as response: + if response.status_code >= 400: + return {} + page = response.text + + pos = page.find('id="adults_only"') + if pos >= 0: + msg = text.extract(page, 'class="highlight">', '<', pos)[0] + self.log.warning('"%s"', msg) + extr = text.extract_from(page) data = extract_data(extr, post_url) @@ -230,16 +236,20 @@ class NewgroundsExtractor(Extractor): yield fmt[1][0]["src"] def _pagination(self, kind): - root = self.user_root + url = "{}/{}".format(self.user_root, kind) + params = { + "page": 1, + "isAjaxRequest": "1", + } headers = { - "Accept": "application/json, text/javascript, */*; q=0.01", + "Referer": url, "X-Requested-With": "XMLHttpRequest", - "Referer": root, } - url = "{}/{}/page/1".format(root, kind) while True: - with self.request(url, headers=headers, fatal=False) as response: + with self.request( + url, params=params, headers=headers, + fatal=False) as response: try: data = response.json() except ValueError: @@ -250,14 +260,17 @@ class NewgroundsExtractor(Extractor): msg = ", ".join(text.unescape(e) for e in data["errors"]) raise exception.StopExtraction(msg) - for year in data["sequence"]: - for item in data["years"][str(year)]["items"]: + for year, items in data["items"].items(): + for item in items: page_url = text.extract(item, 'href="', '"')[0] - yield text.urljoin(root, page_url) + if page_url[0] == "/": + page_url = self.root + page_url + yield page_url - if not data["more"]: + more = data.get("load_more") + if not more or len(more) < 8: return - url = text.urljoin(root, data["more"]) + params["page"] += 1 class NewgroundsImageExtractor(NewgroundsExtractor): @@ -293,7 +306,12 @@ class NewgroundsImageExtractor(NewgroundsExtractor): ("https://www.newgrounds.com/art/view/sailoryon/yon-dream-buster", { "url": "84eec95e663041a80630df72719f231e157e5f5d", "count": 2, - }) + }), + # "adult" rated (#2456) + ("https://www.newgrounds.com/art/view/kekiiro/red", { + "options": (("username", None),), + "count": 1, + }), ) def __init__(self, match): @@ -360,6 +378,11 @@ class NewgroundsMediaExtractor(NewgroundsExtractor): "pattern": r"https://uploads\.ungrounded\.net/alternate/1482000" r"/1482860_alternate_102516\.720p\.mp4\?\d+", }), + # "adult" rated (#2456) + ("https://www.newgrounds.com/portal/view/717744", { + "options": (("username", None),), + "count": 1, + }), ) def __init__(self, match): @@ -454,25 +477,28 @@ class NewgroundsFavoriteExtractor(NewgroundsExtractor): ) def _pagination(self, kind): - num = 1 + url = "{}/favorites/{}".format(self.user_root, kind) + params = { + "page": 1, + "isAjaxRequest": "1", + } headers = { - "Accept": "application/json, text/javascript, */*; q=0.01", + "Referer": url, "X-Requested-With": "XMLHttpRequest", - "Referer": self.user_root, } while True: - url = "{}/favorites/{}/{}".format(self.user_root, kind, num) - response = self.request(url, headers=headers) + response = self.request(url, params=params, headers=headers) if response.history: return - favs = self._extract_favorites(response.text) + data = response.json() + favs = self._extract_favorites(data.get("component") or "") yield from favs if len(favs) < 24: return - num += 1 + params["page"] += 1 def _extract_favorites(self, page): return [ diff --git a/gallery_dl/extractor/pinterest.py b/gallery_dl/extractor/pinterest.py index 25344e8..2079b73 100644 --- a/gallery_dl/extractor/pinterest.py +++ b/gallery_dl/extractor/pinterest.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2016-2021 Mike Fährmann +# Copyright 2016-2022 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -20,8 +20,8 @@ BASE_PATTERN = r"(?:https?://)?(?:\w+\.)?pinterest\.[\w.]+" class PinterestExtractor(Extractor): """Base class for pinterest extractors""" category = "pinterest" - filename_fmt = "{category}_{id}.{extension}" - archive_fmt = "{id}" + filename_fmt = "{category}_{id}{media_id:?_//}.{extension}" + archive_fmt = "{id}{media_id}" root = "https://www.pinterest.com" def __init__(self, match): @@ -35,28 +35,39 @@ class PinterestExtractor(Extractor): yield Message.Directory, data for pin in self.pins(): + pin.update(data) - try: - media = self._media_from_pin(pin) - except Exception: - self.log.debug("Unable to fetch download URL for pin %s", - pin.get("id")) - continue + carousel_data = pin.get("carousel_data") + if carousel_data: + for num, slot in enumerate(carousel_data["carousel_slots"], 1): + slot["media_id"] = slot.pop("id") + pin.update(slot) + pin["num"] = num + size, image = next(iter(slot["images"].items())) + url = image["url"].replace("/" + size + "/", "/originals/") + yield Message.Url, url, text.nameext_from_url(url, pin) - if not videos and media.get("duration") is not None: - continue + else: + try: + media = self._media_from_pin(pin) + except Exception: + self.log.debug("Unable to fetch download URL for pin %s", + pin.get("id")) + continue - pin.update(data) - pin.update(media) - url = media["url"] - text.nameext_from_url(url, pin) + if videos or media.get("duration") is None: + pin.update(media) + pin["num"] = 0 + pin["media_id"] = "" + + url = media["url"] + text.nameext_from_url(url, pin) - if pin["extension"] == "m3u8": - url = "ytdl:" + url - pin["extension"] = "mp4" - pin["_ytdl_extra"] = {"protocol": "m3u8_native"} + if pin["extension"] == "m3u8": + url = "ytdl:" + url + pin["extension"] = "mp4" - yield Message.Url, url, pin + yield Message.Url, url, pin def metadata(self): """Return general metadata""" @@ -124,7 +135,8 @@ class PinterestBoardExtractor(PinterestExtractor): subcategory = "board" directory_fmt = ("{category}", "{board[owner][username]}", "{board[name]}") archive_fmt = "{board[id]}_{id}" - pattern = BASE_PATTERN + r"/(?!pin/)([^/?#&]+)/(?!_saved)([^/?#&]+)/?$" + pattern = (BASE_PATTERN + r"/(?!pin/)([^/?#&]+)" + "/(?!_saved|_created)([^/?#&]+)/?$") test = ( ("https://www.pinterest.com/g1952849/test-/", { "pattern": r"https://i\.pinimg\.com/originals/", @@ -192,6 +204,28 @@ class PinterestUserExtractor(PinterestExtractor): yield Message.Queue, self.root + url, board +class PinterestCreatedExtractor(PinterestExtractor): + """Extractor for a user's created pins""" + subcategory = "created" + directory_fmt = ("{category}", "{user}") + pattern = BASE_PATTERN + r"/(?!pin/)([^/?#&]+)/_created/?$" + test = ("https://www.pinterest.com/amazon/_created", { + "pattern": r"https://i\.pinimg\.com/originals/[0-9a-f]{2}" + r"/[0-9a-f]{2}/[0-9a-f]{2}/[0-9a-f]{32}\.jpg", + "count": 10, + }) + + def __init__(self, match): + PinterestExtractor.__init__(self, match) + self.user = text.unquote(match.group(1)) + + def metadata(self): + return {"user": self.user} + + def pins(self): + return self.api.user_activity_pins(self.user) + + class PinterestSectionExtractor(PinterestExtractor): """Extractor for board sections on pinterest.com""" subcategory = "section" @@ -385,6 +419,16 @@ class PinterestAPI(): options = {"board_id": board_id, "add_vase": True} return self._pagination("BoardRelatedPixieFeed", options) + def user_activity_pins(self, user): + """Yield pins created by 'user'""" + options = { + "exclude_add_pin_rep": True, + "field_set_key" : "grid_item", + "is_own_profile_pins": False, + "username" : user, + } + return self._pagination("UserActivityPins", options) + def search(self, query): """Yield pins from searches""" options = {"query": query, "scope": "pins", "rs": "typed"} diff --git a/gallery_dl/extractor/skeb.py b/gallery_dl/extractor/skeb.py index 965391c..2af917d 100644 --- a/gallery_dl/extractor/skeb.py +++ b/gallery_dl/extractor/skeb.py @@ -8,6 +8,7 @@ from .common import Extractor, Message from .. import text +import itertools class SkebExtractor(Extractor): @@ -22,7 +23,6 @@ class SkebExtractor(Extractor): Extractor.__init__(self, match) self.user_name = match.group(1) self.thumbnails = self.config("thumbnails", False) - self.sent_requests = self.config("sent-requests", False) def items(self): for user_name, post_num in self.posts(): @@ -35,18 +35,18 @@ class SkebExtractor(Extractor): def posts(self): """Return post number""" - def _pagination(self): - url = "{}/api/users/{}/works".format(self.root, self.user_name) - params = {"role": "creator", "sort": "date", "offset": 0} + def _pagination(self, url, params): headers = {"Referer": self.root, "Authorization": "Bearer null"} - do_requests = self.sent_requests + params["offset"] = 0 while True: posts = self.request(url, params=params, headers=headers).json() for post in posts: - post_num = post["path"].rpartition("/")[2] - user_name = post["path"].split("/")[1][1:] + parts = post["path"].split("/") + user_name = parts[1][1:] + post_num = parts[3] + if post["private"]: self.log.debug("Skipping @%s/%s (private)", user_name, post_num) @@ -54,13 +54,7 @@ class SkebExtractor(Extractor): yield user_name, post_num if len(posts) < 30: - if do_requests: - params["offset"] = 0 - params['role'] = "client" - do_requests = False - continue - else: - return + return params["offset"] += 30 def _get_post_data(self, user_name, post_num): @@ -134,6 +128,54 @@ class SkebPostExtractor(SkebExtractor): """Extractor for a single skeb post""" subcategory = "post" pattern = r"(?:https?://)?skeb\.jp/@([^/?#]+)/works/(\d+)" + test = ("https://skeb.jp/@kanade_cocotte/works/38", { + "count": 2, + "keyword": { + "anonymous": False, + "body": "re:はじめまして。私はYouTubeにてVTuberとして活動をしている湊ラ", + "client": { + "avatar_url": "https://pbs.twimg.com/profile_images" + "/1471184042791895042/f0DcWFGl.jpg", + "header_url": None, + "id": 1196514, + "name": "湊ラギ", + "screen_name": "minato_ragi", + }, + "completed_at": "2022-02-27T14:03:45.442Z", + "content_category": "preview", + "creator": { + "avatar_url": "https://pbs.twimg.com/profile_images" + "/1225470417063645184/P8_SiB0V.jpg", + "header_url": "https://pbs.twimg.com/profile_banners" + "/71243217/1647958329/1500x500", + "id": 159273, + "name": "イチノセ奏", + "screen_name": "kanade_cocotte", + }, + "date": "dt:2022-02-27 14:03:45", + "file_id": int, + "file_url": str, + "genre": "art", + "nsfw": False, + "original": { + "byte_size": int, + "duration": None, + "extension": "re:psd|png", + "frame_rate": None, + "height": 3727, + "is_movie": False, + "width": 2810, + }, + "post_num": "38", + "post_url": "https://skeb.jp/@kanade_cocotte/works/38", + "source_body": None, + "source_thanks": None, + "tags": list, + "thanks": None, + "translated_body": False, + "translated_thanks": None, + } + }) def __init__(self, match): SkebExtractor.__init__(self, match) @@ -146,7 +188,23 @@ class SkebPostExtractor(SkebExtractor): class SkebUserExtractor(SkebExtractor): """Extractor for all posts from a skeb user""" subcategory = "user" - pattern = r"(?:https?://)?skeb\.jp/@([^/?#]+)" + pattern = r"(?:https?://)?skeb\.jp/@([^/?#]+)/?$" + test = ("https://skeb.jp/@kanade_cocotte", { + "pattern": r"https://skeb\.imgix\.net/uploads/origins/[\w-]+" + r"\?bg=%23fff&auto=format&txtfont=bold&txtshad=70" + r"&txtclr=BFFFFFFF&txtalign=middle%2Ccenter&txtsize=150" + r"&txt=SAMPLE&w=800&s=\w+", + "range": "1-5", + }) def posts(self): - return self._pagination() + url = "{}/api/users/{}/works".format(self.root, self.user_name) + + params = {"role": "creator", "sort": "date"} + posts = self._pagination(url, params) + + if self.config("sent-requests", False): + params = {"role": "client", "sort": "date"} + posts = itertools.chain(posts, self._pagination(url, params)) + + return posts diff --git a/gallery_dl/extractor/telegraph.py b/gallery_dl/extractor/telegraph.py new file mode 100644 index 0000000..8e9bf2c --- /dev/null +++ b/gallery_dl/extractor/telegraph.py @@ -0,0 +1,95 @@ +# -*- coding: utf-8 -*- + +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. + +"""Extractor for https://telegra.ph/""" + +from .common import GalleryExtractor +from .. import text + + +class TelegraphGalleryExtractor(GalleryExtractor): + """Extractor for articles from telegra.ph""" + + category = "telegraph" + root = "https://telegra.ph" + directory_fmt = ("{category}", "{slug}") + filename_fmt = "{num_formatted}_{filename}.{extension}" + archive_fmt = "{slug}_{num}" + pattern = r"(?:https?://)(?:www\.)??telegra\.ph(/[^/?#]+)" + test = ( + ("https://telegra.ph/Telegraph-Test-03-28", { + "pattern": r"https://telegra\.ph/file/[0-9a-f]+\.png", + "keyword": { + "author": "mikf", + "caption": r"re:test|", + "count": 2, + "date": "dt:2022-03-28 16:01:36", + "description": "Just a test", + "post_url": "https://telegra.ph/Telegraph-Test-03-28", + "slug": "Telegraph-Test-03-28", + "title": "Telegra.ph Test", + }, + }), + ("https://telegra.ph/森-03-28", { + "pattern": "https://telegra.ph/file/3ea79d23b0dd0889f215a.jpg", + "count": 1, + "keyword": { + "author": "&", + "caption": "kokiri", + "count": 1, + "date": "dt:2022-03-28 16:31:26", + "description": "コキリの森", + "extension": "jpg", + "filename": "3ea79d23b0dd0889f215a", + "num": 1, + "num_formatted": "1", + "post_url": "https://telegra.ph/森-03-28", + "slug": "森-03-28", + "title": '"森"', + "url": "https://telegra.ph/file/3ea79d23b0dd0889f215a.jpg", + }, + }), + ) + + def metadata(self, page): + extr = text.extract_from(page) + data = { + "title": text.unescape(extr( + 'property="og:title" content="', '"')), + "description": text.unescape(extr( + 'property="og:description" content="', '"')), + "date": text.parse_datetime(extr( + 'property="article:published_time" content="', '"'), + "%Y-%m-%dT%H:%M:%S%z"), + "author": text.unescape(extr( + 'property="article:author" content="', '"')), + "post_url": text.unescape(extr( + 'rel="canonical" href="', '"')), + } + data["slug"] = data["post_url"][19:] + return data + + def images(self, page): + figures = tuple(text.extract_iter(page, "<figure>", "</figure>")) + num_zeroes = len(str(len(figures))) + num = 0 + + result = [] + for figure in figures: + src, pos = text.extract(figure, 'src="', '"') + if src.startswith("/embed/"): + continue + caption, pos = text.extract(figure, "<figcaption>", "<", pos) + url = self.root + src + num += 1 + + result.append((url, { + "url" : url, + "caption" : text.unescape(caption), + "num" : num, + "num_formatted": str(num).zfill(num_zeroes), + })) + return result diff --git a/gallery_dl/extractor/twibooru.py b/gallery_dl/extractor/twibooru.py index ec8ab35..355ca21 100644 --- a/gallery_dl/extractor/twibooru.py +++ b/gallery_dl/extractor/twibooru.py @@ -36,8 +36,9 @@ class TwibooruExtractor(BooruExtractor): post["date"] = text.parse_datetime( post["created_at"], "%Y-%m-%dT%H:%M:%S.%fZ") - name, sep, rest = post["name"].rpartition(".") - post["filename"] = name if sep else rest + if "name" in post: + name, sep, rest = post["name"].rpartition(".") + post["filename"] = name if sep else rest class TwibooruPostExtractor(TwibooruExtractor): diff --git a/gallery_dl/extractor/twitter.py b/gallery_dl/extractor/twitter.py index 6d51834..4c46170 100644 --- a/gallery_dl/extractor/twitter.py +++ b/gallery_dl/extractor/twitter.py @@ -15,7 +15,7 @@ import json BASE_PATTERN = ( r"(?:https?://)?(?:www\.|mobile\.)?" - r"(?:twitter\.com|nitter\.net)" + r"(?:(?:fx)?twitter\.com|nitter\.net)" ) @@ -217,23 +217,24 @@ class TwitterExtractor(Extractor): if "legacy" in tweet: tweet = tweet["legacy"] + tget = tweet.get entities = tweet["entities"] tdata = { "tweet_id" : text.parse_int(tweet["id_str"]), "retweet_id" : text.parse_int( - tweet.get("retweeted_status_id_str")), + tget("retweeted_status_id_str")), "quote_id" : text.parse_int( - tweet.get("quoted_status_id_str")), + tget("quoted_status_id_str")), "reply_id" : text.parse_int( - tweet.get("in_reply_to_status_id_str")), + tget("in_reply_to_status_id_str")), "date" : text.parse_datetime( tweet["created_at"], "%a %b %d %H:%M:%S %z %Y"), "user" : user, "lang" : tweet["lang"], - "favorite_count": tweet["favorite_count"], - "quote_count" : tweet["quote_count"], - "reply_count" : tweet["reply_count"], - "retweet_count" : tweet["retweet_count"], + "favorite_count": tget("favorite_count"), + "quote_count" : tget("quote_count"), + "reply_count" : tget("reply_count"), + "retweet_count" : tget("retweet_count"), } hashtags = entities.get("hashtags") @@ -248,7 +249,7 @@ class TwitterExtractor(Extractor): "nick": u["name"], } for u in mentions] - content = tweet["full_text"] + content = tget("full_text") or tget("text") or "" urls = entities.get("urls") if urls: for url in urls: @@ -269,33 +270,36 @@ class TwitterExtractor(Extractor): return tdata def _transform_user(self, user): + uid = user.get("rest_id") or user["id_str"] + try: - return self._user_cache[user.get("rest_id") or user["id_str"]] + return self._user_cache[uid] except KeyError: pass - uid = user.get("rest_id") or user["id_str"] if "legacy" in user: user = user["legacy"] + + uget = user.get entities = user["entities"] self._user_cache[uid] = udata = { "id" : text.parse_int(uid), "name" : user["screen_name"], "nick" : user["name"], - "location" : user["location"], + "location" : uget("location"), "date" : text.parse_datetime( - user["created_at"], "%a %b %d %H:%M:%S %z %Y"), - "verified" : user.get("verified", False), - "profile_banner" : user.get("profile_banner_url", ""), - "profile_image" : user.get( + uget("created_at"), "%a %b %d %H:%M:%S %z %Y"), + "verified" : uget("verified", False), + "profile_banner" : uget("profile_banner_url", ""), + "profile_image" : uget( "profile_image_url_https", "").replace("_normal.", "."), - "favourites_count": user["favourites_count"], - "followers_count" : user["followers_count"], - "friends_count" : user["friends_count"], - "listed_count" : user["listed_count"], - "media_count" : user["media_count"], - "statuses_count" : user["statuses_count"], + "favourites_count": uget("favourites_count"), + "followers_count" : uget("followers_count"), + "friends_count" : uget("friends_count"), + "listed_count" : uget("listed_count"), + "media_count" : uget("media_count"), + "statuses_count" : uget("statuses_count"), } descr = user["description"] @@ -653,6 +657,11 @@ class TwitterTweetExtractor(TwitterExtractor): ("https://twitter.com/i/web/status/1486373748911575046", { "count": 4, }), + # age-restricted (#2354) + ("https://twitter.com/mightbecursed/status/1492954264909479936", { + "options": (("syndication", True),), + "count": 1, + }), ) def __init__(self, match): @@ -770,6 +779,7 @@ class TwitterAPI(): } self._nsfw_warning = True + self._syndication = extractor.config("syndication") self._json_dumps = json.JSONEncoder(separators=(",", ":")).encode self._user = None @@ -1153,9 +1163,10 @@ class TwitterAPI(): elif esw("conversationthread-"): tweets.extend(entry["content"]["items"]) elif esw("tombstone-"): - self._report_tombstone( - entry, - entry["content"]["itemContent"]["tombstoneInfo"]) + item = entry["content"]["itemContent"] + item["tweet_results"] = \ + {"result": {"tombstone": item["tombstoneInfo"]}} + tweets.append(entry) elif esw("cursor-bottom-"): cursor = entry["content"] if not cursor.get("stopOnEmptyResponse", True): @@ -1168,8 +1179,10 @@ class TwitterAPI(): tweet = ((entry.get("content") or entry["item"]) ["itemContent"]["tweet_results"]["result"]) if "tombstone" in tweet: - self._report_tombstone(entry, tweet["tombstone"]) - continue + tweet = self._process_tombstone( + entry, tweet["tombstone"]) + if not tweet: + continue if "tweet" in tweet: tweet = tweet["tweet"] legacy = tweet["legacy"] @@ -1259,10 +1272,45 @@ class TwitterAPI(): return variables["cursor"] = cursor - def _report_tombstone(self, entry, tombstone): + def _process_tombstone(self, entry, tombstone): text = (tombstone.get("richText") or tombstone["text"])["text"] - if text.startswith("Age-restricted") and self._nsfw_warning: - self.extractor.log.warning(text) - self._nsfw_warning = False - self.extractor.log.debug( - "Skipping %s (%s)", entry["entryId"].rpartition("-")[2], text) + tweet_id = entry["entryId"].rpartition("-")[2] + + if text.startswith("Age-restricted"): + if self._syndication: + return self._syndication_tweet(tweet_id) + elif self._nsfw_warning: + self._nsfw_warning = False + self.extractor.log.warning('"%s"', text) + + self.extractor.log.debug("Skipping %s (\"%s\")", tweet_id, text) + + def _syndication_tweet(self, tweet_id): + tweet = self.extractor.request( + "https://cdn.syndication.twimg.com/tweet?id=" + tweet_id).json() + + tweet["user"]["description"] = "" + tweet["user"]["entities"] = {"description": {}} + + if "video" in tweet: + video = tweet["video"] + del video["variants"][:-1] + video["variants"][0]["url"] = video["variants"][0]["src"] + tweet["extended_entities"] = {"media": [{ + "video_info" : video, + "original_info": {"width" : 0, "height": 0}, + }]} + elif "photos" in tweet: + for p in tweet["photos"]: + p["media_url_https"] = p["url"] + p["original_info"] = { + "width" : p["width"], + "height": p["height"], + } + tweet["extended_entities"] = {"media": tweet["photos"]} + + return { + "rest_id": tweet["id_str"], + "legacy" : tweet, + "user" : tweet["user"], + } diff --git a/gallery_dl/extractor/unsplash.py b/gallery_dl/extractor/unsplash.py index 2405dc3..6036322 100644 --- a/gallery_dl/extractor/unsplash.py +++ b/gallery_dl/extractor/unsplash.py @@ -193,7 +193,7 @@ class UnsplashSearchExtractor(UnsplashExtractor): """Extractor for unsplash search results""" subcategory = "search" pattern = BASE_PATTERN + r"/s/photos/([^/?#]+)(?:\?([^/?#]+))?" - test = ("https://unsplash.com/s/photos/nature", { + test = ("https://unsplash.com/s/photos/hair-style", { "pattern": r"https://images\.unsplash\.com/((flagged/)?photo-\d+-\w+" r"|reserve/[^/?#]+)\?ixid=\w+&ixlib=rb-1\.2\.1$", "range": "1-30", @@ -206,7 +206,7 @@ class UnsplashSearchExtractor(UnsplashExtractor): def photos(self): url = self.root + "/napi/search/photos" - params = {"query": text.unquote(self.item)} + params = {"query": text.unquote(self.item.replace('-', ' '))} if self.query: params.update(text.parse_query(self.query)) return self._pagination(url, params, True) diff --git a/gallery_dl/formatter.py b/gallery_dl/formatter.py index c2b4d99..27d5e40 100644 --- a/gallery_dl/formatter.py +++ b/gallery_dl/formatter.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2021 Mike Fährmann +# Copyright 2021-2022 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -43,6 +43,8 @@ def parse(format_string, default=None): cls = ExpressionFormatter elif kind == "M": cls = ModuleFormatter + elif kind == "F": + cls = FStringFormatter formatter = _CACHE[key] = cls(format_string, default) return formatter @@ -206,6 +208,13 @@ class ModuleFormatter(): self.format_map = getattr(module, function_name) +class FStringFormatter(): + """Generate text by evaluaring an f-string literal""" + + def __init__(self, fstring, default=None): + self.format_map = util.compile_expression("f'''" + fstring + "'''") + + def parse_field_name(field_name): first, rest = _string.formatter_field_name_split(field_name) funcs = [] @@ -245,7 +254,7 @@ def parse_format_spec(format_spec, conversion): "C": string.capwords, "j": json.dumps, "t": str.strip, - "T": util.to_timestamp, + "T": util.datetime_to_timestamp_string, "d": text.parse_timestamp, "U": text.unescape, "S": util.to_string, diff --git a/gallery_dl/job.py b/gallery_dl/job.py index 3eebf0b..044369a 100644 --- a/gallery_dl/job.py +++ b/gallery_dl/job.py @@ -389,8 +389,10 @@ class DownloadJob(Job): def initialize(self, kwdict=None): """Delayed initialization of PathFormat, etc.""" - cfg = self.extractor.config - pathfmt = self.pathfmt = path.PathFormat(self.extractor) + extr = self.extractor + cfg = extr.config + + pathfmt = self.pathfmt = path.PathFormat(extr) if kwdict: pathfmt.set_directory(kwdict) @@ -403,17 +405,18 @@ class DownloadJob(Job): archive = cfg("archive") if archive: archive = util.expand_path(archive) + archive_format = (cfg("archive-prefix", extr.category) + + cfg("archive-format", extr.archive_fmt)) try: if "{" in archive: archive = formatter.parse(archive).format_map(kwdict) - self.archive = util.DownloadArchive(archive, self.extractor) + self.archive = util.DownloadArchive(archive, archive_format) except Exception as exc: - self.extractor.log.warning( + extr.log.warning( "Failed to open download archive at '%s' ('%s: %s')", archive, exc.__class__.__name__, exc) else: - self.extractor.log.debug( - "Using download archive '%s'", archive) + extr.log.debug("Using download archive '%s'", archive) skip = cfg("skip", True) if skip: @@ -435,7 +438,7 @@ class DownloadJob(Job): if self.archive: self.archive.check = pathfmt.exists - postprocessors = self.extractor.config_accumulate("postprocessors") + postprocessors = extr.config_accumulate("postprocessors") if postprocessors: self.hooks = collections.defaultdict(list) pp_log = self.get_logger("postprocessor") @@ -453,7 +456,7 @@ class DownloadJob(Job): clist = pp_dict.get("blacklist") negate = True if clist and not util.build_extractor_filter( - clist, negate)(self.extractor): + clist, negate)(extr): continue name = pp_dict.get("name") @@ -471,8 +474,7 @@ class DownloadJob(Job): pp_list.append(pp_obj) if pp_list: - self.extractor.log.debug( - "Active postprocessor modules: %s", pp_list) + extr.log.debug("Active postprocessor modules: %s", pp_list) if "init" in self.hooks: for callback in self.hooks["init"]: callback(pathfmt) @@ -530,6 +532,10 @@ class SimulationJob(DownloadJob): class KeywordJob(Job): """Print available keywords""" + def __init__(self, url, parent=None): + Job.__init__(self, url, parent) + self.private = config.get(("output",), "private") + def handle_url(self, url, kwdict): print("\nKeywords for filenames and --filter:") print("------------------------------------") @@ -567,21 +573,20 @@ class KeywordJob(Job): KeywordJob(extr or url, self).run() raise exception.StopExtraction() - @staticmethod - def print_kwdict(kwdict, prefix=""): + def print_kwdict(self, kwdict, prefix=""): """Print key-value pairs in 'kwdict' with formatting""" suffix = "]" if prefix else "" for key, value in sorted(kwdict.items()): - if key[0] == "_": + if key[0] == "_" and not self.private: continue key = prefix + key + suffix if isinstance(value, dict): - KeywordJob.print_kwdict(value, key + "[") + self.print_kwdict(value, key + "[") elif isinstance(value, list): if value and isinstance(value[0], dict): - KeywordJob.print_kwdict(value[0], key + "[][") + self.print_kwdict(value[0], key + "[][") else: print(key, "[]", sep="") for val in value: diff --git a/gallery_dl/option.py b/gallery_dl/option.py index e1ada09..782063d 100644 --- a/gallery_dl/option.py +++ b/gallery_dl/option.py @@ -401,6 +401,18 @@ def build_parser(): help="Convert Pixiv Ugoira to WebM in VP9 lossless mode", ) postprocessor.add_argument( + "--ugoira-conv-copy", + dest="postprocessors", action="append_const", const={ + "name" : "ugoira", + "extension" : "mkv", + "ffmpeg-args" : ("-c:v", "copy"), + "ffmpeg-twopass" : False, + "repeat-last-frame": False, + "whitelist" : ("pixiv", "danbooru"), + }, + help="Convert Pixiv Ugoira to MKV without re-encoding any frames", + ) + postprocessor.add_argument( "--write-metadata", dest="postprocessors", action="append_const", const="metadata", diff --git a/gallery_dl/postprocessor/metadata.py b/gallery_dl/postprocessor/metadata.py index e776888..5e8f3e9 100644 --- a/gallery_dl/postprocessor/metadata.py +++ b/gallery_dl/postprocessor/metadata.py @@ -59,9 +59,35 @@ class MetadataPP(PostProcessor): events = events.split(",") job.register_hooks({event: self.run for event in events}, options) + archive = options.get("archive") + if archive: + extr = job.extractor + archive = util.expand_path(archive) + archive_format = ( + options.get("archive-prefix", extr.category) + + options.get("archive-format", "_MD_" + extr.archive_fmt)) + try: + if "{" in archive: + archive = formatter.parse(archive).format_map( + job.pathfmt.kwdict) + self.archive = util.DownloadArchive( + archive, archive_format, "_archive_metadata") + except Exception as exc: + self.log.warning( + "Failed to open download archive at '%s' ('%s: %s')", + archive, exc.__class__.__name__, exc) + else: + self.log.debug("Using download archive '%s'", archive) + else: + self.archive = None + self.mtime = options.get("mtime") def run(self, pathfmt): + archive = self.archive + if archive and archive.check(pathfmt.kwdict): + return + directory = self._directory(pathfmt) path = directory + self._filename(pathfmt) @@ -73,6 +99,9 @@ class MetadataPP(PostProcessor): with open(path, "w", encoding="utf-8") as fp: self.write(fp, pathfmt.kwdict) + if archive: + archive.add(pathfmt.kwdict) + if self.mtime: mtime = pathfmt.kwdict.get("_mtime") if mtime: diff --git a/gallery_dl/postprocessor/mtime.py b/gallery_dl/postprocessor/mtime.py index 098984a..3f8d90a 100644 --- a/gallery_dl/postprocessor/mtime.py +++ b/gallery_dl/postprocessor/mtime.py @@ -9,7 +9,8 @@ """Use metadata as file modification time""" from .common import PostProcessor -from ..text import parse_int +from .. import text, util +from datetime import datetime class MtimePP(PostProcessor): @@ -27,8 +28,11 @@ class MtimePP(PostProcessor): def run(self, pathfmt): mtime = pathfmt.kwdict.get(self.key) - ts = getattr(mtime, "timestamp", None) - pathfmt.kwdict["_mtime"] = ts() if ts else parse_int(mtime) + pathfmt.kwdict["_mtime"] = ( + util.datetime_to_timestamp(mtime) + if isinstance(mtime, datetime) else + text.parse_int(mtime) + ) __postprocessor__ = MtimePP diff --git a/gallery_dl/postprocessor/ugoira.py b/gallery_dl/postprocessor/ugoira.py index e5bdebc..c5477d2 100644 --- a/gallery_dl/postprocessor/ugoira.py +++ b/gallery_dl/postprocessor/ugoira.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2018-2021 Mike Fährmann +# Copyright 2018-2022 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -10,12 +10,20 @@ from .common import PostProcessor from .. import util -import collections import subprocess import tempfile import zipfile +import shutil import os +try: + from math import gcd +except ImportError: + def gcd(a, b): + while b: + a, b = b, a % b + return a + class UgoiraPP(PostProcessor): @@ -27,19 +35,37 @@ class UgoiraPP(PostProcessor): self.output = options.get("ffmpeg-output", True) self.delete = not options.get("keep-files", False) self.repeat = options.get("repeat-last-frame", True) + self.mtime = options.get("mtime") ffmpeg = options.get("ffmpeg-location") self.ffmpeg = util.expand_path(ffmpeg) if ffmpeg else "ffmpeg" + mkvmerge = options.get("mkvmerge-location") + self.mkvmerge = util.expand_path(mkvmerge) if mkvmerge else "mkvmerge" + + demuxer = options.get("ffmpeg-demuxer") + if demuxer is None or demuxer == "auto": + if self.extension in ("webm", "mkv") and ( + mkvmerge or shutil.which("mkvmerge")): + demuxer = "mkvmerge" + else: + demuxer = "concat" if util.WINDOWS else "image2" + + if demuxer == "mkvmerge": + self._process = self._process_mkvmerge + self._finalize = self._finalize_mkvmerge + elif demuxer == "image2": + self._process = self._process_image2 + self._finalize = None + else: + self._process = self._process_concat + self._finalize = None + self.log.debug("using %s demuxer", demuxer) + rate = options.get("framerate", "auto") if rate != "auto": self.calculate_framerate = lambda _: (None, rate) - if options.get("ffmpeg-demuxer") == "image2": - self._process = self._image2 - else: - self._process = self._concat - if options.get("libx264-prevent-odd", True): # get last video-codec argument vcodec = None @@ -88,13 +114,12 @@ class UgoiraPP(PostProcessor): return # process frames and collect command-line arguments - args = self._process(tempdir) + pathfmt.set_extension(self.extension) + args = self._process(pathfmt, tempdir) if self.args: args += self.args - self.log.debug("ffmpeg args: %s", args) # invoke ffmpeg - pathfmt.set_extension(self.extension) try: if self.twopass: if "-f" not in self.args: @@ -105,48 +130,61 @@ class UgoiraPP(PostProcessor): else: args.append(pathfmt.realpath) self._exec(args) + if self._finalize: + self._finalize(pathfmt, tempdir) except OSError as exc: print() self.log.error("Unable to invoke FFmpeg (%s: %s)", exc.__class__.__name__, exc) pathfmt.realpath = pathfmt.temppath else: + if self.mtime: + mtime = pathfmt.kwdict.get("_mtime") + if mtime: + util.set_mtime(pathfmt.realpath, mtime) if self.delete: pathfmt.delete = True else: pathfmt.set_extension("zip") - def _concat(self, path): - ffconcat = path + "/ffconcat.txt" - - content = ["ffconcat version 1.0"] - append = content.append - for frame in self._frames: - append("file '{}'\nduration {}".format( - frame["file"], frame["delay"] / 1000)) - if self.repeat: - append("file '{}'".format(frame["file"])) - append("") - - with open(ffconcat, "w") as file: - file.write("\n".join(content)) + def _exec(self, args): + self.log.debug(args) + out = None if self.output else subprocess.DEVNULL + return subprocess.Popen(args, stdout=out, stderr=out).wait() + def _process_concat(self, pathfmt, tempdir): rate_in, rate_out = self.calculate_framerate(self._frames) args = [self.ffmpeg, "-f", "concat"] if rate_in: args += ("-r", str(rate_in)) - args += ("-i", ffconcat) + args += ("-i", self._write_ffmpeg_concat(tempdir)) if rate_out: args += ("-r", str(rate_out)) return args - def _image2(self, path): - path += "/" + def _process_image2(self, pathfmt, tempdir): + tempdir += "/" + frames = self._frames + + # add extra frame if necessary + if self.repeat and not self._delay_is_uniform(frames): + last = frames[-1] + delay_gcd = self._delay_gcd(frames) + if last["delay"] - delay_gcd > 0: + last["delay"] -= delay_gcd + + self.log.debug("non-uniform delays; inserting extra frame") + last_copy = last.copy() + frames.append(last_copy) + name, _, ext = last_copy["file"].rpartition(".") + last_copy["file"] = "{:>06}.{}".format(int(name)+1, ext) + shutil.copyfile(tempdir + last["file"], + tempdir + last_copy["file"]) # adjust frame mtime values ts = 0 - for frame in self._frames: - os.utime(path + frame["file"], ns=(ts, ts)) + for frame in frames: + os.utime(tempdir + frame["file"], ns=(ts, ts)) ts += frame["delay"] * 1000000 return [ @@ -155,18 +193,90 @@ class UgoiraPP(PostProcessor): "-ts_from_file", "2", "-pattern_type", "sequence", "-i", "{}%06d.{}".format( - path.replace("%", "%%"), frame["file"].rpartition(".")[2]), + tempdir.replace("%", "%%"), + frame["file"].rpartition(".")[2] + ), ] - def _exec(self, args): - out = None if self.output else subprocess.DEVNULL - return subprocess.Popen(args, stdout=out, stderr=out).wait() + def _process_mkvmerge(self, pathfmt, tempdir): + self._realpath = pathfmt.realpath + pathfmt.realpath = tempdir + "/temp." + self.extension + + return [ + self.ffmpeg, + "-f", "image2", + "-pattern_type", "sequence", + "-i", "{}/%06d.{}".format( + tempdir.replace("%", "%%"), + self._frames[0]["file"].rpartition(".")[2] + ), + ] + + def _finalize_mkvmerge(self, pathfmt, tempdir): + args = [ + self.mkvmerge, + "-o", self._realpath, + "--timecodes", "0:" + self._write_mkvmerge_timecodes(tempdir), + ] + if self.extension == "webm": + args.append("--webm") + args += ("=", pathfmt.realpath) + + pathfmt.realpath = self._realpath + self._exec(args) + + def _write_ffmpeg_concat(self, tempdir): + content = ["ffconcat version 1.0"] + append = content.append + + for frame in self._frames: + append("file '{}'\nduration {}".format( + frame["file"], frame["delay"] / 1000)) + if self.repeat: + append("file '{}'".format(frame["file"])) + append("") + + ffconcat = tempdir + "/ffconcat.txt" + with open(ffconcat, "w") as file: + file.write("\n".join(content)) + return ffconcat + + def _write_mkvmerge_timecodes(self, tempdir): + content = ["# timecode format v2"] + append = content.append + + delay_sum = 0 + for frame in self._frames: + append(str(delay_sum)) + delay_sum += frame["delay"] + append(str(delay_sum)) + append("") + + timecodes = tempdir + "/timecodes.tc" + with open(timecodes, "w") as file: + file.write("\n".join(content)) + return timecodes + + def calculate_framerate(self, frames): + uniform = self._delay_is_uniform(frames) + if uniform: + return ("1000/{}".format(frames[0]["delay"]), None) + return (None, "1000/{}".format(self._delay_gcd(frames))) + + @staticmethod + def _delay_gcd(frames): + result = frames[0]["delay"] + for f in frames: + result = gcd(result, f["delay"]) + return result @staticmethod - def calculate_framerate(framelist): - counter = collections.Counter(frame["delay"] for frame in framelist) - fps = "1000/{}".format(min(counter)) - return (fps, None) if len(counter) == 1 else (None, fps) + def _delay_is_uniform(frames): + delay = frames[0]["delay"] + for f in frames: + if f["delay"] != delay: + return False + return True __postprocessor__ = UgoiraPP diff --git a/gallery_dl/util.py b/gallery_dl/util.py index 92d1620..e8af358 100644 --- a/gallery_dl/util.py +++ b/gallery_dl/util.py @@ -71,6 +71,20 @@ def unique_sequence(iterable): yield element +def contains(values, elements, separator=" "): + """Returns True if at least one of 'elements' is contained in 'values'""" + if isinstance(values, str): + values = values.split(separator) + + if not isinstance(elements, (tuple, list)): + return elements in values + + for e in elements: + if e in values: + return True + return False + + def raises(cls): """Returns a function that raises 'cls' as exception""" def wrap(*args): @@ -173,8 +187,13 @@ def to_string(value): return str(value) -def to_timestamp(dt): - """Convert naive datetime to UTC timestamp string""" +def datetime_to_timestamp(dt): + """Convert naive UTC datetime to timestamp""" + return (dt - EPOCH) / SECOND + + +def datetime_to_timestamp_string(dt): + """Convert naive UTC datetime to timestamp string""" try: return str((dt - EPOCH) // SECOND) except Exception: @@ -289,12 +308,12 @@ def load_cookiestxt(fp): for line in fp: - line = line.lstrip() + line = line.lstrip(" ") # strip '#HttpOnly_' if line.startswith("#HttpOnly_"): line = line[10:] # ignore empty lines and comments - if not line or line[0] in ("#", "$"): + if not line or line[0] in ("#", "$", "\n"): continue # strip trailing '\n' if line[-1] == "\n": @@ -326,6 +345,9 @@ def save_cookiestxt(fp, cookies): fp.write("# Netscape HTTP Cookie File\n\n") for cookie in cookies: + if not cookie.domain: + continue + if cookie.value is None: name = "" value = cookie.name @@ -421,6 +443,7 @@ WINDOWS = (os.name == "nt") SENTINEL = object() SPECIAL_EXTRACTORS = {"oauth", "recursive", "test"} GLOBALS = { + "contains" : contains, "parse_int": text.parse_int, "urlsplit" : urllib.parse.urlsplit, "datetime" : datetime.datetime, @@ -669,11 +692,14 @@ class ExtendedUrl(): class DownloadArchive(): - def __init__(self, path, extractor): + def __init__(self, path, format_string, cache_key="_archive_key"): con = sqlite3.connect(path, timeout=60, check_same_thread=False) con.isolation_level = None + self.close = con.close self.cursor = con.cursor() + self.keygen = format_string.format_map + self._cache_key = cache_key try: self.cursor.execute("CREATE TABLE IF NOT EXISTS archive " @@ -682,20 +708,16 @@ class DownloadArchive(): # fallback for missing WITHOUT ROWID support (#553) self.cursor.execute("CREATE TABLE IF NOT EXISTS archive " "(entry PRIMARY KEY)") - self.keygen = ( - extractor.config("archive-prefix", extractor.category) + - extractor.config("archive-format", extractor.archive_fmt) - ).format_map def check(self, kwdict): """Return True if the item described by 'kwdict' exists in archive""" - key = kwdict["_archive_key"] = self.keygen(kwdict) + key = kwdict[self._cache_key] = self.keygen(kwdict) self.cursor.execute( "SELECT 1 FROM archive WHERE entry=? LIMIT 1", (key,)) return self.cursor.fetchone() def add(self, kwdict): """Add item described by 'kwdict' to archive""" - key = kwdict.get("_archive_key") or self.keygen(kwdict) + key = kwdict.get(self._cache_key) or self.keygen(kwdict) self.cursor.execute( "INSERT OR IGNORE INTO archive VALUES (?)", (key,)) diff --git a/gallery_dl/version.py b/gallery_dl/version.py index 54c81aa..fe9a0f8 100644 --- a/gallery_dl/version.py +++ b/gallery_dl/version.py @@ -6,4 +6,4 @@ # it under the terms of the GNU General Public License version 2 as # published by the Free Software Foundation. -__version__ = "1.21.0" +__version__ = "1.21.1" diff --git a/test/test_cookies.py b/test/test_cookies.py index 0657456..188b54c 100644 --- a/test/test_cookies.py +++ b/test/test_cookies.py @@ -180,14 +180,14 @@ class TestCookieUtils(unittest.TestCase): extr._cookiejar.set("a", "1", expires=now+100) with mock.patch.object(log, "warning") as mw: - self.assertFalse(extr._check_cookies(("a",))) + self.assertTrue(extr._check_cookies(("a",))) self.assertEqual(mw.call_count, 1) self.assertEqual(mw.call_args[0], ( "Cookie '%s' will expire in less than %s hour%s", "a", 1, "")) extr._cookiejar.set("a", "1", expires=now+100+7200) with mock.patch.object(log, "warning") as mw: - self.assertFalse(extr._check_cookies(("a",))) + self.assertTrue(extr._check_cookies(("a",))) self.assertEqual(mw.call_count, 1) self.assertEqual(mw.call_args[0], ( "Cookie '%s' will expire in less than %s hour%s", "a", 3, "s")) diff --git a/test/test_formatter.py b/test/test_formatter.py index 8464b1b..4cce8a3 100644 --- a/test/test_formatter.py +++ b/test/test_formatter.py @@ -232,6 +232,14 @@ class TestFormatter(unittest.TestCase): self._run_test("\fE name * 2 + ' ' + a", "{}{} {}".format( self.kwdict["name"], self.kwdict["name"], self.kwdict["a"])) + @unittest.skipIf(sys.hexversion < 0x3060000, "no fstring support") + def test_fstring(self): + self._run_test("\fF {a}", self.kwdict["a"]) + self._run_test("\fF {name}{name} {a}", "{}{} {}".format( + self.kwdict["name"], self.kwdict["name"], self.kwdict["a"])) + self._run_test("\fF foo-'\"{a.upper()}\"'-bar", + """foo-'"{}"'-bar""".format(self.kwdict["a"].upper())) + def test_module(self): with tempfile.TemporaryDirectory() as tmpdirname: path = os.path.join(tmpdirname, "testmod.py") diff --git a/test/test_postprocessor.py b/test/test_postprocessor.py index 84d2747..e23cfa2 100644 --- a/test/test_postprocessor.py +++ b/test/test_postprocessor.py @@ -1,7 +1,7 @@ #!/usr/bin/env python3 # -*- coding: utf-8 -*- -# Copyright 2019-2021 Mike Fährmann +# Copyright 2019-2022 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -16,7 +16,7 @@ import logging import zipfile import tempfile import collections -from datetime import datetime, timezone as tz +from datetime import datetime sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) from gallery_dl import extractor, output, path # noqa E402 @@ -345,7 +345,7 @@ class MtimeTest(BasePostprocessorTest): self.assertEqual(pp.key, "date") def test_mtime_datetime(self): - self._create(None, {"date": datetime(1980, 1, 1, tzinfo=tz.utc)}) + self._create(None, {"date": datetime(1980, 1, 1)}) self._trigger() self.assertEqual(self.pathfmt.kwdict["_mtime"], 315532800) diff --git a/test/test_util.py b/test/test_util.py index ce403a8..3cf3d68 100644 --- a/test/test_util.py +++ b/test/test_util.py @@ -1,7 +1,7 @@ #!/usr/bin/env python3 # -*- coding: utf-8 -*- -# Copyright 2015-2021 Mike Fährmann +# Copyright 2015-2022 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -189,6 +189,10 @@ class TestCookiesTxt(unittest.TestCase): [self._cookie("name", "", ".example.org")], ) _assert( + "\tTRUE\t/\tTRUE\t\tname\t", + [self._cookie("name", "", "")], + ) + _assert( "# Netscape HTTP Cookie File\n" "\n" "# default\n" @@ -241,6 +245,8 @@ class TestCookiesTxt(unittest.TestCase): "n4", "" , "www.example.org", False, "/", False), self._cookie( "n5", "v5", "www.example.org", False, "/path", False, 100), + self._cookie( + "n6", "v6", "", False), ], "# Netscape HTTP Cookie File\n" "\n" @@ -313,6 +319,27 @@ class TestOther(unittest.TestCase): self.assertSequenceEqual( list(util.unique_sequence([1, 2, 1, 3, 2, 1])), [1, 2, 1, 3, 2, 1]) + def test_contains(self): + c = [1, "2", 3, 4, "5", "foo"] + self.assertTrue(util.contains(c, 1)) + self.assertTrue(util.contains(c, "foo")) + self.assertTrue(util.contains(c, [1, 3, "5"])) + self.assertTrue(util.contains(c, ["a", "b", "5"])) + self.assertFalse(util.contains(c, "bar")) + self.assertFalse(util.contains(c, [2, 5, "bar"])) + + s = "1 2 3 asd qwe y(+)c f(+)(-) bar" + self.assertTrue(util.contains(s, "y(+)c")) + self.assertTrue(util.contains(s, ["asd", "qwe", "yxc"])) + self.assertTrue(util.contains(s, ["sdf", "dfg", "qwe"])) + self.assertFalse(util.contains(s, "tag1")) + self.assertFalse(util.contains(s, ["tag1", "tag2", "tag3"])) + + s = "1, 2, 3, asd, qwe, y(+)c, f(+)(-), bar" + self.assertTrue(util.contains(s, "y(+)c", ", ")) + self.assertTrue(util.contains(s, ["sdf", "dfg", "qwe"], ", ")) + self.assertFalse(util.contains(s, "tag1", ", ")) + def test_raises(self): func = util.raises(Exception) with self.assertRaises(Exception): @@ -531,7 +558,16 @@ class TestOther(unittest.TestCase): self.assertEqual(f(["a", "b", "c"]), "a, b, c") self.assertEqual(f([1, 2, 3]), "1, 2, 3") - def test_to_timestamp(self, f=util.to_timestamp): + def test_datetime_to_timestamp(self, f=util.datetime_to_timestamp): + self.assertEqual(f(util.EPOCH), 0.0) + self.assertEqual(f(datetime.datetime(2010, 1, 1)), 1262304000.0) + self.assertEqual(f(datetime.datetime(2010, 1, 1, 0, 0, 0, 128000)), + 1262304000.128000) + with self.assertRaises(TypeError): + f(None) + + def test_datetime_to_timestamp_string( + self, f=util.datetime_to_timestamp_string): self.assertEqual(f(util.EPOCH), "0") self.assertEqual(f(datetime.datetime(2010, 1, 1)), "1262304000") self.assertEqual(f(None), "") |
