diff options
| author | 2022-07-16 00:39:30 -0400 | |
|---|---|---|
| committer | 2022-07-16 00:39:30 -0400 | |
| commit | 5d92ac909e26a3373506cc4fce1dd465ea649942 (patch) | |
| tree | 2be8b9ea40d04c87b375fbe3b36dce641dce5c3a | |
| parent | 278a4319a00485410c1af8f0788a539f34c42696 (diff) | |
| parent | ae2a0f5622beaa6f402526f8a7b939419283a090 (diff) | |
Update upstream source from tag 'upstream/1.22.4'
Update to upstream version '1.22.4'
with Debian dir 740824d4b4f6e11c0a3280aa45c483d7116cf56d
67 files changed, 778 insertions, 376 deletions
diff --git a/CHANGELOG.md b/CHANGELOG.md index 403149e..be9a4f7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,43 @@ # Changelog +## 1.22.4 - 2022-07-15 +### Additions +- [instagram] add `pinned` metadata field ([#2752](https://github.com/mikf/gallery-dl/issues/2752)) +- [itaku] categorize sections by group ([#1842](https://github.com/mikf/gallery-dl/issues/1842)) +- [khinsider] extract `platform` metadata +- [tumblr] support `/blog/view` URLs ([#2760](https://github.com/mikf/gallery-dl/issues/2760)) +- [twitter] implement `strategy` option ([#2712](https://github.com/mikf/gallery-dl/issues/2712)) +- [twitter] add `count` metadata field ([#2741](https://github.com/mikf/gallery-dl/issues/2741)) +- [formatter] implement `O` format specifier ([#2736](https://github.com/mikf/gallery-dl/issues/2736)) +- [postprocessor:mtime] add `value` option ([#2739](https://github.com/mikf/gallery-dl/issues/2739)) +- add `--no-postprocessors` command-line option ([#2725](https://github.com/mikf/gallery-dl/issues/2725)) +- implement `format-separator` option ([#2737](https://github.com/mikf/gallery-dl/issues/2737)) +### Changes +- [pinterest] handle section pins with separate extractors ([#2684](https://github.com/mikf/gallery-dl/issues/2684)) +- [postprocessor:ugoira] enable `mtime` by default ([#2714](https://github.com/mikf/gallery-dl/issues/2714)) +### Fixes +- [bunkr] fix extraction ([#2732](https://github.com/mikf/gallery-dl/issues/2732)) +- [hentaifoundry] fix metadata extraction +- [itaku] fix user caching ([#1842](https://github.com/mikf/gallery-dl/issues/1842)) +- [itaku] fix `date` parsing +- [kemonoparty] ensure all files have an `extension` ([#2740](https://github.com/mikf/gallery-dl/issues/2740)) +- [komikcast] update domain +- [mangakakalot] update domain +- [newgrounds] only attempt to login if necessary ([#2715](https://github.com/mikf/gallery-dl/issues/2715)) +- [newgrounds] prevent exception on empty results ([#2727](https://github.com/mikf/gallery-dl/issues/2727)) +- [nozomi] reduce memory consumption during searches ([#2754](https://github.com/mikf/gallery-dl/issues/2754)) +- [pixiv] fix default `background` filenames +- [sankaku] rewrite file URLs to s.sankakucomplex.com ([#2746](https://github.com/mikf/gallery-dl/issues/2746)) +- [slideshare] fix `description` extraction +- [twitter] ignore previously seen Tweets ([#2712](https://github.com/mikf/gallery-dl/issues/2712)) +- [twitter] unescape HTML entities in `content` ([#2757](https://github.com/mikf/gallery-dl/issues/2757)) +- [weibo] handle invalid or broken status objects +- [postprocessor:zip] ensure target directory exists ([#2758](https://github.com/mikf/gallery-dl/issues/2758)) +- make `brotli` an *optional* dependency ([#2716](https://github.com/mikf/gallery-dl/issues/2716)) +- limit path length for `--write-pages` output on Windows ([#2733](https://github.com/mikf/gallery-dl/issues/2733)) +### Removals +- [foolfuuka] remove archive.wakarimasen.moe + ## 1.22.3 - 2022-06-28 ### Changes - [twitter] revert strategy changes for user URLs ([#2712](https://github.com/mikf/gallery-dl/issues/2712), [#2710](https://github.com/mikf/gallery-dl/issues/2710)) @@ -1,6 +1,6 @@ Metadata-Version: 2.1 Name: gallery_dl -Version: 1.22.3 +Version: 1.22.4 Summary: Command-line program to download image galleries and collections from several image hosting sites Home-page: https://github.com/mikf/gallery-dl Download-URL: https://github.com/mikf/gallery-dl/releases/latest @@ -58,6 +58,7 @@ Optional - FFmpeg_: Pixiv Ugoira to WebM conversion - yt-dlp_ or youtube-dl_: Video downloads - PySocks_: SOCKS proxy support +- brotli_ or brotlicffi_: Brotli compression support Installation @@ -98,8 +99,8 @@ Standalone Executable Prebuilt executable files with a Python interpreter and required Python packages included are available for -- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.22.3/gallery-dl.exe>`__ -- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.22.3/gallery-dl.bin>`__ +- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.22.4/gallery-dl.exe>`__ +- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.22.4/gallery-dl.bin>`__ | Executables build from the latest commit can be found at | https://github.com/mikf/gallery-dl/actions/workflows/executables.yml @@ -365,6 +366,8 @@ To authenticate with a ``mastodon`` instance, run *gallery-dl* with .. _yt-dlp: https://github.com/yt-dlp/yt-dlp .. _youtube-dl: https://ytdl-org.github.io/youtube-dl/ .. _PySocks: https://pypi.org/project/PySocks/ +.. _brotli: https://github.com/google/brotli +.. _brotlicffi: https://github.com/python-hyper/brotlicffi .. _pyOpenSSL: https://pyopenssl.org/ .. _Snapd: https://docs.snapcraft.io/installing-snapd .. _OAuth: https://en.wikipedia.org/wiki/OAuth @@ -25,6 +25,7 @@ Optional - FFmpeg_: Pixiv Ugoira to WebM conversion - yt-dlp_ or youtube-dl_: Video downloads - PySocks_: SOCKS proxy support +- brotli_ or brotlicffi_: Brotli compression support Installation @@ -65,8 +66,8 @@ Standalone Executable Prebuilt executable files with a Python interpreter and required Python packages included are available for -- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.22.3/gallery-dl.exe>`__ -- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.22.3/gallery-dl.bin>`__ +- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.22.4/gallery-dl.exe>`__ +- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.22.4/gallery-dl.bin>`__ | Executables build from the latest commit can be found at | https://github.com/mikf/gallery-dl/actions/workflows/executables.yml @@ -332,6 +333,8 @@ To authenticate with a ``mastodon`` instance, run *gallery-dl* with .. _yt-dlp: https://github.com/yt-dlp/yt-dlp .. _youtube-dl: https://ytdl-org.github.io/youtube-dl/ .. _PySocks: https://pypi.org/project/PySocks/ +.. _brotli: https://github.com/google/brotli +.. _brotlicffi: https://github.com/python-hyper/brotlicffi .. _pyOpenSSL: https://pyopenssl.org/ .. _Snapd: https://docs.snapcraft.io/installing-snapd .. _OAuth: https://en.wikipedia.org/wiki/OAuth diff --git a/data/completion/_gallery-dl b/data/completion/_gallery-dl index 7b8d3aa..5e46dc5 100644 --- a/data/completion/_gallery-dl +++ b/data/completion/_gallery-dl @@ -41,6 +41,7 @@ _arguments -C -S \ --no-skip'[Do not skip downloads; overwrite existing files]' \ --no-mtime'[Do not set file modification times according to Last-Modified HTTP response headers]' \ --no-download'[Do not download any files]' \ +--no-postprocessors'[Do not run any post processors]' \ --no-check-certificate'[Disable HTTPS certificate validation]' \ {-c,--config}'[Additional configuration files]':'<file>':_files \ --config-yaml'[==SUPPRESS==]':'<file>':_files \ diff --git a/data/completion/gallery-dl b/data/completion/gallery-dl index 7424e41..40280d5 100644 --- a/data/completion/gallery-dl +++ b/data/completion/gallery-dl @@ -10,7 +10,7 @@ _gallery_dl() elif [[ "${prev}" =~ ^()$ ]]; then COMPREPLY=( $(compgen -d -- "${cur}") ) else - COMPREPLY=( $(compgen -W "--help --version --input-file --destination --directory --filename --proxy --source-address --clear-cache --cookies --cookies-from-browser --quiet --verbose --get-urls --resolve-urls --dump-json --simulate --extractor-info --list-keywords --list-modules --list-extractors --write-log --write-unsupported --write-pages --limit-rate --retries --http-timeout --sleep --sleep-request --sleep-extractor --filesize-min --filesize-max --no-part --no-skip --no-mtime --no-download --no-check-certificate --config --config-yaml --option --ignore-config --username --password --netrc --download-archive --abort --terminate --range --chapter-range --filter --chapter-filter --zip --ugoira-conv --ugoira-conv-lossless --ugoira-conv-copy --write-metadata --write-info-json --write-infojson --write-tags --mtime-from-date --exec --exec-after --postprocessor" -- "${cur}") ) + COMPREPLY=( $(compgen -W "--help --version --input-file --destination --directory --filename --proxy --source-address --clear-cache --cookies --cookies-from-browser --quiet --verbose --get-urls --resolve-urls --dump-json --simulate --extractor-info --list-keywords --list-modules --list-extractors --write-log --write-unsupported --write-pages --limit-rate --retries --http-timeout --sleep --sleep-request --sleep-extractor --filesize-min --filesize-max --no-part --no-skip --no-mtime --no-download --no-postprocessors --no-check-certificate --config --config-yaml --option --ignore-config --username --password --netrc --download-archive --abort --terminate --range --chapter-range --filter --chapter-filter --zip --ugoira-conv --ugoira-conv-lossless --ugoira-conv-copy --write-metadata --write-info-json --write-infojson --write-tags --mtime-from-date --exec --exec-after --postprocessor" -- "${cur}") ) fi } diff --git a/data/completion/gallery-dl.fish b/data/completion/gallery-dl.fish index 02769ee..587ff12 100644 --- a/data/completion/gallery-dl.fish +++ b/data/completion/gallery-dl.fish @@ -35,6 +35,7 @@ complete -c gallery-dl -l 'no-part' -d 'Do not use .part files' complete -c gallery-dl -l 'no-skip' -d 'Do not skip downloads; overwrite existing files' complete -c gallery-dl -l 'no-mtime' -d 'Do not set file modification times according to Last-Modified HTTP response headers' complete -c gallery-dl -l 'no-download' -d 'Do not download any files' +complete -c gallery-dl -l 'no-postprocessors' -d 'Do not run any post processors' complete -c gallery-dl -l 'no-check-certificate' -d 'Disable HTTPS certificate validation' complete -c gallery-dl -r -F -s 'c' -l 'config' -d 'Additional configuration files' complete -c gallery-dl -r -F -l 'config-yaml' -d '==SUPPRESS==' diff --git a/data/man/gallery-dl.1 b/data/man/gallery-dl.1 index 2c8757d..751d470 100644 --- a/data/man/gallery-dl.1 +++ b/data/man/gallery-dl.1 @@ -1,4 +1,4 @@ -.TH "GALLERY-DL" "1" "2022-06-28" "1.22.3" "gallery-dl Manual" +.TH "GALLERY-DL" "1" "2022-07-15" "1.22.4" "gallery-dl Manual" .\" disable hyphenation .nh @@ -125,6 +125,9 @@ Do not set file modification times according to Last-Modified HTTP response head .B "\-\-no\-download" Do not download any files .TP +.B "\-\-no\-postprocessors" +Do not run any post processors +.TP .B "\-\-no\-check\-certificate" Disable HTTPS certificate validation .TP diff --git a/data/man/gallery-dl.conf.5 b/data/man/gallery-dl.conf.5 index 1139e2e..39550ad 100644 --- a/data/man/gallery-dl.conf.5 +++ b/data/man/gallery-dl.conf.5 @@ -1,4 +1,4 @@ -.TH "GALLERY-DL.CONF" "5" "2022-06-28" "1.22.3" "gallery-dl Manual" +.TH "GALLERY-DL.CONF" "5" "2022-07-15" "1.22.4" "gallery-dl Manual" .\" disable hyphenation .nh .\" disable justification (adjust text to left margin only) @@ -2700,6 +2700,27 @@ If this value is \f[I]"original"\f[], metadata for these files will be taken from the original Tweets, not the Retweets. +.SS extractor.twitter.timeline.strategy +.IP "Type:" 6 +\f[I]string\f[] + +.IP "Default:" 9 +\f[I]"auto"\f[] + +.IP "Description:" 4 +Controls the strategy / tweet source used for user URLs +(\f[I]https://twitter.com/USER\f[]). + +.br +* \f[I]"tweets"\f[]: \f[I]/tweets\f[] timeline + search +.br +* \f[I]"media"\f[]: \f[I]/media\f[] timeline + search +.br +* \f[I]"with_replies"\f[]: \f[I]/with_replies\f[] timeline + search +.br +* \f[I]"auto"\f[]: \f[I]"tweets"\f[] or \f[I]"media"\f[], depending on \f[I]retweets\f[] and \f[I]text-tweets\f[] settings + + .SS extractor.twitter.text-tweets .IP "Type:" 6 \f[I]bool\f[] @@ -2726,6 +2747,17 @@ and appropriate \f[I]filename\f[]. Extract \f[I]TwitPic\f[] embeds. +.SS extractor.twitter.unique +.IP "Type:" 6 +\f[I]bool\f[] + +.IP "Default:" 9 +\f[I]true\f[] + +.IP "Description:" 4 +Ignore previously seen Tweets. + + .SS extractor.twitter.users .IP "Type:" 6 \f[I]string\f[] @@ -3866,6 +3898,28 @@ Name of the metadata field whose value should be used. This value must either be a UNIX timestamp or a \f[I]datetime\f[] object. +Note: This option gets ignored if \f[I]mtime.value\f[] is set. + + +.SS mtime.value +.IP "Type:" 6 +\f[I]string\f[] + +.IP "Default:" 9 +\f[I]null\f[] + +.IP "Example:" 4 +.br +* "{status[date]}" +.br +* "{content[0:6]:R22/2022/D%Y%m%d/}" + +.IP "Description:" 4 +A \f[I]format string\f[] whose value should be used. + +The resulting value must either be a UNIX timestamp or a +\f[I]datetime\f[] object. + .SS ugoira.extension .IP "Type:" 6 @@ -4012,7 +4066,7 @@ to reduce an odd width/height by 1 pixel and make them even. \f[I]bool\f[] .IP "Default:" 9 -\f[I]false\f[] +\f[I]true\f[] .IP "Description:" 4 Set modification times of generated ugoira aniomations. @@ -4108,6 +4162,21 @@ Set this option to \f[I]null\f[] or an invalid path to disable this cache. +.SS format-separator +.IP "Type:" 6 +\f[I]string\f[] + +.IP "Default:" 9 +\f[I]"/"\f[] + +.IP "Description:" 4 +Character(s) used as argument separator in format string +\f[I]format specifiers\f[]. + +For example, setting this option to \f[I]"#"\f[] would allow a replacement +operation to be \f[I]Rold#new#\f[] instead of the default \f[I]Rold/new/\f[] + + .SS signals-ignore .IP "Type:" 6 \f[I]list\f[] of \f[I]strings\f[] diff --git a/docs/gallery-dl.conf b/docs/gallery-dl.conf index 78550b5..1492653 100644 --- a/docs/gallery-dl.conf +++ b/docs/gallery-dl.conf @@ -285,8 +285,10 @@ "quoted": false, "replies": true, "retweets": false, + "strategy": null, "text-tweets": false, "twitpic": false, + "unique": true, "users": "timeline", "videos": true }, diff --git a/gallery_dl.egg-info/PKG-INFO b/gallery_dl.egg-info/PKG-INFO index 5eb7939..1e1d74d 100644 --- a/gallery_dl.egg-info/PKG-INFO +++ b/gallery_dl.egg-info/PKG-INFO @@ -1,6 +1,6 @@ Metadata-Version: 2.1 Name: gallery-dl -Version: 1.22.3 +Version: 1.22.4 Summary: Command-line program to download image galleries and collections from several image hosting sites Home-page: https://github.com/mikf/gallery-dl Download-URL: https://github.com/mikf/gallery-dl/releases/latest @@ -58,6 +58,7 @@ Optional - FFmpeg_: Pixiv Ugoira to WebM conversion - yt-dlp_ or youtube-dl_: Video downloads - PySocks_: SOCKS proxy support +- brotli_ or brotlicffi_: Brotli compression support Installation @@ -98,8 +99,8 @@ Standalone Executable Prebuilt executable files with a Python interpreter and required Python packages included are available for -- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.22.3/gallery-dl.exe>`__ -- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.22.3/gallery-dl.bin>`__ +- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.22.4/gallery-dl.exe>`__ +- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.22.4/gallery-dl.bin>`__ | Executables build from the latest commit can be found at | https://github.com/mikf/gallery-dl/actions/workflows/executables.yml @@ -365,6 +366,8 @@ To authenticate with a ``mastodon`` instance, run *gallery-dl* with .. _yt-dlp: https://github.com/yt-dlp/yt-dlp .. _youtube-dl: https://ytdl-org.github.io/youtube-dl/ .. _PySocks: https://pypi.org/project/PySocks/ +.. _brotli: https://github.com/google/brotli +.. _brotlicffi: https://github.com/python-hyper/brotlicffi .. _pyOpenSSL: https://pyopenssl.org/ .. _Snapd: https://docs.snapcraft.io/installing-snapd .. _OAuth: https://en.wikipedia.org/wiki/OAuth diff --git a/gallery_dl.egg-info/SOURCES.txt b/gallery_dl.egg-info/SOURCES.txt index eb62cb3..b323e38 100644 --- a/gallery_dl.egg-info/SOURCES.txt +++ b/gallery_dl.egg-info/SOURCES.txt @@ -57,6 +57,7 @@ gallery_dl/extractor/bcy.py gallery_dl/extractor/behance.py gallery_dl/extractor/blogger.py gallery_dl/extractor/booru.py +gallery_dl/extractor/bunkr.py gallery_dl/extractor/comicvine.py gallery_dl/extractor/common.py gallery_dl/extractor/cyberdrop.py diff --git a/gallery_dl/__init__.py b/gallery_dl/__init__.py index 9e27417..04ea54c 100644 --- a/gallery_dl/__init__.py +++ b/gallery_dl/__init__.py @@ -177,6 +177,12 @@ def main(): extractor.modules = modules extractor._module_iter = iter(modules) + # format string separator + separator = config.get((), "format-separator") + if separator: + from . import formatter + formatter._SEPARATOR = separator + # loglevels output.configure_logging(args.loglevel) if args.loglevel >= logging.ERROR: diff --git a/gallery_dl/extractor/2chan.py b/gallery_dl/extractor/2chan.py index 38b2d5a..f5125ee 100644 --- a/gallery_dl/extractor/2chan.py +++ b/gallery_dl/extractor/2chan.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2017-2021 Mike Fährmann +# Copyright 2017-2022 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -13,7 +13,7 @@ from .. import text class _2chanThreadExtractor(Extractor): - """Extractor for images from threads on www.2chan.net""" + """Extractor for 2chan threads""" category = "2chan" subcategory = "thread" directory_fmt = ("{category}", "{board_name}", "{thread}") @@ -21,9 +21,24 @@ class _2chanThreadExtractor(Extractor): archive_fmt = "{board}_{thread}_{tim}" url_fmt = "https://{server}.2chan.net/{board}/src/{filename}" pattern = r"(?:https?://)?([\w-]+)\.2chan\.net/([^/]+)/res/(\d+)" - test = ("http://dec.2chan.net/70/res/4752.htm", { - "url": "f49aa31340e9a3429226af24e19e01f5b819ca1f", - "keyword": "44599c21b248e79692b2eb2da12699bd0ed5640a", + test = ("https://dec.2chan.net/70/res/14565.htm", { + "pattern": r"https://dec\.2chan\.net/70/src/\d{13}\.jpg", + "count": ">= 3", + "keyword": { + "board": "70", + "board_name": "新板提案", + "com": str, + "fsize": r"re:\d+", + "name": "名無し", + "no": r"re:1[45]\d\d\d", + "now": r"re:22/../..\(.\)..:..:..", + "post": "無題", + "server": "dec", + "thread": "14565", + "tim": r"re:^\d{13}$", + "time": r"re:^\d{10}$", + "title": "ヒロアカ板" + }, }) def __init__(self, match): diff --git a/gallery_dl/extractor/500px.py b/gallery_dl/extractor/500px.py index 88ceaeb..b2ae963 100644 --- a/gallery_dl/extractor/500px.py +++ b/gallery_dl/extractor/500px.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2019-2021 Mike Fährmann +# Copyright 2019-2022 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -261,7 +261,7 @@ class _500pxImageExtractor(_500pxExtractor): "tags": list, "taken_at": "2017-05-04T17:36:51+00:00", "times_viewed": int, - "url": "/photo/222049255/Queen-Of-Coasts-by-Olesya-Nabieva", + "url": "/photo/222049255/Queen-Of-Coasts-by-Alice-Nabieva", "user": dict, "user_id": 12847235, "votes_count": int, diff --git a/gallery_dl/extractor/8kun.py b/gallery_dl/extractor/8kun.py index 9232f88..5d260b9 100644 --- a/gallery_dl/extractor/8kun.py +++ b/gallery_dl/extractor/8kun.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2020 Mike Fährmann +# Copyright 2020-2022 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -26,10 +26,10 @@ class _8kunThreadExtractor(Extractor): "count": ">= 8", }), # old-style file URLs (#1101) - ("https://8kun.top/d/res/13258.html", { - "pattern": r"https://media\.8kun\.top/d/src/\d+(-\d)?\.\w+", - "range": "1-20", - }), + # ("https://8kun.top/d/res/13258.html", { + # "pattern": r"https://media\.8kun\.top/d/src/\d+(-\d)?\.\w+", + # "range": "1-20", + # }), ) def __init__(self, match): diff --git a/gallery_dl/extractor/8muses.py b/gallery_dl/extractor/8muses.py index 3cd9c3a..fe57412 100644 --- a/gallery_dl/extractor/8muses.py +++ b/gallery_dl/extractor/8muses.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2019-2021 Mike Fährmann +# Copyright 2019-2022 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -55,7 +55,7 @@ class _8musesAlbumExtractor(Extractor): }, }), # custom sorting - ("https://www.8muses.com/comics/album/Fakku-Comics/9?sort=az", { + ("https://www.8muses.com/comics/album/Fakku-Comics/11?sort=az", { "count": ">= 70", "keyword": {"name": r"re:^[R-Zr-z]"}, }), diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py index e273f84..70cebb3 100644 --- a/gallery_dl/extractor/__init__.py +++ b/gallery_dl/extractor/__init__.py @@ -25,6 +25,7 @@ modules = [ "bcy", "behance", "blogger", + "bunkr", "comicvine", "cyberdrop", "danbooru", diff --git a/gallery_dl/extractor/artstation.py b/gallery_dl/extractor/artstation.py index e686c70..19b9d97 100644 --- a/gallery_dl/extractor/artstation.py +++ b/gallery_dl/extractor/artstation.py @@ -147,14 +147,15 @@ class ArtstationUserExtractor(ArtstationExtractor): r"/(?!artwork|projects|search)([^/?#]+)(?:/albums/all)?" r"|((?!www)\w+)\.artstation\.com(?:/projects)?)/?$") test = ( - ("https://www.artstation.com/gaerikim/", { + ("https://www.artstation.com/sungchoi/", { "pattern": r"https://\w+\.artstation\.com/p/assets/images" r"/images/\d+/\d+/\d+/(4k|large|medium|small)/[^/]+", - "count": ">= 6", + "range": "1-10", + "count": ">= 10", }), - ("https://www.artstation.com/gaerikim/albums/all/"), - ("https://gaerikim.artstation.com/"), - ("https://gaerikim.artstation.com/projects/"), + ("https://www.artstation.com/sungchoi/albums/all/"), + ("https://sungchoi.artstation.com/"), + ("https://sungchoi.artstation.com/projects/"), ) def projects(self): @@ -400,7 +401,7 @@ class ArtstationFollowingExtractor(ArtstationExtractor): subcategory = "following" pattern = (r"(?:https?://)?(?:www\.)?artstation\.com" r"/(?!artwork|projects|search)([^/?#]+)/following") - test = ("https://www.artstation.com/gaerikim/following", { + test = ("https://www.artstation.com/sungchoi/following", { "pattern": ArtstationUserExtractor.pattern, "count": ">= 50", }) diff --git a/gallery_dl/extractor/blogger.py b/gallery_dl/extractor/blogger.py index eef87f9..21ca991 100644 --- a/gallery_dl/extractor/blogger.py +++ b/gallery_dl/extractor/blogger.py @@ -136,9 +136,9 @@ class BloggerPostExtractor(BloggerExtractor): "pattern": r"https://.+\.googlevideo\.com/videoplayback", }), # image URLs with width/height (#1061) - ("https://aaaninja.blogspot.com/2020/08/altera-boob-press-2.html", { - "pattern": r"https://1.bp.blogspot.com/.+/s0/altera_.+png", - }), + # ("https://aaaninja.blogspot.com/2020/08/altera-boob-press-2.html", { + # "pattern": r"https://1.bp.blogspot.com/.+/s0/altera_.+png", + # }), # new image domain (#2204) (("https://randomthingsthroughmyletterbox.blogspot.com/2022/01" "/bitter-flowers-by-gunnar-staalesen-blog.html"), { diff --git a/gallery_dl/extractor/bunkr.py b/gallery_dl/extractor/bunkr.py new file mode 100644 index 0000000..9904d0a --- /dev/null +++ b/gallery_dl/extractor/bunkr.py @@ -0,0 +1,89 @@ +# -*- coding: utf-8 -*- + +# Copyright 2022 Mike Fährmann +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. + +"""Extractors for https://bunkr.is/""" + +from .lolisafe import LolisafeAlbumExtractor +from .. import text +import json + + +class BunkrAlbumExtractor(LolisafeAlbumExtractor): + """Extractor for bunkr.is albums""" + category = "bunkr" + root = "https://app.bunkr.is" + pattern = r"(?:https?://)?(?:app\.)?bunkr\.(?:is|to)/a/([^/?#]+)" + test = ( + ("https://app.bunkr.is/a/Lktg9Keq", { + "pattern": r"https://cdn\.bunkr\.is/test-テスト-\"&>-QjgneIQv\.png", + "content": "0c8768055e4e20e7c7259608b67799171b691140", + "keyword": { + "album_id": "Lktg9Keq", + "album_name": 'test テスト "&>', + "count": 1, + "filename": 'test-テスト-"&>-QjgneIQv', + "id": "QjgneIQv", + "name": 'test-テスト-"&>', + "num": int, + }, + }), + # mp4 (#2239) + ("https://bunkr.is/a/ptRHaCn2", { + "pattern": r"https://media-files\.bunkr\.is/_-RnHoW69L\.mp4", + "content": "80e61d1dbc5896ae7ef9a28734c747b28b320471", + }), + ("https://bunkr.to/a/Lktg9Keq"), + ) + + def fetch_album(self, album_id): + if "//app." in self.root: + return self._fetch_album_api(album_id) + else: + return self._fetch_album_site(album_id) + + def _fetch_album_api(self, album_id): + files, data = LolisafeAlbumExtractor.fetch_album(self, album_id) + + for file in files: + url = file["file"] + if url.endswith(".mp4"): + file["file"] = url.replace( + "//cdn.bunkr.is/", "//media-files.bunkr.is/", 1) + else: + file["_fallback"] = (url.replace("//cdn.", "//cdn3.", 1),) + + return files, data + + def _fetch_album_site(self, album_id): + url = self.root + "/a/" + self.album_id + + try: + data = json.loads(text.extract( + self.request(url).text, + 'id="__NEXT_DATA__" type="application/json">', '<')[0]) + props = data["props"]["pageProps"] + album = props["album"] + files = props["files"] + except Exception as exc: + self.log.debug(exc) + self.root = self.root.replace("bunkr", "app.bunkr", 1) + return self._fetch_album_api(album_id) + + for file in files: + name = file["name"] + if name.endswith(".mp4"): + file["file"] = "https://media-files.bunkr.is/" + name + else: + file["file"] = file["cdn"] + "/" + name + + return files, { + "album_id" : self.album_id, + "album_name" : text.unescape(album["name"]), + "description": text.unescape(album["description"]), + "count" : len(files), + } diff --git a/gallery_dl/extractor/common.py b/gallery_dl/extractor/common.py index 5c5e29e..6ccae7f 100644 --- a/gallery_dl/extractor/common.py +++ b/gallery_dl/extractor/common.py @@ -8,6 +8,7 @@ """Common classes and constants used by extractor modules.""" +import os import re import ssl import time @@ -224,7 +225,9 @@ class Extractor(): headers.clear() ssl_options = ssl_ciphers = 0 - browser = self.config("browser") or self.browser + browser = self.config("browser") + if browser is None: + browser = self.browser if browser and isinstance(browser, str): browser, _, platform = browser.lower().partition(":") @@ -259,6 +262,10 @@ class Extractor(): "rv:102.0) Gecko/20100101 Firefox/102.0")) headers["Accept"] = "*/*" headers["Accept-Language"] = "en-US,en;q=0.5" + + if BROTLI: + headers["Accept-Encoding"] = "gzip, deflate, br" + else: headers["Accept-Encoding"] = "gzip, deflate" custom_headers = self.config("headers") @@ -473,11 +480,16 @@ class Extractor(): fname = "{:>02}_{}".format( Extractor._dump_index, - Extractor._dump_sanitize('_', response.url) - )[:250] + Extractor._dump_sanitize('_', response.url), + ) + + if util.WINDOWS: + path = os.path.abspath(fname)[:255] + else: + path = fname[:251] try: - with open(fname + ".dump", 'wb') as fp: + with open(path + ".txt", 'wb') as fp: util.dump_response( response, fp, headers=(self._write_pages == "all")) except Exception as e: @@ -718,7 +730,7 @@ HTTP_HEADERS = { ("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9," "image/avif,image/webp,*/*;q=0.8"), ("Accept-Language", "en-US,en;q=0.5"), - ("Accept-Encoding", "gzip, deflate, br"), + ("Accept-Encoding", None), ("Referer", None), ("DNT", "1"), ("Connection", "keep-alive"), @@ -736,7 +748,7 @@ HTTP_HEADERS = { ("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9," "image/webp,image/apng,*/*;q=0.8"), ("Referer", None), - ("Accept-Encoding", "gzip, deflate"), + ("Accept-Encoding", None), ("Accept-Language", "en-US,en;q=0.9"), ("Cookie", None), ), @@ -783,6 +795,13 @@ SSL_CIPHERS = { } +# detect brotli support +try: + BROTLI = requests.packages.urllib3.response.brotli is not None +except AttributeError: + BROTLI = False + + # Undo automatic pyOpenSSL injection by requests pyopenssl = config.get((), "pyopenssl", False) if not pyopenssl: diff --git a/gallery_dl/extractor/danbooru.py b/gallery_dl/extractor/danbooru.py index f21817e..ec0db68 100644 --- a/gallery_dl/extractor/danbooru.py +++ b/gallery_dl/extractor/danbooru.py @@ -181,7 +181,8 @@ class DanbooruTagExtractor(DanbooruExtractor): # 'external' option (#1747) ("https://danbooru.donmai.us/posts?tags=pixiv_id%3A1476533", { "options": (("external", True),), - "pattern": r"http://img16.pixiv.net/img/takaraakihito/1476533.jpg", + "pattern": r"https://i\.pximg\.net/img-original/img" + r"/2008/08/28/02/35/48/1476533_p0\.jpg", }), ("https://e621.net/posts?tags=anry", { "url": "8021e5ea28d47c474c1ffc9bd44863c4d45700ba", diff --git a/gallery_dl/extractor/deviantart.py b/gallery_dl/extractor/deviantart.py index 70bee52..39ae484 100644 --- a/gallery_dl/extractor/deviantart.py +++ b/gallery_dl/extractor/deviantart.py @@ -671,12 +671,12 @@ class DeviantartCollectionExtractor(DeviantartExtractor): test = ( (("https://www.deviantart.com/pencilshadings/favourites" "/70595441/3D-Favorites"), { - "count": ">= 20", + "count": ">= 15", "options": (("original", False),), }), (("https://www.deviantart.com/pencilshadings/favourites" "/F050486B-CB62-3C66-87FB-1105A7F6379F/3D Favorites"), { - "count": ">= 20", + "count": ">= 15", "options": (("original", False),), }), ("https://pencilshadings.deviantart.com" @@ -876,8 +876,9 @@ class DeviantartDeviationExtractor(DeviantartExtractor): }), # GIF (#242) (("https://www.deviantart.com/skatergators/art/COM-Moni-781571783"), { - "pattern": (r"https://images-wixmp-\w+\.wixmp\.com" - r"/f/[^/]+/[^.]+\.gif\?token="), + "pattern": r"https://wixmp-\w+\.wixmp\.com/f/03fd2413-efe9-4e5c-" + r"8734-2b72605b3fbb/dcxbsnb-1bbf0b38-42af-4070-8878-" + r"f30961955bec\.gif\?token=ey...", }), # Flash animation with GIF preview (#1731) ("https://www.deviantart.com/yuumei/art/Flash-Comic-214724929", { diff --git a/gallery_dl/extractor/exhentai.py b/gallery_dl/extractor/exhentai.py index 36b89f7..2720691 100644 --- a/gallery_dl/extractor/exhentai.py +++ b/gallery_dl/extractor/exhentai.py @@ -122,7 +122,7 @@ class ExhentaiGalleryExtractor(ExhentaiExtractor): "date": "dt:2018-03-18 20:15:00", "eh_category": "Non-H", "expunged": False, - "favorites": "21", + "favorites": r"re:^[12]\d$", "filecount": "4", "filesize": 1488978, "gid": 1200119, diff --git a/gallery_dl/extractor/foolfuuka.py b/gallery_dl/extractor/foolfuuka.py index 093113d..34b52ef 100644 --- a/gallery_dl/extractor/foolfuuka.py +++ b/gallery_dl/extractor/foolfuuka.py @@ -100,10 +100,6 @@ BASE_PATTERN = FoolfuukaExtractor.update({ "root": "https://thebarchive.com", "pattern": r"thebarchive\.com", }, - "wakarimasen": { - "root": "https://archive.wakarimasen.moe", - "pattern": r"archive\.wakarimasen\.moe", - }, }) @@ -149,9 +145,6 @@ class FoolfuukaThreadExtractor(FoolfuukaExtractor): ("https://thebarchive.com/b/thread/739772332/", { "url": "e8b18001307d130d67db31740ce57c8561b5d80c", }), - ("https://archive.wakarimasen.moe/a/thread/223157648/", { - "url": "fef0758d2eb81b1ba783051fd5ec491d70107a78", - }), ) def __init__(self, match): @@ -190,7 +183,6 @@ class FoolfuukaBoardExtractor(FoolfuukaExtractor): ("https://archive.nyafuu.org/c/"), ("https://rbt.asia/g/"), ("https://thebarchive.com/b/"), - ("https://archive.wakarimasen.moe/a/"), ) def __init__(self, match): @@ -234,7 +226,6 @@ class FoolfuukaSearchExtractor(FoolfuukaExtractor): ("https://archive.nyafuu.org/_/search/text/test/"), ("https://rbt.asia/_/search/text/test/"), ("https://thebarchive.com/_/search/text/test/"), - ("https://archive.wakarimasen.moe/a/search/text/test/"), ) def __init__(self, match): @@ -300,7 +291,6 @@ class FoolfuukaGalleryExtractor(FoolfuukaExtractor): ("https://archive.nyafuu.org/c/gallery/7"), ("https://rbt.asia/g/gallery/8"), ("https://thebarchive.com/b/gallery/9"), - ("https://archive.wakarimasen.moe/a/gallery/10"), ) def __init__(self, match): diff --git a/gallery_dl/extractor/gelbooru_v02.py b/gallery_dl/extractor/gelbooru_v02.py index bf9c983..35a3448 100644 --- a/gallery_dl/extractor/gelbooru_v02.py +++ b/gallery_dl/extractor/gelbooru_v02.py @@ -149,7 +149,7 @@ class GelbooruV02TagExtractor(GelbooruV02Extractor): pattern = BASE_PATTERN + r"/index\.php\?page=post&s=list&tags=([^&#]+)" test = ( ("https://rule34.xxx/index.php?page=post&s=list&tags=danraku", { - "content": "622e80be3f496672c44aab5c47fbc6941c61bc79", + "content": "5c6ae9ee13e6d4bc9cb8bdce224c84e67fbfa36c", "pattern": r"https?://.*rule34\.xxx/images/\d+/[0-9a-f]+\.jpg", "count": 2, }), diff --git a/gallery_dl/extractor/hentaifoundry.py b/gallery_dl/extractor/hentaifoundry.py index 691cefb..0741451 100644 --- a/gallery_dl/extractor/hentaifoundry.py +++ b/gallery_dl/extractor/hentaifoundry.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2015-2020 Mike Fährmann +# Copyright 2015-2022 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -84,12 +84,12 @@ class HentaifoundryExtractor(Extractor): .replace("\r\n", "\n"), "", "")), "ratings" : [text.unescape(r) for r in text.extract_iter(extr( "class='ratings_box'", "</div>"), "title='", "'")], - "media" : text.unescape(extr("Media</b></td>\t\t<td>", "<")), "date" : text.parse_datetime(extr("datetime='", "'")), - "views" : text.parse_int(extr("Views</b></td>\t\t<td>", "<")), + "views" : text.parse_int(extr(">Views</span>", "<")), + "score" : text.parse_int(extr(">Vote Score</span>", "<")), + "media" : text.unescape(extr(">Media</span>", "<").strip()), "tags" : text.split_html(extr( - "<td><b>Keywords</b></td>", "</tr>"))[::2], - "score" : text.parse_int(extr('Score</b></td>\t\t<td>', '<')), + ">Tags </span>", "</div>")), } return text.nameext_from_url(data["src"], data) @@ -292,7 +292,7 @@ class HentaifoundryImageExtractor(HentaifoundryExtractor): "media" : "Other digital art", "ratings": ["Sexual content", "Contains female nudity"], "score" : int, - "tags" : ["kancolle", "kantai", "collection", "shimakaze"], + "tags" : ["collection", "kancolle", "kantai", "shimakaze"], "title" : "shimakaze", "user" : "Tenpura", "views" : int, diff --git a/gallery_dl/extractor/hentaihand.py b/gallery_dl/extractor/hentaihand.py index c423b18..bf9e464 100644 --- a/gallery_dl/extractor/hentaihand.py +++ b/gallery_dl/extractor/hentaihand.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2020-2021 Mike Fährmann +# Copyright 2020-2022 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -22,12 +22,12 @@ class HentaihandGalleryExtractor(GalleryExtractor): (("https://hentaihand.com/en/comic/c75-takumi-na-muchi-choudenji-hou-" "no-aishi-kata-how-to-love-a-super-electromagnetic-gun-toaru-kagaku-" "no-railgun-english"), { - "pattern": r"https://cdn.hentaihand.com/.*/images/360468/\d+.jpg$", + "pattern": r"https://cdn.hentaihand.com/.*/images/37387/\d+.jpg$", "count": 50, "keyword": { "artists" : ["Takumi Na Muchi"], "date" : "dt:2014-06-28 00:00:00", - "gallery_id": 360468, + "gallery_id": 37387, "lang" : "en", "language" : "English", "parodies" : ["Toaru Kagaku No Railgun"], diff --git a/gallery_dl/extractor/imagefap.py b/gallery_dl/extractor/imagefap.py index 7922e84..b1c0e9e 100644 --- a/gallery_dl/extractor/imagefap.py +++ b/gallery_dl/extractor/imagefap.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2016-2021 Mike Fährmann +# Copyright 2016-2022 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -36,13 +36,13 @@ class ImagefapGalleryExtractor(ImagefapExtractor): test = ( ("https://www.imagefap.com/pictures/7102714", { - "pattern": r"https://cdnh\.imagefap\.com" + "pattern": r"https://cdnh?\.imagefap\.com" r"/images/full/\d+/\d+/\d+\.jpg", "keyword": "2ba96e84c2952c4750e9fa94a3f2b1f965cec2f3", "content": "694a0a57385980a6f90fbc296cadcd6c11ba2dab", }), ("https://www.imagefap.com/gallery/5486966", { - "pattern": r"https://cdnh\.imagefap\.com" + "pattern": r"https://cdnh?\.imagefap\.com" r"/images/full/\d+/\d+/\d+\.jpg", "keyword": "3e24eace5b09639b881ebd393165862feb46adde", }), @@ -107,7 +107,7 @@ class ImagefapImageExtractor(ImagefapExtractor): pattern = BASE_PATTERN + r"/photo/(\d+)" test = ( ("https://www.imagefap.com/photo/1369341772/", { - "pattern": r"https://cdnh\.imagefap\.com" + "pattern": r"https://cdnh?\.imagefap\.com" r"/images/full/\d+/\d+/\d+\.jpg", "keyword": "8894e45f7262020d8d66ce59917315def1fc475b", }), @@ -156,10 +156,10 @@ class ImagefapUserExtractor(ImagefapExtractor): r"|usergallery\.php\?userid=(\d+))") test = ( ("https://www.imagefap.com/profile/LucyRae/galleries", { - "url": "d941aa906f56a75972a7a5283030eb9a8d27a4fd", + "url": "822cb6cbb6f474ca2d0f58d1d6d253bc2338937a", }), ("https://www.imagefap.com/usergallery.php?userid=1862791", { - "url": "d941aa906f56a75972a7a5283030eb9a8d27a4fd", + "url": "822cb6cbb6f474ca2d0f58d1d6d253bc2338937a", }), ("https://www.imagefap.com/profile.php?user=LucyRae"), ("https://beta.imagefap.com/profile.php?user=LucyRae"), diff --git a/gallery_dl/extractor/instagram.py b/gallery_dl/extractor/instagram.py index 31f5b32..4a2c3bb 100644 --- a/gallery_dl/extractor/instagram.py +++ b/gallery_dl/extractor/instagram.py @@ -186,11 +186,17 @@ class InstagramExtractor(Extractor): media = next(self._media_by_id(post["id"])) return self._parse_post_api(media) + pinned = post.get("pinned_for_users", ()) + if pinned: + for index, user in enumerate(pinned): + pinned[index] = int(user["id"]) + owner = post["owner"] data = { "typename" : typename, "date" : text.parse_timestamp(post["taken_at_timestamp"]), "likes" : post["edge_media_preview_like"]["count"], + "pinned" : pinned, "owner_id" : owner["id"], "username" : owner.get("username"), "fullname" : owner.get("full_name"), @@ -263,6 +269,7 @@ class InstagramExtractor(Extractor): "post_id" : post["pk"], "post_shortcode": post["code"], "likes": post["like_count"], + "pinned": post.get("timeline_pinned_user_ids", ()), } caption = post["caption"] diff --git a/gallery_dl/extractor/itaku.py b/gallery_dl/extractor/itaku.py index dfe4b53..6b2cf4c 100644 --- a/gallery_dl/extractor/itaku.py +++ b/gallery_dl/extractor/itaku.py @@ -34,11 +34,19 @@ class ItakuExtractor(Extractor): for post in self.posts(): post["date"] = text.parse_datetime( - post["date_added"], "%Y-%m-%dT%H:%M:%S.%f") + post["date_added"], "%Y-%m-%dT%H:%M:%S.%fZ") for category, tags in post.pop("categorized_tags").items(): post["tags_" + category.lower()] = [t["name"] for t in tags] post["tags"] = [t["name"] for t in post["tags"]] - post["sections"] = [s["title"] for s in post["sections"]] + + sections = [] + for s in post["sections"]: + group = s["group"] + if group: + sections.append(group["title"] + "/" + s["title"]) + else: + sections.append(s["title"]) + post["sections"] = sections if post["video"] and self.videos: url = post["video"]["video"] @@ -79,12 +87,13 @@ class ItakuImageExtractor(ItakuExtractor): "is_blacklisted": False }, "can_reshare": True, + "date": "dt:2022-05-05 19:21:17", "date_added": "2022-05-05T19:21:17.674148Z", "date_edited": "2022-05-25T14:37:46.220612Z", "description": "sketch from drawpile", "extension": "png", "filename": "220504_oUNIAFT", - "hotness_score": 11507.4691939, + "hotness_score": float, "id": 100471, "image": "https://d1wmr8tlk3viaj.cloudfront.net/gallery_imgs" "/220504_oUNIAFT.png", @@ -102,7 +111,7 @@ class ItakuImageExtractor(ItakuExtractor): "owner_displayname": "Piku", "owner_username": "piku", "reshared_by_you": False, - "sections": ["Miku"], + "sections": ["Fanart/Miku"], "tags": list, "tags_character": ["hatsune_miku"], "tags_copyright": ["vocaloid"], @@ -152,10 +161,10 @@ class ItakuAPI(): return self._pagination(endpoint, params, self.image) def image(self, image_id): - endpoint = "/galleries/images/" + str(image_id) + endpoint = "/galleries/images/{}/".format(image_id) return self._call(endpoint) - @memcache() + @memcache(keyarg=1) def user(self, username): return self._call("/user_profiles/{}/".format(username)) diff --git a/gallery_dl/extractor/kemonoparty.py b/gallery_dl/extractor/kemonoparty.py index 7287c38..f1eb79f 100644 --- a/gallery_dl/extractor/kemonoparty.py +++ b/gallery_dl/extractor/kemonoparty.py @@ -96,12 +96,14 @@ class KemonopartyExtractor(Extractor): post["num"] += 1 post["_http_headers"] = headers + text.nameext_from_url(file.get("name", url), post) + if not post["extension"]: + post["extension"] = text.ext_from_url(url) + if url[0] == "/": url = self.root + "/data" + url elif url.startswith(self.root): url = self.root + "/data" + url[20:] - - text.nameext_from_url(file.get("name", url), post) yield Message.Url, url, post def login(self): @@ -377,12 +379,15 @@ class KemonopartyDiscordExtractor(KemonopartyExtractor): for post["num"], file in enumerate(files, 1): post["type"] = file["type"] url = file["path"] + + text.nameext_from_url(file.get("name", url), post) + if not post["extension"]: + post["extension"] = text.ext_from_url(url) + if url[0] == "/": url = self.root + "/data" + url elif url.startswith(self.root): url = self.root + "/data" + url[20:] - - text.nameext_from_url(file["name"], post) yield Message.Url, url, post def posts(self): diff --git a/gallery_dl/extractor/khinsider.py b/gallery_dl/extractor/khinsider.py index e7827b1..d2e9d88 100644 --- a/gallery_dl/extractor/khinsider.py +++ b/gallery_dl/extractor/khinsider.py @@ -30,7 +30,8 @@ class KhinsiderSoundtrackExtractor(AsynchronousMixin, Extractor): "album": { "count": 1, "date": "Sep 18th, 2016", - "name": "Horizon Riders (Wii)", + "name": "Horizon Riders", + "platform": "Wii", "size": 26214400, "type": "Gamerip", }, @@ -60,6 +61,7 @@ class KhinsiderSoundtrackExtractor(AsynchronousMixin, Extractor): extr = text.extract_from(page) return {"album": { "name" : text.unescape(extr("<h2>", "<")), + "platform": extr("Platforms: <a", "<").rpartition(">")[2], "count": text.parse_int(extr("Number of Files: <b>", "<")), "size" : text.parse_bytes(extr("Total Filesize: <b>", "<")[:-1]), "date" : extr("Date Added: <b>", "<"), diff --git a/gallery_dl/extractor/komikcast.py b/gallery_dl/extractor/komikcast.py index 21ed3c7..1187fd6 100644 --- a/gallery_dl/extractor/komikcast.py +++ b/gallery_dl/extractor/komikcast.py @@ -1,22 +1,24 @@ # -*- coding: utf-8 -*- -# Copyright 2018-2021 Mike Fährmann +# Copyright 2018-2022 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as # published by the Free Software Foundation. -"""Extract manga-chapters and entire manga from https://komikcast.com/""" +"""Extractors for https://komikcast.me/""" from .common import ChapterExtractor, MangaExtractor from .. import text import re +BASE_PATTERN = r"(?:https?://)?(?:www\.)?komikcast\.(?:me|com)" + class KomikcastBase(): """Base class for komikcast extractors""" category = "komikcast" - root = "https://komikcast.com" + root = "https://komikcast.me" @staticmethod def parse_chapter_string(chapter_string, data=None): @@ -44,18 +46,18 @@ class KomikcastBase(): class KomikcastChapterExtractor(KomikcastBase, ChapterExtractor): - """Extractor for manga-chapters from komikcast.com""" - pattern = r"(?:https?://)?(?:www\.)?komikcast\.com(/chapter/[^/?#]+/)" + """Extractor for manga-chapters from komikcast.me""" + pattern = BASE_PATTERN + r"(/chapter/[^/?#]+/)" test = ( - (("https://komikcast.com/chapter/" - "apotheosis-chapter-02-2-bahasa-indonesia/"), { - "url": "f6b43fbc027697749b3ea1c14931c83f878d7936", + (("https://komikcast.me/chapter" + "/apotheosis-chapter-02-2-bahasa-indonesia/"), { + "url": "74eca5c9b27b896816497f9b2d847f2a1fcfc209", "keyword": "f3938e1aff9ad1f302f52447e9781b21f6da26d4", }), - (("https://komikcast.com/chapter/" - "solo-spell-caster-chapter-37-bahasa-indonesia/"), { - "url": "c3d30de6c796ff6ff36eb86e2e6fa2f8add8e829", - "keyword": "ed8a0ff73098776988bf66fb700381a2c748f910", + (("https://komikcast.me/chapter" + "/soul-land-ii-chapter-300-1-bahasa-indonesia/"), { + "url": "243a5250e210b40d17217e83b7547cefea5638bd", + "keyword": "cb646cfed3d45105bd645ab38b2e9f7d8c436436", }), ) @@ -74,16 +76,15 @@ class KomikcastChapterExtractor(KomikcastBase, ChapterExtractor): class KomikcastMangaExtractor(KomikcastBase, MangaExtractor): - """Extractor for manga from komikcast.com""" + """Extractor for manga from komikcast.me""" chapterclass = KomikcastChapterExtractor - pattern = (r"(?:https?://)?(?:www\.)?komikcast\.com" - r"(/(?:komik/)?[^/?#]+)/?$") + pattern = BASE_PATTERN + r"(/(?:komik/)?[^/?#]+)/?$" test = ( - ("https://komikcast.com/komik/090-eko-to-issho/", { - "url": "dc798d107697d1f2309b14ca24ca9dba30c6600f", + ("https://komikcast.me/komik/090-eko-to-issho/", { + "url": "08204f0a703ec5272121abcf0632ecacba1e588f", "keyword": "837a7e96867344ff59d840771c04c20dc46c0ab1", }), - ("https://komikcast.com/tonari-no-kashiwagi-san/"), + ("https://komikcast.me/tonari-no-kashiwagi-san/"), ) def chapters(self, page): diff --git a/gallery_dl/extractor/lolisafe.py b/gallery_dl/extractor/lolisafe.py index 2aea44c..7c6ef69 100644 --- a/gallery_dl/extractor/lolisafe.py +++ b/gallery_dl/extractor/lolisafe.py @@ -20,10 +20,6 @@ class LolisafeExtractor(BaseExtractor): BASE_PATTERN = LolisafeExtractor.update({ - "bunkr": { - "root": "https://app.bunkr.is", - "pattern": r"(?:app\.)?bunkr\.(?:is|to)", - }, "zzzz" : { "root": "https://zz.ht", "pattern": r"zz\.(?:ht|fo)", @@ -35,25 +31,6 @@ class LolisafeAlbumExtractor(LolisafeExtractor): subcategory = "album" pattern = BASE_PATTERN + "/a/([^/?#]+)" test = ( - ("https://app.bunkr.is/a/Lktg9Keq", { - "pattern": r"https://cdn\.bunkr\.is/test-テスト-\"&>-QjgneIQv\.png", - "content": "0c8768055e4e20e7c7259608b67799171b691140", - "keyword": { - "album_id": "Lktg9Keq", - "album_name": 'test テスト "&>', - "count": 1, - "filename": 'test-テスト-"&>-QjgneIQv', - "id": "QjgneIQv", - "name": 'test-テスト-"&>', - "num": int, - }, - }), - # mp4 (#2239) - ("https://bunkr.is/a/ptRHaCn2", { - "pattern": r"https://media-files\.bunkr\.is/_-RnHoW69L\.mp4", - "content": "80e61d1dbc5896ae7ef9a28734c747b28b320471", - }), - ("https://bunkr.to/a/Lktg9Keq"), ("https://zz.ht/a/lop7W6EZ", { "pattern": r"https://z\.zz\.fo/(4anuY|ih560)\.png", "count": 2, @@ -71,11 +48,7 @@ class LolisafeAlbumExtractor(LolisafeExtractor): domain = self.config("domain") if domain is None or domain == "auto": - if self.category == "bunkr": - self.root = "https://app.bunkr.is" - else: - self.root = text.root_from_url(match.group(0)) - + self.root = text.root_from_url(match.group(0)) else: self.root = text.ensure_http_scheme(domain) @@ -89,10 +62,6 @@ class LolisafeAlbumExtractor(LolisafeExtractor): data["_fallback"] = file["_fallback"] text.nameext_from_url(url, data) data["name"], sep, data["id"] = data["filename"].rpartition("-") - - if data["extension"] == "mp4": - url = url.replace( - "//cdn.bunkr.is/", "//media-files.bunkr.is/", 1) yield Message.Url, url, data def fetch_album(self, album_id): diff --git a/gallery_dl/extractor/mangahere.py b/gallery_dl/extractor/mangahere.py index 461c92d..531aef4 100644 --- a/gallery_dl/extractor/mangahere.py +++ b/gallery_dl/extractor/mangahere.py @@ -95,9 +95,9 @@ class MangahereMangaExtractor(MangahereBase, MangaExtractor): r"(/manga/[^/?#]+/?)(?:#.*)?$") test = ( ("https://www.mangahere.cc/manga/aria/", { - "url": "dc7f8954efbe87d9fd670c54e5edb5230c01f767", - "keyword": "864524eed2dc6a73e366f6ba400b80d894f99b5a", - "count": 69, + "url": "9c2e54ec42e9a87ad53096c328b33c90750af3e4", + "keyword": "71503c682c5d0c277a50409a8c5fd78e871e3d69", + "count": 71, }), ("https://www.mangahere.cc/manga/hiyokoi/#50", { "url": "654850570aa03825cd57e2ae2904af489602c523", diff --git a/gallery_dl/extractor/mangakakalot.py b/gallery_dl/extractor/mangakakalot.py index c71b003..ba55ac1 100644 --- a/gallery_dl/extractor/mangakakalot.py +++ b/gallery_dl/extractor/mangakakalot.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- # Copyright 2020 Jake Mannens -# Copyright 2021 Mike Fährmann +# Copyright 2021-2022 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -13,30 +13,27 @@ from .common import ChapterExtractor, MangaExtractor from .. import text import re +BASE_PATTERN = r"(?:https?://)?(?:ww[\dw]?\.)?mangakakalot\.tv" + class MangakakalotBase(): """Base class for mangakakalot extractors""" category = "mangakakalot" - root = "https://ww.mangakakalot.tv" + root = "https://ww3.mangakakalot.tv" class MangakakalotChapterExtractor(MangakakalotBase, ChapterExtractor): """Extractor for manga chapters from mangakakalot.tv""" - pattern = (r"(?:https?://)?(?:www?\.)?mangakakalot\.tv" - r"(/chapter/[^/?#]+/chapter[_-][^/?#]+)") + pattern = BASE_PATTERN + r"(/chapter/[^/?#]+/chapter[_-][^/?#]+)" test = ( - ("https://ww.mangakakalot.tv/chapter/manga-hl984546/chapter-6", { + ("https://ww3.mangakakalot.tv/chapter/manga-jk986845/chapter-34.2", { "pattern": r"https://cm\.blazefast\.co" r"/[0-9a-f]{2}/[0-9a-f]{2}/[0-9a-f]{32}\.jpg", - "keyword": "e9646a76a210f1eb4a71b4134664814c99d65d48", - "count": 14, - }), - (("https://mangakakalot.tv/chapter" - "/hatarakanai_futari_the_jobless_siblings/chapter_20.1"), { - "keyword": "14c430737ff600b26a3811815905f34dd6a6c8c6", - "content": "b3eb1f139caef98d9dcd8ba6a5ee146a13deebc4", - "count": 2, + "keyword": "0f1586ff52f0f9cbbb25306ae64ab718f8a6a633", + "count": 9, }), + ("https://mangakakalot.tv/chapter" + "/hatarakanai_futari_the_jobless_siblings/chapter_20.1"), ) def __init__(self, match): @@ -78,16 +75,13 @@ class MangakakalotChapterExtractor(MangakakalotBase, ChapterExtractor): class MangakakalotMangaExtractor(MangakakalotBase, MangaExtractor): """Extractor for manga from mangakakalot.tv""" chapterclass = MangakakalotChapterExtractor - pattern = (r"(?:https?://)?(?:www?\.)?mangakakalot\.tv" - r"(/manga/[^/?#]+)") + pattern = BASE_PATTERN + r"(/manga/[^/?#]+)" test = ( - ("https://ww.mangakakalot.tv/manga/lk921810", { - "url": "654d040c17728c9c8756fce7092b084e8dcf67d2", - }), - ("https://mangakakalot.tv/manga/manga-jk986845", { + ("https://ww3.mangakakalot.tv/manga/manga-jk986845", { "pattern": MangakakalotChapterExtractor.pattern, "count": ">= 30", }), + ("https://mangakakalot.tv/manga/lk921810"), ) def chapters(self, page): diff --git a/gallery_dl/extractor/manganelo.py b/gallery_dl/extractor/manganelo.py index 833d18e..3444a7a 100644 --- a/gallery_dl/extractor/manganelo.py +++ b/gallery_dl/extractor/manganelo.py @@ -21,16 +21,12 @@ class ManganeloChapterExtractor(ChapterExtractor): pattern = BASE_PATTERN + r"(/(?:manga-\w+|chapter/\w+)/chapter[-_][^/?#]+)" test = ( ("https://readmanganato.com/manga-gn983696/chapter-23", { - "pattern": r"https://s\d+\.\w+\.com/mangakakalot/g\d+/gq921227/" - r"vol3_chapter_23_24_yen/\d+\.jpg", - "keyword": "3748087cf41abc97f991530e6fd53b291490d6d0", + "pattern": r"https://v\d+\.mkklcdnv6tempv5\.com/img/tab_17/03/23" + r"/39/gn983696/vol_3_chapter_23_24_yen/\d+-[no]\.jpg", + "keyword": "2c5cd59342f149375df9bcb50aa416b4d04a43cf", "count": 25, }), - ("https://manganelo.com/chapter/gamers/chapter_15", { - "keyword": "8f59f88d516247011fe122e05746c27e203c8191", - "content": "fbec629c71f66b246bfa0604204407c0d1c8ae38", - "count": 39, - }), + ("https://manganelo.com/chapter/gamers/chapter_15"), ("https://manganelo.com/chapter/gq921227/chapter_23"), ) @@ -81,14 +77,11 @@ class ManganeloMangaExtractor(MangaExtractor): chapterclass = ManganeloChapterExtractor pattern = BASE_PATTERN + r"(/(?:manga[-/]|read_)\w+)/?$" test = ( - ("https://manganato.com/manga-gu983703", { - "pattern": ManganeloChapterExtractor.pattern, - "count": ">= 70", - }), - ("https://manganelo.com/manga/read_otome_no_teikoku", { + ("https://readmanganato.com/manga-gn983696", { "pattern": ManganeloChapterExtractor.pattern, - "count": ">= 40", + "count": ">= 25", }), + ("https://manganelo.com/manga/read_otome_no_teikoku"), ("https://manganelo.com/manga/ol921234/"), ) diff --git a/gallery_dl/extractor/mangapark.py b/gallery_dl/extractor/mangapark.py index a883b91..a28a966 100644 --- a/gallery_dl/extractor/mangapark.py +++ b/gallery_dl/extractor/mangapark.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2015-2021 Mike Fährmann +# Copyright 2015-2022 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -121,8 +121,8 @@ class MangaparkMangaExtractor(MangaparkBase, MangaExtractor): r"(/manga/[^/?#]+)/?$") test = ( ("https://mangapark.net/manga/aria", { - "url": "b8f7db2f581404753c4af37af66c049a41273b94", - "keyword": "2c0d28efaf84fcfe62932b6931ef3c3987cd48c0", + "url": "51c6d82aed5c3c78e0d3f980b09a998e6a2a83ee", + "keyword": "cabc60cf2efa82749d27ac92c495945961e4b73c", }), ("https://mangapark.me/manga/aria"), ("https://mangapark.com/manga/aria"), diff --git a/gallery_dl/extractor/myhentaigallery.py b/gallery_dl/extractor/myhentaigallery.py index 4a43d57..da0f589 100644 --- a/gallery_dl/extractor/myhentaigallery.py +++ b/gallery_dl/extractor/myhentaigallery.py @@ -4,7 +4,7 @@ # it under the terms of the GNU General Public License version 2 as # published by the Free Software Foundation. -"""Extract hentai-gallery from https://myhentaigallery.com/""" +"""Extractors for https://myhentaigallery.com/""" from .common import GalleryExtractor from .. import text, exception @@ -18,8 +18,8 @@ class MyhentaigalleryGalleryExtractor(GalleryExtractor): r"/gallery/(?:thumbnails|show)/(\d+)") test = ( ("https://myhentaigallery.com/gallery/thumbnails/16247", { - "pattern": r"https://images.myhentaigrid.com/imagesgallery/images" - r"/[^/]+/original/\d+\.jpg", + "pattern": r"https://images.myhentaicomics\.com/imagesgallery" + r"/images/[^/]+/original/\d+\.jpg", "keyword": { "artist" : list, "count" : 11, diff --git a/gallery_dl/extractor/newgrounds.py b/gallery_dl/extractor/newgrounds.py index e9fde97..d9ab336 100644 --- a/gallery_dl/extractor/newgrounds.py +++ b/gallery_dl/extractor/newgrounds.py @@ -72,6 +72,8 @@ class NewgroundsExtractor(Extractor): """Return general metadata""" def login(self): + if self._check_cookies(self.cookienames): + return username, password = self._get_auth_info() if username: self._update_cookies(self._login_impl(username, password)) @@ -81,10 +83,13 @@ class NewgroundsExtractor(Extractor): self.log.info("Logging in as %s", username) url = self.root + "/passport/" - page = self.request(url).text - headers = {"Origin": self.root, "Referer": url} + response = self.request(url) + if response.history and response.url.endswith("/social"): + return self.session.cookies - url = text.urljoin(self.root, text.extract(page, 'action="', '"')[0]) + headers = {"Origin": self.root, "Referer": url} + url = text.urljoin(self.root, text.extract( + response.text, 'action="', '"')[0]) data = { "username": username, "password": password, @@ -260,7 +265,11 @@ class NewgroundsExtractor(Extractor): msg = ", ".join(text.unescape(e) for e in data["errors"]) raise exception.StopExtraction(msg) - for year, items in data["items"].items(): + items = data.get("items") + if not items: + return + + for year, items in items.items(): for item in items: page_url = text.extract(item, 'href="', '"')[0] if page_url[0] == "/": diff --git a/gallery_dl/extractor/nozomi.py b/gallery_dl/extractor/nozomi.py index 7d7c3f8..713330d 100644 --- a/gallery_dl/extractor/nozomi.py +++ b/gallery_dl/extractor/nozomi.py @@ -193,25 +193,28 @@ class NozomiSearchExtractor(NozomiExtractor): return {"search_tags": self.tags} def posts(self): - index = None - result = set() + result = None + positive = [] + negative = [] def nozomi(path): url = "https://j.nozomi.la/" + path + ".nozomi" return decode_nozomi(self.request(url).content) for tag in self.tags: - tag = tag.replace("/", "") - if tag[0] == "-": - if not index: - index = set(nozomi("index")) - items = index.difference(nozomi("nozomi/" + tag[1:])) - else: - items = nozomi("nozomi/" + tag) + (negative if tag[0] == "-" else positive).append( + tag.replace("/", "")) - if result: - result.intersection_update(items) + for tag in positive: + ids = nozomi("nozomi/" + tag) + if result is None: + result = set(ids) else: - result.update(items) + result.intersection_update(ids) + + if result is None: + result = set(nozomi("index")) + for tag in negative: + result.difference_update(nozomi("nozomi/" + tag[1:])) - return sorted(result, reverse=True) + return sorted(result, reverse=True) if result else () diff --git a/gallery_dl/extractor/philomena.py b/gallery_dl/extractor/philomena.py index 951b34d..fba1312 100644 --- a/gallery_dl/extractor/philomena.py +++ b/gallery_dl/extractor/philomena.py @@ -118,11 +118,11 @@ class PhilomenaPostExtractor(PhilomenaExtractor): "source_url": "https://www.deviantart.com/speccysy/art" "/Afternoon-Flight-215193985", "spoilered": False, - "tag_count": 42, + "tag_count": int, "tag_ids": list, "tags": list, "thumbnails_generated": True, - "updated_at": "2021-09-30T20:04:01Z", + "updated_at": "2022-04-25T09:30:57Z", "uploader": "Clover the Clever", "uploader_id": 211188, "upvotes": int, diff --git a/gallery_dl/extractor/piczel.py b/gallery_dl/extractor/piczel.py index 94de983..b03d6f8 100644 --- a/gallery_dl/extractor/piczel.py +++ b/gallery_dl/extractor/piczel.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2018-2021 Mike Fährmann +# Copyright 2018-2022 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -108,7 +108,7 @@ class PiczelImageExtractor(PiczelExtractor): pattern = r"(?:https?://)?(?:www\.)?piczel\.tv/gallery/image/(\d+)" test = ("https://piczel.tv/gallery/image/7807", { "pattern": r"https://(\w+\.)?piczel\.tv/static/uploads/gallery_image" - r"/32920/image/7807/25737334-Lulena\.png", + r"/32920/image/7807/1532236438-Lulena\.png", "content": "df9a053a24234474a19bce2b7e27e0dec23bff87", "keyword": { "created_at": "2018-07-22T05:13:58.000Z", diff --git a/gallery_dl/extractor/pinterest.py b/gallery_dl/extractor/pinterest.py index 2079b73..f786be6 100644 --- a/gallery_dl/extractor/pinterest.py +++ b/gallery_dl/extractor/pinterest.py @@ -35,6 +35,12 @@ class PinterestExtractor(Extractor): yield Message.Directory, data for pin in self.pins(): + + if isinstance(pin, tuple): + url, data = pin + yield Message.Queue, url, data + continue + pin.update(data) carousel_data = pin.get("carousel_data") @@ -170,14 +176,17 @@ class PinterestBoardExtractor(PinterestExtractor): def pins(self): board = self.board + pins = self.api.board_pins(board["id"]) if board["section_count"] and self.config("sections", True): - pins = [self.api.board_pins(board["id"])] - for section in self.api.board_sections(board["id"]): - pins.append(self.api.board_section_pins(section["id"])) - return itertools.chain.from_iterable(pins) - else: - return self.api.board_pins(board["id"]) + base = "{}/{}/{}/id:".format( + self.root, board["owner"]["username"], board["name"]) + data = {"_extractor": PinterestSectionExtractor} + sections = [(base + section["id"], data) + for section in self.api.board_sections(board["id"])] + pins = itertools.chain(pins, sections) + + return pins class PinterestUserExtractor(PinterestExtractor): @@ -245,8 +254,12 @@ class PinterestSectionExtractor(PinterestExtractor): self.section = None def metadata(self): - section = self.section = self.api.board_section( - self.user, self.board_slug, self.section_slug) + if self.section_slug.startswith("id:"): + section = self.section = self.api.board_section( + self.section_slug[3:]) + else: + section = self.section = self.api.board_section_by_name( + self.user, self.board_slug, self.section_slug) section.pop("preview_pins", None) return {"board": section.pop("board"), "section": section} @@ -398,8 +411,13 @@ class PinterestAPI(): options = {"board_id": board_id} return self._pagination("BoardFeed", options) - def board_section(self, user, board_slug, section_slug): + def board_section(self, section_id): """Yield a specific board section""" + options = {"section_id": section_id} + return self._call("BoardSection", options)["resource_response"]["data"] + + def board_section_by_name(self, user, board_slug, section_slug): + """Yield a board section by name""" options = {"board_slug": board_slug, "section_slug": section_slug, "username": user} return self._call("BoardSection", options)["resource_response"]["data"] diff --git a/gallery_dl/extractor/pixiv.py b/gallery_dl/extractor/pixiv.py index f19e008..a589760 100644 --- a/gallery_dl/extractor/pixiv.py +++ b/gallery_dl/extractor/pixiv.py @@ -238,7 +238,7 @@ class PixivAvatarExtractor(PixivExtractor): class PixivBackgroundExtractor(PixivExtractor): """Extractor for pixiv background banners""" subcategory = "background" - filename_fmt = "background{date?_//:%Y-%m-%d}.{extension}" + filename_fmt = "background{date:?_//%Y-%m-%d}.{extension}" archive_fmt = "background_{user[id]}_{date}" pattern = (r"(?:https?://)?(?:www\.)?pixiv\.net" r"/(?:en/)?users/(\d+)/background") diff --git a/gallery_dl/extractor/readcomiconline.py b/gallery_dl/extractor/readcomiconline.py index a477424..c924e0a 100644 --- a/gallery_dl/extractor/readcomiconline.py +++ b/gallery_dl/extractor/readcomiconline.py @@ -50,8 +50,9 @@ class ReadcomiconlineIssueExtractor(ReadcomiconlineBase, ChapterExtractor): subcategory = "issue" pattern = BASE_PATTERN + r"(/Comic/[^/?#]+/[^/?#]+\?)([^#]+)" test = ("https://readcomiconline.li/Comic/W-i-t-c-h/Issue-130?id=22289", { - "url": "30d29c5afc65043bfd384c010257ec2d0ecbafa6", + "pattern": r"https://2\.bp\.blogspot\.com/[\w-]+=s0\?.+", "keyword": "2d9ec81ce1b11fac06ebf96ce33cdbfca0e85eb5", + "count": 36, }) def __init__(self, match): diff --git a/gallery_dl/extractor/sankaku.py b/gallery_dl/extractor/sankaku.py index 855833a..2ce7f6c 100644 --- a/gallery_dl/extractor/sankaku.py +++ b/gallery_dl/extractor/sankaku.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2014-2021 Mike Fährmann +# Copyright 2014-2022 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -49,6 +49,8 @@ class SankakuExtractor(BooruExtractor): self.log.warning( "Login required to download 'contentious_content' posts") SankakuExtractor._warning = False + elif url[8] == "v": + url = "https://s.sankakucomplex.com" + url[url.index("/", 8):] return url @staticmethod @@ -79,8 +81,8 @@ class SankakuTagExtractor(SankakuExtractor): test = ( ("https://sankaku.app/?tags=bonocho", { "count": 5, - "pattern": r"https://v\.sankakucomplex\.com/data/[^/]{2}/[^/]{2}" - r"/[^/]{32}\.\w+\?e=\d+&expires=\d+&m=[^&#]+", + "pattern": r"https://s\.sankakucomplex\.com/data/[^/]{2}/[^/]{2}" + r"/[^/]{32}\.\w+\?e=\d+&(expires=\d+&)?m=[^&#]+", }), ("https://beta.sankakucomplex.com/?tags=bonocho"), ("https://chan.sankakucomplex.com/?tags=bonocho"), @@ -160,7 +162,7 @@ class SankakuPostExtractor(SankakuExtractor): }), # 'contentious_content' ("https://sankaku.app/post/show/21418978", { - "pattern": r"https://v\.sankakucomplex\.com" + "pattern": r"https://s\.sankakucomplex\.com" r"/data/13/3c/133cda3bfde249c504284493903fb985\.jpg", }), # empty tags (#1617) diff --git a/gallery_dl/extractor/skeb.py b/gallery_dl/extractor/skeb.py index 2ecb4b6..6dfc907 100644 --- a/gallery_dl/extractor/skeb.py +++ b/gallery_dl/extractor/skeb.py @@ -134,12 +134,12 @@ class SkebPostExtractor(SkebExtractor): "anonymous": False, "body": "re:はじめまして。私はYouTubeにてVTuberとして活動をしている湊ラ", "client": { - "avatar_url": "https://pbs.twimg.com/profile_images" - "/1537488326697287680/yNUbLDgC.jpg", - "header_url": "https://pbs.twimg.com/profile_banners" - "/1375007870291300358/1655744756/1500x500", + "avatar_url": r"re:https://pbs.twimg.com/profile_images" + r"/\d+/\w+\.jpg", + "header_url": r"re:https://pbs.twimg.com/profile_banners" + r"/1375007870291300358/\d+/1500x500", "id": 1196514, - "name": "湊ラギ♦️🎀Vtuber🎀次回6/23予定", + "name": str, "screen_name": "minato_ragi", }, "completed_at": "2022-02-27T14:03:45.442Z", diff --git a/gallery_dl/extractor/slickpic.py b/gallery_dl/extractor/slickpic.py index 7b5982a..ae4e2e8 100644 --- a/gallery_dl/extractor/slickpic.py +++ b/gallery_dl/extractor/slickpic.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2019-2020 Mike Fährmann +# Copyright 2019-2022 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -36,12 +36,15 @@ class SlickpicAlbumExtractor(SlickpicExtractor): pattern = BASE_PATTERN + r"/albums/([^/?#]+)" test = ( ("https://mattcrandall.slickpic.com/albums/LamborghiniMurcielago/", { - "url": "58bd94ebc80fd906e9879826970b408d54c6da07", - "keyword": "54a9d6f9e42ae43c644aa9316186fb9d9955fe53", + "pattern": r"https://stored-cf\.slickpic\.com/NDk5MjNmYTc1MzU0MQ,," + r"/20160807/\w+/p/o/JSBFSS-\d+\.jpg", + "keyword": "c37c4ce9c54c09abc6abdf295855d46f11529cbf", + "count": 102, }), ("https://mattcrandall.slickpic.com/albums/LamborghiniMurcielago/", { "range": "34", - "content": ("cec6630e659dc72db1ee1a9a6f3b525189261988", + "content": ("52b5a310587de1048030ab13a912f6a3a9cc7dab", + "cec6630e659dc72db1ee1a9a6f3b525189261988", "6f81e1e74c6cd6db36844e7211eef8e7cd30055d", "22e83645fc242bc3584eca7ec982c8a53a4d8a44"), }), diff --git a/gallery_dl/extractor/slideshare.py b/gallery_dl/extractor/slideshare.py index 557c9fb..b0b8f3b 100644 --- a/gallery_dl/extractor/slideshare.py +++ b/gallery_dl/extractor/slideshare.py @@ -79,10 +79,9 @@ class SlidesharePresentationExtractor(GalleryExtractor): views = extr('content="UserPageVisits:', '"') if descr.endswith("…"): - alt_descr = extr( - 'id="slideshow-description-text" class="notranslate">', '</p>') + alt_descr = extr('id="slideshow-description-text"', '</p>') if alt_descr: - descr = text.remove_html(alt_descr).strip() + descr = text.remove_html(alt_descr.partition(">")[2]).strip() return { "user": self.user, diff --git a/gallery_dl/extractor/smugmug.py b/gallery_dl/extractor/smugmug.py index bdf6036..98e914e 100644 --- a/gallery_dl/extractor/smugmug.py +++ b/gallery_dl/extractor/smugmug.py @@ -1,12 +1,12 @@ # -*- coding: utf-8 -*- -# Copyright 2018-2021 Mike Fährmann +# Copyright 2018-2022 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as # published by the Free Software Foundation. -"""Extract images from https://www.smugmug.com/""" +"""Extractors for https://www.smugmug.com/""" from .common import Extractor, Message from .. import text, oauth, exception @@ -111,13 +111,13 @@ class SmugmugImageExtractor(SmugmugExtractor): test = ( ("https://tdm.smugmug.com/Nature/Dove/i-kCsLJT6", { "url": "e6408fd2c64e721fd146130dceb56a971ceb4259", - "keyword": "b15af021186b7234cebcac758d2a4fd8462f9912", + "keyword": "460a773f5addadd3e216bda346fc524fe4eedc52", "content": "ecbd9d7b4f75a637abc8d35319be9ec065a44eb0", }), # video ("https://tstravels.smugmug.com/Dailies/Daily-Dose-2015/i-39JFNzB", { "url": "04d0ab1ff829ca7d78f5acb5548953df08e9a5ee", - "keyword": "f6967cc5a46c3e130a4f8de7c5c971f72e07fe61", + "keyword": "eb74e5cf6780d5152ab8f11b431ec1b17fa8f69b", }), ) diff --git a/gallery_dl/extractor/tumblr.py b/gallery_dl/extractor/tumblr.py index fbe641d..ded7fd1 100644 --- a/gallery_dl/extractor/tumblr.py +++ b/gallery_dl/extractor/tumblr.py @@ -6,7 +6,7 @@ # it under the terms of the GNU General Public License version 2 as # published by the Free Software Foundation. -"""Extract images from https://www.tumblr.com/""" +"""Extractors for https://www.tumblr.com/""" from .common import Extractor, Message from .. import text, oauth, exception @@ -35,7 +35,10 @@ POST_TYPES = frozenset(( BASE_PATTERN = ( r"(?:tumblr:(?:https?://)?([^/]+)|" - r"(?:https?://)?([\w-]+\.tumblr\.com))") + r"(?:https?://)?" + r"(?:www\.tumblr\.com/blog/(?:view/)?([\w-]+)|" + r"([\w-]+\.tumblr\.com)))" +) class TumblrExtractor(Extractor): @@ -48,9 +51,14 @@ class TumblrExtractor(Extractor): def __init__(self, match): Extractor.__init__(self, match) - self.blog = match.group(1) or match.group(2) - self.api = TumblrAPI(self) + name = match.group(2) + if name: + self.blog = name + ".tumblr.com" + else: + self.blog = match.group(1) or match.group(3) + + self.api = TumblrAPI(self) self.types = self._setup_posttypes() self.avatar = self.config("avatar", False) self.inline = self.config("inline", True) @@ -232,6 +240,8 @@ class TumblrUserExtractor(TumblrExtractor): ("https://demo.tumblr.com/archive"), ("tumblr:http://www.b-authentique.com/"), ("tumblr:www.b-authentique.com"), + ("https://www.tumblr.com/blog/view/smarties-art"), + ("https://www.tumblr.com/blog/smarties-art"), ) def posts(self): @@ -241,7 +251,7 @@ class TumblrUserExtractor(TumblrExtractor): class TumblrPostExtractor(TumblrExtractor): """Extractor for images from a single post on tumblr""" subcategory = "post" - pattern = BASE_PATTERN + r"/(?:post|image)/(\d+)" + pattern = BASE_PATTERN + r"/(?:post/|image/)?(\d+)" test = ( ("http://demo.tumblr.com/post/459265350", { "pattern": (r"https://\d+\.media\.tumblr\.com" @@ -273,11 +283,12 @@ class TumblrPostExtractor(TumblrExtractor): "exception": exception.NotFoundError, # HTML response (#297) }), ("http://demo.tumblr.com/image/459265350"), + ("https://www.tumblr.com/blog/view/smarties-art/686047436641353728"), ) def __init__(self, match): TumblrExtractor.__init__(self, match) - self.post_id = match.group(3) + self.post_id = match.group(4) self.reblogs = True self.date_min = 0 @@ -293,14 +304,18 @@ class TumblrTagExtractor(TumblrExtractor): """Extractor for images from a tumblr-user by tag""" subcategory = "tag" pattern = BASE_PATTERN + r"/tagged/([^/?#]+)" - test = ("http://demo.tumblr.com/tagged/Times%20Square", { - "pattern": (r"https://\d+\.media\.tumblr\.com/tumblr_[^/_]+_1280.jpg"), - "count": 1, - }) + test = ( + ("http://demo.tumblr.com/tagged/Times%20Square", { + "pattern": r"https://\d+\.media\.tumblr\.com" + r"/tumblr_[^/_]+_1280.jpg", + "count": 1, + }), + ("https://www.tumblr.com/blog/view/smarties-art/tagged/undertale"), + ) def __init__(self, match): TumblrExtractor.__init__(self, match) - self.tag = text.unquote(match.group(3).replace("-", " ")) + self.tag = text.unquote(match.group(4).replace("-", " ")) def posts(self): return self.api.posts(self.blog, {"tag": self.tag}) @@ -312,9 +327,12 @@ class TumblrLikesExtractor(TumblrExtractor): directory_fmt = ("{category}", "{blog_name}", "likes") archive_fmt = "f_{blog[name]}_{id}_{num}" pattern = BASE_PATTERN + r"/likes" - test = ("http://mikf123.tumblr.com/likes", { - "count": 1, - }) + test = ( + ("http://mikf123.tumblr.com/likes", { + "count": 1, + }), + ("https://www.tumblr.com/blog/view/mikf123/likes"), + ) def posts(self): return self.api.likes(self.blog) diff --git a/gallery_dl/extractor/twibooru.py b/gallery_dl/extractor/twibooru.py index 355ca21..93fa039 100644 --- a/gallery_dl/extractor/twibooru.py +++ b/gallery_dl/extractor/twibooru.py @@ -83,7 +83,7 @@ class TwibooruPostExtractor(TwibooruExtractor): "tag_ids": list, "tags": list, "thumbnails_generated": True, - "updated_at": "2022-02-03T15:49:07.110Z", + "updated_at": "2022-05-13T00:43:19.791Z", "upvotes": int, "view_url": "https://cdn.twibooru.org/img/2020/7/8/1/full.png", "width": 576, diff --git a/gallery_dl/extractor/twitter.py b/gallery_dl/extractor/twitter.py index a0d6194..36b4806 100644 --- a/gallery_dl/extractor/twitter.py +++ b/gallery_dl/extractor/twitter.py @@ -64,6 +64,11 @@ class TwitterExtractor(Extractor): tweets = self._expand_tweets(self.tweets()) self.tweets = lambda : tweets + if self.config("unique", True): + seen_tweets = set() + else: + seen_tweets = None + for tweet in self.tweets(): if "legacy" in tweet: @@ -71,6 +76,11 @@ class TwitterExtractor(Extractor): else: data = tweet + if seen_tweets is not None: + if data["id_str"] in seen_tweets: + continue + seen_tweets.add(data["id_str"]) + if not self.retweets and "retweeted_status_id_str" in data: self.log.debug("Skipping %s (retweet)", data["id_str"]) continue @@ -100,6 +110,7 @@ class TwitterExtractor(Extractor): tdata = self._transform_tweet(tweet) tdata.update(metadata) + tdata["count"] = len(files) yield Message.Directory, tdata for tdata["num"], file in enumerate(files, 1): file.update(tdata) @@ -259,7 +270,7 @@ class TwitterExtractor(Extractor): "nick": u["name"], } for u in mentions] - content = tget("full_text") or tget("text") or "" + content = text.unescape(tget("full_text") or tget("text") or "") urls = entities.get("urls") if urls: for url in urls: @@ -440,12 +451,9 @@ class TwitterTimelineExtractor(TwitterExtractor): self.user = "id:" + user_id def tweets(self): - tweets = (self.api.user_tweets if self.retweets else - self.api.user_media) - # yield initial batch of (media) tweets tweet = None - for tweet in tweets(self.user): + for tweet in self._select_tweet_source()(self.user): yield tweet if tweet is None: @@ -476,6 +484,19 @@ class TwitterTimelineExtractor(TwitterExtractor): # yield search results starting from last tweet id yield from self.api.search_adaptive(query) + def _select_tweet_source(self): + strategy = self.config("strategy") + if strategy is None or strategy == "auto": + if self.retweets or self.textonly: + return self.api.user_tweets + else: + return self.api.user_media + if strategy == "tweets": + return self.api.user_tweets + if strategy == "with_replies": + return self.api.user_tweets_and_replies + return self.api.user_media + class TwitterTweetsExtractor(TwitterExtractor): """Extractor for Tweets from a user's Tweets timeline""" diff --git a/gallery_dl/extractor/vsco.py b/gallery_dl/extractor/vsco.py index 9278242..756384b 100644 --- a/gallery_dl/extractor/vsco.py +++ b/gallery_dl/extractor/vsco.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2019-2020 Mike Fährmann +# Copyright 2019-2022 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -119,7 +119,8 @@ class VscoUserExtractor(VscoExtractor): pattern = BASE_PATTERN + r"(?:/gallery|/images(?:/\d+)?)?/?(?:$|[?#])" test = ( ("https://vsco.co/missuri/gallery", { - "pattern": r"https://image(-aws.+)?\.vsco\.co/[0-9a-f/]+/vsco\w+", + "pattern": r"https://image(-aws.+)?\.vsco\.co" + r"/[0-9a-f/]+/[\w-]+\.\w+", "range": "1-80", "count": 80, }), @@ -150,7 +151,7 @@ class VscoCollectionExtractor(VscoExtractor): archive_fmt = "c_{user}_{id}" pattern = BASE_PATTERN + r"/collection/" test = ("https://vsco.co/vsco/collection/1", { - "pattern": r"https://image(-aws.+)?\.vsco\.co/[0-9a-f/]+/vsco\w+\.\w+", + "pattern": r"https://image(-aws.+)?\.vsco\.co/[0-9a-f/]+/[\w-]+\.\w+", "range": "1-80", "count": 80, }) diff --git a/gallery_dl/extractor/wallhaven.py b/gallery_dl/extractor/wallhaven.py index e025a22..37eab24 100644 --- a/gallery_dl/extractor/wallhaven.py +++ b/gallery_dl/extractor/wallhaven.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2018-2021 Mike Fährmann +# Copyright 2018-2022 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -58,7 +58,7 @@ class WallhavenSearchExtractor(WallhavenExtractor): (("https://wallhaven.cc/search?q=id%3A87" "&categories=111&purity=100&sorting=date_added&order=asc&page=3"), { "pattern": r"https://w.wallhaven.cc/full/\w\w/wallhaven-\w+\.\w+", - "count": "<= 20", + "count": "<= 30", }), ) diff --git a/gallery_dl/extractor/weibo.py b/gallery_dl/extractor/weibo.py index 68871c8..bdbdc8c 100644 --- a/gallery_dl/extractor/weibo.py +++ b/gallery_dl/extractor/weibo.py @@ -310,7 +310,8 @@ class WeiboAlbumExtractor(WeiboExtractor): subcategory = "album" pattern = USER_PATTERN + r"\?tabtype=album" test = ("https://weibo.com/1758989602?tabtype=album", { - "pattern": r"https://wx\d+\.sinaimg\.cn/large/\w{32}\.(jpg|png|gif)", + "pattern": r"https://(wx\d+\.sinaimg\.cn/large/\w{32}\.(jpg|png|gif)" + r"|g\.us\.sinaimg\.cn/../\w+\.mp4)", "range": "1-3", "count": 3, }) @@ -324,7 +325,11 @@ class WeiboAlbumExtractor(WeiboExtractor): mid = image["mid"] if mid not in seen: seen.add(mid) - yield self._status_by_id(mid) + status = self._status_by_id(mid) + if status.get("ok") != 1: + self.log.debug("Skipping status %s (%s)", mid, status) + else: + yield status class WeiboStatusExtractor(WeiboExtractor): @@ -341,7 +346,7 @@ class WeiboStatusExtractor(WeiboExtractor): }), # unavailable video (#427) ("https://m.weibo.cn/status/4268682979207023", { - "exception": exception.HttpError, + "exception": exception.NotFoundError, }), # non-numeric status ID (#664) ("https://weibo.com/3314883543/Iy7fj4qVg"), @@ -366,7 +371,11 @@ class WeiboStatusExtractor(WeiboExtractor): ) def statuses(self): - return (self._status_by_id(self.user),) + status = self._status_by_id(self.user) + if status.get("ok") != 1: + self.log.debug(status) + raise exception.NotFoundError("status") + return (status,) @cache(maxage=356*86400) diff --git a/gallery_dl/formatter.py b/gallery_dl/formatter.py index 107c8ed..bc4d837 100644 --- a/gallery_dl/formatter.py +++ b/gallery_dl/formatter.py @@ -10,6 +10,7 @@ import os import json +import time import string import _string import datetime @@ -17,17 +18,9 @@ import operator import functools from . import text, util -_CACHE = {} -_CONVERSIONS = None -_GLOBALS = { - "_env": lambda: os.environ, - "_lit": lambda: _literal, - "_now": datetime.datetime.now, -} - -def parse(format_string, default=None): - key = format_string, default +def parse(format_string, default=None, fmt=format): + key = format_string, default, fmt try: return _CACHE[key] @@ -48,7 +41,7 @@ def parse(format_string, default=None): elif kind == "F": cls = FStringFormatter - formatter = _CACHE[key] = cls(format_string, default) + formatter = _CACHE[key] = cls(format_string, default, fmt) return formatter @@ -95,8 +88,9 @@ class StringFormatter(): Example: {f:R /_/} -> "f_o_o_b_a_r" (if "f" is "f o o b a r") """ - def __init__(self, format_string, default=None): + def __init__(self, format_string, default=None, fmt=format): self.default = default + self.format = fmt self.result = [] self.fields = [] @@ -126,7 +120,7 @@ class StringFormatter(): return "".join(result) def _field_access(self, field_name, format_spec, conversion): - fmt = parse_format_spec(format_spec, conversion) + fmt = self._parse_format_spec(format_spec, conversion) if "|" in field_name: return self._apply_list([ @@ -184,27 +178,38 @@ class StringFormatter(): return fmt(obj) return wrap + def _parse_format_spec(self, format_spec, conversion): + fmt = _build_format_func(format_spec, self.format) + if not conversion: + return fmt + + conversion = _CONVERSIONS[conversion] + if fmt is self.format: + return conversion + else: + return lambda obj: fmt(conversion(obj)) + class TemplateFormatter(StringFormatter): """Read format_string from file""" - def __init__(self, path, default=None): + def __init__(self, path, default=None, fmt=format): with open(util.expand_path(path)) as fp: format_string = fp.read() - StringFormatter.__init__(self, format_string, default) + StringFormatter.__init__(self, format_string, default, fmt) class ExpressionFormatter(): """Generate text by evaluating a Python expression""" - def __init__(self, expression, default=None): + def __init__(self, expression, default=None, fmt=None): self.format_map = util.compile_expression(expression) class ModuleFormatter(): """Generate text by calling an external function""" - def __init__(self, function_spec, default=None): + def __init__(self, function_spec, default=None, fmt=None): module_name, _, function_name = function_spec.partition(":") module = __import__(module_name) self.format_map = getattr(module, function_name) @@ -213,7 +218,7 @@ class ModuleFormatter(): class FStringFormatter(): """Generate text by evaluaring an f-string literal""" - def __init__(self, fstring, default=None): + def __init__(self, fstring, default=None, fmt=None): self.format_map = util.compile_expression("f'''" + fstring + "'''") @@ -251,81 +256,37 @@ def _slice(indices): ) -def parse_format_spec(format_spec, conversion): - fmt = build_format_func(format_spec) - if not conversion: - return fmt - - global _CONVERSIONS - if _CONVERSIONS is None: - _CONVERSIONS = { - "l": str.lower, - "u": str.upper, - "c": str.capitalize, - "C": string.capwords, - "j": functools.partial(json.dumps, default=str), - "t": str.strip, - "T": util.datetime_to_timestamp_string, - "d": text.parse_timestamp, - "U": text.unescape, - "S": util.to_string, - "s": str, - "r": repr, - "a": ascii, - } - - conversion = _CONVERSIONS[conversion] - if fmt is format: - return conversion - else: - def chain(obj): - return fmt(conversion(obj)) - return chain +def _build_format_func(format_spec, default): + if format_spec: + return _FORMAT_SPECIFIERS.get( + format_spec[0], _default_format)(format_spec, default) + return default -def build_format_func(format_spec): - if format_spec: - fmt = format_spec[0] - if fmt == "?": - return _parse_optional(format_spec) - if fmt == "[": - return _parse_slice(format_spec) - if fmt == "L": - return _parse_maxlen(format_spec) - if fmt == "J": - return _parse_join(format_spec) - if fmt == "R": - return _parse_replace(format_spec) - if fmt == "D": - return _parse_datetime(format_spec) - return _default_format(format_spec) - return format - - -def _parse_optional(format_spec): - before, after, format_spec = format_spec.split("/", 2) +def _parse_optional(format_spec, default): + before, after, format_spec = format_spec.split(_SEPARATOR, 2) before = before[1:] - fmt = build_format_func(format_spec) + fmt = _build_format_func(format_spec, default) def optional(obj): return before + fmt(obj) + after if obj else "" return optional -def _parse_slice(format_spec): +def _parse_slice(format_spec, default): indices, _, format_spec = format_spec.partition("]") slice = _slice(indices[1:]) - fmt = build_format_func(format_spec) + fmt = _build_format_func(format_spec, default) def apply_slice(obj): return fmt(obj[slice]) return apply_slice -def _parse_maxlen(format_spec): - maxlen, replacement, format_spec = format_spec.split("/", 2) +def _parse_maxlen(format_spec, default): + maxlen, replacement, format_spec = format_spec.split(_SEPARATOR, 2) maxlen = text.parse_int(maxlen[1:]) - fmt = build_format_func(format_spec) + fmt = _build_format_func(format_spec, default) def mlen(obj): obj = fmt(obj) @@ -333,37 +294,58 @@ def _parse_maxlen(format_spec): return mlen -def _parse_join(format_spec): - separator, _, format_spec = format_spec.partition("/") +def _parse_join(format_spec, default): + separator, _, format_spec = format_spec.partition(_SEPARATOR) separator = separator[1:] - fmt = build_format_func(format_spec) + fmt = _build_format_func(format_spec, default) def join(obj): return fmt(separator.join(obj)) return join -def _parse_replace(format_spec): - old, new, format_spec = format_spec.split("/", 2) +def _parse_replace(format_spec, default): + old, new, format_spec = format_spec.split(_SEPARATOR, 2) old = old[1:] - fmt = build_format_func(format_spec) + fmt = _build_format_func(format_spec, default) def replace(obj): return fmt(obj.replace(old, new)) return replace -def _parse_datetime(format_spec): - dt_format, _, format_spec = format_spec.partition("/") +def _parse_datetime(format_spec, default): + dt_format, _, format_spec = format_spec.partition(_SEPARATOR) dt_format = dt_format[1:] - fmt = build_format_func(format_spec) + fmt = _build_format_func(format_spec, default) def dt(obj): return fmt(text.parse_datetime(obj, dt_format)) return dt -def _default_format(format_spec): +def _parse_offset(format_spec, default): + offset, _, format_spec = format_spec.partition(_SEPARATOR) + offset = offset[1:] + fmt = _build_format_func(format_spec, default) + + if not offset or offset == "local": + is_dst = time.daylight and time.localtime().tm_isdst > 0 + offset = -(time.altzone if is_dst else time.timezone) + else: + hours, _, minutes = offset.partition(":") + offset = 3600 * int(hours) + if minutes: + offset += 60 * (int(minutes) if offset > 0 else -int(minutes)) + + offset = datetime.timedelta(seconds=offset) + + def off(obj): + return fmt(obj + offset) + return off + + +def _default_format(format_spec, default): def wrap(obj): return format(obj, format_spec) return wrap @@ -379,3 +361,35 @@ class Literal(): _literal = Literal() + +_CACHE = {} +_SEPARATOR = "/" +_GLOBALS = { + "_env": lambda: os.environ, + "_lit": lambda: _literal, + "_now": datetime.datetime.now, +} +_CONVERSIONS = { + "l": str.lower, + "u": str.upper, + "c": str.capitalize, + "C": string.capwords, + "j": functools.partial(json.dumps, default=str), + "t": str.strip, + "T": util.datetime_to_timestamp_string, + "d": text.parse_timestamp, + "U": text.unescape, + "S": util.to_string, + "s": str, + "r": repr, + "a": ascii, +} +_FORMAT_SPECIFIERS = { + "?": _parse_optional, + "[": _parse_slice, + "D": _parse_datetime, + "L": _parse_maxlen, + "J": _parse_join, + "O": _parse_offset, + "R": _parse_replace, +} diff --git a/gallery_dl/job.py b/gallery_dl/job.py index a0adffb..9636bef 100644 --- a/gallery_dl/job.py +++ b/gallery_dl/job.py @@ -439,6 +439,9 @@ class DownloadJob(Job): if self.archive: self.archive.check = pathfmt.exists + if not cfg("postprocess", True): + return + postprocessors = extr.config_accumulate("postprocessors") if postprocessors: self.hooks = collections.defaultdict(list) diff --git a/gallery_dl/option.py b/gallery_dl/option.py index bd61b74..37247a7 100644 --- a/gallery_dl/option.py +++ b/gallery_dl/option.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2017-2021 Mike Fährmann +# Copyright 2017-2022 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -284,6 +284,11 @@ def build_parser(): help=("Do not download any files") ) downloader.add_argument( + "--no-postprocessors", + dest="postprocess", nargs=0, action=ConfigConstAction, const=False, + help=("Do not run any post processors") + ) + downloader.add_argument( "--no-check-certificate", dest="verify", nargs=0, action=ConfigConstAction, const=False, help="Disable HTTPS certificate validation", diff --git a/gallery_dl/postprocessor/mtime.py b/gallery_dl/postprocessor/mtime.py index 3f8d90a..ea61b7b 100644 --- a/gallery_dl/postprocessor/mtime.py +++ b/gallery_dl/postprocessor/mtime.py @@ -9,7 +9,7 @@ """Use metadata as file modification time""" from .common import PostProcessor -from .. import text, util +from .. import text, util, formatter from datetime import datetime @@ -17,7 +17,12 @@ class MtimePP(PostProcessor): def __init__(self, job, options): PostProcessor.__init__(self, job) - self.key = options.get("key", "date") + value = options.get("value") + if value: + self._get = formatter.parse(value, None, util.identity).format_map + else: + key = options.get("key", "date") + self._get = lambda kwdict: kwdict.get(key) events = options.get("event") if events is None: @@ -27,7 +32,7 @@ class MtimePP(PostProcessor): job.register_hooks({event: self.run for event in events}, options) def run(self, pathfmt): - mtime = pathfmt.kwdict.get(self.key) + mtime = self._get(pathfmt.kwdict) pathfmt.kwdict["_mtime"] = ( util.datetime_to_timestamp(mtime) if isinstance(mtime, datetime) else diff --git a/gallery_dl/postprocessor/ugoira.py b/gallery_dl/postprocessor/ugoira.py index 0b4c259..98c8246 100644 --- a/gallery_dl/postprocessor/ugoira.py +++ b/gallery_dl/postprocessor/ugoira.py @@ -35,7 +35,7 @@ class UgoiraPP(PostProcessor): self.output = options.get("ffmpeg-output", True) self.delete = not options.get("keep-files", False) self.repeat = options.get("repeat-last-frame", True) - self.mtime = options.get("mtime") + self.mtime = options.get("mtime", True) ffmpeg = options.get("ffmpeg-location") self.ffmpeg = util.expand_path(ffmpeg) if ffmpeg else "ffmpeg" diff --git a/gallery_dl/postprocessor/zip.py b/gallery_dl/postprocessor/zip.py index 1c4bd03..ff97add 100644 --- a/gallery_dl/postprocessor/zip.py +++ b/gallery_dl/postprocessor/zip.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2018-2021 Mike Fährmann +# Copyright 2018-2022 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -11,6 +11,7 @@ from .common import PostProcessor from .. import util import zipfile +import os class ZipPP(PostProcessor): @@ -34,30 +35,38 @@ class ZipPP(PostProcessor): algorithm = "store" self.zfile = None - self.path = job.pathfmt.realdirectory - self.args = (self.path[:-1] + ext, "a", + self.path = job.pathfmt.realdirectory[:-1] + self.args = (self.path + ext, "a", self.COMPRESSION_ALGORITHMS[algorithm], True) job.register_hooks({ - "file": - self.write_safe if options.get("mode") == "safe" else self.write, + "file": (self.write_safe if options.get("mode") == "safe" else + self.write_fast), }, options) job.hooks["finalize"].append(self.finalize) - def write(self, pathfmt, zfile=None): + def open(self): + try: + return zipfile.ZipFile(*self.args) + except FileNotFoundError: + os.makedirs(os.path.dirname(self.path)) + return zipfile.ZipFile(*self.args) + + def write(self, pathfmt, zfile): # 'NameToInfo' is not officially documented, but it's available # for all supported Python versions and using it directly is a lot # faster than calling getinfo() - if zfile is None: - if self.zfile is None: - self.zfile = zipfile.ZipFile(*self.args) - zfile = self.zfile if pathfmt.filename not in zfile.NameToInfo: zfile.write(pathfmt.temppath, pathfmt.filename) pathfmt.delete = self.delete + def write_fast(self, pathfmt): + if self.zfile is None: + self.zfile = self.open() + self.write(pathfmt, self.zfile) + def write_safe(self, pathfmt): - with zipfile.ZipFile(*self.args) as zfile: + with self.open() as zfile: self.write(pathfmt, zfile) def finalize(self, pathfmt, status): diff --git a/gallery_dl/version.py b/gallery_dl/version.py index 8ac7384..76f879c 100644 --- a/gallery_dl/version.py +++ b/gallery_dl/version.py @@ -6,4 +6,4 @@ # it under the terms of the GNU General Public License version 2 as # published by the Free Software Foundation. -__version__ = "1.22.3" +__version__ = "1.22.4" diff --git a/test/test_formatter.py b/test/test_formatter.py index 5b8ca0a..aec091a 100644 --- a/test/test_formatter.py +++ b/test/test_formatter.py @@ -9,12 +9,13 @@ import os import sys +import time import unittest import datetime import tempfile sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) -from gallery_dl import formatter # noqa E402 +from gallery_dl import formatter, text, util # noqa E402 class TestFormatter(unittest.TestCase): @@ -98,6 +99,14 @@ class TestFormatter(unittest.TestCase): self._run_test("{missing[key]}", replacement, default) self._run_test("{missing:?a//}", "a" + default, default) + def test_fmt_func(self): + self._run_test("{t}" , self.kwdict["t"] , None, int) + self._run_test("{t}" , self.kwdict["t"] , None, util.identity) + self._run_test("{dt}", self.kwdict["dt"], None, util.identity) + self._run_test("{ds}", self.kwdict["dt"], None, text.parse_datetime) + self._run_test("{ds:D%Y-%m-%dT%H:%M:%S%z}", self.kwdict["dt"], + None, util.identity) + def test_alternative(self): self._run_test("{a|z}" , "hElLo wOrLd") self._run_test("{z|a}" , "hElLo wOrLd") @@ -184,6 +193,31 @@ class TestFormatter(unittest.TestCase): self._run_test("{ds:D%Y}", "2010-01-01T01:00:00+0100") self._run_test("{l:D%Y}", "None") + def test_offset(self): + self._run_test("{dt:O 01:00}", "2010-01-01 01:00:00") + self._run_test("{dt:O+02:00}", "2010-01-01 02:00:00") + self._run_test("{dt:O-03:45}", "2009-12-31 20:15:00") + + self._run_test("{dt:O12}", "2010-01-01 12:00:00") + self._run_test("{dt:O-24}", "2009-12-31 00:00:00") + + self._run_test("{ds:D%Y-%m-%dT%H:%M:%S%z/O1}", "2010-01-01 01:00:00") + self._run_test("{t!d:O2}", "2010-01-01 02:00:00") + + orig_daylight = time.daylight + orig_timezone = time.timezone + orig_altzone = time.altzone + try: + time.daylight = False + time.timezone = -3600 + self._run_test("{dt:O}", "2010-01-01 01:00:00") + time.timezone = 7200 + self._run_test("{dt:Olocal}", "2009-12-31 22:00:00") + finally: + time.daylight = orig_daylight + time.timezone = orig_timezone + time.altzone = orig_altzone + def test_chain_special(self): # multiple replacements self._run_test("{a:Rh/C/RE/e/RL/l/}", "Cello wOrld") @@ -202,6 +236,26 @@ class TestFormatter(unittest.TestCase): # parse and format datetime self._run_test("{ds:D%Y-%m-%dT%H:%M:%S%z/%Y%m%d}", "20100101") + def test_separator(self): + orig_separator = formatter._SEPARATOR + try: + formatter._SEPARATOR = "|" + self._run_test("{a:Rh|C|RE|e|RL|l|}", "Cello wOrld") + self._run_test("{d[b]!s:R1|Q|R2|A|R0|Y|}", "Y") + + formatter._SEPARATOR = "##" + self._run_test("{l:J-##Rb##E##}", "a-E-c") + self._run_test("{l:J-##[1:-1]}", "-b-") + + formatter._SEPARATOR = "\0" + self._run_test("{d[a]:?<\0>\0L1\0too long\0}", "<too long>") + self._run_test("{d[c]:?<\0>\0L5\0too long\0}", "") + + formatter._SEPARATOR = "?" + self._run_test("{ds:D%Y-%m-%dT%H:%M:%S%z?%Y%m%d}", "20100101") + finally: + formatter._SEPARATOR = orig_separator + def test_globals_env(self): os.environ["FORMATTER_TEST"] = value = self.kwdict["a"] @@ -316,8 +370,8 @@ def noarg(): with self.assertRaises(TypeError): self.assertEqual(fmt3.format_map(self.kwdict), "") - def _run_test(self, format_string, result, default=None): - fmt = formatter.parse(format_string, default) + def _run_test(self, format_string, result, default=None, fmt=format): + fmt = formatter.parse(format_string, default, fmt) output = fmt.format_map(self.kwdict) self.assertEqual(output, result, format_string) diff --git a/test/test_postprocessor.py b/test/test_postprocessor.py index c382c0e..7a216bb 100644 --- a/test/test_postprocessor.py +++ b/test/test_postprocessor.py @@ -350,10 +350,6 @@ class MetadataTest(BasePostprocessorTest): class MtimeTest(BasePostprocessorTest): - def test_mtime_default(self): - pp = self._create() - self.assertEqual(pp.key, "date") - def test_mtime_datetime(self): self._create(None, {"date": datetime(1980, 1, 1)}) self._trigger() @@ -364,31 +360,36 @@ class MtimeTest(BasePostprocessorTest): self._trigger() self.assertEqual(self.pathfmt.kwdict["_mtime"], 315532800) - def test_mtime_custom(self): + def test_mtime_key(self): self._create({"key": "foo"}, {"foo": 315532800}) self._trigger() self.assertEqual(self.pathfmt.kwdict["_mtime"], 315532800) + def test_mtime_value(self): + self._create({"value": "{foo}"}, {"foo": 315532800}) + self._trigger() + self.assertEqual(self.pathfmt.kwdict["_mtime"], 315532800) + class ZipTest(BasePostprocessorTest): def test_zip_default(self): pp = self._create() - self.assertEqual(self.job.hooks["file"][0], pp.write) - self.assertEqual(pp.path, self.pathfmt.realdirectory) + self.assertEqual(self.job.hooks["file"][0], pp.write_fast) + self.assertEqual(pp.path, self.pathfmt.realdirectory[:-1]) self.assertEqual(pp.delete, True) self.assertEqual(pp.args, ( - pp.path[:-1] + ".zip", "a", zipfile.ZIP_STORED, True, + pp.path + ".zip", "a", zipfile.ZIP_STORED, True, )) self.assertTrue(pp.args[0].endswith("/test.zip")) def test_zip_safe(self): pp = self._create({"mode": "safe"}) self.assertEqual(self.job.hooks["file"][0], pp.write_safe) - self.assertEqual(pp.path, self.pathfmt.realdirectory) + self.assertEqual(pp.path, self.pathfmt.realdirectory[:-1]) self.assertEqual(pp.delete, True) self.assertEqual(pp.args, ( - pp.path[:-1] + ".zip", "a", zipfile.ZIP_STORED, True, + pp.path + ".zip", "a", zipfile.ZIP_STORED, True, )) self.assertTrue(pp.args[0].endswith("/test.zip")) @@ -400,7 +401,7 @@ class ZipTest(BasePostprocessorTest): }) self.assertEqual(pp.delete, False) self.assertEqual(pp.args, ( - pp.path[:-1] + ".cbz", "a", zipfile.ZIP_DEFLATED, True, + pp.path + ".cbz", "a", zipfile.ZIP_DEFLATED, True, )) self.assertTrue(pp.args[0].endswith("/test.cbz")) @@ -439,9 +440,9 @@ class ZipTest(BasePostprocessorTest): with zipfile.ZipFile(pp.zfile.filename) as file: nti = file.NameToInfo self.assertEqual(len(pp.zfile.NameToInfo), 3) - self.assertIn("file0.ext", pp.zfile.NameToInfo) - self.assertIn("file1.ext", pp.zfile.NameToInfo) - self.assertIn("file2.ext", pp.zfile.NameToInfo) + self.assertIn("file0.ext", nti) + self.assertIn("file1.ext", nti) + self.assertIn("file2.ext", nti) os.unlink(pp.zfile.filename) |
