diff options
| author | 2025-08-16 07:00:40 -0400 | |
|---|---|---|
| committer | 2025-08-16 07:00:40 -0400 | |
| commit | 22e8d9823eb9fb802c926fb03a5fdccbea26f878 (patch) | |
| tree | d399937a3bf139d386b8f5df2fc646b751c14719 | |
| parent | 0839cde5064bd6000162ee23b8445b99afe10068 (diff) | |
| parent | 3d18761f620a294ea6c5bff13c5994b93b29f3ed (diff) | |
Update upstream source from tag 'upstream/1.30.3'
Update to upstream version '1.30.3'
with Debian dir cbd3490f51b0ee3f2e172965318cd079b856367d
65 files changed, 1573 insertions, 838 deletions
diff --git a/CHANGELOG.md b/CHANGELOG.md index 159ff0d..1bdbcc7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,11 +1,71 @@ -## 1.30.2 - 2025-07-27 +## 1.30.3 - 2025-08-15 ### Extractors #### Additions -- [itaku] add `posts` & `bookmarks` extractors ([#7707](https://github.com/mikf/gallery-dl/issues/7707)) +- [booth] add support ([#7920](https://github.com/mikf/gallery-dl/issues/7920)) +- [civitai] add `collection` & `user-collections` extractors ([#8005](https://github.com/mikf/gallery-dl/issues/8005)) +- [facebook] add `info` extractor ([#6582](https://github.com/mikf/gallery-dl/issues/6582)) +- [facebook] add `albums` extractor ([#7848](https://github.com/mikf/gallery-dl/issues/7848)) +- [imgdrive] add `image` extractor ([#7976](https://github.com/mikf/gallery-dl/issues/7976)) +- [imgtaxi] add `image` extractor ([#8019](https://github.com/mikf/gallery-dl/issues/8019)) +- [imgwallet] add `image` extractor ([#8021](https://github.com/mikf/gallery-dl/issues/8021)) +- [picstate] add `image` extractor ([#7946](https://github.com/mikf/gallery-dl/issues/7946)) +- [silverpic] add `image` extractor ([#8020](https://github.com/mikf/gallery-dl/issues/8020)) +- [tumblr] add `following` & `followers` extractors ([#8018](https://github.com/mikf/gallery-dl/issues/8018)) +- [xasiat] add support ([#4161](https://github.com/mikf/gallery-dl/issues/4161) [#5929](https://github.com/mikf/gallery-dl/issues/5929) [#7934](https://github.com/mikf/gallery-dl/issues/7934)) #### Fixes -- [kemono] support new `kemono.cr` domain ([#7902](https://github.com/mikf/gallery-dl/issues/7902) [#7909](https://github.com/mikf/gallery-dl/issues/7909) [#7911](https://github.com/mikf/gallery-dl/issues/7911) [#7913](https://github.com/mikf/gallery-dl/issues/7913) [#7904](https://github.com/mikf/gallery-dl/issues/7904)) -- [coomer] support new `coomer.st` domain ([#7907](https://github.com/mikf/gallery-dl/issues/7907) [#7909](https://github.com/mikf/gallery-dl/issues/7909) [#7911](https://github.com/mikf/gallery-dl/issues/7911) [#7904](https://github.com/mikf/gallery-dl/issues/7904)) -### Post Processors -- [exec] use `False` as `start_new_session` default to avoid a `TypeError` ([#7899](https://github.com/mikf/gallery-dl/issues/7899)) +- [blogger] fix video extraction ([#7892](https://github.com/mikf/gallery-dl/issues/7892)) +- [comick] handle chapters without chapter data ([#7972](https://github.com/mikf/gallery-dl/issues/7972)) +- [comick] handle volume-only chapters ([#8043](https://github.com/mikf/gallery-dl/issues/8043)) +- [comick] fix exception when filtering by translation group ([#8045](https://github.com/mikf/gallery-dl/issues/8045)) +- [deviantart:tiptap] fix `KeyError: 'attrs'` ([#7929](https://github.com/mikf/gallery-dl/issues/7929)) +- [everia] fix image extraction ([#7973](https://github.com/mikf/gallery-dl/issues/7973) [#7977](https://github.com/mikf/gallery-dl/issues/7977)) +- [facebook] fix `avatar` extraction for empty profiles ([#7962](https://github.com/mikf/gallery-dl/issues/7962)) +- [facebook] handle profiles without photos or `set_id` ([#7962](https://github.com/mikf/gallery-dl/issues/7962)) +- [fappic] rewrite thumbnail URLs ([#8013](https://github.com/mikf/gallery-dl/issues/8013)) +- [idolcomplex] update to new domain and interface ([#7559](https://github.com/mikf/gallery-dl/issues/7559) [#8009](https://github.com/mikf/gallery-dl/issues/8009)) +- [kemono][coomer] fix extraction ([#8028](https://github.com/mikf/gallery-dl/issues/8028) [#8031](https://github.com/mikf/gallery-dl/issues/8031)) +- [kemono] update `/creators` endpoint ([#8039](https://github.com/mikf/gallery-dl/issues/8039) [#8040](https://github.com/mikf/gallery-dl/issues/8040)) +- [kemono] don't set error status for posts without comments ([#7961](https://github.com/mikf/gallery-dl/issues/7961)) +- [pixiv] fix `IndexError` for unviewable works ([#7940](https://github.com/mikf/gallery-dl/issues/7940)) +- [pixiv] fix artworks downloads when using expired cookies ([#7987](https://github.com/mikf/gallery-dl/issues/7987)) +- [scrolller] fix NSFW subreddit pagination ([#7945](https://github.com/mikf/gallery-dl/issues/7945)) +- [twitter] fix potential `UnboundLocalError` when `videos` are disabled ([#7932](https://github.com/mikf/gallery-dl/issues/7932)) +- [vsco] disable TLS 1.2 cipher suites by default ([#7984](https://github.com/mikf/gallery-dl/issues/7984) [#7986](https://github.com/mikf/gallery-dl/issues/7986)) +- [wikimedia:wiki] fix `AttributeError: 'subcategories'` ([#7931](https://github.com/mikf/gallery-dl/issues/7931)) +#### Improvements +- [aibooru] support `general.aibooru.online` & `aibooru.download` +- [comick] add `lang` option ([#7938](https://github.com/mikf/gallery-dl/issues/7938)) +- [hentaifoundry] add `descriptions` option ([#7952](https://github.com/mikf/gallery-dl/issues/7952)) +- [facebook] raise `AuthRequired` for profiles requiring cookies ([#7962](https://github.com/mikf/gallery-dl/issues/7962)) +- [instagram] warn about lower quality image downloads ([#7921](https://github.com/mikf/gallery-dl/issues/7921)) +- [kemono] support `"endpoint": "posts+"` for full metadata ([#8028](https://github.com/mikf/gallery-dl/issues/8028)) +- [misskey] support `misskey.art` ([#7923](https://github.com/mikf/gallery-dl/issues/7923)) +- [motherless] detect `404`/`File not found` pages +- [pixiv] detect suspended/deleted accounts ([#7990](https://github.com/mikf/gallery-dl/issues/7990)) +- [pixiv] improve API error messages +- [pixiv] remove redundant cookies initialization code +- [scrolller] limit `title` length in default filenames +- [skeb] implement `include` option ([#6558](https://github.com/mikf/gallery-dl/issues/6558) [#7267](https://github.com/mikf/gallery-dl/issues/7267)) +- [vk] update default `archive_fmt` ([#8030](https://github.com/mikf/gallery-dl/issues/8030)) +#### Metadata +- [cien] provide `author[id]` metadata ([#6582](https://github.com/mikf/gallery-dl/issues/6582)) +- [dankefuerslesen] extract more metadata ([#7915](https://github.com/mikf/gallery-dl/issues/7915)) +- [dankefuerslesen:manga] fix metadata being overwritten +- [facebook] ensure numeric `user_id` values ([#7953](https://github.com/mikf/gallery-dl/issues/7953)) +- [facebook:set] fix/improve `user_id` extraction ([#7848](https://github.com/mikf/gallery-dl/issues/7848)) +- [fappic] fix `filename` values +#### Common +- [common] implement `"user-agent": "@BROWSER"` ([#7947](https://github.com/mikf/gallery-dl/issues/7947)) +- [common] improve error message for non-Netscape cookie files ([#8014](https://github.com/mikf/gallery-dl/issues/8014)) +### Downloaders +- [ytdl] don't overwrite existing `filename` data ([#7964](https://github.com/mikf/gallery-dl/issues/7964)) ### Miscellaneous -- [tests/postprocessor] fix `TypeError` when logging an error ([#6582](https://github.com/mikf/gallery-dl/issues/6582)) +- [docs/configuration] improve `client-id` & `api-key` instructions +- [docs/formatting] update and improve +- [job] apply `extension-map` to `SimulationJob` results ([#7954](https://github.com/mikf/gallery-dl/issues/7954)) +- [job] improve URL `scheme` extraction performance +- [job] split collected DataJob results +- [path] implement `path-convert` option ([#493](https://github.com/mikf/gallery-dl/issues/493) [#6582](https://github.com/mikf/gallery-dl/issues/6582)) +- [scripts] improve and extend `init`, `generate_test_result`, and `pyprint` +- extend `-A`/`--abort` & `"skip": "abort"` functionality ([#7891](https://github.com/mikf/gallery-dl/issues/7891)) +- use more f-strings ([#7671](https://github.com/mikf/gallery-dl/issues/7671)) @@ -1,6 +1,6 @@ Metadata-Version: 2.4 Name: gallery_dl -Version: 1.30.2 +Version: 1.30.3 Summary: Command-line program to download image galleries and collections from several image hosting sites Home-page: https://github.com/mikf/gallery-dl Download-URL: https://github.com/mikf/gallery-dl/releases/latest @@ -37,6 +37,7 @@ Requires-Dist: yt-dlp; extra == "video" Provides-Extra: extra Requires-Dist: requests[socks]; extra == "extra" Requires-Dist: yt-dlp[default]; extra == "extra" +Requires-Dist: jinja2; extra == "extra" Requires-Dist: pyyaml; extra == "extra" Requires-Dist: toml; python_version < "3.11" and extra == "extra" Requires-Dist: truststore; python_version >= "3.10" and extra == "extra" @@ -138,9 +139,9 @@ Standalone Executable Prebuilt executable files with a Python interpreter and required Python packages included are available for -- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.30.2/gallery-dl.exe>`__ +- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.30.3/gallery-dl.exe>`__ (Requires `Microsoft Visual C++ Redistributable Package (x86) <https://aka.ms/vs/17/release/vc_redist.x86.exe>`__) -- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.30.2/gallery-dl.bin>`__ +- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.30.3/gallery-dl.bin>`__ Nightly Builds @@ -79,9 +79,9 @@ Standalone Executable Prebuilt executable files with a Python interpreter and required Python packages included are available for -- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.30.2/gallery-dl.exe>`__ +- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.30.3/gallery-dl.exe>`__ (Requires `Microsoft Visual C++ Redistributable Package (x86) <https://aka.ms/vs/17/release/vc_redist.x86.exe>`__) -- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.30.2/gallery-dl.bin>`__ +- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.30.3/gallery-dl.bin>`__ Nightly Builds diff --git a/data/completion/_gallery-dl b/data/completion/_gallery-dl index f0d654e..07cfcd9 100644 --- a/data/completion/_gallery-dl +++ b/data/completion/_gallery-dl @@ -72,8 +72,8 @@ _arguments -s -S \ {-C,--cookies}'[File to load additional cookies from]':'<file>':_files \ --cookies-export'[Export session cookies to FILE]':'<file>':_files \ --cookies-from-browser'[Name of the browser to load cookies from, with optional domain prefixed with '\''/'\'', keyring name prefixed with '\''+'\'', profile prefixed with '\'':'\'', and container prefixed with '\''::'\'' ('\''none'\'' for no container (default), '\''all'\'' for all containers)]':'<browser[/domain][+keyring][:profile][::container]>' \ -{-A,--abort}'[Stop current extractor run after N consecutive file downloads were skipped]':'<n>' \ -{-T,--terminate}'[Stop current and parent extractor runs after N consecutive file downloads were skipped]':'<n>' \ +{-A,--abort}'[Stop current extractor(s) after N consecutive file downloads were skipped. Specify a TARGET to set how many levels to ascend or to which subcategory to jump to. Examples: '\''-A 3'\'', '\''-A 3:2'\'', '\''-A 3:manga'\'']':'<n[:target]>' \ +{-T,--terminate}'[Stop current & parent extractors and proceed with the next input URL after N consecutive file downloads were skipped]':'<n>' \ --filesize-min'[Do not download files smaller than SIZE (e.g. 500k or 2.5M)]':'<size>' \ --filesize-max'[Do not download files larger than SIZE (e.g. 500k or 2.5M)]':'<size>' \ --download-archive'[Record successfully downloaded files in FILE and skip downloading any file already in it]':'<file>':_files \ diff --git a/data/completion/gallery-dl.fish b/data/completion/gallery-dl.fish index 8eb427a..b7e4fe4 100644 --- a/data/completion/gallery-dl.fish +++ b/data/completion/gallery-dl.fish @@ -67,8 +67,8 @@ complete -c gallery-dl -l 'netrc' -d 'Enable .netrc authentication data' complete -c gallery-dl -r -F -s 'C' -l 'cookies' -d 'File to load additional cookies from' complete -c gallery-dl -r -F -l 'cookies-export' -d 'Export session cookies to FILE' complete -c gallery-dl -x -l 'cookies-from-browser' -d 'Name of the browser to load cookies from, with optional domain prefixed with "/", keyring name prefixed with "+", profile prefixed with ":", and container prefixed with "::" ("none" for no container (default), "all" for all containers)' -complete -c gallery-dl -x -s 'A' -l 'abort' -d 'Stop current extractor run after N consecutive file downloads were skipped' -complete -c gallery-dl -x -s 'T' -l 'terminate' -d 'Stop current and parent extractor runs after N consecutive file downloads were skipped' +complete -c gallery-dl -x -s 'A' -l 'abort' -d 'Stop current extractor(s) after N consecutive file downloads were skipped. Specify a TARGET to set how many levels to ascend or to which subcategory to jump to. Examples: "-A 3", "-A 3:2", "-A 3:manga"' +complete -c gallery-dl -x -s 'T' -l 'terminate' -d 'Stop current & parent extractors and proceed with the next input URL after N consecutive file downloads were skipped' complete -c gallery-dl -x -l 'filesize-min' -d 'Do not download files smaller than SIZE (e.g. 500k or 2.5M)' complete -c gallery-dl -x -l 'filesize-max' -d 'Do not download files larger than SIZE (e.g. 500k or 2.5M)' complete -c gallery-dl -r -F -l 'download-archive' -d 'Record successfully downloaded files in FILE and skip downloading any file already in it' diff --git a/data/man/gallery-dl.1 b/data/man/gallery-dl.1 index 4979279..39b88a4 100644 --- a/data/man/gallery-dl.1 +++ b/data/man/gallery-dl.1 @@ -1,4 +1,4 @@ -.TH "GALLERY-DL" "1" "2025-07-27" "1.30.2" "gallery-dl Manual" +.TH "GALLERY-DL" "1" "2025-08-15" "1.30.3" "gallery-dl Manual" .\" disable hyphenation .nh @@ -218,11 +218,11 @@ Export session cookies to FILE .B "\-\-cookies\-from\-browser" \f[I]BROWSER[/DOMAIN][+KEYRING][:PROFILE][::CONTAINER]\f[] Name of the browser to load cookies from, with optional domain prefixed with '/', keyring name prefixed with '+', profile prefixed with ':', and container prefixed with '::' ('none' for no container (default), 'all' for all containers) .TP -.B "\-A, \-\-abort" \f[I]N\f[] -Stop current extractor run after N consecutive file downloads were skipped +.B "\-A, \-\-abort" \f[I]N[:TARGET]\f[] +Stop current extractor(s) after N consecutive file downloads were skipped. Specify a TARGET to set how many levels to ascend or to which subcategory to jump to. Examples: '-A 3', '-A 3:2', '-A 3:manga' .TP .B "\-T, \-\-terminate" \f[I]N\f[] -Stop current and parent extractor runs after N consecutive file downloads were skipped +Stop current & parent extractors and proceed with the next input URL after N consecutive file downloads were skipped .TP .B "\-\-filesize\-min" \f[I]SIZE\f[] Do not download files smaller than SIZE (e.g. 500k or 2.5M) diff --git a/data/man/gallery-dl.conf.5 b/data/man/gallery-dl.conf.5 index 12eea08..d33a147 100644 --- a/data/man/gallery-dl.conf.5 +++ b/data/man/gallery-dl.conf.5 @@ -1,4 +1,4 @@ -.TH "GALLERY-DL.CONF" "5" "2025-07-27" "1.30.2" "gallery-dl Manual" +.TH "GALLERY-DL.CONF" "5" "2025-08-15" "1.30.3" "gallery-dl Manual" .\" disable hyphenation .nh .\" disable justification (adjust text to left margin only) @@ -330,6 +330,22 @@ depending on the local operating system * \f[I]"windows"\f[]: \f[I]". "\f[] +.SS extractor.*.path-convert +.IP "Type:" 6 +\f[I]Conversion(s)\f[] + +.IP "Example:" 4 +.br +* "g" +.br +* "Wl" + +.IP "Description:" 4 +\f[I]Conversion(s)\f[] to apply to each path segment after +\f[I]path-restrict\f[] +replacements. + + .SS extractor.*.path-extended .IP "Type:" 6 \f[I]bool\f[] @@ -371,36 +387,59 @@ A JSON \f[I]object\f[] mapping filename extensions to their replacements. .IP "Default:" 9 \f[I]true\f[] +.IP "Example:" 4 +.br +* "abort:5" +.br +* "abort:5:2" +.br +* "abort:5:manga" +.br +* "terminate:3" + .IP "Description:" 4 Controls the behavior when downloading files that have been downloaded before, i.e. a file with the same filename already exists or its ID is in a \f[I]download archive\f[]. -.br -* \f[I]true\f[]: Skip downloads -.br -* \f[I]false\f[]: Overwrite already existing files +\f[I]true\f[] +Skip downloads +\f[I]false\f[] +Overwrite already existing files +\f[I]"abort"\f[] +Stop the current extractor +\f[I]"abort:N"\f[] +Skip downloads and +stop the current extractor after \f[I]N\f[] consecutive skips +\f[I]"abort:N:L"\f[] +Skip downloads and .br -* \f[I]"abort"\f[]: Stop the current extractor run +stop the current extractor after \f[I]N\f[] consecutive skips +Ascend \f[I]L\f[] levels in the extractor hierarchy .br -* \f[I]"abort:N"\f[]: Skip downloads and stop the current extractor run -after \f[I]N\f[] consecutive skips - +\f[I]"abort:N:SC"\f[] +Skip downloads and .br -* \f[I]"terminate"\f[]: Stop the current extractor run, including parent extractors +stop the current extractor after \f[I]N\f[] consecutive skips +Ascend to an extractor with subcategory \f[I]SC\f[] in the extractor hierarchy .br -* \f[I]"terminate:N"\f[]: Skip downloads and stop the current extractor run, -including parent extractors, after \f[I]N\f[] consecutive skips -.br -* \f[I]"exit"\f[]: Exit the program altogether -.br -* \f[I]"exit:N"\f[]: Skip downloads and exit the program +\f[I]"terminate"\f[] +Stop the current extractor, including parent extractors +\f[I]"terminate:N"\f[] +Skip downloads and +stop the current extractor, including parent extractors, after \f[I]N\f[] consecutive skips -.br -* \f[I]"enumerate"\f[]: Add an enumeration index to the beginning of the +\f[I]"exit"\f[] +Exit the program altogether +\f[I]"exit:N"\f[] +Skip downloads and +exit the program after \f[I]N\f[] consecutive skips + +\f[I]"enumerate"\f[] +Add an enumeration index to the beginning of the filename extension (\f[I]file.1.ext\f[], \f[I]file.2.ext\f[], etc.) @@ -457,6 +496,7 @@ response before \f[I]retrying\f[] the request. * \f[I]"0.5-1.5"\f[] \f[I]ao3\f[], \f[I]arcalive\f[], +\f[I]booth\f[], \f[I]civitai\f[], \f[I][Danbooru]\f[], \f[I][E621]\f[], @@ -498,7 +538,6 @@ response before \f[I]retrying\f[] the request. * \f[I]"3.0-6.0"\f[] \f[I]bilibili\f[], \f[I]exhentai\f[], -\f[I]idolcomplex\f[], \f[I][reactor]\f[], \f[I]readcomiconline\f[] .br @@ -831,12 +870,23 @@ or a \f[I]list\f[] with IP and explicit port number as elements. .br * \f[I]"Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:LATEST) Gecko/20100101 Firefox/LATEST"\f[]: otherwise +.IP "Example:" 4 +.br +* "curl/8.14.1" +.br +* "browser" +.br +* "@chrome" + .IP "Description:" 4 User-Agent header value used for HTTP requests. Setting this value to \f[I]"browser"\f[] will try to automatically detect and use the \f[I]User-Agent\f[] header of the system's default browser. +Setting this value to \f[I]"@BROWSER"\f[], e.g. \f[I]"@chrome"\f[], will try to automatically detect +and use the \f[I]User-Agent\f[] header of this installed browser. + .SS extractor.*.browser .IP "Type:" 6 @@ -969,7 +1019,7 @@ to use these browser's default ciphers. .IP "Default:" 9 .br -* \f[I]false\f[]: \f[I]artstation\f[], \f[I]behance\f[] +* \f[I]false\f[]: \f[I]artstation\f[], \f[I]behance\f[], \f[I]vsco\f[] .br * \f[I]true\f[]: otherwise @@ -2062,6 +2112,24 @@ Possibly available formats are * \f[I]tiny\f[] (144p) +.SS extractor.booth.strategy +.IP "Type:" 6 +\f[I]string\f[] + +.IP "Default:" 9 +\f[I]"webpage"\f[] + +.IP "Description:" 4 +Selects how to handle and extract file URLs. + +\f[I]"webpage"\f[] +Retrieve the full HTML page +and extract file URLs from it +\f[I]"fallback"\f[] +Use \f[I]fallback\f[] URLs +to guess each file's correct filename extension + + .SS extractor.bunkr.endpoint .IP "Type:" 6 \f[I]string\f[] @@ -2175,6 +2243,8 @@ Possible values are * \f[I]"user-images"\f[] .br * \f[I]"user-videos"\f[] +.br +* \f[I]"user-collections"\f[] It is possible to use \f[I]"all"\f[] instead of listing all values separately. @@ -2293,6 +2363,26 @@ Use \f[I]+\f[] as first character to add the given options to the \f[I]quality\f[] ones. +.SS extractor.comick.lang +.IP "Type:" 6 +.br +* \f[I]string\f[] +.br +* \f[I]list\f[] of \f[I]strings\f[] + +.IP "Example:" 4 +.br +* "en" +.br +* "fr,it,pl" +.br +* ["fr", "it", "pl"] + +.IP "Description:" 4 +\f[I]ISO 639-1\f[] language codes +to filter chapters by. + + .SS extractor.cyberdrop.domain .IP "Type:" 6 \f[I]string\f[] @@ -3092,9 +3182,13 @@ when processing a user profile. Supported values are .br +* \f[I]info\f[] +.br * \f[I]avatar\f[] .br * \f[I]photos\f[] +.br +* \f[I]albums\f[] It is possible to use \f[I]"all"\f[] instead of listing all values separately. @@ -3485,6 +3579,22 @@ to attempt to fetch the current value used by gofile. Recursively download files from subfolders. +.SS extractor.hentaifoundry.descriptions +.IP "Type:" 6 +\f[I]string\f[] + +.IP "Default:" 9 +\f[I]"text"\f[] + +.IP "Description:" 4 +Controls the format of \f[I]description\f[] metadata fields. + +.br +* \f[I]"text"\f[]: Plain text with HTML tags removed +.br +* \f[I]"html"\f[]: Raw HTML content + + .SS extractor.hentaifoundry.include .IP "Type:" 6 .br @@ -3937,29 +4047,11 @@ Extract a user's announcements as \f[I]announcements\f[] metadata. .IP "Description:" 4 API endpoint to use for retrieving creator posts. -\f[I]"legacy"\f[] -Use the results from -.br -\f[I]/v1/{service}/user/{creator_id}/posts-legacy\f[] -Provides less metadata, but is more reliable at returning all posts. -.br -Supports filtering results by \f[I]tag\f[] query parameter. -.br -\f[I]"legacy+"\f[] -Use the results from -.br -\f[I]/v1/{service}/user/{creator_id}/posts-legacy\f[] -to retrieve post IDs -and one request to -.br -\f[I]/v1/{service}/user/{creator_id}/post/{post_id}\f[] -to get a full set of metadata for each. -\f[I]"posts"\f[] -Use the results from -.br -\f[I]/v1/{service}/user/{creator_id}\f[] -Provides more metadata, but might not return a creator's first/last posts. -.br +\f[I]"posts"\f[] \f[I] \f[I]"legacy"\f[] +Provides only limited metadata. +\f[I]"posts+"\f[] \f[] \f[I]"legacy+"\f[] +Provides full metadata, +but requires an additional API request for each post. .SS extractor.kemono.favorites @@ -5497,6 +5589,40 @@ Download animated images as \f[I].gif\f[] instead of \f[I].webp\f[] Download article images. +.SS extractor.skeb.include +.IP "Type:" 6 +.br +* \f[I]string\f[] +.br +* \f[I]list\f[] of \f[I]strings\f[] + +.IP "Default:" 9 +.br +* \f[I]["works", "sentrequests"]\f[] +if \f[I]sent-requests\f[] are enabled +.br +* \f[I]["works"]\f[] otherwise + +.IP "Example:" 4 +.br +* "works,sentrequests" +.br +* ["works", "sentrequests"] + +.IP "Description:" 4 +A (comma-separated) list of subcategories to include +when processing a user profile. + +Possible values are + +.br +* \f[I]"works"\f[] +.br +* \f[I]"sentrequests"\f[] + +It is possible to use \f[I]"all"\f[] instead of listing all values separately. + + .SS extractor.skeb.sent-requests .IP "Type:" 6 \f[I]bool\f[] @@ -9589,9 +9715,9 @@ section of your account's preferences .br * select "installed app" .br -* set \f[I]http://localhost:6414/\f[] as "redirect uri" +* set "redirect uri" to http://localhost:6414/ .br -* solve the "I'm not a robot" reCAPTCHA if needed +* solve the "I'm not a robot" challenge if needed .br * click "create app" @@ -9620,11 +9746,21 @@ new \f[I]client-id\f[] (\f[I]gallery-dl oauth:reddit\f[]) .br * login and \f[I]Apply for an API Key\f[] .br -* use a random name and description, -set "Type" to "Application", "Platform" to "All", -and "Use" to "Non-Commercial" +* fill out the form: + +.br +* choose a random name and description +.br +* set "Type" to "Application" +.br +* set "Platform" to "All" +.br +* set "Use" to "Non-Commercial" +.br +* tick the two checkboxes at the bottom .br -* fill out the two checkboxes at the bottom and click "Apply" +* click "Apply" + .br * copy \f[I]API Key\f[] and \f[I]API Secret\f[] and put them in your configuration file @@ -9642,11 +9778,19 @@ as \f[I]"api-key"\f[] and \f[I]"api-secret"\f[] .br * click "Register application" .br -* fill out the form: use a random name and description, set -https://example.org/ as "Application Website" and "Default -callback URL" +* fill out the form: + +.br +* choose a random name and description .br -* solve Google's "I'm not a robot" challenge and click "Register" +* set "Application Website" to https://example.org/ +.br +* set "Default callback URL" to https://example.org/ +.br +* solve the "I'm not a robot" challenge +.br +* click "Register" + .br * click "Show secret key" (below "OAuth Consumer Key") .br diff --git a/docs/gallery-dl.conf b/docs/gallery-dl.conf index 6541030..97b5564 100644 --- a/docs/gallery-dl.conf +++ b/docs/gallery-dl.conf @@ -63,6 +63,7 @@ "path-replace" : "_", "path-remove" : "\\u0000-\\u001f\\u007f", "path-strip" : "auto", + "path-convert" : null, "path-extended": true, "metadata-extractor": null, @@ -182,6 +183,12 @@ "metadata": false, "videos" : true }, + "booth": + { + "sleep-request": "0.5-1.5", + + "strategy": "webpage" + }, "bunkr": { "endpoint": "/api/_001_v2", @@ -205,6 +212,10 @@ "quality" : "original=true", "quality-videos": "quality=100" }, + "comick": + { + "lang": "" + }, "coomer": { "username": "", @@ -360,6 +371,7 @@ }, "hentaifoundry": { + "descriptions": "text", "include": ["pictures"] }, "hitomi": @@ -370,8 +382,9 @@ { "username": "", "password": "", - "referer" : false, - "sleep-request": "3.0-6.0" + + "refresh" : false, + "tags" : false }, "imagechest": { @@ -632,8 +645,8 @@ "username": "", "password": "", - "refresh" : false, - "tags" : false + "refresh" : false, + "tags" : false }, "sankakucomplex": { @@ -663,6 +676,7 @@ "skeb": { "article" : false, + "include" : ["works"], "sent-requests": false, "thumbnails" : false, diff --git a/gallery_dl.egg-info/PKG-INFO b/gallery_dl.egg-info/PKG-INFO index 550241f..6787cc9 100644 --- a/gallery_dl.egg-info/PKG-INFO +++ b/gallery_dl.egg-info/PKG-INFO @@ -1,6 +1,6 @@ Metadata-Version: 2.4 Name: gallery_dl -Version: 1.30.2 +Version: 1.30.3 Summary: Command-line program to download image galleries and collections from several image hosting sites Home-page: https://github.com/mikf/gallery-dl Download-URL: https://github.com/mikf/gallery-dl/releases/latest @@ -37,6 +37,7 @@ Requires-Dist: yt-dlp; extra == "video" Provides-Extra: extra Requires-Dist: requests[socks]; extra == "extra" Requires-Dist: yt-dlp[default]; extra == "extra" +Requires-Dist: jinja2; extra == "extra" Requires-Dist: pyyaml; extra == "extra" Requires-Dist: toml; python_version < "3.11" and extra == "extra" Requires-Dist: truststore; python_version >= "3.10" and extra == "extra" @@ -138,9 +139,9 @@ Standalone Executable Prebuilt executable files with a Python interpreter and required Python packages included are available for -- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.30.2/gallery-dl.exe>`__ +- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.30.3/gallery-dl.exe>`__ (Requires `Microsoft Visual C++ Redistributable Package (x86) <https://aka.ms/vs/17/release/vc_redist.x86.exe>`__) -- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.30.2/gallery-dl.bin>`__ +- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.30.3/gallery-dl.bin>`__ Nightly Builds diff --git a/gallery_dl.egg-info/SOURCES.txt b/gallery_dl.egg-info/SOURCES.txt index 8ae28f6..d4427ab 100644 --- a/gallery_dl.egg-info/SOURCES.txt +++ b/gallery_dl.egg-info/SOURCES.txt @@ -71,6 +71,7 @@ gallery_dl/extractor/blogger.py gallery_dl/extractor/bluesky.py gallery_dl/extractor/booru.py gallery_dl/extractor/boosty.py +gallery_dl/extractor/booth.py gallery_dl/extractor/bunkr.py gallery_dl/extractor/catbox.py gallery_dl/extractor/chevereto.py @@ -260,6 +261,7 @@ gallery_dl/extractor/weibo.py gallery_dl/extractor/wikiart.py gallery_dl/extractor/wikifeet.py gallery_dl/extractor/wikimedia.py +gallery_dl/extractor/xasiat.py gallery_dl/extractor/xfolio.py gallery_dl/extractor/xhamster.py gallery_dl/extractor/xvideos.py diff --git a/gallery_dl.egg-info/requires.txt b/gallery_dl.egg-info/requires.txt index 531a762..c4b9769 100644 --- a/gallery_dl.egg-info/requires.txt +++ b/gallery_dl.egg-info/requires.txt @@ -3,6 +3,7 @@ requests>=2.11.0 [extra] requests[socks] yt-dlp[default] +jinja2 pyyaml [extra:python_version < "3.11"] diff --git a/gallery_dl/__init__.py b/gallery_dl/__init__.py index 9ab61e5..fdcb6d0 100644 --- a/gallery_dl/__init__.py +++ b/gallery_dl/__init__.py @@ -48,7 +48,7 @@ def main(): if filename == "/O": filename = "{filename}.{extension}" elif filename.startswith("\\f"): - filename = "\f" + filename[2:] + filename = f"\f{filename[2:]}" config.set((), "filename", filename) if args.directory is not None: config.set((), "base-directory", args.directory) @@ -56,9 +56,9 @@ def main(): if args.postprocessors: config.set((), "postprocessors", args.postprocessors) if args.abort: - config.set((), "skip", "abort:" + str(args.abort)) + config.set((), "skip", f"abort:{args.abort}") if args.terminate: - config.set((), "skip", "terminate:" + str(args.terminate)) + config.set((), "skip", f"terminate:{args.terminate}") if args.cookies_from_browser: browser, _, profile = args.cookies_from_browser.partition(":") browser, _, keyring = browser.partition("+") diff --git a/gallery_dl/archive.py b/gallery_dl/archive.py index edecb10..3df5011 100644 --- a/gallery_dl/archive.py +++ b/gallery_dl/archive.py @@ -41,7 +41,7 @@ def connect(path, prefix, format, def sanitize(name): - return '"' + name.replace('"', "_") + '"' + return f'''"{name.replace('"', '_')}"''' class DownloadArchive(): @@ -68,25 +68,25 @@ class DownloadArchive(): table = "archive" if table is None else sanitize(table) self._stmt_select = ( - "SELECT 1 " - "FROM " + table + " " - "WHERE entry=? " - "LIMIT 1") + f"SELECT 1 " + f"FROM {table} " + f"WHERE entry=? " + f"LIMIT 1") self._stmt_insert = ( - "INSERT OR IGNORE INTO " + table + " " - "(entry) VALUES (?)") + f"INSERT OR IGNORE INTO {table} " + f"(entry) VALUES (?)") if pragma: for stmt in pragma: - cursor.execute("PRAGMA " + stmt) + cursor.execute(f"PRAGMA {stmt}") try: - cursor.execute("CREATE TABLE IF NOT EXISTS " + table + " " - "(entry TEXT PRIMARY KEY) WITHOUT ROWID") + cursor.execute(f"CREATE TABLE IF NOT EXISTS {table} " + f"(entry TEXT PRIMARY KEY) WITHOUT ROWID") except self._sqlite3.OperationalError: # fallback for missing WITHOUT ROWID support (#553) - cursor.execute("CREATE TABLE IF NOT EXISTS " + table + " " - "(entry TEXT PRIMARY KEY)") + cursor.execute(f"CREATE TABLE IF NOT EXISTS {table} " + f"(entry TEXT PRIMARY KEY)") def add(self, kwdict): """Add item described by 'kwdict' to archive""" @@ -156,18 +156,18 @@ class DownloadArchivePostgresql(): table = "archive" if table is None else sanitize(table) self._stmt_select = ( - "SELECT true " - "FROM " + table + " " - "WHERE entry=%s " - "LIMIT 1") + f"SELECT true " + f"FROM {table} " + f"WHERE entry=%s " + f"LIMIT 1") self._stmt_insert = ( - "INSERT INTO " + table + " (entry) " - "VALUES (%s) " - "ON CONFLICT DO NOTHING") + f"INSERT INTO {table} (entry) " + f"VALUES (%s) " + f"ON CONFLICT DO NOTHING") try: - cursor.execute("CREATE TABLE IF NOT EXISTS " + table + " " - "(entry TEXT PRIMARY KEY)") + cursor.execute(f"CREATE TABLE IF NOT EXISTS {table} " + f"(entry TEXT PRIMARY KEY)") con.commit() except Exception as exc: log.error("%s: %s when creating '%s' table: %s", diff --git a/gallery_dl/downloader/ytdl.py b/gallery_dl/downloader/ytdl.py index 69a59ff..9659782 100644 --- a/gallery_dl/downloader/ytdl.py +++ b/gallery_dl/downloader/ytdl.py @@ -109,7 +109,11 @@ class YoutubeDLDownloader(DownloaderBase): def _download_video(self, ytdl_instance, pathfmt, info_dict): if "url" in info_dict: - text.nameext_from_url(info_dict["url"], pathfmt.kwdict) + if "filename" in pathfmt.kwdict: + pathfmt.kwdict["extension"] = \ + text.ext_from_url(info_dict["url"]) + else: + text.nameext_from_url(info_dict["url"], pathfmt.kwdict) formats = info_dict.get("requested_formats") if formats and not compatible_formats(formats): diff --git a/gallery_dl/exception.py b/gallery_dl/exception.py index 5a52581..6adda0d 100644 --- a/gallery_dl/exception.py +++ b/gallery_dl/exception.py @@ -100,12 +100,17 @@ class AuthorizationError(ExtractionError): class AuthRequired(AuthorizationError): default = "Account credentials required" - def __init__(self, required=None, message=None): - if required and not message: - if isinstance(required, str): - message = f"{required} required" + def __init__(self, auth=None, resource="resource", message=None): + if auth: + if not isinstance(auth, str): + auth = " or ".join(auth) + if " " not in resource: + resource = "this " + resource + if message is None: + message = (f"{auth} needed to access {resource}") else: - message = f"{' or '.join(required)} required" + message = (f"{auth} needed to access {resource} " + f"('{message}')") AuthorizationError.__init__(self, message) @@ -160,6 +165,22 @@ class ControlException(GalleryDLException): class StopExtraction(ControlException): """Stop data extraction""" + def __init__(self, target=None): + ControlException.__init__(self) + + if target is None: + self.target = None + self.depth = 1 + elif isinstance(target, int): + self.target = None + self.depth = target + elif target.isdecimal(): + self.target = None + self.depth = int(target) + else: + self.target = target + self.depth = 128 + class AbortExtraction(ExtractionError, ControlException): """Abort data extraction due to an error""" diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py index 688f0a0..70e79fe 100644 --- a/gallery_dl/extractor/__init__.py +++ b/gallery_dl/extractor/__init__.py @@ -35,6 +35,7 @@ modules = [ "blogger", "bluesky", "boosty", + "booth", "bunkr", "catbox", "chevereto", @@ -210,6 +211,7 @@ modules = [ "wikiart", "wikifeet", "wikimedia", + "xasiat", "xfolio", "xhamster", "xvideos", diff --git a/gallery_dl/extractor/blogger.py b/gallery_dl/extractor/blogger.py index 796d9d1..af43446 100644 --- a/gallery_dl/extractor/blogger.py +++ b/gallery_dl/extractor/blogger.py @@ -31,6 +31,11 @@ class BloggerExtractor(BaseExtractor): self.blog = self.root.rpartition("/")[2] self.videos = self.config("videos", True) + if self.videos: + self.findall_video = util.re( + r"""src=["'](https?://www\.blogger\.com""" + r"""/video\.g\?token=[^"']+)""").findall + def items(self): blog = self.api.blog_by_url("http://" + self.blog) blog["pages"] = blog["pages"]["totalItems"] @@ -43,8 +48,6 @@ class BloggerExtractor(BaseExtractor): r'blogger\.googleusercontent\.com/img|' r'lh\d+(?:-\w+)?\.googleusercontent\.com|' r'\d+\.bp\.blogspot\.com)/[^"]+)').findall - findall_video = util.re( - r'src="(https?://www\.blogger\.com/video\.g\?token=[^"]+)').findall metadata = self.metadata() for post in self.posts(blog): @@ -54,16 +57,10 @@ class BloggerExtractor(BaseExtractor): for idx, url in enumerate(files): files[idx] = original(url) - if self.videos and 'id="BLOG_video-' in content: - page = self.request(post["url"]).text - for url in findall_video(page): - page = self.request(url).text - video_config = util.json_loads(text.extr( - page, 'var VIDEO_CONFIG =', '\n')) - files.append(max( - video_config["streams"], - key=lambda x: x["format_id"], - )["play_url"]) + if self.videos and ( + 'id="BLOG_video-' in content or + 'class="BLOG_video_' in content): + self._extract_videos(files, post) post["author"] = post["author"]["displayName"] post["replies"] = post["replies"]["totalItems"] @@ -87,6 +84,27 @@ class BloggerExtractor(BaseExtractor): def metadata(self): """Return additional metadata""" + def _extract_videos(self, files, post): + url = f"https://{self.blog}/feeds/posts/default/{post['id']}" + params = { + "alt" : "json", + "v" : "2", + "dynamicviews" : "1", + "rewriteforssl": "true", + } + + data = self.request_json(url, params=params) + html = data["entry"]["content"]["$t"] + + for url in self.findall_video(html): + page = self.request(url).text + video_config = util.json_loads(text.extr( + page, 'var VIDEO_CONFIG =', '\n')) + files.append(max( + video_config["streams"], + key=lambda x: x["format_id"], + )["play_url"]) + BASE_PATTERN = BloggerExtractor.update({ "blogspot": { diff --git a/gallery_dl/extractor/booth.py b/gallery_dl/extractor/booth.py new file mode 100644 index 0000000..0fcb1cb --- /dev/null +++ b/gallery_dl/extractor/booth.py @@ -0,0 +1,127 @@ +# -*- coding: utf-8 -*- + +# Copyright 2025 Mike Fรคhrmann +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. + +"""Extractors for https://booth.pm/""" + +from .common import Extractor, Message +from .. import text, util + + +class BoothExtractor(Extractor): + """Base class for booth extractors""" + category = "booth" + root = "https://booth.pm" + directory_fmt = ("{category}", "{shop[name]}", "{id} {name}") + filename_fmt = "{num:>02} {filename}.{extension}" + archive_fmt = "{id}_{filename}" + request_interval = (0.5, 1.5) + + def _init(self): + self.cookies.set("adult", "t", domain=".booth.pm") + + def items(self): + for item in self.shop_items(): + item["_extractor"] = BoothItemExtractor + yield Message.Queue, item["shop_item_url"], item + + def _pagination(self, url): + while True: + page = self.request(url).text + + for item in text.extract_iter(page, ' data-item="', '"'): + yield util.json_loads(text.unescape(item)) + + next = text.extr(page, 'rel="next" class="nav-item" href="', '"') + if not next: + break + url = self.root + next + + +class BoothItemExtractor(BoothExtractor): + subcategory = "item" + pattern = r"(?:https?://)?(?:[\w-]+\.)?booth\.pm/(?:\w\w/)?items/(\d+)" + example = "https://booth.pm/items/12345" + + def items(self): + url = f"{self.root}/ja/items/{self.groups[0]}" + headers = { + "Accept": "application/json", + "Content-Type": "application/json", + "X-CSRF-Token": None, + "Sec-Fetch-Dest": "empty", + "Sec-Fetch-Mode": "cors", + "Sec-Fetch-Site": "same-origin", + "Priority": "u=4", + } + + if self.config("strategy") == "fallback": + page = None + item = self.request_json(url + ".json", headers=headers) + else: + page = self.request(url).text + headers["X-CSRF-Token"] = text.extr( + page, 'name="csrf-token" content="', '"') + item = self.request_json( + url + ".json", headers=headers, interval=False) + + item["booth_category"] = item.pop("category", None) + item["date"] = text.parse_datetime( + item["published_at"], "%Y-%m-%dT%H:%M:%S.%f%z") + item["tags"] = [t["name"] for t in item["tags"]] + + shop = item["shop"] + shop["id"] = text.parse_int(shop["thumbnail_url"].rsplit("/", 3)[1]) + + if files := self._extract_files(item, page): + item["count"] = len(files) + shop["uuid"] = files[0]["url"].split("/", 4)[3] + else: + item["count"] = 0 + shop["uuid"] = util.NONE + + yield Message.Directory, item + for num, file in enumerate(files, 1): + url = file["url"] + file["num"] = num + text.nameext_from_url(url, file) + yield Message.Url, url, {**item, **file} + + def _extract_files(self, item, page): + if page is None: + files = [] + for image in item.pop("images"): + url = image["original"].replace("_base_resized", "") + files.append({ + "url" : url, + "_fallback": _fallback(url), + }) + return files + + del item["images"] + return [{"url": url} + for url in text.extract_iter(page, 'data-origin="', '"')] + + +class BoothShopExtractor(BoothExtractor): + subcategory = "shop" + pattern = r"(?:https?://)?([\w-]+\.)booth\.pm/(?:\w\w/)?(?:items)?" + example = "https://SHOP.booth.pm/" + + def __init__(self, match): + self.root = text.root_from_url(match[0]) + BoothExtractor.__init__(self, match) + + def shop_items(self): + return self._pagination(f"{self.root}/items") + + +def _fallback(url): + base = url[:-3] + yield base + "jpeg" + yield base + "png" + yield base + "webp" diff --git a/gallery_dl/extractor/cien.py b/gallery_dl/extractor/cien.py index 7dfe6b6..45e5dab 100644 --- a/gallery_dl/extractor/cien.py +++ b/gallery_dl/extractor/cien.py @@ -52,17 +52,19 @@ class CienArticleExtractor(CienExtractor): example = "https://ci-en.net/creator/123/article/12345" def items(self): - url = f"{self.root}/creator/{self.groups[0]}/article/{self.groups[1]}" + author_id, post_id = self.groups + url = f"{self.root}/creator/{author_id}/article/{post_id}" page = self.request(url, notfound="article").text files = self._extract_files(page) post = self._extract_jsonld(page)[0] post["post_url"] = url - post["post_id"] = text.parse_int(self.groups[1]) + post["post_id"] = text.parse_int(post_id) post["count"] = len(files) post["date"] = text.parse_datetime(post["datePublished"]) try: + post["author"]["id"] = text.parse_int(author_id) del post["publisher"] del post["sameAs"] except Exception: diff --git a/gallery_dl/extractor/civitai.py b/gallery_dl/extractor/civitai.py index dc5b777..fe3b7ed 100644 --- a/gallery_dl/extractor/civitai.py +++ b/gallery_dl/extractor/civitai.py @@ -200,7 +200,7 @@ class CivitaiExtractor(Extractor): if "Authorization" not in self.api.headers and \ not self.cookies.get( "__Secure-civitai-token", domain=".civitai.com"): - raise exception.AuthRequired(("'api-key'", "cookies")) + raise exception.AuthRequired(("api-key", "authenticated cookies")) def _parse_query(self, value): return text.parse_query_list( @@ -377,6 +377,28 @@ class CivitaiImageExtractor(CivitaiExtractor): return self.api.image(self.groups[0]) +class CivitaiCollectionExtractor(CivitaiExtractor): + subcategory = "collection" + directory_fmt = ("{category}", "{user_collection[username]}", + "collections", "{collection[id]}{collection[name]:? //}") + pattern = BASE_PATTERN + r"/collections/(\d+)" + example = "https://civitai.com/collections/12345" + + def images(self): + cid = int(self.groups[0]) + self.kwdict["collection"] = col = self.api.collection(cid) + self.kwdict["user_collection"] = col.pop("user", None) + + params = { + "collectionId" : cid, + "period" : "AllTime", + "sort" : "Newest", + "browsingLevel" : self.api.nsfw, + "include" : ("cosmetics",), + } + return self.api.images(params, defaults=False) + + class CivitaiPostExtractor(CivitaiExtractor): subcategory = "post" directory_fmt = ("{category}", "{username|user[username]}", "posts", @@ -461,6 +483,7 @@ class CivitaiUserExtractor(Dispatch, CivitaiExtractor): (CivitaiUserPostsExtractor , base + "posts"), (CivitaiUserImagesExtractor, base + "images"), (CivitaiUserVideosExtractor, base + "videos"), + (CivitaiUserCollectionsExtractor, base + "collections"), ), ("user-images", "user-videos")) @@ -529,6 +552,22 @@ class CivitaiUserVideosExtractor(CivitaiExtractor): images = CivitaiUserImagesExtractor.images +class CivitaiUserCollectionsExtractor(CivitaiExtractor): + subcategory = "user-collections" + pattern = USER_PATTERN + r"/collections/?(?:\?([^#]+))?" + example = "https://civitai.com/user/USER/collections" + + def items(self): + user, query = self.groups + params = self._parse_query(query) + params["userId"] = self.api.user(text.unquote(user))[0]["id"] + + base = f"{self.root}/collections/" + for collection in self.api.collections(params): + collection["_extractor"] = CivitaiCollectionExtractor + yield Message.Queue, f"{base}{collection['id']}", collection + + class CivitaiGeneratedExtractor(CivitaiExtractor): """Extractor for your generated files feed""" subcategory = "generated" @@ -635,7 +674,7 @@ class CivitaiTrpcAPI(): self.root = extractor.root + "/api/trpc/" self.headers = { "content-type" : "application/json", - "x-client-version": "5.0.920", + "x-client-version": "5.0.954", "x-client-date" : "", "x-client" : "web", "x-fingerprint" : "undefined", @@ -758,6 +797,23 @@ class CivitaiTrpcAPI(): params = self._type_params(params) return self._pagination(endpoint, params, meta) + def collection(self, collection_id): + endpoint = "collection.getById" + params = {"id": int(collection_id)} + return self._call(endpoint, params)["collection"] + + def collections(self, params, defaults=True): + endpoint = "collection.getInfinite" + + if defaults: + params = self._merge_params(params, { + "browsingLevel": self.nsfw, + "sort" : "Newest", + }) + + params = self._type_params(params) + return self._pagination(endpoint, params) + def user(self, username): endpoint = "user.getCreator" params = {"username": username} @@ -783,9 +839,8 @@ class CivitaiTrpcAPI(): params = {"input": util.json_dumps(input)} headers["x-client-date"] = str(int(time.time() * 1000)) - response = self.extractor.request(url, params=params, headers=headers) - - return response.json()["result"]["data"]["json"] + return self.extractor.request_json( + url, params=params, headers=headers)["result"]["data"]["json"] def _pagination(self, endpoint, params, meta=None): if "cursor" not in params: diff --git a/gallery_dl/extractor/comick.py b/gallery_dl/extractor/comick.py index 7ef4607..6c54156 100644 --- a/gallery_dl/extractor/comick.py +++ b/gallery_dl/extractor/comick.py @@ -20,83 +20,27 @@ class ComickBase(): category = "comick" root = "https://comick.io" - @memcache(keyarg=1) - def _manga_info(self, slug): - url = f"{self.root}/comic/{slug}" - page = self.request(url).text - data = self._extract_nextdata(page) - props = data["props"]["pageProps"] - comic = props["comic"] - - genre = [] - theme = [] - format = "" - for item in comic["md_comic_md_genres"]: - item = item["md_genres"] - group = item["group"] - if group == "Genre": - genre.append(item["name"]) - elif group == "Theme": - theme.append(item["name"]) - else: - format = item["name"] - - if mu := comic["mu_comics"]: - tags = [c["mu_categories"]["title"] - for c in mu["mu_comic_categories"]] - publisher = [p["mu_publishers"]["title"] - for p in mu["mu_comic_publishers"]] - else: - tags = publisher = () - - return { - "manga": comic["title"], - "manga_id": comic["id"], - "manga_hid": comic["hid"], - "manga_slug": slug, - "manga_titles": [t["title"] for t in comic["md_titles"]], - "artist": [a["name"] for a in props["artists"]], - "author": [a["name"] for a in props["authors"]], - "genre" : genre, - "theme" : theme, - "format": format, - "tags" : tags, - "publisher": publisher, - "published": text.parse_int(comic["year"]), - "description": comic["desc"], - "demographic": props["demographic"], - "origin": comic["iso639_1"], - "mature": props["matureContent"], - "rating": comic["content_rating"], - "rank" : comic["follow_rank"], - "score" : text.parse_float(comic["bayesian_rating"]), - "status": "Complete" if comic["status"] == 2 else "Ongoing", - "links" : comic["links"], - "_build_id": data["buildId"], - } - - def _chapter_info(self, manga, chstr): - slug = manga['manga_slug'] - url = (f"{self.root}/_next/data/{manga['_build_id']}" - f"/comic/{slug}/{chstr}.json") - params = {"slug": slug, "chapter": chstr} - return self.request_json(url, params=params)["pageProps"] - class ComickChapterExtractor(ComickBase, ChapterExtractor): """Extractor for comick.io manga chapters""" archive_fmt = "{chapter_hid}_{page}" - pattern = BASE_PATTERN + r"/comic/([\w-]+)/(\w+-chapter-[^/?#]+)" + pattern = (BASE_PATTERN + r"/comic/([\w-]+)" + r"/(\w+(?:-(?:chapter|volume)-[^/?#]+)?)") example = "https://comick.io/comic/MANGA/ID-chapter-123-en" def metadata(self, page): slug, chstr = self.groups - manga = self._manga_info(slug) - props = self._chapter_info(manga, chstr) + manga = _manga_info(self, slug) + props = _chapter_info(self, manga, chstr) ch = props["chapter"] self._images = ch["md_images"] - chapter, sep, minor = ch["chap"].partition(".") + + if chapter := ch["chap"]: + chapter, sep, minor = chapter.partition(".") + else: + chapter = 0 + sep = minor = "" return { **manga, @@ -133,19 +77,32 @@ class ComickMangaExtractor(ComickBase, MangaExtractor): example = "https://comick.io/comic/MANGA" def items(self): - slug = self.groups[0] - manga = self._manga_info(slug) + manga = _manga_info(self, self.groups[0]) + slug = manga["manga_slug"] + _manga_info.update(slug, manga) for ch in self.chapters(manga): - url = (f"{self.root}/comic/{slug}" - f"/{ch['hid']}-chapter-{ch['chap']}-{ch['lang']}") - ch.update(manga) - chapter, sep, minor = ch["chap"].partition(".") - ch["chapter"] = text.parse_int(chapter) - ch["chapter_minor"] = sep + minor ch["_extractor"] = ComickChapterExtractor + if chapter := ch["chap"]: + url = (f"{self.root}/comic/{slug}" + f"/{ch['hid']}-chapter-{chapter}-{ch['lang']}") + chapter, sep, minor = chapter.partition(".") + ch["volume"] = text.parse_int(ch["vol"]) + ch["chapter"] = text.parse_int(chapter) + ch["chapter_minor"] = sep + minor + elif volume := ch["vol"]: + url = (f"{self.root}/comic/{slug}" + f"/{ch['hid']}-volume-{volume}-{ch['lang']}") + ch["volume"] = text.parse_int(volume) + ch["chapter"] = 0 + ch["chapter_minor"] = "" + else: + url = f"{self.root}/comic/{slug}/{ch['hid']}" + ch["volume"] = ch["chapter"] = 0 + ch["chapter_minor"] = "" + yield Message.Queue, url, ch def chapters(self, manga): @@ -160,8 +117,15 @@ class ComickMangaExtractor(ComickBase, MangaExtractor): "Sec-Fetch-Site": "same-site", } - query = text.parse_query(query) - params = {"lang": query.get("lang") or None} + query = text.parse_query_list(query, ("lang",)) + + if (lang := query.get("lang")) or (lang := self.config("lang")): + if not isinstance(lang, str): + lang = ",".join(lang) + else: + lang = None + + params = {"lang": lang} params["page"] = page = text.parse_int(query.get("page"), 1) if date_order := query.get("date-order"): @@ -172,7 +136,7 @@ class ComickMangaExtractor(ComickBase, MangaExtractor): params["chap-order"] = \ "0" if self.config("chapter-reverse", False) else "1" - group = query.get("group", None) + group = query.get("group") if group == "0": group = None @@ -190,9 +154,73 @@ class ComickMangaExtractor(ComickBase, MangaExtractor): yield from data["chapters"] else: for ch in data["chapters"]: - if group in ch["group_name"]: + if (groups := ch["group_name"]) and group in groups: yield ch if data["total"] <= limit * page: return params["page"] = page = page + 1 + + +@memcache(keyarg=1) +def _manga_info(self, slug): + url = f"{self.root}/comic/{slug}" + page = self.request(url).text + data = self._extract_nextdata(page) + props = data["props"]["pageProps"] + comic = props["comic"] + + genre = [] + theme = [] + format = "" + for item in comic["md_comic_md_genres"]: + item = item["md_genres"] + group = item["group"] + if group == "Genre": + genre.append(item["name"]) + elif group == "Theme": + theme.append(item["name"]) + else: + format = item["name"] + + if mu := comic["mu_comics"]: + tags = [c["mu_categories"]["title"] + for c in mu["mu_comic_categories"]] + publisher = [p["mu_publishers"]["title"] + for p in mu["mu_comic_publishers"]] + else: + tags = publisher = () + + return { + "manga": comic["title"], + "manga_id": comic["id"], + "manga_hid": comic["hid"], + "manga_slug": comic["slug"], + "manga_titles": [t["title"] for t in comic["md_titles"]], + "artist": [a["name"] for a in props["artists"]], + "author": [a["name"] for a in props["authors"]], + "genre" : genre, + "theme" : theme, + "format": format, + "tags" : tags, + "publisher": publisher, + "published": text.parse_int(comic["year"]), + "description": comic["desc"], + "demographic": props["demographic"], + "origin": comic["iso639_1"], + "mature": props["matureContent"], + "rating": comic["content_rating"], + "rank" : comic["follow_rank"], + "score" : text.parse_float(comic["bayesian_rating"]), + "status": "Complete" if comic["status"] == 2 else "Ongoing", + "links" : comic["links"], + "_build_id": data["buildId"], + } + + +def _chapter_info(self, manga, chstr): + slug = manga['manga_slug'] + url = (f"{self.root}/_next/data/{manga['_build_id']}" + f"/comic/{slug}/{chstr}.json") + params = {"slug": slug, "chapter": chstr} + return self.request_json(url, params=params)["pageProps"] diff --git a/gallery_dl/extractor/common.py b/gallery_dl/extractor/common.py index d46152b..1ee54de 100644 --- a/gallery_dl/extractor/common.py +++ b/gallery_dl/extractor/common.py @@ -142,9 +142,9 @@ class Extractor(): return values - def request(self, url, method="GET", session=None, - retries=None, retry_codes=None, encoding=None, - fatal=True, notfound=None, **kwargs): + def request(self, url, method="GET", session=None, fatal=True, + retries=None, retry_codes=None, interval=True, + encoding=None, notfound=None, **kwargs): if session is None: session = self.session if retries is None: @@ -170,7 +170,7 @@ class Extractor(): response = challenge = None tries = 1 - if self._interval: + if self._interval and interval: seconds = (self._interval() - (time.time() - Extractor.request_timestamp)) if seconds > 0.0: @@ -464,7 +464,9 @@ class Extractor(): if custom_ua is None or custom_ua == "auto": pass elif custom_ua == "browser": - headers["User-Agent"] = _browser_useragent() + headers["User-Agent"] = _browser_useragent(None) + elif custom_ua[0] == "@": + headers["User-Agent"] = _browser_useragent(custom_ua[1:]) elif self.useragent is Extractor.useragent and not self.browser or \ custom_ua is not config.get(("extractor",), "user-agent"): headers["User-Agent"] = custom_ua @@ -539,6 +541,10 @@ class Extractor(): try: with open(path) as fp: cookies = util.cookiestxt_load(fp) + except ValueError as exc: + self.log.warning("cookies: Invalid Netscape cookies.txt file " + "'%s' (%s: %s)", + cookies_source, exc.__class__.__name__, exc) except Exception as exc: self.log.warning("cookies: Failed to load '%s' (%s: %s)", cookies_source, exc.__class__.__name__, exc) @@ -1042,19 +1048,31 @@ def _build_requests_adapter( return adapter -@cache.cache(maxage=86400) -def _browser_useragent(): +@cache.cache(maxage=86400, keyarg=0) +def _browser_useragent(browser): """Get User-Agent header from default browser""" import webbrowser - import socket + try: + open = webbrowser.get(browser).open + except webbrowser.Error: + if not browser: + raise + import shutil + if not (browser := shutil.which(browser)): + raise + + def open(url): + util.Popen((browser, url), + start_new_session=False if util.WINDOWS else True) + import socket server = socket.socket(socket.AF_INET, socket.SOCK_STREAM) server.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) server.bind(("127.0.0.1", 0)) server.listen(1) host, port = server.getsockname() - webbrowser.open(f"http://{host}:{port}/user-agent") + open(f"http://{host}:{port}/user-agent") client = server.accept()[0] server.close() diff --git a/gallery_dl/extractor/danbooru.py b/gallery_dl/extractor/danbooru.py index ff071c5..019410c 100644 --- a/gallery_dl/extractor/danbooru.py +++ b/gallery_dl/extractor/danbooru.py @@ -236,7 +236,7 @@ BASE_PATTERN = DanbooruExtractor.update({ }, "aibooru": { "root": None, - "pattern": r"(?:safe\.)?aibooru\.online", + "pattern": r"(?:safe\.|general\.)?aibooru\.(?:online|download)", }, "booruvar": { "root": "https://booru.borvar.art", diff --git a/gallery_dl/extractor/dankefuerslesen.py b/gallery_dl/extractor/dankefuerslesen.py index a2b0f42..1c4b7d8 100644 --- a/gallery_dl/extractor/dankefuerslesen.py +++ b/gallery_dl/extractor/dankefuerslesen.py @@ -59,6 +59,9 @@ class DankefuerslesenChapterExtractor(DankefuerslesenBase, ChapterExtractor): return { "manga" : manga["title"], "manga_slug": manga["slug"], + "author" : manga["author"], + "artist" : manga["artist"], + "description": manga["description"], "title" : data["title"], "volume" : text.parse_int(data["volume"]), "chapter" : text.parse_int(chapter), @@ -114,7 +117,6 @@ class DankefuerslesenMangaExtractor(DankefuerslesenBase, MangaExtractor): data["chapter"] = text.parse_int(ch) data["chapter_minor"] = "" - manga.update(data) - results.append((f"{base}{ch}/1/", manga)) + results.append((f"{base}{ch}/1/", {**manga, **data})) return results diff --git a/gallery_dl/extractor/deviantart.py b/gallery_dl/extractor/deviantart.py index 66e2a1e..d900f4c 100644 --- a/gallery_dl/extractor/deviantart.py +++ b/gallery_dl/extractor/deviantart.py @@ -430,13 +430,15 @@ class DeviantartExtractor(Extractor): if children := content.get("content"): html.append('<p style="') - attrs = content["attrs"] - if attrs.get("textAlign"): - html.append("text-align:") - html.append(attrs["textAlign"]) - html.append(";") - self._tiptap_process_indentation(html, attrs) - html.append('">') + if attrs := content.get("attrs"): + if align := attrs.get("textAlign"): + html.append("text-align:") + html.append(align) + html.append(";") + self._tiptap_process_indentation(html, attrs) + html.append('">') + else: + html.append('margin-inline-start:0px">') for block in children: self._tiptap_process_content(html, block) diff --git a/gallery_dl/extractor/everia.py b/gallery_dl/extractor/everia.py index 787786e..91672bb 100644 --- a/gallery_dl/extractor/everia.py +++ b/gallery_dl/extractor/everia.py @@ -52,7 +52,7 @@ class EveriaPostExtractor(EveriaExtractor): url = self.root + self.groups[0] + "/" page = self.request(url).text content = text.extr(page, 'itemprop="text">', "<h3") - urls = util.re(r'img.*?src="([^"]+)').findall(content) + urls = util.re(r'img.*?lazy-src="([^"]+)').findall(content) data = { "title": text.unescape( diff --git a/gallery_dl/extractor/facebook.py b/gallery_dl/extractor/facebook.py index 069ed99..f9ed1ab 100644 --- a/gallery_dl/extractor/facebook.py +++ b/gallery_dl/extractor/facebook.py @@ -7,7 +7,7 @@ """Extractors for https://www.facebook.com/""" from .common import Extractor, Message, Dispatch -from .. import text, exception +from .. import text, util, exception from ..cache import memcache BASE_PATTERN = r"(?:https?://)?(?:[\w-]+\.)?facebook\.com" @@ -61,6 +61,7 @@ class FacebookExtractor(Extractor): "user_id": text.extr( set_page, '"owner":{"__typename":"User","id":"', '"' ), + "user_pfbid": "", "title": self.decode_all(text.extr( set_page, '"title":{"text":"', '"' )), @@ -74,6 +75,15 @@ class FacebookExtractor(Extractor): ) } + if directory["user_id"].startswith("pfbid"): + directory["user_pfbid"] = directory["user_id"] + directory["user_id"] = ( + text.extr( + set_page, '"actors":[{"__typename":"User","id":"', '"') or + text.extr( + set_page, '"userID":"', '"') or + directory["set_id"].split(".")[1]) + return directory def parse_photo_page(self, photo_page): @@ -92,6 +102,7 @@ class FacebookExtractor(Extractor): "user_id": text.extr( photo_page, '"owner":{"__typename":"User","id":"', '"' ), + "user_pfbid": "", "caption": self.decode_all(text.extr( photo_page, '"message":{"delight_ranges"', @@ -115,6 +126,11 @@ class FacebookExtractor(Extractor): ) } + if photo["user_id"].startswith("pfbid"): + photo["user_pfbid"] = photo["user_id"] + photo["user_id"] = text.extr( + photo_page, r'\"content_owner_id_new\":\"', r'\"') + text.nameext_from_url(photo["url"], photo) photo["followups_ids"] = [] @@ -296,21 +312,33 @@ class FacebookExtractor(Extractor): i += 1 @memcache(keyarg=1) - def _extract_profile_photos_page(self, profile): - profile_photos_url = f"{self.root}/{profile}/photos_by" + def _extract_profile(self, profile, set_id=False): + if set_id: + url = f"{self.root}/{profile}/photos_by" + else: + url = f"{self.root}/{profile}" + return self._extract_profile_page(url) + def _extract_profile_page(self, url): for _ in range(self.fallback_retries + 1): - profile_photos_page = self.request(profile_photos_url).text - if set_id := self._extract_profile_set_id(profile_photos_page): - break - self.log.debug("Got empty profile photos page, retrying...") - else: - raise exception.AbortExtraction("Failed to extract profile data") + page = self.request(url).text - avatar_page_url = text.extr( - profile_photos_page, ',"profilePhoto":{"url":"', '"') + if page.find('>Page Not Found</title>', 0, 3000) > 0: + break + if ('"props":{"title":"This content isn\'t available right now"' in + page): + raise exception.AuthRequired( + "authenticated cookies", "profile", + "This content isn't available right now") + + set_id = self._extract_profile_set_id(page) + user = self._extract_profile_user(page) + if set_id or user: + user["set_id"] = set_id + return user - return set_id, avatar_page_url.replace("\\/", "/") + self.log.debug("Got empty profile photos page, retrying...") + return {} def _extract_profile_set_id(self, profile_photos_page): set_ids_raw = text.extr( @@ -325,6 +353,28 @@ class FacebookExtractor(Extractor): return set_id + def _extract_profile_user(self, page): + data = text.extr(page, '","user":{"', '},"viewer":{') + + user = None + try: + user = util.json_loads(f'{{"{data}}}') + if user["id"].startswith("pfbid"): + user["user_pfbid"] = user["id"] + user["id"] = text.extr(page, '"userID":"', '"') + user["username"] = (text.extr(page, '"userVanity":"', '"') or + text.extr(page, '"vanity":"', '"')) + user["profile_tabs"] = [ + edge["node"] + for edge in (user["profile_tabs"]["profile_user"] + ["timeline_nav_app_sections"]["edges"]) + ] + except Exception: + if user is None: + self.log.debug("Failed to extract user data: %s", data) + user = {} + return user + class FacebookSetExtractor(FacebookExtractor): """Base class for Facebook Set extractors""" @@ -418,6 +468,51 @@ class FacebookVideoExtractor(FacebookExtractor): yield Message.Url, audio["url"], audio +class FacebookInfoExtractor(FacebookExtractor): + """Extractor for Facebook Profile data""" + subcategory = "info" + directory_fmt = ("{category}", "{username}") + pattern = USER_PATTERN + r"/info" + example = "https://www.facebook.com/USERNAME/info" + + def items(self): + user = self._extract_profile(self.groups[0]) + return iter(((Message.Directory, user),)) + + +class FacebookAlbumsExtractor(FacebookExtractor): + """Extractor for Facebook Profile albums""" + subcategory = "albums" + pattern = USER_PATTERN + r"/photos_albums(?:/([^/?#]+))?" + example = "https://www.facebook.com/USERNAME/photos_albums" + + def items(self): + profile, name = self.groups + url = f"{self.root}/{profile}/photos_albums" + page = self.request(url).text + + pos = page.find( + '"TimelineAppCollectionAlbumsRenderer","collection":{"id":"') + if pos < 0: + return + if name is not None: + name = name.lower() + + items = text.extract(page, '},"pageItems":', '}}},', pos)[0] + edges = util.json_loads(items + "}}")["edges"] + + # TODO: use /graphql API endpoint + for edge in edges: + node = edge["node"] + album = node["node"] + album["title"] = title = node["title"]["text"] + if name is not None and name != title.lower(): + continue + album["_extractor"] = FacebookSetExtractor + album["thumbnail"] = (img := node["image"]) and img["uri"] + yield Message.Queue, album["url"], album + + class FacebookPhotosExtractor(FacebookExtractor): """Extractor for Facebook Profile Photos""" subcategory = "photos" @@ -425,7 +520,10 @@ class FacebookPhotosExtractor(FacebookExtractor): example = "https://www.facebook.com/USERNAME/photos" def items(self): - set_id = self._extract_profile_photos_page(self.groups[0])[0] + set_id = self._extract_profile(self.groups[0], True)["set_id"] + if not set_id: + return iter(()) + set_url = f"{self.root}/media/set/?set={set_id}" set_page = self.request(set_url).text set_data = self.parse_set_page(set_page) @@ -439,7 +537,8 @@ class FacebookAvatarExtractor(FacebookExtractor): example = "https://www.facebook.com/USERNAME/avatar" def items(self): - avatar_page_url = self._extract_profile_photos_page(self.groups[0])[1] + user = self._extract_profile(self.groups[0]) + avatar_page_url = user["profilePhoto"]["url"] avatar_page = self.photo_page_request_wrapper(avatar_page_url).text avatar = self.parse_photo_page(avatar_page) @@ -462,6 +561,8 @@ class FacebookUserExtractor(Dispatch, FacebookExtractor): def items(self): base = f"{self.root}/{self.groups[0]}/" return self._dispatch_extractors(( + (FacebookInfoExtractor , base + "info"), (FacebookAvatarExtractor, base + "avatar"), (FacebookPhotosExtractor, base + "photos"), + (FacebookAlbumsExtractor, base + "photos_albums"), ), ("photos",)) diff --git a/gallery_dl/extractor/hentaifoundry.py b/gallery_dl/extractor/hentaifoundry.py index e529940..91bcd38 100644 --- a/gallery_dl/extractor/hentaifoundry.py +++ b/gallery_dl/extractor/hentaifoundry.py @@ -32,6 +32,10 @@ class HentaifoundryExtractor(Extractor): self.start_post = 0 self.start_page = 1 + def _init(self): + if self.config("descriptions") == "html": + self._process_description = self._process_description_html + def items(self): self._init_site_filters() data = self.metadata() @@ -77,9 +81,9 @@ class HentaifoundryExtractor(Extractor): "artist" : text.unescape(extr('/profile">', '<')), "_body" : extr( '<div class="boxbody"', '<div class="boxfooter"'), - "description": text.unescape(text.remove_html(extr( - '>Description</div>', '</section>') - .replace("\r\n", "\n"), "", "")), + "description": self._process_description(extr( + "<div class='picDescript'>", '</section>') + .replace("\r\n", "\n")), "ratings" : [text.unescape(r) for r in text.extract_iter(extr( "class='ratings_box'", "</div>"), "title='", "'")], "date" : text.parse_datetime(extr("datetime='", "'")), @@ -106,6 +110,14 @@ class HentaifoundryExtractor(Extractor): return text.nameext_from_url(data["src"], data) + def _process_description(self, description): + return text.unescape(text.remove_html(description, "", "")) + + def _process_description_html(self, description): + pos1 = description.rfind('</div') # picDescript + pos2 = description.rfind('</div', None, pos1) # boxBody + return str.strip(description[0:pos2]) + def _parse_story(self, html): """Collect url and metadata for a story""" extr = text.extract_from(html) diff --git a/gallery_dl/extractor/idolcomplex.py b/gallery_dl/extractor/idolcomplex.py index 075e1f6..26fd595 100644 --- a/gallery_dl/extractor/idolcomplex.py +++ b/gallery_dl/extractor/idolcomplex.py @@ -6,266 +6,39 @@ # it under the terms of the GNU General Public License version 2 as # published by the Free Software Foundation. -"""Extractors for https://idol.sankakucomplex.com/""" +"""Extractors for https://www.idolcomplex.com/""" -from .sankaku import SankakuExtractor -from .common import Message -from ..cache import cache -from .. import text, util, exception -import collections -import re +from . import sankaku -BASE_PATTERN = r"(?:https?://)?idol\.sankakucomplex\.com(?:/[a-z]{2})?" +BASE_PATTERN = (r"(?:https?://)?(?:www\.)?" + r"idol(?:\.sankaku)?complex\.com(?:/[a-z]{2})?") -class IdolcomplexExtractor(SankakuExtractor): +class IdolcomplexBase(): """Base class for idolcomplex extractors""" category = "idolcomplex" - root = "https://idol.sankakucomplex.com" - cookies_domain = "idol.sankakucomplex.com" - cookies_names = ("_idolcomplex_session",) - referer = False - request_interval = (3.0, 6.0) - - def __init__(self, match): - SankakuExtractor.__init__(self, match) - self.logged_in = True - self.start_page = 1 - self.start_post = 0 + root = "https://www.idolcomplex.com" + cookies_domain = ".idolcomplex.com" def _init(self): - self.find_pids = re.compile( - r" href=[\"#]/\w\w/posts/(\w+)" - ).findall - self.find_tags = re.compile( - r'tag-type-([^"]+)">\s*<a [^>]*?href="/[^?]*\?tags=([^"]+)' - ).findall - - def items(self): - self.login() - data = self.metadata() - - for post_id in util.advance(self.post_ids(), self.start_post): - post = self._extract_post(post_id) - url = post["file_url"] - post.update(data) - text.nameext_from_url(url, post) - yield Message.Directory, post - yield Message.Url, url, post - - def skip(self, num): - self.start_post += num - return num - - def post_ids(self): - """Return an iterable containing all relevant post ids""" - - def login(self): - if self.cookies_check(self.cookies_names): - return - - username, password = self._get_auth_info() - if username: - return self.cookies_update(self._login_impl(username, password)) - - self.logged_in = False - - @cache(maxage=90*86400, keyarg=1) - def _login_impl(self, username, password): - self.log.info("Logging in as %s", username) - - url = self.root + "/users/login" - page = self.request(url).text - - headers = { - "Referer": url, - } - url = self.root + (text.extr(page, '<form action="', '"') or - "/en/user/authenticate") - data = { - "authenticity_token": text.unescape(text.extr( - page, 'name="authenticity_token" value="', '"')), - "url" : "", - "user[name]" : username, - "user[password]": password, - "commit" : "Login", - } - self.sleep(10, "login") - response = self.request(url, method="POST", headers=headers, data=data) - - if not response.history or response.url.endswith( - ("/users/login", "/user/home")): - raise exception.AuthenticationError() - return {c.name: c.value for c in response.history[0].cookies} - - def _extract_post(self, post_id): - url = self.root + "/posts/" + post_id - page = self.request(url, retries=10).text - extr = text.extract_from(page) - - vavg = extr('id="rating"', "</ul>") - vcnt = extr('>Votes</strong>:', "<") - pid = extr(">Post ID:", "<") - created = extr(' title="', '"') - - if file_url := extr('>Original:', 'id='): - file_url = extr(' href="', '"') - width = extr(">", "x") - height = extr("", " ") - else: - width = extr('<object width=', ' ') - height = extr('height=', '>') - file_url = extr('<embed src="', '"') - - rating = extr(">Rating:", "<br") - - data = { - "id" : pid.strip(), - "md5" : file_url.rpartition("/")[2].partition(".")[0], - "vote_average": (1.0 * vavg.count('class="star-full"') + - 0.5 * vavg.count('class="star-half"')), - "vote_count" : text.parse_int(vcnt), - "created_at" : created, - "date" : text.parse_datetime( - created, "%Y-%m-%d %H:%M:%S.%f"), - "rating" : text.remove_html(rating).lower(), - "file_url" : "https:" + text.unescape(file_url), - "width" : text.parse_int(width), - "height" : text.parse_int(height), - } - - tags = collections.defaultdict(list) - tags_list = [] - tags_html = text.extr(page, '<ul id="tag-sidebar"', '</ul>') - for tag_type, tag_name in self.find_tags(tags_html or ""): - tags[tag_type].append(text.unquote(tag_name)) - for key, value in tags.items(): - data["tags_" + key] = " ".join(value) - tags_list += value - data["tags"] = " ".join(tags_list) - - return data + self.api = sankaku.SankakuAPI(self) + self.api.ROOT = "https://i.sankakuapi.com" + self.api.headers["Origin"] = self.root -class IdolcomplexTagExtractor(IdolcomplexExtractor): - """Extractor for images from idol.sankakucomplex.com by search-tags""" - subcategory = "tag" - directory_fmt = ("{category}", "{search_tags}") - archive_fmt = "t_{search_tags}_{id}" - pattern = BASE_PATTERN + r"/(?:posts/?)?\?([^#]*)" - example = "https://idol.sankakucomplex.com/en/posts?tags=TAGS" - per_page = 20 +class IdolcomplexTagExtractor(IdolcomplexBase, sankaku.SankakuTagExtractor): + """Extractor for idolcomplex tag searches""" + pattern = BASE_PATTERN + r"(?:/posts)?/?\?([^#]*)" + example = "https://www.idolcomplex.com/en/posts?tags=TAGS" - def __init__(self, match): - IdolcomplexExtractor.__init__(self, match) - query = text.parse_query(match[1]) - self.tags = text.unquote(query.get("tags", "").replace("+", " ")) - self.start_page = text.parse_int(query.get("page"), 1) - self.next = text.parse_int(query.get("next"), 0) - def skip(self, num): - if self.next: - self.start_post += num - else: - pages, posts = divmod(num, self.per_page) - self.start_page += pages - self.start_post += posts - return num - - def metadata(self): - if not self.next: - max_page = 50 if self.logged_in else 25 - if self.start_page > max_page: - self.log.info("Traversing from page %d to page %d", - max_page, self.start_page) - self.start_post += self.per_page * (self.start_page - max_page) - self.start_page = max_page - - tags = self.tags.split() - if not self.logged_in and len(tags) > 4: - raise exception.AbortExtraction( - "Non-members can only search up to 4 tags at once") - return {"search_tags": " ".join(tags)} - - def post_ids(self): - url = self.root + "/en/posts" - - params = {"auto_page": "t"} - if self.next: - params["next"] = self.next - else: - params["page"] = self.start_page - params["tags"] = self.tags - - while True: - response = self.request(url, params=params, retries=10) - if response.history and "/posts/premium" in response.url: - self.log.warning("HTTP redirect to %s", response.url) - page = response.text - - yield from text.extract_iter(page, '"id":"', '"') - - next_page_url = text.extr(page, 'next-page-url="', '"') - if not next_page_url: - return - - url, _, next_params = text.unquote( - text.unescape(text.unescape(next_page_url))).partition("?") - next_params = text.parse_query(next_params) - - if "next" in next_params: - # stop if the same "next" value occurs twice in a row (#265) - if "next" in params and params["next"] == next_params["next"]: - return - next_params["page"] = "2" - - if url[0] == "/": - url = self.root + url - params = next_params - - -class IdolcomplexPoolExtractor(IdolcomplexExtractor): - """Extractor for image-pools from idol.sankakucomplex.com""" - subcategory = "pool" - directory_fmt = ("{category}", "pool", "{pool}") - archive_fmt = "p_{pool}_{id}" +class IdolcomplexPoolExtractor(IdolcomplexBase, sankaku.SankakuPoolExtractor): + """Extractor for idolcomplex pools""" pattern = BASE_PATTERN + r"/pools?/(?:show/)?(\w+)" - example = "https://idol.sankakucomplex.com/pools/0123456789abcdef" - per_page = 24 - - def skip(self, num): - pages, posts = divmod(num, self.per_page) - self.start_page += pages - self.start_post += posts - return num - - def metadata(self): - return {"pool": self.groups[0]} - - def post_ids(self): - if not self.logged_in: - self.log.warning("Login required") - - url = self.root + "/pools/show/" + self.groups[0] - params = {"page": self.start_page} - - while True: - page = self.request(url, params=params, retries=10).text - pos = page.find('id="pool-show"') + 1 - post_ids = self.find_pids(page, pos) - - yield from post_ids - if len(post_ids) < self.per_page: - return - params["page"] += 1 - + example = "https://www.idolcomplex.com/en/pools/0123456789abcdef" -class IdolcomplexPostExtractor(IdolcomplexExtractor): - """Extractor for single images from idol.sankakucomplex.com""" - subcategory = "post" - archive_fmt = "{id}" - pattern = BASE_PATTERN + r"/posts?/(?:show/)?(\w+)" - example = "https://idol.sankakucomplex.com/posts/0123456789abcdef" - def post_ids(self): - return (self.groups[0],) +class IdolcomplexPostExtractor(IdolcomplexBase, sankaku.SankakuPostExtractor): + """Extractor for individual idolcomplex posts""" + pattern = BASE_PATTERN + r"/posts?(?:/show)?/(\w+)" + example = "https://www.idolcomplex.com/en/posts/0123456789abcdef" diff --git a/gallery_dl/extractor/imagehosts.py b/gallery_dl/extractor/imagehosts.py index 0e5ce7e..fccc466 100644 --- a/gallery_dl/extractor/imagehosts.py +++ b/gallery_dl/extractor/imagehosts.py @@ -372,14 +372,78 @@ class ImgclickImageExtractor(ImagehostImageExtractor): class FappicImageExtractor(ImagehostImageExtractor): """Extractor for single images from fappic.com""" category = "fappic" - pattern = r"(?:https?://)?((?:www\.)?fappic\.com/(\w+)/[^/?#]+)" - example = "https://fappic.com/abc123/NAME.EXT" + pattern = (r"(?:https?://)?(?:www\.|img\d+\.)?fappic\.com" + r"/(?:i/\d+/())?(\w{10,})(?:/|\.)\w+") + example = "https://fappic.com/abcde12345/NAME.EXT" + + def __init__(self, match): + Extractor.__init__(self, match) + + thumb, token = self.groups + if thumb is not None and token.endswith("_t"): + self.token = token = token[:-2] + else: + self.token = token + self.page_url = f"https://fappic.com/{token}/pic.jpg" def get_info(self, page): url , pos = text.extract(page, '<a href="#"><img src="', '"') filename, pos = text.extract(page, 'alt="', '"', pos) + return url, text.re(r"^Porn[ -]Pic(?:s|ture)[ -]").sub("", filename) + - if filename.startswith("Porn-Picture-"): - filename = filename[13:] +class PicstateImageExtractor(ImagehostImageExtractor): + """Extractor for single images from picstate.com""" + category = "picstate" + pattern = r"(?:https?://)?((?:www\.)?picstate\.com/view/full/([^/?#]+))" + example = "https://picstate.com/view/full/123" + def get_info(self, page): + pos = page.index(' id="image_container"') + url , pos = text.extract(page, '<img src="', '"', pos) + filename, pos = text.extract(page, 'alt="', '"', pos) return url, filename + + +class ImgdriveImageExtractor(ImagehostImageExtractor): + """Extractor for single images from imgdrive.net""" + category = "imgdrive" + pattern = (r"(?:https?://)?(?:www\.)?(img(drive|taxi|wallet)\.(?:com|net)" + r"/img-(\w+)\.html)") + example = "https://imgdrive.net/img-0123456789abc.html" + + def __init__(self, match): + path, category, self.token = match.groups() + self.page_url = f"https://{path}" + self.category = f"img{category}" + Extractor.__init__(self, match) + + def get_info(self, page): + title, pos = text.extract( + page, 'property="og:title" content="', '"') + image, pos = text.extract( + page, 'property="og:image" content="', '"', pos) + return image.replace("/small/", "/big/"), title.rsplit(" | ", 2)[0] + + +class SilverpicImageExtractor(ImagehostImageExtractor): + """Extractor for single images from silverpic.com""" + category = "silverpic" + pattern = (r"(?:https?://)?((?:www\.)?silverpic\.com" + r"/([a-z0-9]{10,})/[\S]+\.html)") + example = "https://silverpic.com/a1b2c3d4f5g6/NAME.EXT.html" + + def get_info(self, page): + url, pos = text.extract(page, '<img src="/img/', '"') + alt, pos = text.extract(page, 'alt="', '"', pos) + return f"https://silverpic.com/img/{url}", alt + + def metadata(self, page): + pos = page.find('<img src="/img/') + width = text.extract(page, 'width="', '"', pos)[0] + height = text.extract(page, 'height="', '"', pos)[0] + + return { + "width" : text.parse_int(width), + "height": text.parse_int(height), + } diff --git a/gallery_dl/extractor/instagram.py b/gallery_dl/extractor/instagram.py index 6213e9a..b5450d5 100644 --- a/gallery_dl/extractor/instagram.py +++ b/gallery_dl/extractor/instagram.py @@ -223,7 +223,8 @@ class InstagramExtractor(Extractor): for num, item in enumerate(items, 1): try: - image = item["image_versions2"]["candidates"][0] + candidates = item["image_versions2"]["candidates"] + image = candidates[0] except Exception: self.log.warning("Missing media in post %s", data["post_shortcode"]) @@ -239,6 +240,22 @@ class InstagramExtractor(Extractor): video = None media = image + if len(candidates) <= 3 and not post.get("__gdl_gen"): + self.log.warning( + "%s: Image candidate list possibly incomplete " + "(%s items). Consider refreshing your cookies.", + data["post_shortcode"], len(candidates)) + elif image["width"] < item.get("original_width", 0) or \ + image["height"] < item.get("original_height", 0): + self.log.warning( + "%s: Available image resolutions lower than the " + "original (%sx%s < %sx%s). " + "Consider refreshing your cookies.", + data["post_shortcode"], + image["width"], image["height"], + item.get("original_width", 0), + item.get("original_height", 0)) + media = { "num" : num, "date" : text.parse_timestamp(item.get("taken_at") or @@ -694,6 +711,7 @@ class InstagramAvatarExtractor(InstagramExtractor): "caption" : None, "like_count": 0, "image_versions2": {"candidates": (avatar,)}, + "__gdl_gen" : True, },) diff --git a/gallery_dl/extractor/iwara.py b/gallery_dl/extractor/iwara.py index 934b301..179909b 100644 --- a/gallery_dl/extractor/iwara.py +++ b/gallery_dl/extractor/iwara.py @@ -341,7 +341,8 @@ class IwaraAPI(): def favorites(self, type): if not self.username: - raise exception.AuthRequired("'username' & 'password'") + raise exception.AuthRequired( + "username & password", "your favorites") endpoint = f"/favorites/{type}s" return self._pagination(endpoint) diff --git a/gallery_dl/extractor/kemono.py b/gallery_dl/extractor/kemono.py index 1e88891..46139bc 100644 --- a/gallery_dl/extractor/kemono.py +++ b/gallery_dl/extractor/kemono.py @@ -102,19 +102,15 @@ class KemonoExtractor(Extractor): post["username"] = creator["name"] if comments: - try: - post["comments"] = self.api.creator_post_comments( - service, creator_id, post["id"]) - except exception.HttpError: + post["comments"] = cmts = self.api.creator_post_comments( + service, creator_id, post["id"]) + if not isinstance(cmts, list): + self.log.debug("%s/%s: %s", creator_id, post["id"], cmts) post["comments"] = () if dms is not None: if dms is True: dms = self.api.creator_dms( post["service"], post["user"]) - try: - dms = dms["props"]["dms"] - except Exception: - dms = () post["dms"] = dms if announcements is not None: if announcements is True: @@ -245,16 +241,15 @@ class KemonoExtractor(Extractor): def _revisions_post(self, post): post["revision_id"] = 0 - try: - revs = self.api.creator_post_revisions( - post["service"], post["user"], post["id"]) - except exception.HttpError: + revs = self.api.creator_post_revisions( + post["service"], post["user"], post["id"]) + if not revs: post["revision_hash"] = self._revision_hash(post) post["revision_index"] = 1 post["revision_count"] = 1 return (post,) - revs.insert(0, post) + revs.insert(0, post) for rev in revs: rev["revision_hash"] = self._revision_hash(rev) @@ -325,25 +320,14 @@ class KemonoUserExtractor(KemonoExtractor): def posts(self): _, _, service, creator_id, query = self.groups params = text.parse_query(query) - tag = params.get("tag") - endpoint = self.config("endpoint") - if endpoint == "legacy+": - endpoint = self._posts_legacy_plus - elif endpoint == "legacy" or tag: - endpoint = self.api.creator_posts_legacy + if self.config("endpoint") in ("posts+", "legacy+"): + endpoint = self.api.creator_posts_expand else: endpoint = self.api.creator_posts return endpoint(service, creator_id, - params.get("o"), params.get("q"), tag) - - def _posts_legacy_plus(self, service, creator_id, - offset=0, query=None, tags=None): - for post in self.api.creator_posts_legacy( - service, creator_id, offset, query, tags): - yield self.api.creator_post( - service, creator_id, post["id"])["post"] + params.get("o"), params.get("q"), params.get("tag")) class KemonoPostsExtractor(KemonoExtractor): @@ -589,20 +573,22 @@ class KemonoAPI(): return self._call(endpoint) def creators(self): - endpoint = "/creators.txt" - return self._call(endpoint) + endpoint = "/creators" + headers = {"Accept": "text/css"} + return self._call(endpoint, headers=headers) def creator_posts(self, service, creator_id, offset=0, query=None, tags=None): - endpoint = f"/{service}/user/{creator_id}" - params = {"q": query, "tag": tags, "o": offset} + endpoint = f"/{service}/user/{creator_id}/posts" + params = {"o": offset, "tag": tags, "q": query} return self._pagination(endpoint, params, 50) - def creator_posts_legacy(self, service, creator_id, + def creator_posts_expand(self, service, creator_id, offset=0, query=None, tags=None): - endpoint = f"/{service}/user/{creator_id}/posts-legacy" - params = {"o": offset, "tag": tags, "q": query} - return self._pagination(endpoint, params, 50, "results") + for post in self.creator_posts( + service, creator_id, offset, query, tags): + yield self.creator_post( + service, creator_id, post["id"])["post"] def creator_announcements(self, service, creator_id): endpoint = f"/{service}/user/{creator_id}/announcements" @@ -622,11 +608,11 @@ class KemonoAPI(): def creator_post_comments(self, service, creator_id, post_id): endpoint = f"/{service}/user/{creator_id}/post/{post_id}/comments" - return self._call(endpoint) + return self._call(endpoint, fatal=False) def creator_post_revisions(self, service, creator_id, post_id): endpoint = f"/{service}/user/{creator_id}/post/{post_id}/revisions" - return self._call(endpoint) + return self._call(endpoint, fatal=False) def creator_profile(self, service, creator_id): endpoint = f"/{service}/user/{creator_id}/profile" @@ -657,19 +643,19 @@ class KemonoAPI(): params = {"type": type} return self._call(endpoint, params) - def _call(self, endpoint, params=None): - url = self.root + endpoint - response = self.extractor.request(url, params=params) - return response.json() + def _call(self, endpoint, params=None, headers=None, fatal=True): + return self.extractor.request_json( + f"{self.root}{endpoint}", params=params, headers=headers, + encoding="utf-8", fatal=fatal) - def _pagination(self, endpoint, params, batch=50, key=False): + def _pagination(self, endpoint, params, batch=50, key=None): offset = text.parse_int(params.get("o")) params["o"] = offset - offset % batch while True: data = self._call(endpoint, params) - if key: + if key is not None: data = data.get(key) if not data: return diff --git a/gallery_dl/extractor/madokami.py b/gallery_dl/extractor/madokami.py index e87dbba..1db5126 100644 --- a/gallery_dl/extractor/madokami.py +++ b/gallery_dl/extractor/madokami.py @@ -31,7 +31,7 @@ class MadokamiMangaExtractor(MadokamiExtractor): def items(self): username, password = self._get_auth_info() if not username: - raise exception.AuthRequired("'username' & 'password'") + raise exception.AuthRequired("username & password") self.session.auth = util.HTTPBasicAuth(username, password) url = f"{self.root}/Manga/{self.groups[0]}" diff --git a/gallery_dl/extractor/misskey.py b/gallery_dl/extractor/misskey.py index f579a2d..5ff601a 100644 --- a/gallery_dl/extractor/misskey.py +++ b/gallery_dl/extractor/misskey.py @@ -90,6 +90,10 @@ BASE_PATTERN = MisskeyExtractor.update({ "root": "https://misskey.design", "pattern": r"misskey\.design", }, + "misskey.art": { + "root": "https://misskey.art", + "pattern": r"misskey\.art", + }, "lesbian.energy": { "root": "https://lesbian.energy", "pattern": r"lesbian\.energy", diff --git a/gallery_dl/extractor/motherless.py b/gallery_dl/extractor/motherless.py index c81a4d1..48137ce 100644 --- a/gallery_dl/extractor/motherless.py +++ b/gallery_dl/extractor/motherless.py @@ -9,7 +9,7 @@ """Extractors for https://motherless.com/""" from .common import Extractor, Message -from .. import text, util +from .. import text, util, exception from ..cache import memcache from datetime import timedelta @@ -23,6 +23,17 @@ class MotherlessExtractor(Extractor): filename_fmt = "{id} {title}.{extension}" archive_fmt = "{id}" + def request(self, url, **kwargs): + response = Extractor.request(self, url, **kwargs) + + content = response.content + if (b'<div class="error-page' in content or + b">The page you're looking for cannot be found.<" in content): + raise exception.NotFoundError("page") + + self.request = Extractor.request.__get__(self) + return response + def _extract_media(self, path): url = f"{self.root}/{path}" page = self.request(url).text diff --git a/gallery_dl/extractor/pixiv.py b/gallery_dl/extractor/pixiv.py index cb0e93e..d34130d 100644 --- a/gallery_dl/extractor/pixiv.py +++ b/gallery_dl/extractor/pixiv.py @@ -308,7 +308,12 @@ class PixivExtractor(Extractor): square1200 = body["userIllusts"][body["id"]]["url"] except Exception: return + parts = square1200.rpartition("_p0")[0].split("/") + if len(parts) < 6: + return self.log.warning( + "%s: %s", body["id"], square1200.rpartition("/")[2]) + del parts[3:5] parts[3] = "img-original" base = "/".join(parts) @@ -424,14 +429,11 @@ class PixivArtworksExtractor(PixivExtractor): self.user_id = u1 or u2 self.tag = t1 or t2 - if self.sanity_workaround: - self.cookies_domain = domain = ".pixiv.net" - self._init_cookies() - if self._warn_phpsessid: - PixivArtworksExtractor._warn_phpsessid = False - if not self.cookies.get("PHPSESSID", domain=domain): - self.log.warning("No 'PHPSESSID' cookie set. Can detect on" - "ly non R-18 'limit_sanity_level' works.") + if self.sanity_workaround and self._warn_phpsessid: + PixivArtworksExtractor._warn_phpsessid = False + if not self.cookies.get("PHPSESSID", domain=self.cookies_domain): + self.log.warning("No 'PHPSESSID' cookie set. Can detect only " + "non R-18 'limit_sanity_level' works.") def metadata(self): if self.config("metadata"): @@ -441,19 +443,16 @@ class PixivArtworksExtractor(PixivExtractor): def works(self): works = self.api.user_illusts(self.user_id) - if self.sanity_workaround: - body = self._request_ajax( - f"/user/{self.user_id}/profile/all") - if not body: - return () + if self.sanity_workaround and (body := self._request_ajax( + f"/user/{self.user_id}/profile/all")): try: ajax_ids = list(map(int, body["illusts"])) ajax_ids.extend(map(int, body["manga"])) ajax_ids.sort() except Exception as exc: + self.log.debug("", exc_info=exc) self.log.warning("u%s: Failed to collect artwork IDs " - "using AJAX API (%s: %s)", - self.user_id, exc.__class__.__name__, exc) + "using AJAX API", self.user_id) else: works = self._extend_sanity(works, ajax_ids) @@ -1262,7 +1261,7 @@ class PixivAppAPI(): def user_illusts(self, user_id): params = {"user_id": user_id} - return self._pagination("/v1/user/illusts", params) + return self._pagination("/v1/user/illusts", params, user_data="user") def user_novels(self, user_id): params = {"user_id": user_id} @@ -1297,22 +1296,29 @@ class PixivAppAPI(): self.extractor.wait(seconds=300) continue - raise exception.AbortExtraction(f"API request failed: {error}") + msg = (f"'{msg}'" if (msg := error.get("user_message")) else + f"'{msg}'" if (msg := error.get("message")) else + error) + raise exception.AbortExtraction(f"API request failed: {msg}") def _pagination(self, endpoint, params, - key_items="illusts", key_data=None): - while True: - data = self._call(endpoint, params) + key_items="illusts", key_data=None, user_data=None): + data = self._call(endpoint, params) - if key_data: - self.data = data.get(key_data) - key_data = None + if key_data is not None: + self.data = data.get(key_data) + if user_data is not None: + if not data[user_data].get("id"): + raise exception.NotFoundError("user") + + while True: yield from data[key_items] if not data["next_url"]: return query = data["next_url"].rpartition("?")[2] params = text.parse_query(query) + data = self._call(endpoint, params) @cache(maxage=36500*86400, keyarg=0) diff --git a/gallery_dl/extractor/sankaku.py b/gallery_dl/extractor/sankaku.py index 1c93cbf..5caad4b 100644 --- a/gallery_dl/extractor/sankaku.py +++ b/gallery_dl/extractor/sankaku.py @@ -152,12 +152,8 @@ class SankakuPoolExtractor(SankakuExtractor): pattern = BASE_PATTERN + r"/(?:books|pools?/show)/(\w+)" example = "https://sankaku.app/books/12345" - def __init__(self, match): - SankakuExtractor.__init__(self, match) - self.pool_id = match[1] - def metadata(self): - pool = self.api.pools(self.pool_id) + pool = self.api.pools(self.groups[0]) pool["tags"] = [tag["name"] for tag in pool["tags"]] pool["artist_tags"] = [tag["name"] for tag in pool["artist_tags"]] @@ -178,12 +174,8 @@ class SankakuPostExtractor(SankakuExtractor): pattern = BASE_PATTERN + r"/posts?(?:/show)?/(\w+)" example = "https://sankaku.app/post/show/12345" - def __init__(self, match): - SankakuExtractor.__init__(self, match) - self.post_id = match[1] - def posts(self): - return self.api.posts(self.post_id) + return self.api.posts(self.groups[0]) class SankakuBooksExtractor(SankakuExtractor): @@ -207,12 +199,14 @@ class SankakuBooksExtractor(SankakuExtractor): class SankakuAPI(): """Interface for the sankaku.app API""" + ROOT = "https://sankakuapi.com" + VERSION = None def __init__(self, extractor): self.extractor = extractor self.headers = { "Accept" : "application/vnd.sankaku.api+json;v=2", - "Api-Version": None, + "Api-Version": self.VERSION, "Origin" : extractor.root, } @@ -281,7 +275,7 @@ class SankakuAPI(): _authenticate_impl(self.extractor, self.username, self.password) def _call(self, endpoint, params=None): - url = "https://sankakuapi.com" + endpoint + url = self.ROOT + endpoint for _ in range(5): self.authenticate() response = self.extractor.request( @@ -307,6 +301,10 @@ class SankakuAPI(): ("unauthorized", "invalid-token", "invalid_token")): _authenticate_impl.invalidate(self.username) continue + try: + code = f"'{code.rpartition('__')[2].replace('-', ' ')}'" + except Exception: + pass raise exception.AbortExtraction(code) return data @@ -357,12 +355,12 @@ class SankakuAPI(): def _authenticate_impl(extr, username, password): extr.log.info("Logging in as %s", username) - url = "https://sankakuapi.com/auth/token" - headers = {"Accept": "application/vnd.sankaku.api+json;v=2"} + api = extr.api + url = api.ROOT + "/auth/token" data = {"login": username, "password": password} response = extr.request( - url, method="POST", headers=headers, json=data, fatal=False) + url, method="POST", headers=api.headers, json=data, fatal=False) data = response.json() if response.status_code >= 400 or not data.get("success"): diff --git a/gallery_dl/extractor/scrolller.py b/gallery_dl/extractor/scrolller.py index 40f047a..ff191db 100644 --- a/gallery_dl/extractor/scrolller.py +++ b/gallery_dl/extractor/scrolller.py @@ -20,7 +20,7 @@ class ScrolllerExtractor(Extractor): category = "scrolller" root = "https://scrolller.com" directory_fmt = ("{category}", "{subredditTitle}") - filename_fmt = "{id}{num:?_//>03}{title:? //}.{extension}" + filename_fmt = "{id}{num:?_//>03}{title:? //[:230]}.{extension}" archive_fmt = "{id}_{num}" request_interval = (0.5, 1.5) @@ -115,7 +115,7 @@ class ScrolllerExtractor(Extractor): )["data"] def _pagination(self, opname, variables, data=None): - if data is None: + if data is None or not data.get("items"): data = self._request_graphql(opname, variables) while True: diff --git a/gallery_dl/extractor/skeb.py b/gallery_dl/extractor/skeb.py index 1caafd1..3c7205a 100644 --- a/gallery_dl/extractor/skeb.py +++ b/gallery_dl/extractor/skeb.py @@ -6,9 +6,11 @@ """Extractors for https://skeb.jp/""" -from .common import Extractor, Message +from .common import Extractor, Message, Dispatch from .. import text -import itertools + +BASE_PATTERN = r"(?:https?://)?skeb\.jp" +USER_PATTERN = BASE_PATTERN + r"/@([^/?#]+)" class SkebExtractor(Extractor): @@ -19,10 +21,6 @@ class SkebExtractor(Extractor): archive_fmt = "{post_num}_{_file_id}_{content_category}" root = "https://skeb.jp" - def __init__(self, match): - Extractor.__init__(self, match) - self.user_name = match[1] - def _init(self): self.thumbnails = self.config("thumbnails", False) self.article = self.config("article", False) @@ -65,7 +63,7 @@ class SkebExtractor(Extractor): url = file["file_url"] yield Message.Url, url, text.nameext_from_url(url, post) - def _items_users(self): + def items_users(self): base = self.root + "/@" for user in self.users(): user["_extractor"] = SkebUserExtractor @@ -196,44 +194,63 @@ class SkebExtractor(Extractor): class SkebPostExtractor(SkebExtractor): """Extractor for a single skeb post""" subcategory = "post" - pattern = r"(?:https?://)?skeb\.jp/@([^/?#]+)/works/(\d+)" + pattern = USER_PATTERN + r"/works/(\d+)" example = "https://skeb.jp/@USER/works/123" - def __init__(self, match): - SkebExtractor.__init__(self, match) - self.post_num = match[2] + def posts(self): + return (self.groups,) + + +class SkebWorksExtractor(SkebExtractor): + """Extractor for a skeb user's works""" + subcategory = "works" + pattern = USER_PATTERN + r"/works" + example = "https://skeb.jp/@USER/works" def posts(self): - return ((self.user_name, self.post_num),) + url = f"{self.root}/api/users/{self.groups[0]}/works" + params = {"role": "creator", "sort": "date"} + return self._pagination(url, params) -class SkebUserExtractor(SkebExtractor): - """Extractor for all posts from a skeb user""" - subcategory = "user" - pattern = r"(?:https?://)?skeb\.jp/@([^/?#]+)/?$" - example = "https://skeb.jp/@USER" +class SkebSentrequestsExtractor(SkebExtractor): + """Extractor for a skeb user's sent requests""" + subcategory = "sentrequests" + pattern = USER_PATTERN + r"/sent[ _-]?requests" + example = "https://skeb.jp/@USER/sentrequests" def posts(self): - url = f"{self.root}/api/users/{self.user_name}/works" + url = f"{self.root}/api/users/{self.groups[0]}/works" + params = {"role": "client", "sort": "date"} + return self._pagination(url, params) - params = {"role": "creator", "sort": "date"} - posts = self._pagination(url, params) +class SkebUserExtractor(Dispatch, SkebExtractor): + """Extractor for a skeb user profile""" + pattern = USER_PATTERN + r"/?$" + example = "https://skeb.jp/@USER" + + def items(self): if self.config("sent-requests", False): - params = {"role": "client", "sort": "date"} - posts = itertools.chain(posts, self._pagination(url, params)) + default = ("works", "sentrequests") + else: + default = ("works",) - return posts + base = f"{self.root}/@{self.groups[0]}/" + return self._dispatch_extractors(( + (SkebWorksExtractor , base + "works"), + (SkebSentrequestsExtractor, base + "sentrequests"), + ), default) class SkebSearchExtractor(SkebExtractor): """Extractor for skeb search results""" subcategory = "search" - pattern = r"(?:https?://)?skeb\.jp/search\?q=([^&#]+)" + pattern = BASE_PATTERN + r"/search\?q=([^&#]+)" example = "https://skeb.jp/search?q=QUERY" def metadata(self): - return {"search_tags": text.unquote(self.user_name)} + return {"search_tags": text.unquote(self.groups[0])} def posts(self): url = "https://hb1jt3kre9-2.algolianet.com/1/indexes/*/queries" @@ -258,7 +275,7 @@ class SkebSearchExtractor(SkebExtractor): request = { "indexName": "Request", - "query": text.unquote(self.user_name), + "query": text.unquote(self.groups[0]), "params": pams + str(page), } data = {"requests": (request,)} @@ -281,13 +298,13 @@ class SkebSearchExtractor(SkebExtractor): class SkebFollowingExtractor(SkebExtractor): """Extractor for all creators followed by a skeb user""" subcategory = "following" - pattern = r"(?:https?://)?skeb\.jp/@([^/?#]+)/following_creators" + pattern = USER_PATTERN + r"/following_creators" example = "https://skeb.jp/@USER/following_creators" - items = SkebExtractor._items_users + items = SkebExtractor.items_users def users(self): - endpoint = f"/users/{self.user_name}/following_creators" + endpoint = f"/users/{self.groups[0]}/following_creators" params = {"sort": "date"} return self._pagination_users(endpoint, params) @@ -295,12 +312,11 @@ class SkebFollowingExtractor(SkebExtractor): class SkebFollowingUsersExtractor(SkebExtractor): """Extractor for your followed users""" subcategory = "following-users" - pattern = r"(?:https?://)?skeb\.jp/following_users()" + pattern = BASE_PATTERN + r"/following_users" example = "https://skeb.jp/following_users" - items = SkebExtractor._items_users + items = SkebExtractor.items_users def users(self): endpoint = "/following_users" - params = {} - return self._pagination_users(endpoint, params) + return self._pagination_users(endpoint, {}) diff --git a/gallery_dl/extractor/tumblr.py b/gallery_dl/extractor/tumblr.py index d9f1ea2..46507c4 100644 --- a/gallery_dl/extractor/tumblr.py +++ b/gallery_dl/extractor/tumblr.py @@ -171,6 +171,11 @@ class TumblrExtractor(Extractor): post["count"] = len(posts) yield msg, url, post + def items_blogs(self): + for blog in self.blogs(): + blog["_extractor"] = TumblrUserExtractor + yield Message.Queue, blog["url"], blog + def posts(self): """Return an iterable containing all relevant posts""" @@ -345,6 +350,30 @@ class TumblrLikesExtractor(TumblrExtractor): return self.api.likes(self.blog) +class TumblrFollowingExtractor(TumblrExtractor): + """Extractor for a Tumblr user's followed blogs""" + subcategory = "following" + pattern = BASE_PATTERN + r"/following" + example = "https://www.tumblr.com/BLOG/following" + + items = TumblrExtractor.items_blogs + + def blogs(self): + return self.api.following(self.blog) + + +class TumblrFollowersExtractor(TumblrExtractor): + """Extractor for a Tumblr user's followers""" + subcategory = "followers" + pattern = BASE_PATTERN + r"/followers" + example = "https://www.tumblr.com/BLOG/followers" + + items = TumblrExtractor.items_blogs + + def blogs(self): + return self.api.followers(self.blog) + + class TumblrSearchExtractor(TumblrExtractor): """Extractor for a Tumblr search""" subcategory = "search" @@ -420,6 +449,14 @@ class TumblrAPI(oauth.OAuth1API): yield from posts params["before"] = posts[-1]["liked_timestamp"] + def following(self, blog): + endpoint = f"/v2/blog/{blog}/following" + return self._pagination_blogs(endpoint) + + def followers(self, blog): + endpoint = f"/v2/blog/{blog}/followers" + return self._pagination_blogs(endpoint) + def search(self, query, params, mode="top", post_type=None): """Retrieve search results""" endpoint = "/v2/timeline/search" @@ -556,3 +593,21 @@ class TumblrAPI(oauth.OAuth1API): params["before"] = None if params["offset"] >= data["total_posts"]: return + + def _pagination_blogs(self, endpoint, params=None): + if params is None: + params = {} + if self.api_key: + params["api_key"] = self.api_key + params["limit"] = 20 + params["offset"] = text.parse_int(params.get("offset"), 0) + + while True: + data = self._call(endpoint, params) + + blogs = data["blogs"] + yield from blogs + + params["offset"] = params["offset"] + params["limit"] + if params["offset"] >= data["total_blogs"]: + return diff --git a/gallery_dl/extractor/twitter.py b/gallery_dl/extractor/twitter.py index 7252d05..4303524 100644 --- a/gallery_dl/extractor/twitter.py +++ b/gallery_dl/extractor/twitter.py @@ -217,6 +217,8 @@ class TwitterExtractor(Extractor): "duration": video_info.get( "duration_millis", 0) / 1000, } + else: + continue elif "media_url_https" in media: url = media["media_url_https"] if url[-4] == ".": diff --git a/gallery_dl/extractor/vk.py b/gallery_dl/extractor/vk.py index 0f323e1..75a0137 100644 --- a/gallery_dl/extractor/vk.py +++ b/gallery_dl/extractor/vk.py @@ -19,7 +19,7 @@ class VkExtractor(Extractor): category = "vk" directory_fmt = ("{category}", "{user[name]|user[id]}") filename_fmt = "{id}.{extension}" - archive_fmt = "{id}" + archive_fmt = "{user[id]}_{id}" root = "https://vk.com" request_interval = (0.5, 1.5) diff --git a/gallery_dl/extractor/vsco.py b/gallery_dl/extractor/vsco.py index 42839a8..df09fce 100644 --- a/gallery_dl/extractor/vsco.py +++ b/gallery_dl/extractor/vsco.py @@ -22,6 +22,7 @@ class VscoExtractor(Extractor): directory_fmt = ("{category}", "{user}") filename_fmt = "{id}.{extension}" archive_fmt = "{id}" + tls12 = False def __init__(self, match): Extractor.__init__(self, match) diff --git a/gallery_dl/extractor/wikimedia.py b/gallery_dl/extractor/wikimedia.py index e927bc1..00266bd 100644 --- a/gallery_dl/extractor/wikimedia.py +++ b/gallery_dl/extractor/wikimedia.py @@ -32,6 +32,7 @@ class WikimediaExtractor(BaseExtractor): f"{self.root.partition('.')[0].rpartition('/')[2]}") self.per_page = self.config("limit", 50) + self.subcategories = False if useragent := self.config_instance("useragent"): self.useragent = useragent @@ -217,8 +218,8 @@ class WikimediaArticleExtractor(WikimediaExtractor): self.subcategory = prefix if prefix == "category": - self.subcategories = \ - True if self.config("subcategories", True) else False + if self.config("subcategories", True): + self.subcategories = True self.params = { "generator": "categorymembers", "gcmtitle" : path, @@ -226,12 +227,10 @@ class WikimediaArticleExtractor(WikimediaExtractor): "gcmlimit" : self.per_page, } elif prefix == "file": - self.subcategories = False self.params = { "titles" : path, } else: - self.subcategories = False self.params = { "generator": "images", "gimlimit" : self.per_page, diff --git a/gallery_dl/extractor/xasiat.py b/gallery_dl/extractor/xasiat.py new file mode 100644 index 0000000..6aa3168 --- /dev/null +++ b/gallery_dl/extractor/xasiat.py @@ -0,0 +1,103 @@ +# -*- coding: utf-8 -*- + +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. + +"""Extractors for https://www.xasiat.com""" + +from .common import Extractor, Message +from .. import text, util +import time + +BASE_PATTERN = r"(?:https?://)?(?:www\.)?xasiat\.com((?:/fr|/ja)?/albums" + + +class XasiatExtractor(Extractor): + category = "xasiat" + directory_fmt = ("{category}", "{title}") + archive_fmt = "{album_url}_{num}" + root = "https://www.xasiat.com" + + def items(self): + data = {"_extractor": XasiatAlbumExtractor} + for url in self.posts(): + yield Message.Queue, url, data + + def posts(self): + return self._pagination(self.groups[0]) + + def _pagination(self, path, pnum=1): + url = f"{self.root}{path}/" + find_posts = util.re(r'class="item ">\s*<a href="([^"]+)').findall + + while True: + params = { + "mode": "async", + "function": "get_block", + "block_id": "list_albums_common_albums_list", + "sort_by": "post_date", + "from": pnum, + "_": int(time.time() * 1000) + } + + page = self.request(url, params=params).text + yield from find_posts(page) + + if "<span>Next</span>" in page: + return + + pnum += 1 + + +class XasiatAlbumExtractor(XasiatExtractor): + subcategory = "album" + pattern = BASE_PATTERN + r"/(\d+)/[^/?#]+)" + example = "https://www.xasiat.com/albums/12345/TITLE/" + + def items(self): + path, album_id = self.groups + url = f"{self.root}{path}/" + response = self.request(url) + extr = text.extract_from(response.text) + + title = extr("<h1>", "<") + info = extr('class="info-content"', "</div>") + images = extr('class="images"', "</div>") + + urls = list(text.extract_iter(images, 'href="', '"')) + + data = { + "title": text.unescape(title), + "model": util.re( + r'top_models1"></i>\s*(.+)\s*</span').findall(info), + "tags": util.re( + r'tags/[^"]+\">\s*(.+)\s*</a').findall(info), + "album_category": util.re( + r'categories/[^"]+\">\s*(.+)\s*</a').findall(info)[0], + "album_url": response.url, + "album_id": text.parse_int(album_id), + "count": len(urls), + } + + yield Message.Directory, data + for data["num"], url in enumerate(urls, 1): + yield Message.Url, url, text.nameext_from_url(url[:-1], data) + + +class XasiatTagExtractor(XasiatExtractor): + subcategory = "tag" + pattern = BASE_PATTERN + r"/tags/[^/?#]+)" + example = "https://www.xasiat.com/albums/tags/TAG/" + + +class XasiatCategoryExtractor(XasiatExtractor): + subcategory = "category" + pattern = BASE_PATTERN + r"/categories/[^/?#]+)" + example = "https://www.xasiat.com/albums/categories/CATEGORY/" + + +class XasiatModelExtractor(XasiatExtractor): + subcategory = "model" + pattern = BASE_PATTERN + r"/models/[^/?#]+)" + example = "https://www.xasiat.com/albums/models/MODEL/" diff --git a/gallery_dl/job.py b/gallery_dl/job.py index 3176eb4..9d98e68 100644 --- a/gallery_dl/job.py +++ b/gallery_dl/job.py @@ -151,7 +151,10 @@ class Job(): try: for msg in extractor: self.dispatch(msg) - except exception.StopExtraction: + except exception.StopExtraction as exc: + if exc.depth > 1 and exc.target != extractor.__class__.subcategory: + exc.depth -= 1 + raise pass except exception.AbortExtraction as exc: log.error(exc.message) @@ -509,12 +512,11 @@ class DownloadJob(Job): if not self._skipftr or self._skipftr(pathfmt.kwdict): self._skipcnt += 1 if self._skipcnt >= self._skipmax: - raise self._skipexc() + raise self._skipexc def download(self, url): """Download 'url'""" - scheme = url.partition(":")[0] - if downloader := self.get_downloader(scheme): + if downloader := self.get_downloader(url[:url.find(":")]): try: return downloader.download(url, self.pathfmt) except OSError as exc: @@ -604,7 +606,8 @@ class DownloadJob(Job): elif isinstance(skip, str): skip, _, smax = skip.partition(":") if skip == "abort": - self._skipexc = exception.StopExtraction + smax, _, sarg = smax.partition(":") + self._skipexc = exception.StopExtraction(sarg or None) elif skip == "terminate": self._skipexc = exception.TerminateExtraction elif skip == "exit": @@ -731,8 +734,8 @@ class SimulationJob(DownloadJob): """Simulate the extraction process without downloading anything""" def handle_url(self, url, kwdict): - if not kwdict["extension"]: - kwdict["extension"] = "jpg" + ext = kwdict["extension"] or "jpg" + kwdict["extension"] = self.pathfmt.extension_map(ext, ext) if self.sleep: self.extractor.sleep(self.sleep(), "download") if self.archive and self._archive_write_skip: @@ -850,7 +853,7 @@ class UrlJob(Job): stdout_write(url + "\n") if "_fallback" in kwdict: for url in kwdict["_fallback"]: - stdout_write("| " + url + "\n") + stdout_write(f"| {url}\n") def handle_queue(self, url, kwdict): if cls := kwdict.get("_extractor"): @@ -909,6 +912,10 @@ class DataJob(Job): Job.__init__(self, url, parent) self.file = file self.data = [] + self.data_urls = [] + self.data_post = [] + self.data_meta = [] + self.exception = None self.ascii = config.get(("output",), "ascii", ensure_ascii) self.resolve = 128 if resolve is True else (resolve or self.resolve) @@ -934,6 +941,7 @@ class DataJob(Job): except exception.StopExtraction: pass except Exception as exc: + self.exception = exc self.data.append((-1, { "error" : exc.__class__.__name__, "message": str(exc), @@ -957,13 +965,21 @@ class DataJob(Job): return 0 def handle_url(self, url, kwdict): - self.data.append((Message.Url, url, self.filter(kwdict))) + kwdict = self.filter(kwdict) + self.data_urls.append(url) + self.data_meta.append(kwdict) + self.data.append((Message.Url, url, kwdict)) def handle_directory(self, kwdict): - self.data.append((Message.Directory, self.filter(kwdict))) + kwdict = self.filter(kwdict) + self.data_post.append(kwdict) + self.data.append((Message.Directory, kwdict)) def handle_queue(self, url, kwdict): - self.data.append((Message.Queue, url, self.filter(kwdict))) + kwdict = self.filter(kwdict) + self.data_urls.append(url) + self.data_meta.append(kwdict) + self.data.append((Message.Queue, url, kwdict)) def handle_queue_resolve(self, url, kwdict): if cls := kwdict.get("_extractor"): @@ -972,8 +988,14 @@ class DataJob(Job): extr = extractor.find(url) if not extr: - return self.data.append((Message.Queue, url, self.filter(kwdict))) + kwdict = self.filter(kwdict) + self.data_urls.append(url) + self.data_meta.append(kwdict) + return self.data.append((Message.Queue, url, kwdict)) job = self.__class__(extr, self, None, self.ascii, self.resolve-1) job.data = self.data + job.data_urls = self.data_urls + job.data_post = self.data_post + job.data_meta = self.data_meta job.run() diff --git a/gallery_dl/option.py b/gallery_dl/option.py index 963f957..fd664e6 100644 --- a/gallery_dl/option.py +++ b/gallery_dl/option.py @@ -664,14 +664,18 @@ def build_parser(): selection = parser.add_argument_group("Selection Options") selection.add_argument( "-A", "--abort", - dest="abort", metavar="N", type=int, - help=("Stop current extractor run " - "after N consecutive file downloads were skipped"), + dest="abort", metavar="N[:TARGET]", + help=("Stop current extractor(s) " + "after N consecutive file downloads were skipped. " + "Specify a TARGET to set how many levels to ascend or " + "to which subcategory to jump to. " + "Examples: '-A 3', '-A 3:2', '-A 3:manga'"), ) selection.add_argument( "-T", "--terminate", - dest="terminate", metavar="N", type=int, - help=("Stop current and parent extractor runs " + dest="terminate", metavar="N", + help=("Stop current & parent extractors " + "and proceed with the next input URL " "after N consecutive file downloads were skipped"), ) selection.add_argument( diff --git a/gallery_dl/output.py b/gallery_dl/output.py index e4937f4..519a8f4 100644 --- a/gallery_dl/output.py +++ b/gallery_dl/output.py @@ -165,9 +165,9 @@ class Formatter(logging.Formatter): if record.exc_info and not record.exc_text: record.exc_text = self.formatException(record.exc_info) if record.exc_text: - msg = msg + "\n" + record.exc_text + msg = f"{msg}\n{record.exc_text}" if record.stack_info: - msg = msg + "\n" + record.stack_info + msg = f"{msg}\n{record.stack_info}" return msg @@ -317,18 +317,7 @@ def configure_standard_streams(): elif not options.get("errors"): options["errors"] = "replace" - try: - stream.reconfigure(**options) - except AttributeError: - # no 'reconfigure' support - oget = options.get - setattr(sys, name, stream.__class__( - stream.buffer, - encoding=oget("encoding", stream.encoding), - errors=oget("errors", "replace"), - newline=oget("newline", stream.newlines), - line_buffering=oget("line_buffering", stream.line_buffering), - )) + stream.reconfigure(**options) # -------------------------------------------------------------------- @@ -383,10 +372,10 @@ class NullOutput(): class PipeOutput(NullOutput): def skip(self, path): - stdout_write(CHAR_SKIP + path + "\n") + stdout_write(f"{CHAR_SKIP}{path}\n") def success(self, path): - stdout_write(path + "\n") + stdout_write(f"{path}\n") class TerminalOutput(): @@ -401,13 +390,13 @@ class TerminalOutput(): self.shorten = util.identity def start(self, path): - stdout_write_flush(self.shorten(" " + path)) + stdout_write_flush(self.shorten(f" {path}")) def skip(self, path): - stdout_write(self.shorten(CHAR_SKIP + path) + "\n") + stdout_write(f"{self.shorten(CHAR_SKIP + path)}\n") def success(self, path): - stdout_write("\r" + self.shorten(CHAR_SUCCESS + path) + "\n") + stdout_write(f"\r{self.shorten(CHAR_SUCCESS + path)}\n") def progress(self, bytes_total, bytes_downloaded, bytes_per_second): bdl = util.format_value(bytes_downloaded) @@ -435,10 +424,10 @@ class ColorOutput(TerminalOutput): stdout_write_flush(self.shorten(path)) def skip(self, path): - stdout_write(self.color_skip + self.shorten(path) + "\033[0m\n") + stdout_write(f"{self.color_skip}{self.shorten(path)}\x1b[0m\n") def success(self, path): - stdout_write(self.color_success + self.shorten(path) + "\033[0m\n") + stdout_write(f"{self.color_success}{self.shorten(path)}\x1b[0m\n") class CustomOutput(): @@ -514,7 +503,7 @@ def shorten_string(txt, limit, sep="โฆ"): if len(txt) <= limit: return txt limit -= len(sep) - return txt[:limit // 2] + sep + txt[-((limit+1) // 2):] + return f"{txt[:limit // 2]}{sep}{txt[-((limit+1) // 2):]}" def shorten_string_eaw(txt, limit, sep="โฆ", cache=EAWCache()): @@ -529,7 +518,7 @@ def shorten_string_eaw(txt, limit, sep="โฆ", cache=EAWCache()): limit -= len(sep) if text_width == len(txt): # all characters have a width of 1 - return txt[:limit // 2] + sep + txt[-((limit+1) // 2):] + return f"{txt[:limit // 2]}{sep}{txt[-((limit+1) // 2):]}" # wide characters left = 0 @@ -548,4 +537,4 @@ def shorten_string_eaw(txt, limit, sep="โฆ", cache=EAWCache()): break right -= 1 - return txt[:left] + sep + txt[right+1:] + return f"{txt[:left]}{sep}{txt[right+1:]}" diff --git a/gallery_dl/path.py b/gallery_dl/path.py index 795564d..eecbd6c 100644 --- a/gallery_dl/path.py +++ b/gallery_dl/path.py @@ -90,6 +90,7 @@ class PathFormat(): restrict = config("path-restrict", "auto") replace = config("path-replace", "_") + conv = config("path-convert") if restrict == "auto": restrict = "\\\\|/<>:\"?*" if WINDOWS else "/" elif restrict == "unix": @@ -100,10 +101,10 @@ class PathFormat(): restrict = "^0-9A-Za-z_." elif restrict == "ascii+": restrict = "^0-9@-[\\]-{ #-)+-.;=!}~" - self.clean_segment = self._build_cleanfunc(restrict, replace) + self.clean_segment = _build_cleanfunc(restrict, replace, conv) remove = config("path-remove", "\x00-\x1f\x7f") - self.clean_path = self._build_cleanfunc(remove, "") + self.clean_path = _build_cleanfunc(remove, "") strip = config("path-strip", "auto") if strip == "auto": @@ -122,7 +123,7 @@ class PathFormat(): basedir = config("base-directory") sep = os.sep if basedir is None: - basedir = "." + sep + "gallery-dl" + sep + basedir = f".{sep}gallery-dl{sep}" elif basedir: basedir = util.expand_path(basedir) altsep = os.altsep @@ -133,37 +134,6 @@ class PathFormat(): basedir = self.clean_path(basedir) self.basedirectory = basedir - def _build_cleanfunc(self, chars, repl): - if not chars: - return util.identity - elif isinstance(chars, dict): - if 0 not in chars: - chars = self._process_repl_dict(chars) - chars[0] = None - - def func(x, table=str.maketrans(chars)): - return x.translate(table) - elif len(chars) == 1: - def func(x, c=chars, r=repl): - return x.replace(c, r) - else: - return functools.partial(util.re(f"[{chars}]").sub, repl) - return func - - def _process_repl_dict(self, chars): - # can't modify 'chars' while *directly* iterating over its keys - for char in [c for c in chars if len(c) > 1]: - if len(char) == 3 and char[1] == "-": - citer = range(ord(char[0]), ord(char[2])+1) - else: - citer = char - - repl = chars.pop(char) - for c in citer: - chars[c] = repl - - return chars - def open(self, mode="wb"): """Open file and return a corresponding file object""" try: @@ -382,3 +352,51 @@ class PathFormat(): break self.set_mtime() + + +def _build_convertfunc(func, conv): + if len(conv) <= 1: + conv = formatter._CONVERSIONS[conv] + return lambda x: conv(func(x)) + + def convert_many(x): + x = func(x) + for conv in convs: + x = conv(x) + return x + convs = [formatter._CONVERSIONS[c] for c in conv] + return convert_many + + +def _build_cleanfunc(chars, repl, conv=None): + if not chars: + func = util.identity + elif isinstance(chars, dict): + if 0 not in chars: + chars = _process_repl_dict(chars) + chars[0] = None + + def func(x): + return x.translate(table) + table = str.maketrans(chars) + elif len(chars) == 1: + def func(x): + return x.replace(chars, repl) + else: + func = functools.partial(util.re(f"[{chars}]").sub, repl) + return _build_convertfunc(func, conv) if conv else func + + +def _process_repl_dict(chars): + # can't modify 'chars' while *directly* iterating over its keys + for char in [c for c in chars if len(c) > 1]: + if len(char) == 3 and char[1] == "-": + citer = range(ord(char[0]), ord(char[2])+1) + else: + citer = char + + repl = chars.pop(char) + for c in citer: + chars[c] = repl + + return chars diff --git a/gallery_dl/transaction_id.py b/gallery_dl/transaction_id.py index 915b7b3..f8769d9 100644 --- a/gallery_dl/transaction_id.py +++ b/gallery_dl/transaction_id.py @@ -65,8 +65,8 @@ class ClientTransaction(): @cache(maxage=36500*86400, keyarg=1) def _extract_indices(self, ondemand_s, extractor): - url = ("https://abs.twimg.com/responsive-web/client-web" - "/ondemand.s." + ondemand_s + "a.js") + url = (f"https://abs.twimg.com/responsive-web/client-web" + f"/ondemand.s.{ondemand_s}a.js") page = extractor.request(url).text pattern = util.re_compile(r"\(\w\[(\d\d?)\],\s*16\)") return [int(i) for i in pattern.findall(page)] diff --git a/gallery_dl/util.py b/gallery_dl/util.py index 4027ac6..45ffc9c 100644 --- a/gallery_dl/util.py +++ b/gallery_dl/util.py @@ -512,15 +512,15 @@ def cookiestxt_store(fp, cookies): value = cookie.value domain = cookie.domain - fp.write("\t".join(( - domain, - "TRUE" if domain and domain[0] == "." else "FALSE", - cookie.path, - "TRUE" if cookie.secure else "FALSE", - "0" if cookie.expires is None else str(cookie.expires), - name, - value + "\n", - ))) + fp.write( + f"{domain}\t" + f"{'TRUE' if domain and domain[0] == '.' else 'FALSE'}\t" + f"{cookie.path}\t" + f"{'TRUE' if cookie.secure else 'FALSE'}\t" + f"{'0' if cookie.expires is None else str(cookie.expires)}\t" + f"{name}\t" + f"{value}\n" + ) def code_to_language(code, default=None): diff --git a/gallery_dl/version.py b/gallery_dl/version.py index af7e3c6..a6474de 100644 --- a/gallery_dl/version.py +++ b/gallery_dl/version.py @@ -6,5 +6,5 @@ # it under the terms of the GNU General Public License version 2 as # published by the Free Software Foundation. -__version__ = "1.30.2" +__version__ = "1.30.3" __variant__ = None diff --git a/scripts/run_tests.py b/scripts/run_tests.py index d1fd1f1..ebf06fa 100755 --- a/scripts/run_tests.py +++ b/scripts/run_tests.py @@ -34,13 +34,13 @@ suite = unittest.TestSuite() for test in TESTS: try: module = __import__(test) - except ImportError: - print("unable to import", test) + except Exception as exc: + sys.stderr.write(f"Failed to import {test}: {exc}\n") else: tests = unittest.defaultTestLoader.loadTestsFromModule(module) suite.addTests(tests) if __name__ == "__main__": result = unittest.TextTestRunner(verbosity=2).run(suite) - if result.errors or result.failures: + if not result.wasSuccessful(): sys.exit(1) @@ -111,6 +111,7 @@ def build_setuptools(): "extra": [ "requests[socks]", "yt-dlp[default]", + "jinja2", "pyyaml", "toml; python_version < '3.11'", "truststore; python_version >= '3.10'", diff --git a/test/test_config.py b/test/test_config.py index 5c94b1b..064d8e7 100644 --- a/test/test_config.py +++ b/test/test_config.py @@ -229,7 +229,7 @@ class TestConfigFiles(unittest.TestCase): with open(path) as fp: return util.json_loads(fp.read()) except FileNotFoundError: - raise unittest.SkipTest(path + " not available") + raise unittest.SkipTest(f"{path} not available") if __name__ == "__main__": diff --git a/test/test_cookies.py b/test/test_cookies.py index 5900473..9721d10 100644 --- a/test/test_cookies.py +++ b/test/test_cookies.py @@ -91,7 +91,7 @@ class TestCookiedict(unittest.TestCase): self.assertEqual(sorted(cookies.values()), sorted(self.cdict.values())) def test_domain(self): - for category in ["exhentai", "idolcomplex", "nijie", "horne"]: + for category in ["exhentai", "nijie", "horne"]: extr = _get_extractor(category) cookies = extr.cookies for key in self.cdict: @@ -108,7 +108,6 @@ class TestCookieLogin(unittest.TestCase): def test_cookie_login(self): extr_cookies = { "exhentai" : ("ipb_member_id", "ipb_pass_hash"), - "idolcomplex": ("login", "pass_hash"), "nijie" : ("nijie_tok",), "horne" : ("horne_tok",), } @@ -159,7 +158,7 @@ class TestCookieUtils(unittest.TestCase): extr.cookies.set("cd_a", "1", domain=extr.cookies_domain) self.assertTrue(extr.cookies_check(("cd_a",))) - extr.cookies.set("wd_a", "1", domain="www" + extr.cookies_domain) + extr.cookies.set("wd_a", "1", domain=f"www{extr.cookies_domain}") self.assertFalse(extr.cookies_check(("wd_a",))) self.assertEqual(len(extr.cookies), 3) @@ -184,7 +183,7 @@ class TestCookieUtils(unittest.TestCase): extr.cookies.set("cd_a", "1", domain=extr.cookies_domain) self.assertTrue(extr.cookies_check(("cd_a",), subdomains=True)) - extr.cookies.set("wd_a", "1", domain="www" + extr.cookies_domain) + extr.cookies.set("wd_a", "1", domain=f"www{extr.cookies_domain}") self.assertTrue(extr.cookies_check(("wd_a",), subdomains=True)) extr.cookies.set("cd_b", "2", domain=extr.cookies_domain) @@ -244,7 +243,6 @@ def _get_extractor(category): URLS = { "exhentai" : "https://exhentai.org/g/1200119/d55c44d3d0/", - "idolcomplex": "https://idol.sankakucomplex.com/post/show/1", "nijie" : "https://nijie.info/view.php?id=1", "horne" : "https://horne.red/view.php?id=1", "test" : "generic:https://example.org/", diff --git a/test/test_downloader.py b/test/test_downloader.py index 3e5bf84..ecd8b85 100644 --- a/test/test_downloader.py +++ b/test/test_downloader.py @@ -183,7 +183,7 @@ class TestDownloaderBase(unittest.TestCase): @classmethod def _prepare_destination(cls, content=None, part=True, extension=None): - name = "file-{}".format(cls.fnum) + name = f"file-{cls.fnum}" cls.fnum += 1 kwdict = { @@ -199,7 +199,7 @@ class TestDownloaderBase(unittest.TestCase): pathfmt.build_path() if content: - mode = "w" + ("b" if isinstance(content, bytes) else "") + mode = "wb" if isinstance(content, bytes) else "w" with pathfmt.open(mode) as fp: fp.write(content) @@ -211,10 +211,10 @@ class TestDownloaderBase(unittest.TestCase): success = self.downloader.download(url, pathfmt) # test successful download - self.assertTrue(success, "downloading '{}' failed".format(url)) + self.assertTrue(success, f"downloading '{url}' failed") # test content - mode = "r" + ("b" if isinstance(output, bytes) else "") + mode = "rb" if isinstance(output, bytes) else "r" with pathfmt.open(mode) as fp: content = fp.read() self.assertEqual(content, output) @@ -245,16 +245,16 @@ class TestHTTPDownloader(TestDownloaderBase): server = http.server.HTTPServer((host, port), HttpRequestHandler) except OSError as exc: raise unittest.SkipTest( - "cannot spawn local HTTP server ({})".format(exc)) + f"cannot spawn local HTTP server ({exc})") host, port = server.server_address - cls.address = "http://{}:{}".format(host, port) + cls.address = f"http://{host}:{port}" threading.Thread(target=server.serve_forever, daemon=True).start() def _run_test(self, ext, input, output, extension, expected_extension=None): TestDownloaderBase._run_test( - self, self.address + "/" + ext, input, output, + self, f"{self.address}/{ext}", input, output, extension, expected_extension) def tearDown(self): @@ -281,7 +281,7 @@ class TestHTTPDownloader(TestDownloaderBase): self._run_test("gif", None, DATA["gif"], "jpg", "gif") def test_http_filesize_min(self): - url = self.address + "/gif" + url = f"{self.address}/gif" pathfmt = self._prepare_destination(None, extension=None) self.downloader.minsize = 100 with self.assertLogs(self.downloader.log, "WARNING"): @@ -290,7 +290,7 @@ class TestHTTPDownloader(TestDownloaderBase): self.assertEqual(pathfmt.temppath, "") def test_http_filesize_max(self): - url = self.address + "/jpg" + url = f"{self.address}/jpg" pathfmt = self._prepare_destination(None, extension=None) self.downloader.maxsize = 100 with self.assertLogs(self.downloader.log, "WARNING"): @@ -334,8 +334,8 @@ class HttpRequestHandler(http.server.BaseHTTPRequestHandler): match = re.match(r"bytes=(\d+)-", self.headers["Range"]) start = int(match[1]) - headers["Content-Range"] = "bytes {}-{}/{}".format( - start, len(output)-1, len(output)) + headers["Content-Range"] = \ + f"bytes {start}-{len(output) - 1}/{len(output)}" output = output[start:] else: status = 200 @@ -408,7 +408,7 @@ for ext, content in SAMPLES: DATA[ext] = content for idx, (_, content) in enumerate(SAMPLES): - DATA["S{:>02}".format(idx)] = content + DATA[f"S{idx:>02}"] = content # reverse mime types mapping @@ -421,8 +421,8 @@ MIME_TYPES = { def generate_tests(): def generate_test(idx, ext, content): def test(self): - self._run_test("S{:>02}".format(idx), None, content, "bin", ext) - test.__name__ = "test_http_ext_{:>02}_{}".format(idx, ext) + self._run_test(f"S{idx:>02}", None, content, "bin", ext) + test.__name__ = f"test_http_ext_{idx:>02}_{ext}" return test for idx, (ext, content) in enumerate(SAMPLES): diff --git a/test/test_extractor.py b/test/test_extractor.py index bf4aa07..f8b8f09 100644 --- a/test/test_extractor.py +++ b/test/test_extractor.py @@ -110,7 +110,7 @@ class TestExtractorModule(unittest.TestCase): except AssertionError: pass else: - self.fail(result["#url"] + ": Test did not fail") + self.fail(f"{result['#url']}: Test did not fail") else: self.assertCategories(result) @@ -167,8 +167,7 @@ class TestExtractorModule(unittest.TestCase): extr.finalize() except ImportError as exc: if exc.name in ("youtube_dl", "yt_dlp"): - raise unittest.SkipTest("cannot import module '{}'".format( - exc.name)) + raise unittest.SkipTest(f"cannot import module '{exc.name}'") raise def test_docstrings(self): @@ -179,7 +178,7 @@ class TestExtractorModule(unittest.TestCase): self.assertNotEqual( extr1.__doc__, extr2.__doc__, - "{} <-> {}".format(extr1, extr2), + f"{extr1} <-> {extr2}", ) def test_names(self): @@ -191,12 +190,10 @@ class TestExtractorModule(unittest.TestCase): for extr in extractor.extractors(): if extr.category not in ("", "oauth", "ytdl"): - expected = "{}{}Extractor".format( - capitalize(extr.category), - capitalize(extr.subcategory), - ) + expected = (f"{capitalize(extr.category)}" + f"{capitalize(extr.subcategory)}Extractor") if expected[0].isdigit(): - expected = "_" + expected + expected = f"_{expected}" self.assertEqual(expected, extr.__name__) @@ -225,7 +222,7 @@ class TestExtractorWait(unittest.TestCase): calls = sleep.mock_calls self.assertEqual(len(calls), 1) - self.assertAlmostEqual(calls[0][1][0], 6.0, places=1) + self.assertAlmostEqual(calls[0][1][0], 6.0, places=0) calls = log.info.mock_calls self.assertEqual(len(calls), 1) @@ -266,7 +263,7 @@ class TextExtractorOAuth(unittest.TestCase): def test_oauth1(self): for category in ("flickr", "smugmug", "tumblr"): - extr = extractor.find("oauth:" + category) + extr = extractor.find(f"oauth:{category}") with patch.object(extr, "_oauth1_authorization_flow") as m: for msg in extr: @@ -275,7 +272,7 @@ class TextExtractorOAuth(unittest.TestCase): def test_oauth2(self): for category in ("deviantart", "reddit"): - extr = extractor.find("oauth:" + category) + extr = extractor.find(f"oauth:{category}") with patch.object(extr, "_oauth2_authorization_code_grant") as m: for msg in extr: diff --git a/test/test_formatter.py b/test/test_formatter.py index 3305983..f3ed9dd 100644 --- a/test/test_formatter.py +++ b/test/test_formatter.py @@ -73,8 +73,8 @@ class TestFormatter(unittest.TestCase): self._run_test("{u!H}", "'< / >'") self._run_test("{n!H}", "") self._run_test("{a!s}", self.kwdict["a"]) - self._run_test("{a!r}", "'" + self.kwdict["a"] + "'") - self._run_test("{a!a}", "'" + self.kwdict["a"] + "'") + self._run_test("{a!r}", f"'{self.kwdict['a']}'") + self._run_test("{a!a}", f"'{self.kwdict['a']}'") self._run_test("{b!a}", "'\\xe4\\xf6\\xfc'") self._run_test("{a!S}", self.kwdict["a"]) self._run_test("{l!S}", "a, b, c") @@ -139,7 +139,7 @@ class TestFormatter(unittest.TestCase): self._run_test("{missing}" , replacement, default) self._run_test("{missing.attr}", replacement, default) self._run_test("{missing[key]}", replacement, default) - self._run_test("{missing:?a//}", "a" + default, default) + self._run_test("{missing:?a//}", f"a{default}", default) def test_fmt_func(self): self._run_test("{t}" , self.kwdict["t"] , None, int) @@ -444,11 +444,11 @@ class TestFormatter(unittest.TestCase): with open(path1, "w") as fp: fp.write("{a}") - fmt1 = formatter.parse("\fT " + path1) + fmt1 = formatter.parse(f"\fT {path1}") with open(path2, "w") as fp: fp.write("{a!u:Rh/C/}\nFooBar") - fmt2 = formatter.parse("\fT " + path2) + fmt2 = formatter.parse(f"\fT {path2}") self.assertEqual(fmt1.format_map(self.kwdict), self.kwdict["a"]) self.assertEqual(fmt2.format_map(self.kwdict), "HELLO WORLD\nFooBar") @@ -458,15 +458,18 @@ class TestFormatter(unittest.TestCase): def test_expression(self): self._run_test("\fE a", self.kwdict["a"]) - self._run_test("\fE name * 2 + ' ' + a", "{}{} {}".format( - self.kwdict["name"], self.kwdict["name"], self.kwdict["a"])) + self._run_test( + "\fE name * 2 + ' ' + a", + f"{self.kwdict['name']}{self.kwdict['name']} {self.kwdict['a']}") def test_fstring(self): self._run_test("\fF {a}", self.kwdict["a"]) - self._run_test("\fF {name}{name} {a}", "{}{} {}".format( - self.kwdict["name"], self.kwdict["name"], self.kwdict["a"])) - self._run_test("\fF foo-'\"{a.upper()}\"'-bar", - """foo-'"{}"'-bar""".format(self.kwdict["a"].upper())) + self._run_test( + "\fF {name}{name} {a}", + f"{self.kwdict['name']}{self.kwdict['name']} {self.kwdict['a']}") + self._run_test( + "\fF foo-'\"{a.upper()}\"'-bar", + f"""foo-'"{self.kwdict['a'].upper()}"'-bar""") def test_template_fstring(self): with tempfile.TemporaryDirectory() as tmpdirname: @@ -475,15 +478,15 @@ class TestFormatter(unittest.TestCase): with open(path1, "w") as fp: fp.write("{a}") - fmt1 = formatter.parse("\fTF " + path1) + fmt1 = formatter.parse(f"\fTF {path1}") with open(path2, "w") as fp: fp.write("foo-'\"{a.upper()}\"'-bar") - fmt2 = formatter.parse("\fTF " + path2) + fmt2 = formatter.parse(f"\fTF {path2}") self.assertEqual(fmt1.format_map(self.kwdict), self.kwdict["a"]) self.assertEqual(fmt2.format_map(self.kwdict), - """foo-'"{}"'-bar""".format(self.kwdict["a"].upper())) + f"""foo-'"{self.kwdict['a'].upper()}"'-bar""") with self.assertRaises(OSError): formatter.parse("\fTF /") @@ -493,10 +496,12 @@ class TestFormatter(unittest.TestCase): formatter.JinjaFormatter.env = None self._run_test("\fJ {{a}}", self.kwdict["a"]) - self._run_test("\fJ {{name}}{{name}} {{a}}", "{}{} {}".format( - self.kwdict["name"], self.kwdict["name"], self.kwdict["a"])) - self._run_test("\fJ foo-'\"{{a | upper}}\"'-bar", - """foo-'"{}"'-bar""".format(self.kwdict["a"].upper())) + self._run_test( + "\fJ {{name}}{{name}} {{a}}", + f"{self.kwdict['name']}{self.kwdict['name']} {self.kwdict['a']}") + self._run_test( + "\fJ foo-'\"{{a | upper}}\"'-bar", + f"""foo-'"{self.kwdict['a'].upper()}"'-bar""") @unittest.skipIf(jinja2 is None, "no jinja2") def test_template_jinja(self): @@ -508,15 +513,15 @@ class TestFormatter(unittest.TestCase): with open(path1, "w") as fp: fp.write("{{a}}") - fmt1 = formatter.parse("\fTJ " + path1) + fmt1 = formatter.parse(f"\fTJ {path1}") with open(path2, "w") as fp: fp.write("foo-'\"{{a | upper}}\"'-bar") - fmt2 = formatter.parse("\fTJ " + path2) + fmt2 = formatter.parse(f"\fTJ {path2}") self.assertEqual(fmt1.format_map(self.kwdict), self.kwdict["a"]) self.assertEqual(fmt2.format_map(self.kwdict), - """foo-'"{}"'-bar""".format(self.kwdict["a"].upper())) + f"""foo-'"{self.kwdict['a'].upper()}"'-bar""") with self.assertRaises(OSError): formatter.parse("\fTJ /") @@ -562,7 +567,7 @@ Present Time is ((( dt | dt_fmt("%H:%M:%S") ))) Hello ((( s | sanitize_whitespace ))). I hope there is enough "(((S|sanitize_whitespace)))" for you. """) - fmt = formatter.parse("\fTJ " + path_template) + fmt = formatter.parse(f"\fTJ {path_template}") self.assertEqual(fmt.format_map(self.kwdict), """\ Present Day is January 01, 2010 @@ -607,8 +612,8 @@ def noarg(): finally: sys.path.pop(0) - fmt3 = formatter.parse("\fM " + path + ":gentext") - fmt4 = formatter.parse("\fM " + path + ":lengths") + fmt3 = formatter.parse(f"\fM {path}:gentext") + fmt4 = formatter.parse(f"\fM {path}:lengths") self.assertEqual(fmt1.format_map(self.kwdict), "'Title' by Name") self.assertEqual(fmt2.format_map(self.kwdict), "168") diff --git a/test/test_job.py b/test/test_job.py index 3aa28e8..0a533ea 100644 --- a/test/test_job.py +++ b/test/test_job.py @@ -299,7 +299,7 @@ class TestDataJob(TestJob): for i in range(1, 4): self.assertEqual( tjob.data[i][2]["_fallback"], - ("https://example.org/alt/{}.jpg".format(i),), + (f"https://example.org/alt/{i}.jpg",), ) def test_sleep(self): @@ -382,13 +382,13 @@ class TestExtractor(Extractor): } for i in range(1, 4): - url = "{}/{}.jpg".format(root, i) + url = f"{root}/{i}.jpg" yield Message.Url, url, text.nameext_from_url(url, { "num" : i, "tags": ["foo", "bar", "ใในใ"], "user": user, "author": user, - "_fallback": ("{}/alt/{}.jpg".format(root, i),), + "_fallback": (f"{root}/alt/{i}.jpg",), }) diff --git a/test/test_postprocessor.py b/test/test_postprocessor.py index 2e39cc7..07bd348 100644 --- a/test/test_postprocessor.py +++ b/test/test_postprocessor.py @@ -52,7 +52,7 @@ class TestPostprocessorModule(unittest.TestCase): def test_find(self): for name in (postprocessor.modules): cls = postprocessor.find(name) - self.assertEqual(cls.__name__, name.capitalize() + "PP") + self.assertEqual(cls.__name__, f"{name.capitalize()}PP") self.assertIs(cls.__base__, PostProcessor) self.assertEqual(postprocessor.find("foo"), None) @@ -129,15 +129,15 @@ class ClassifyTest(BasePostprocessorTest): self._trigger(("prepare",)) self.pathfmt.build_path() path = os.path.join(self.dir.name, "test", "Pictures") - self.assertEqual(self.pathfmt.path, path + "/file.jpg") - self.assertEqual(self.pathfmt.realpath, path + "/file.jpg") + self.assertEqual(self.pathfmt.path, f"{path}/file.jpg") + self.assertEqual(self.pathfmt.realpath, f"{path}/file.jpg") self.pathfmt.set_extension("mp4") self._trigger(("prepare",)) self.pathfmt.build_path() path = os.path.join(self.dir.name, "test", "Video") - self.assertEqual(self.pathfmt.path, path + "/file.mp4") - self.assertEqual(self.pathfmt.realpath, path + "/file.mp4") + self.assertEqual(self.pathfmt.path, f"{path}/file.mp4") + self.assertEqual(self.pathfmt.realpath, f"{path}/file.mp4") def test_classify_noop(self): pp = self._create() @@ -169,8 +169,8 @@ class ClassifyTest(BasePostprocessorTest): self._trigger(("prepare",)) self.pathfmt.build_path() path = os.path.join(self.dir.name, "test", "foo", "bar") - self.assertEqual(self.pathfmt.path, path + "/file.foo") - self.assertEqual(self.pathfmt.realpath, path + "/file.foo") + self.assertEqual(self.pathfmt.path, f"{path}/file.foo") + self.assertEqual(self.pathfmt.realpath, f"{path}/file.foo") class DirectoryTest(BasePostprocessorTest): @@ -179,16 +179,16 @@ class DirectoryTest(BasePostprocessorTest): self._create() path = os.path.join(self.dir.name, "test") - self.assertEqual(self.pathfmt.realdirectory, path + "/") - self.assertEqual(self.pathfmt.realpath, path + "/file.ext") + self.assertEqual(self.pathfmt.realdirectory, f"{path}/") + self.assertEqual(self.pathfmt.realpath, f"{path}/file.ext") self.pathfmt.kwdict["category"] = "custom" self._trigger() path = os.path.join(self.dir.name, "custom") - self.assertEqual(self.pathfmt.realdirectory, path + "/") + self.assertEqual(self.pathfmt.realdirectory, f"{path}/") self.pathfmt.build_path() - self.assertEqual(self.pathfmt.realpath, path + "/file.ext") + self.assertEqual(self.pathfmt.realpath, f"{path}/file.ext") class ExecTest(BasePostprocessorTest): @@ -205,10 +205,12 @@ class ExecTest(BasePostprocessorTest): self._trigger(("after",)) p.assert_called_once_with( - "echo {0} {0} {1} {2} && rm {0};".format( - self.pathfmt.realpath, - self.pathfmt.realdirectory, - self.pathfmt.filename), + (f"echo " + f"{self.pathfmt.realpath} " + f"{self.pathfmt.realpath} " + f"{self.pathfmt.realdirectory} " + f"{self.pathfmt.filename} " + f"&& rm {self.pathfmt.realpath};"), shell=True, creationflags=0, start_new_session=False, @@ -254,10 +256,12 @@ class ExecTest(BasePostprocessorTest): self.assertEqual(p.call_args_list, [ call( - "echo {0} {0} {1} {2} && rm {0};".format( - self.pathfmt.realpath, - self.pathfmt.realdirectory, - self.pathfmt.filename), + (f"echo " + f"{self.pathfmt.realpath} " + f"{self.pathfmt.realpath} " + f"{self.pathfmt.realdirectory} " + f"{self.pathfmt.filename} " + f"&& rm {self.pathfmt.realpath};"), shell=True, creationflags=0, start_new_session=False, @@ -287,8 +291,9 @@ class ExecTest(BasePostprocessorTest): with self.assertLogs() as log: self._trigger(("after",)) - msg = ("WARNING:postprocessor.exec:'echo {}' returned with " - "non-zero exit status (123)".format(self.pathfmt.realpath)) + msg = (f"WARNING:postprocessor.exec:" + f"'echo {self.pathfmt.realpath}' " + f"returned with non-zero exit status (123)") self.assertEqual(log.output[0], msg) def test_async(self): @@ -426,7 +431,7 @@ class MetadataTest(BasePostprocessorTest): with patch("builtins.open", mock_open()) as m: self._trigger() - path = self.pathfmt.realpath + ".JSON" + path = f"{self.pathfmt.realpath}.JSON" m.assert_called_once_with(path, "w", encoding="utf-8") self.assertEqual(self._output(m), """{ @@ -460,7 +465,7 @@ class MetadataTest(BasePostprocessorTest): with patch("builtins.open", mock_open()) as m: self._trigger() - path = self.pathfmt.realpath + ".JSON" + path = f"{self.pathfmt.realpath}.JSON" m.assert_called_once_with(path, "a", encoding="UTF-8") self.assertEqual(self._output(m), """{\ "_private" : "foo \\u30d0\\u30fc",\ @@ -481,7 +486,7 @@ class MetadataTest(BasePostprocessorTest): with patch("builtins.open", mock_open()) as m: self._trigger() - path = self.pathfmt.realpath + ".txt" + path = f"{self.pathfmt.realpath}.txt" m.assert_called_once_with(path, "w", encoding="utf-8") self.assertEqual(self._output(m), "foo\nbar\nbaz\n") @@ -561,7 +566,7 @@ class MetadataTest(BasePostprocessorTest): with patch("builtins.open", mock_open()) as m: self._trigger() - path = self.pathfmt.realdirectory + "file.json" + path = f"{self.pathfmt.realdirectory}file.json" m.assert_called_once_with(path, "w", encoding="utf-8") def test_metadata_extfmt_2(self): @@ -573,7 +578,7 @@ class MetadataTest(BasePostprocessorTest): with patch("builtins.open", mock_open()) as m: self._trigger() - path = self.pathfmt.realdirectory + "file.2.EXT-data:tESt" + path = f"{self.pathfmt.realdirectory}file.2.EXT-data:tESt" m.assert_called_once_with(path, "w", encoding="utf-8") def test_metadata_directory(self): @@ -584,7 +589,7 @@ class MetadataTest(BasePostprocessorTest): with patch("builtins.open", mock_open()) as m: self._trigger() - path = self.pathfmt.realdirectory + "metadata/file.ext.json" + path = f"{self.pathfmt.realdirectory}metadata/file.ext.json" m.assert_called_once_with(path, "w", encoding="utf-8") def test_metadata_directory_2(self): @@ -596,7 +601,7 @@ class MetadataTest(BasePostprocessorTest): with patch("builtins.open", mock_open()) as m: self._trigger() - path = self.pathfmt.realdirectory + "metadata/file.json" + path = f"{self.pathfmt.realdirectory}metadata/file.json" m.assert_called_once_with(path, "w", encoding="utf-8") def test_metadata_directory_format(self): @@ -608,7 +613,7 @@ class MetadataTest(BasePostprocessorTest): with patch("builtins.open", mock_open()) as m: self._trigger() - path = self.pathfmt.realdirectory + "../json/12500/file.ext.json" + path = f"{self.pathfmt.realdirectory}../json/12500/file.ext.json" m.assert_called_once_with(path, "w", encoding="utf-8") def test_metadata_directory_empty(self): @@ -619,7 +624,7 @@ class MetadataTest(BasePostprocessorTest): with patch("builtins.open", mock_open()) as m: self._trigger() - path = self.pathfmt.realdirectory + "./file.ext.json" + path = f"{self.pathfmt.realdirectory}./file.ext.json" m.assert_called_once_with(path, "w", encoding="utf-8") def test_metadata_basedirectory(self): @@ -628,7 +633,7 @@ class MetadataTest(BasePostprocessorTest): with patch("builtins.open", mock_open()) as m: self._trigger() - path = self.pathfmt.basedirectory + "file.ext.json" + path = f"{self.pathfmt.basedirectory}file.ext.json" m.assert_called_once_with(path, "w", encoding="utf-8") def test_metadata_basedirectory_custom(self): @@ -652,7 +657,7 @@ class MetadataTest(BasePostprocessorTest): with patch("builtins.open", mock_open()) as m: self._trigger() - path = self.pathfmt.realdirectory + "test_file__meta_.data" + path = f"{self.pathfmt.realdirectory}test_file__meta_.data" m.assert_called_once_with(path, "w", encoding="utf-8") def test_metadata_meta_path(self): @@ -663,7 +668,7 @@ class MetadataTest(BasePostprocessorTest): self._trigger() self.assertEqual(self.pathfmt.kwdict["_meta_path"], - self.pathfmt.realpath + ".json") + f"{self.pathfmt.realpath}.json") def test_metadata_stdout(self): self._create({"filename": "-", "indent": None, "sort": True}) @@ -752,7 +757,7 @@ class MetadataTest(BasePostprocessorTest): self.assertTrue(m.called) self.assertGreater(len(self._output(m)), 0) - path = self.pathfmt.realdirectory + "file.ext.json" + path = f"{self.pathfmt.realdirectory}file.ext.json" m.assert_called_once_with(path, "w", encoding="utf-8") def test_metadata_option_skip_false(self): @@ -856,7 +861,7 @@ class PythonTest(BasePostprocessorTest): path = os.path.join(self.dir.name, "module.py") self._write_module(path) - self._create({"function": path + ":calc"}, {"_value": 12}) + self._create({"function": f"{path}:calc"}, {"_value": 12}) self.assertNotIn("_result", self.pathfmt.kwdict) self._trigger() @@ -913,7 +918,7 @@ class RenameTest(BasePostprocessorTest): def test_rename_skip(self): self._create({"from": "{id}.{extension}"}, {"id": 12345}) path = self._prepare("12345.ext") - with open(path + "file.ext", "w"): + with open(f"{path}file.ext", "w"): pass with self.assertLogs("postprocessor.rename", level="WARNING") as cm: @@ -932,7 +937,7 @@ class ZipTest(BasePostprocessorTest): self.assertEqual(pp.path, self.pathfmt.realdirectory[:-1]) self.assertEqual(pp.delete, True) self.assertEqual(pp.args, ( - pp.path + ".zip", "a", zipfile.ZIP_STORED, True, + f"{pp.path}.zip", "a", zipfile.ZIP_STORED, True, )) self.assertTrue(pp.args[0].endswith("/test.zip")) @@ -942,7 +947,7 @@ class ZipTest(BasePostprocessorTest): self.assertEqual(pp.path, self.pathfmt.realdirectory[:-1]) self.assertEqual(pp.delete, True) self.assertEqual(pp.args, ( - pp.path + ".zip", "a", zipfile.ZIP_STORED, True, + f"{pp.path}.zip", "a", zipfile.ZIP_STORED, True, )) self.assertTrue(pp.args[0].endswith("/test.zip")) @@ -954,7 +959,7 @@ class ZipTest(BasePostprocessorTest): }) self.assertEqual(pp.delete, False) self.assertEqual(pp.args, ( - pp.path + ".cbz", "a", zipfile.ZIP_DEFLATED, True, + f"{pp.path}.cbz", "a", zipfile.ZIP_DEFLATED, True, )) self.assertTrue(pp.args[0].endswith("/test.cbz")) @@ -968,7 +973,7 @@ class ZipTest(BasePostprocessorTest): # write dummy file with 3 different names for i in range(3): - name = "file{}.ext".format(i) + name = f"file{i}.ext" self.pathfmt.temppath = file.name self.pathfmt.filename = name @@ -1015,8 +1020,8 @@ class ZipTest(BasePostprocessorTest): # write 3 files for i in range(3): - self.pathfmt.temppath = self.pathfmt.realdirectory + "file.ext" - self.pathfmt.filename = "file{}.ext".format(i) + self.pathfmt.temppath = f"{self.pathfmt.realdirectory}file.ext" + self.pathfmt.filename = f"file{i}.ext" self._trigger() # write the last file a second time (should be skipped) diff --git a/test/test_results.py b/test/test_results.py index 4b1c4c1..05b98bf 100644 --- a/test/test_results.py +++ b/test/test_results.py @@ -145,7 +145,8 @@ class TestExtractorResults(unittest.TestCase): config.set((), key, None) if auth and not any(extr.config(key) for key in AUTH_KEYS): - return self._skipped.append((result["#url"], "no auth")) + self._skipped.append((result["#url"], "no auth")) + self.skipTest("no auth") if "#options" in result: for key, value in result["#options"].items(): @@ -155,11 +156,16 @@ class TestExtractorResults(unittest.TestCase): config.set((), "image-range" , result["#range"]) config.set((), "chapter-range", result["#range"]) - tjob = ResultJob(extr, content=("#sha1_content" in result)) + tjob = ResultJob(extr, + content=("#sha1_content" in result), + format=(result.get("#metadata") != "post")) if "#exception" in result: - with self.assertRaises(result["#exception"], msg="#exception"): + with self.assertRaises(result["#exception"], msg="#exception"), \ + self.assertLogs() as log_info: tjob.run() + if "#log" in result: + self.assertLogEqual(result["#log"], log_info.output) return try: @@ -228,7 +234,7 @@ class TestExtractorResults(unittest.TestCase): if isinstance(count, str): self.assertRegex( count, r"^ *(==|!=|<|<=|>|>=) *\d+ *$", msg="#count") - expr = "{} {}".format(len_urls, count) + expr = f"{len_urls} {count}" self.assertTrue(eval(expr), msg=expr) elif isinstance(count, range): self.assertRange(len_urls, count, msg="#count") @@ -257,7 +263,11 @@ class TestExtractorResults(unittest.TestCase): metadata = {k: v for k, v in result.items() if k[0] != "#"} if metadata: - for kwdict in tjob.kwdict_list: + if result.get("#metadata") == "post": + kwdicts = tjob.kwdict_post + else: + kwdicts = tjob.kwdict_list + for kwdict in kwdicts: self._test_kwdict(kwdict, metadata) def _test_kwdict(self, kwdict, tests, parent=None): @@ -274,7 +284,7 @@ class TestExtractorResults(unittest.TestCase): else: subtest = False - path = "{}.{}".format(parent, key) if parent else key + path = f"{parent}.{key}" if parent else key if key.startswith("!"): self.assertNotIn(key[1:], kwdict, msg=path) @@ -286,7 +296,7 @@ class TestExtractorResults(unittest.TestCase): if subtest: self.assertNotIsInstance(value, str, msg=path) for idx, item in enumerate(value): - subpath = "{}[{}]".format(path, idx) + subpath = f"{path}[{idx}]" self._test_kwdict_value(item, test, subpath) else: self._test_kwdict_value(value, test, path) @@ -308,12 +318,18 @@ class TestExtractorResults(unittest.TestCase): for idx, item in enumerate(test): if isinstance(item, dict): subtest = True - subpath = "{}[{}]".format(path, idx) - self._test_kwdict(value[idx], item, subpath) + subpath = f"{path}[{idx}]" + try: + obj = value[idx] + except Exception as exc: + self.fail(f"'{exc.__class__.__name__}: {exc}' " + f"when accessing {subpath}") + self._test_kwdict(obj, item, subpath) if not subtest: self.assertEqual(test, value, msg=path) elif isinstance(test, str): if test.startswith("re:"): + self.assertIsInstance(value, str, msg=path) self.assertRegex(value, test[3:], msg=path) elif test.startswith("dt:"): self.assertIsInstance(value, datetime.datetime, msg=path) @@ -324,8 +340,29 @@ class TestExtractorResults(unittest.TestCase): cls, _, length = test[4:].rpartition(":") if cls: self.assertEqual( - cls, type(value).__name__, msg=path + "/type") - self.assertEqual(int(length), len(value), msg=path) + cls, type(value).__name__, msg=f"{path}/type") + try: + len_value = len(value) + except Exception: + len_value = 0 + for _ in value: + len_value += 1 + self.assertEqual(int(length), len_value, msg=path) + elif test.startswith("iso:"): + iso = test[4:] + if iso in ("dt", "datetime", "8601"): + msg = f"{path} / ISO 8601" + try: + dt = datetime.datetime.fromisoformat(value) + except Exception as exc: + self.fail(f"Invalid datetime '{value}': {exc} {msg}") + self.assertIsInstance(dt, datetime.datetime, msg=msg) + elif iso in ("lang", "639", "639-1"): + msg = f"{path} / ISO 639-1" + self.assertIsInstance(value, str, msg=msg) + self.assertRegex(value, r"^[a-z]{2}(-\w+)?$", msg=msg) + else: + self.fail(f"Unsupported ISO test '{test}'") else: self.assertEqual(test, value, msg=path) else: @@ -335,7 +372,7 @@ class TestExtractorResults(unittest.TestCase): class ResultJob(job.DownloadJob): """Generate test-results for extractor runs""" - def __init__(self, url, parent=None, content=False): + def __init__(self, url, parent=None, content=False, format=True): job.DownloadJob.__init__(self, url, parent) self.queue = False self.content = content @@ -343,6 +380,7 @@ class ResultJob(job.DownloadJob): self.url_list = [] self.url_hash = hashlib.sha1() self.kwdict_list = [] + self.kwdict_post = [] self.kwdict_hash = hashlib.sha1() self.archive_list = [] self.archive_hash = hashlib.sha1() @@ -353,12 +391,17 @@ class ResultJob(job.DownloadJob): else: self._update_content = lambda url, kwdict: None - self.format_directory = TestFormatter( - "".join(self.extractor.directory_fmt)).format_map - self.format_filename = TestFormatter( - self.extractor.filename_fmt).format_map - self.format_archive = TestFormatter( - self.extractor.archive_fmt).format_map + if format: + self.format_directory = TestFormatter( + "".join(self.extractor.directory_fmt)).format_map + self.format_filename = TestFormatter( + self.extractor.filename_fmt).format_map + self.format_archive = TestFormatter( + self.extractor.archive_fmt).format_map + else: + self.format_directory = \ + self.format_filename = \ + self.format_archive = lambda kwdict: "" def run(self): self._init() @@ -391,6 +434,8 @@ class ResultJob(job.DownloadJob): def _update_kwdict(self, kwdict, to_list=True): if to_list: self.kwdict_list.append(kwdict.copy()) + else: + self.kwdict_post.append(kwdict.copy()) kwdict = util.filter_dict(kwdict) self.kwdict_hash.update( json.dumps(kwdict, sort_keys=True, default=str).encode()) @@ -489,8 +534,7 @@ def load_test_config(): except FileNotFoundError: pass except Exception as exc: - sys.exit("Error when loading {}: {}: {}".format( - path, exc.__class__.__name__, exc)) + sys.exit(f"Error when loading {path}: {exc.__class__.__name__}: {exc}") def result_categories(result): @@ -553,12 +597,12 @@ def generate_tests(): enum = collections.defaultdict(int) for result in tests: base, cat, sub = result_categories(result) - name = "{}_{}".format(cat, sub) + name = f"{cat}_{sub}" enum[name] += 1 method = _generate_method(result) method.__doc__ = result["#url"] - method.__name__ = "test_{}_{}".format(name, enum[name]) + method.__name__ = f"test_{name}_{enum[name]}" setattr(TestExtractorResults, method.__name__, method) diff --git a/test/test_util.py b/test/test_util.py index 00e8c4b..4a76769 100644 --- a/test/test_util.py +++ b/test/test_util.py @@ -385,7 +385,7 @@ class TestCompileExpression(unittest.TestCase): self.assertEqual(expr(value), result) with tempfile.TemporaryDirectory() as path: - file = path + "/module_sha1.py" + file = f"{path}/module_sha1.py" with open(file, "w") as fp: fp.write(""" import hashlib @@ -638,7 +638,7 @@ class TestOther(unittest.TestCase): self.assertIs(module, datetime) with tempfile.TemporaryDirectory() as path: - file = path + "/module_test.py" + file = f"{path}/module_test.py" with open(file, "w") as fp: fp.write(""" import datetime diff --git a/test/test_ytdl.py b/test/test_ytdl.py index ecc6d2f..88933e4 100644 --- a/test/test_ytdl.py +++ b/test/test_ytdl.py @@ -23,8 +23,8 @@ class Test_CommandlineArguments(unittest.TestCase): try: cls.module = __import__(cls.module_name) except (ImportError, SyntaxError): - raise unittest.SkipTest("cannot import module '{}'".format( - cls.module_name)) + raise unittest.SkipTest( + f"cannot import module '{cls.module_name}'") cls.default = ytdl.parse_command_line(cls.module, []) cls.ytdlp = hasattr(cls.module, "cookies") |
