diff options
| author | 2020-10-12 18:14:27 -0400 | |
|---|---|---|
| committer | 2020-10-12 18:14:27 -0400 | |
| commit | e0c914765184ebbf99cffdecfe8cdbe10f42486e (patch) | |
| tree | 4dd89f11195c3f58b3b62b9911bbdc40d0e44471 | |
| parent | 9074eee175f76b824fbb6695d56426105191c51c (diff) | |
New upstream version 1.15.1.upstream/1.15.1
41 files changed, 1256 insertions, 898 deletions
diff --git a/CHANGELOG.md b/CHANGELOG.md index b368535..0a55546 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,27 @@ # Changelog +## 1.15.1 - 2020-10-11 +### Additions +- [hentaicafe] add `manga_id` metadata field ([#1036](https://github.com/mikf/gallery-dl/issues/1036)) +- [hentaifoundry] add support for stories ([#734](https://github.com/mikf/gallery-dl/issues/734)) +- [hentaifoundry] add `include` option +- [newgrounds] extract image embeds ([#1033](https://github.com/mikf/gallery-dl/issues/1033)) +- [nijie] add `include` option ([#1018](https://github.com/mikf/gallery-dl/issues/1018)) +- [reactor] match URLs without subdomain ([#1053](https://github.com/mikf/gallery-dl/issues/1053)) +- [twitter] extend `retweets` option ([#1026](https://github.com/mikf/gallery-dl/issues/1026)) +- [weasyl] add extractors ([#977](https://github.com/mikf/gallery-dl/issues/977)) +### Fixes +- [500px] update query hashes +- [behance] fix `collection` extraction +- [newgrounds] fix video extraction ([#1042](https://github.com/mikf/gallery-dl/issues/1042)) +- [twitter] improve twitpic extraction ([#1019](https://github.com/mikf/gallery-dl/issues/1019)) +- [weibo] handle posts with more than 9 images ([#926](https://github.com/mikf/gallery-dl/issues/926)) +- [xvideos] fix `title` extraction +- fix crash when using `--download-archive` with `--no-skip` ([#1023](https://github.com/mikf/gallery-dl/issues/1023)) +- fix issues with `blacklist`/`whitelist` defaults ([#1051](https://github.com/mikf/gallery-dl/issues/1051), [#1056](https://github.com/mikf/gallery-dl/issues/1056)) +### Removals +- [kissmanga] remove module + ## 1.15.0 - 2020-09-20 ### Additions - [deviantart] support watchers-only/paid deviations ([#995](https://github.com/mikf/gallery-dl/issues/995)) @@ -1,6 +1,6 @@ Metadata-Version: 2.1 Name: gallery_dl -Version: 1.15.0 +Version: 1.15.1 Summary: Command-line program to download image-galleries and -collections from several image hosting sites Home-page: https://github.com/mikf/gallery-dl Author: Mike Fährmann @@ -94,8 +94,8 @@ Description: ========== put it into your `PATH <https://en.wikipedia.org/wiki/PATH_(variable)>`__, and run it inside a command prompt (like ``cmd.exe``). - - `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.15.0/gallery-dl.exe>`__ - - `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.15.0/gallery-dl.bin>`__ + - `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.15.1/gallery-dl.exe>`__ + - `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.15.1/gallery-dl.bin>`__ These executables include a Python 3.8 interpreter and all required Python packages. @@ -119,6 +119,14 @@ Description: ========== $ choco install gallery-dl + Scoop + ---------- + + Apart from Chocolatey, *gallery-dl* is also available in Scoop_ "main" bucket for Windows users. + + .. code:: powershell + + $ scoop install gallery-dl Usage ===== @@ -311,7 +319,7 @@ Description: ========== .. _gallery-dl-example.conf: https://github.com/mikf/gallery-dl/blob/master/docs/gallery-dl-example.conf .. _configuration.rst: https://github.com/mikf/gallery-dl/blob/master/docs/configuration.rst .. _Supported Sites: https://github.com/mikf/gallery-dl/blob/master/docs/supportedsites.rst - .. _stable: https://github.com/mikf/gallery-dl/archive/v1.15.0.tar.gz + .. _stable: https://github.com/mikf/gallery-dl/archive/v1.15.1.tar.gz .. _dev: https://github.com/mikf/gallery-dl/archive/master.tar.gz .. _Python: https://www.python.org/downloads/ @@ -324,6 +332,7 @@ Description: ========== .. _Snapd: https://docs.snapcraft.io/installing-snapd .. _OAuth: https://en.wikipedia.org/wiki/OAuth .. _Chocolatey: https://chocolatey.org/install + .. _Scoop: https://scoop.sh .. |pypi| image:: https://img.shields.io/pypi/v/gallery-dl.svg :target: https://pypi.org/project/gallery-dl/ @@ -83,8 +83,8 @@ Download a standalone executable file, put it into your `PATH <https://en.wikipedia.org/wiki/PATH_(variable)>`__, and run it inside a command prompt (like ``cmd.exe``). -- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.15.0/gallery-dl.exe>`__ -- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.15.0/gallery-dl.bin>`__ +- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.15.1/gallery-dl.exe>`__ +- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.15.1/gallery-dl.bin>`__ These executables include a Python 3.8 interpreter and all required Python packages. @@ -108,6 +108,14 @@ Windows users that have Chocolatey_ installed can install *gallery-dl* from the $ choco install gallery-dl +Scoop +---------- + +Apart from Chocolatey, *gallery-dl* is also available in Scoop_ "main" bucket for Windows users. + +.. code:: powershell + + $ scoop install gallery-dl Usage ===== @@ -300,7 +308,7 @@ access to *gallery-dl*. Authorize it and you will be shown one or more .. _gallery-dl-example.conf: https://github.com/mikf/gallery-dl/blob/master/docs/gallery-dl-example.conf .. _configuration.rst: https://github.com/mikf/gallery-dl/blob/master/docs/configuration.rst .. _Supported Sites: https://github.com/mikf/gallery-dl/blob/master/docs/supportedsites.rst -.. _stable: https://github.com/mikf/gallery-dl/archive/v1.15.0.tar.gz +.. _stable: https://github.com/mikf/gallery-dl/archive/v1.15.1.tar.gz .. _dev: https://github.com/mikf/gallery-dl/archive/master.tar.gz .. _Python: https://www.python.org/downloads/ @@ -313,6 +321,7 @@ access to *gallery-dl*. Authorize it and you will be shown one or more .. _Snapd: https://docs.snapcraft.io/installing-snapd .. _OAuth: https://en.wikipedia.org/wiki/OAuth .. _Chocolatey: https://chocolatey.org/install +.. _Scoop: https://scoop.sh .. |pypi| image:: https://img.shields.io/pypi/v/gallery-dl.svg :target: https://pypi.org/project/gallery-dl/ diff --git a/data/man/gallery-dl.1 b/data/man/gallery-dl.1 index 2a84a06..cbcf4bf 100644 --- a/data/man/gallery-dl.1 +++ b/data/man/gallery-dl.1 @@ -1,4 +1,4 @@ -.TH "GALLERY-DL" "1" "2020-09-20" "1.15.0" "gallery-dl Manual" +.TH "GALLERY-DL" "1" "2020-10-11" "1.15.1" "gallery-dl Manual" .\" disable hyphenation .nh diff --git a/data/man/gallery-dl.conf.5 b/data/man/gallery-dl.conf.5 index e37135e..aeecaa0 100644 --- a/data/man/gallery-dl.conf.5 +++ b/data/man/gallery-dl.conf.5 @@ -1,4 +1,4 @@ -.TH "GALLERY-DL.CONF" "5" "2020-09-20" "1.15.0" "gallery-dl Manual" +.TH "GALLERY-DL.CONF" "5" "2020-10-11" "1.15.1" "gallery-dl Manual" .\" disable hyphenation .nh .\" disable justification (adjust text to left margin only) @@ -115,6 +115,7 @@ Note: Even if the value of the \f[I]extension\f[] key is missing or starting. This key is therefore always available to provide a valid filename extension. + .SS extractor.*.directory .IP "Type:" 6 \f[I]list\f[] of \f[I]strings\f[] @@ -129,6 +130,7 @@ Each individual string in such a list represents a single path segment, which will be joined together and appended to the \f[I]base-directory\f[] to form the complete target directory path. + .SS extractor.*.base-directory .IP "Type:" 6 \f[I]Path\f[] @@ -137,7 +139,8 @@ segment, which will be joined together and appended to the \f[I]"./gallery-dl/"\f[] .IP "Description:" 4 -Directory path used as the base for all download destinations. +Directory path used as base for all download destinations. + .SS extractor.*.parent-directory .IP "Type:" 6 @@ -148,9 +151,10 @@ Directory path used as the base for all download destinations. .IP "Description:" 4 Use an extractor's current target directory as -\f[I]base-directory <extractor.*.base-directory_>\f[] +\f[I]base-directory\f[] for any spawned child extractors. + .SS extractor.*.path-restrict .IP "Type:" 6 \f[I]string\f[] or \f[I]object\f[] @@ -159,15 +163,15 @@ for any spawned child extractors. \f[I]"auto"\f[] .IP "Example:" 4 -"/!? (){}" .br -{" ": "_", "/": "-", "|": "-", ":": "-", "*": "+"} +* "/!? (){}" .br +* {" ": "_", "/": "-", "|": "-", ":": "-", "*": "+"} .IP "Description:" 4 A string of characters to be replaced with the value of .br -\f[I]path-replace <extractor.*.path-replace_>\f[] +\f[I]path-replace\f[] or an object mapping invalid/unwanted characters to their replacements .br for generated path segment names. @@ -186,6 +190,7 @@ depending on the local operating system Note: In a string with 2 or more characters, \f[I][]^-\\\f[] need to be escaped with backslashes, e.g. \f[I]"\\\\[\\\\]"\f[] + .SS extractor.*.path-replace .IP "Type:" 6 \f[I]string\f[] @@ -195,7 +200,8 @@ escaped with backslashes, e.g. \f[I]"\\\\[\\\\]"\f[] .IP "Description:" 4 The replacement character(s) for -\f[I]path-restrict <extractor.*.path-restrict_>\f[] +\f[I]path-restrict\f[] + .SS extractor.*.path-remove .IP "Type:" 6 @@ -210,6 +216,7 @@ Set of characters to remove from generated path names. Note: In a string with 2 or more characters, \f[I][]^-\\\f[] need to be escaped with backslashes, e.g. \f[I]"\\\\[\\\\]"\f[] + .SS extractor.*.skip .IP "Type:" 6 \f[I]bool\f[] or \f[I]string\f[] @@ -220,7 +227,7 @@ escaped with backslashes, e.g. \f[I]"\\\\[\\\\]"\f[] .IP "Description:" 4 Controls the behavior when downloading files that have been downloaded before, i.e. a file with the same filename already -exists or its ID is in a \f[I]download archive <extractor.*.archive_>\f[]. +exists or its ID is in a \f[I]download archive\f[]. .br * \f[I]true\f[]: Skip downloads @@ -243,6 +250,7 @@ after \f[I]N\f[] consecutive skips * \f[I]"enumerate"\f[]: Add an enumeration index to the beginning of the filename extension (\f[I]file.1.ext\f[], \f[I]file.2.ext\f[], etc.) + .SS extractor.*.sleep .IP "Type:" 6 \f[I]float\f[] @@ -253,6 +261,7 @@ filename extension (\f[I]file.1.ext\f[], \f[I]file.2.ext\f[], etc.) .IP "Description:" 4 Number of seconds to sleep before each download. + .SS extractor.*.sleep-extractor .IP "Type:" 6 \f[I]float\f[] @@ -264,6 +273,7 @@ Number of seconds to sleep before each download. Number of seconds to sleep before handling an input URL, i.e. before starting a new extractor. + .SS extractor.*.sleep-request .IP "Type:" 6 \f[I]float\f[] @@ -275,6 +285,7 @@ i.e. before starting a new extractor. Minimal time interval in seconds between each HTTP request during data extraction. + .SS extractor.*.username & .password .IP "Type:" 6 \f[I]string\f[] @@ -330,6 +341,7 @@ Note: The password values for \f[I]danbooru\f[] and \f[I]e621\f[] should be the API keys found in your user profile, not your actual account password. + .SS extractor.*.netrc .IP "Type:" 6 \f[I]bool\f[] @@ -340,6 +352,7 @@ password. .IP "Description:" 4 Enable the use of \f[I].netrc\f[] authentication data. + .SS extractor.*.cookies .IP "Type:" 6 \f[I]Path\f[] or \f[I]object\f[] @@ -378,6 +391,7 @@ If \f[I]extractor.*.cookies\f[] specifies the \f[I]Path\f[] to a cookies.txt file and it can be opened and parsed without errors, update its contents with cookies received during data extraction. + .SS extractor.*.proxy .IP "Type:" 6 \f[I]string\f[] or \f[I]object\f[] @@ -411,6 +425,7 @@ Example: Note: All proxy URLs should include a scheme, otherwise \f[I]http://\f[] is assumed. + .SS extractor.*.user-agent .IP "Type:" 6 \f[I]string\f[] @@ -421,9 +436,9 @@ otherwise \f[I]http://\f[] is assumed. .IP "Description:" 4 User-Agent header value to be used for HTTP requests. -Note: This option has no effect on pixiv and -readcomiconline extractors, as these need specific values to -function correctly. +Note: This option has no effect on pixiv extractors, +as these need specific values to function correctly. + .SS extractor.*.keywords .IP "Type:" 6 @@ -435,6 +450,7 @@ function correctly. .IP "Description:" 4 Additional key-value pairs to be added to each metadata dictionary. + .SS extractor.*.keywords-default .IP "Type:" 6 any @@ -446,6 +462,7 @@ any Default value used for missing or undefined keyword names in format strings. + .SS extractor.*.category-transfer .IP "Type:" 6 \f[I]bool\f[] @@ -458,6 +475,7 @@ Transfer an extractor's (sub)category values to all child extractors spawned by it, to let them inherit their parent's config options. + .SS extractor.*.blacklist & .whitelist .IP "Type:" 6 \f[I]list\f[] of \f[I]strings\f[] @@ -473,6 +491,7 @@ e.g. from \f[I]reddit\f[] or \f[I]plurk\f[]. Note: Any \f[I]blacklist\f[] setting will automatically include \f[I]"oauth"\f[], \f[I]"recursive"\f[], and \f[I]"test"\f[]. + .SS extractor.*.archive .IP "Type:" 6 \f[I]Path\f[] @@ -486,7 +505,7 @@ Note: Any \f[I]blacklist\f[] setting will automatically include .IP "Description:" 4 File to store IDs of downloaded files in. Downloads of files already recorded in this archive file will be -\f[I]skipped <extractor.*.skip_>\f[]. +\f[I]skipped\f[]. The resulting archive file is not a plain text file but an SQLite3 database, as either lookup operations are significantly faster or @@ -497,6 +516,7 @@ Note: archive paths support regular \f[I]format string\f[] replacements, but be aware that using external inputs for building local paths may pose a security risk. + .SS extractor.*.archive-format .IP "Type:" 6 \f[I]string\f[] @@ -507,6 +527,7 @@ may pose a security risk. .IP "Description:" 4 An alternative \f[I]format string\f[] to build archive IDs with. + .SS extractor.*.postprocessors .IP "Type:" 6 \f[I]list\f[] of \f[I]Postprocessor Configuration\f[] objects @@ -521,8 +542,9 @@ An alternative \f[I]format string\f[] to build archive IDs with. .IP "Description:" 4 -A list of post-processors to be applied to each downloaded file -in the same order as they are specified. +A list of \f[I]post-processors\f[] +to be applied to each downloaded file in the specified order. + .SS extractor.*.retries .IP "Type:" 6 @@ -535,12 +557,13 @@ in the same order as they are specified. Maximum number of times a failed HTTP request is retried before giving up or \f[I]-1\f[] for infinite retries. + .SS extractor.*.timeout .IP "Type:" 6 -\f[I]float\f[] or \f[I]null\f[] +\f[I]float\f[] .IP "Default:" 9 -\f[I]30\f[] +\f[I]30.0\f[] .IP "Description:" 4 Amount of time (in seconds) to wait for a successful connection @@ -549,6 +572,7 @@ and response from a remote server. This value gets internally used as the \f[I]timeout\f[] parameter for the \f[I]requests.request()\f[] method. + .SS extractor.*.verify .IP "Type:" 6 \f[I]bool\f[] or \f[I]string\f[] @@ -565,6 +589,7 @@ instead of the default certificates. This value gets internally used as the \f[I]verify\f[] parameter for the \f[I]requests.request()\f[] method. + .SS extractor.*.download .IP "Type:" 6 \f[I]bool\f[] @@ -576,66 +601,69 @@ This value gets internally used as the \f[I]verify\f[] parameter for the Controls whether to download media files. Setting this to \f[I]false\f[] won't download any files, but all other -functions (postprocessors_, \f[I]download archive\f[], etc.) +functions (\f[I]postprocessors\f[], \f[I]download archive\f[], etc.) will be executed as normal. + .SS extractor.*.image-range .IP "Type:" 6 \f[I]string\f[] .IP "Example:" 4 -"10-20", .br -"-5, 10, 30-50, 100-" +* "10-20" .br +* "-5, 10, 30-50, 100-" .IP "Description:" 4 Index-range(s) specifying which images to download. Note: The index of the first image is \f[I]1\f[]. + .SS extractor.*.chapter-range .IP "Type:" 6 \f[I]string\f[] .IP "Description:" 4 -Like \f[I]image-range <extractor.*.image-range_>\f[], +Like \f[I]image-range\f[], but applies to delegated URLs like manga-chapters, etc. + .SS extractor.*.image-filter .IP "Type:" 6 \f[I]string\f[] .IP "Example:" 4 -"width >= 1200 and width/height > 1.2", .br -"re.search(r'foo(bar)+', description)" +* "width >= 1200 and width/height > 1.2" .br +* "re.search(r'foo(bar)+', description)" .IP "Description:" 4 -Python expression controlling which images to download. -.br -Files for which the expression evaluates to \f[I]False\f[] +Python expression controlling which files to download. + +Files for which the expression evaluates to \f[I]False\f[] are ignored. .br -are ignored. -Available keys are the filename-specific ones listed +Available keys are the filename-specific ones listed by \f[I]-K\f[] or \f[I]-j\f[]. .br -by \f[I]-K\f[] or \f[I]-j\f[]. + .SS extractor.*.chapter-filter .IP "Type:" 6 \f[I]string\f[] .IP "Example:" 4 -"lang == 'en'" .br -"language == 'French' and 10 <= chapter < 20" +* "lang == 'en'" .br +* "language == 'French' and 10 <= chapter < 20" .IP "Description:" 4 -Like \f[I]image-filter <extractor.*.image-filter_>\f[], +Like \f[I]image-filter\f[], but applies to delegated URLs like manga-chapters, etc. + .SS extractor.*.image-unique .IP "Type:" 6 \f[I]bool\f[] @@ -647,6 +675,7 @@ but applies to delegated URLs like manga-chapters, etc. Ignore image URLs that have been encountered before during the current extractor run. + .SS extractor.*.chapter-unique .IP "Type:" 6 \f[I]bool\f[] @@ -655,9 +684,10 @@ current extractor run. \f[I]false\f[] .IP "Description:" 4 -Like \f[I]image-unique <extractor.*.image-unique_>\f[], +Like \f[I]image-unique\f[], but applies to delegated URLs like manga-chapters, etc. + .SS extractor.*.date-format .IP "Type:" 6 \f[I]string\f[] @@ -671,6 +701,7 @@ date-min and date-max. See \f[I]strptime\f[] for a list of formatting directives. + .SH EXTRACTOR-SPECIFIC OPTIONS .SS extractor.artstation.external .IP "Type:" 6 @@ -682,6 +713,7 @@ See \f[I]strptime\f[] for a list of formatting directives. .IP "Description:" 4 Try to follow external URLs of embedded players. + .SS extractor.aryion.recursive .IP "Type:" 6 \f[I]bool\f[] @@ -698,6 +730,7 @@ descend into subfolders .br * \f[I]false\f[]: Get posts from "Latest Updates" pages + .SS extractor.blogger.videos .IP "Type:" 6 \f[I]bool\f[] @@ -708,6 +741,7 @@ descend into subfolders .IP "Description:" 4 Download embedded videos hosted on https://www.blogger.com/ + .SS extractor.danbooru.ugoira .IP "Type:" 6 \f[I]bool\f[] @@ -723,6 +757,7 @@ Controls the download target for Ugoira posts. .br * \f[I]false\f[]: Converted video files + .SS extractor.deviantart.extra .IP "Type:" 6 \f[I]bool\f[] @@ -736,6 +771,7 @@ description texts and journals. Note: Enabling this option also enables deviantart.metadata_. + .SS extractor.deviantart.flat .IP "Type:" 6 \f[I]bool\f[] @@ -755,6 +791,10 @@ favorites-collections and transfer any further work to other extractors (\f[I]folder\f[] or \f[I]collection\f[]), which will then create individual subdirectories for each of them. +Note: Going through all gallery folders will not be able to +fetch deviations which aren't in any folder. + + .SS extractor.deviantart.folders .IP "Type:" 6 \f[I]bool\f[] @@ -769,6 +809,7 @@ folders a deviation is present in. Note: Gathering this information requires a lot of API calls. Use with caution. + .SS extractor.deviantart.include .IP "Type:" 6 \f[I]string\f[] or \f[I]list\f[] of \f[I]strings\f[] @@ -788,6 +829,7 @@ Possible values are You can use \f[I]"all"\f[] instead of listing all values separately. + .SS extractor.deviantart.journals .IP "Type:" 6 \f[I]string\f[] @@ -805,6 +847,7 @@ Selects the output format of journal entries. .br * \f[I]"none"\f[]: Don't download journals. + .SS extractor.deviantart.mature .IP "Type:" 6 \f[I]bool\f[] @@ -819,6 +862,7 @@ This option simply sets the \f[I]mature_content\f[] parameter for API calls to either \f[I]"true"\f[] or \f[I]"false"\f[] and does not do any other form of content filtering. + .SS extractor.deviantart.metadata .IP "Type:" 6 \f[I]bool\f[] @@ -827,9 +871,9 @@ form of content filtering. \f[I]false\f[] .IP "Description:" 4 -Request extended metadata for deviation objects to additionally -provide \f[I]description\f[], \f[I]tags\f[], \f[I]license\f[] and \f[I]is_watching\f[] -fields. +Request extended metadata for deviation objects to additionally provide +\f[I]description\f[], \f[I]tags\f[], \f[I]license\f[] and \f[I]is_watching\f[] fields. + .SS extractor.deviantart.original .IP "Type:" 6 @@ -845,6 +889,7 @@ Setting this option to \f[I]"images"\f[] only downloads original files if they are images and falls back to preview versions for everything else (archives, etc.). + .SS extractor.deviantart.quality .IP "Type:" 6 \f[I]integer\f[] @@ -856,6 +901,7 @@ everything else (archives, etc.). JPEG quality level of newer images for which an original file download is not available. + .SS extractor.deviantart.refresh-token .IP "Type:" 6 \f[I]string\f[] @@ -865,14 +911,15 @@ an original file download is not available. .IP "Description:" 4 The \f[I]refresh-token\f[] value you get from -\f[I]linking your DeviantArt account to gallery-dl <OAuth_>\f[]. +\f[I]linking your DeviantArt account to gallery-dl\f[]. Using a \f[I]refresh-token\f[] allows you to access private or otherwise not publicly available deviations. Note: The \f[I]refresh-token\f[] becomes invalid -\f[I]after 3 months <https://www.deviantart.com/developers/authentication#refresh>\f[] -or whenever your \f[I]cache file <cache.file_>\f[] is deleted or cleared. +\f[I]after 3 months\f[] +or whenever your \f[I]cache file\f[] is deleted or cleared. + .SS extractor.deviantart.wait-min .IP "Type:" 6 @@ -884,6 +931,7 @@ or whenever your \f[I]cache file <cache.file_>\f[] is deleted or cleared. .IP "Description:" 4 Minimum wait time in seconds before API requests. + .SS extractor.exhentai.domain .IP "Type:" 6 \f[I]string\f[] @@ -900,6 +948,7 @@ depending on the input URL .br * \f[I]"exhentai.org"\f[]: Use \f[I]exhentai.org\f[] for all URLs + .SS extractor.exhentai.limits .IP "Type:" 6 \f[I]bool\f[] or \f[I]integer\f[] @@ -914,6 +963,7 @@ and stop extraction when they are exceeded. If this value is an \f[I]integer\f[], it gets used as the limit maximum instead of the value listed on \f[I]https://e-hentai.org/home.php\f[] + .SS extractor.exhentai.original .IP "Type:" 6 \f[I]bool\f[] @@ -924,6 +974,7 @@ instead of the value listed on \f[I]https://e-hentai.org/home.php\f[] .IP "Description:" 4 Download full-sized original images if available. + .SS extractor.exhentai.wait-min & .wait-max .IP "Type:" 6 \f[I]float\f[] @@ -939,6 +990,7 @@ ExHentai detects and blocks automated downloaders. seconds between \f[I]wait-min\f[] and \f[I]wait-max\f[] after each image to prevent getting blocked. + .SS extractor.flickr.access-token & .access-token-secret .IP "Type:" 6 \f[I]string\f[] @@ -948,7 +1000,8 @@ each image to prevent getting blocked. .IP "Description:" 4 The \f[I]access_token\f[] and \f[I]access_token_secret\f[] values you get -from \f[I]linking your Flickr account to gallery-dl <OAuth_>\f[]. +from \f[I]linking your Flickr account to gallery-dl\f[]. + .SS extractor.flickr.videos .IP "Type:" 6 @@ -960,6 +1013,7 @@ from \f[I]linking your Flickr account to gallery-dl <OAuth_>\f[]. .IP "Description:" 4 Extract and download videos. + .SS extractor.flickr.size-max .IP "Type:" 6 \f[I]integer\f[] or \f[I]string\f[] @@ -978,6 +1032,7 @@ Sets the maximum allowed size for downloaded images. (\f[I]"Original"\f[], \f[I]"Large"\f[], ... or \f[I]"o"\f[], \f[I]"k"\f[], \f[I]"h"\f[], \f[I]"l"\f[], ...) to use as an upper limit. + .SS extractor.furaffinity.include .IP "Type:" 6 \f[I]string\f[] or \f[I]list\f[] of \f[I]strings\f[] @@ -997,6 +1052,7 @@ Possible values are You can use \f[I]"all"\f[] instead of listing all values separately. + .SS extractor.gelbooru.api .IP "Type:" 6 \f[I]bool\f[] @@ -1010,6 +1066,7 @@ Enable use of Gelbooru's API. Set this value to false if the API has been disabled to switch to manual information extraction. + .SS extractor.gfycat.format .IP "Type:" 6 \f[I]string\f[] @@ -1025,6 +1082,27 @@ If the selected format is not available, \f[I]"mp4"\f[], \f[I]"webm"\f[] and \f[I]"gif"\f[] (in that order) will be tried instead, until an available format is found. + +.SS extractor.hentaifoundry.include +.IP "Type:" 6 +\f[I]string\f[] or \f[I]list\f[] of \f[I]strings\f[] + +.IP "Default:" 9 +\f[I]"pictures"\f[] + +.IP "Example:" 4 +"scraps,stories" or ["scraps", "stories"] + +.IP "Description:" 4 +A (comma-separated) list of subcategories to include +when processing a user profile. + +Possible values are +\f[I]"pictures"\f[], \f[I]"scraps"\f[], \f[I]"stories"\f[], \f[I]"favorite"\f[]. + +You can use \f[I]"all"\f[] instead of listing all values separately. + + .SS extractor.hitomi.metadata .IP "Type:" 6 \f[I]bool\f[] @@ -1034,8 +1112,8 @@ available format is found. .IP "Description:" 4 Try to extract -\f[I]artist\f[], \f[I]group\f[], \f[I]parody\f[], and \f[I]characters\f[] -metadata. +\f[I]artist\f[], \f[I]group\f[], \f[I]parody\f[], and \f[I]characters\f[] metadata. + .SS extractor.imgur.mp4 .IP "Type:" 6 @@ -1055,6 +1133,7 @@ Controls whether to choose the GIF or MP4 version of an animation. .br * \f[I]"always"\f[]: Always choose MP4. + .SS extractor.inkbunny.orderby .IP "Type:" 6 \f[I]string\f[] @@ -1065,9 +1144,10 @@ Controls whether to choose the GIF or MP4 version of an animation. .IP "Description:" 4 Value of the \f[I]orderby\f[] parameter for submission searches. -(See \f[I]API#Search <https://wiki.inkbunny.net/wiki/API#Search>\f[] +(See \f[I]API#Search\f[] for details) + .SS extractor.instagram.highlights .IP "Type:" 6 \f[I]bool\f[] @@ -1079,6 +1159,7 @@ for details) Include *Story Highlights* when downloading a user profile. (requires authentication) + .SS extractor.instagram.videos .IP "Type:" 6 \f[I]bool\f[] @@ -1089,6 +1170,7 @@ Include *Story Highlights* when downloading a user profile. .IP "Description:" 4 Download video files. + .SS extractor.khinsider.format .IP "Type:" 6 \f[I]string\f[] @@ -1105,20 +1187,6 @@ or a (comma-separated) list to select multiple formats. If the selected format is not available, the first in the list gets chosen (usually mp3). -.SS extractor.kissmanga.captcha -.IP "Type:" 6 -\f[I]string\f[] - -.IP "Default:" 9 -\f[I]"stop"\f[] - -.IP "Description:" 4 -Controls how to handle redirects to CAPTCHA pages. - -.br -* \f[I]"stop\f[]: Stop the current extractor run. -.br -* \f[I]"wait\f[]: Ask the user to solve the CAPTCHA and wait. .SS extractor.newgrounds.include .IP "Type:" 6 @@ -1139,6 +1207,24 @@ Possible values are You can use \f[I]"all"\f[] instead of listing all values separately. + +.SS extractor.nijie.include +.IP "Type:" 6 +\f[I]string\f[] or \f[I]list\f[] of \f[I]strings\f[] + +.IP "Default:" 9 +\f[I]"illustration,doujin"\f[] + +.IP "Description:" 4 +A (comma-separated) list of subcategories to include +when processing a user profile. + +Possible values are +\f[I]"illustration"\f[], \f[I]"doujin"\f[], \f[I]"favorite"\f[]. + +You can use \f[I]"all"\f[] instead of listing all values separately. + + .SS extractor.oauth.browser .IP "Type:" 6 \f[I]bool\f[] @@ -1147,14 +1233,15 @@ You can use \f[I]"all"\f[] instead of listing all values separately. \f[I]true\f[] .IP "Description:" 4 -Controls how a user is directed to an OAuth authorization site. +Controls how a user is directed to an OAuth authorization page. .br * \f[I]true\f[]: Use Python's \f[I]webbrowser.open()\f[] method to automatically -open the URL in the user's browser. +open the URL in the user's default browser. .br * \f[I]false\f[]: Ask the user to copy & paste an URL from the terminal. + .SS extractor.oauth.cache .IP "Type:" 6 \f[I]bool\f[] @@ -1164,7 +1251,8 @@ open the URL in the user's browser. .IP "Description:" 4 Store tokens received during OAuth authorizations -in \f[I]cache <cache.file_>\f[]. +in \f[I]cache\f[]. + .SS extractor.oauth.port .IP "Type:" 6 @@ -1181,6 +1269,7 @@ of the port specified here. You'll have to manually adjust the port number in your browser's address bar when using a different port than the default. + .SS extractor.photobucket.subalbums .IP "Type:" 6 \f[I]bool\f[] @@ -1191,6 +1280,7 @@ port than the default. .IP "Description:" 4 Download subalbums. + .SS extractor.pinterest.sections .IP "Type:" 6 \f[I]bool\f[] @@ -1201,6 +1291,7 @@ Download subalbums. .IP "Description:" 4 Include pins from board sections. + .SS extractor.pixiv.user.avatar .IP "Type:" 6 \f[I]bool\f[] @@ -1211,6 +1302,7 @@ Include pins from board sections. .IP "Description:" 4 Download user avatars. + .SS extractor.pixiv.ugoira .IP "Type:" 6 \f[I]bool\f[] @@ -1224,9 +1316,10 @@ Download Pixiv's Ugoira animations or ignore them. These animations come as a \f[I].zip\f[] file containing all animation frames in JPEG format. -Use an \f[I]ugoira\f[] post processor to convert them +Use an ugoira post processor to convert them to watchable videos. (Example__) + .SS extractor.plurk.comments .IP "Type:" 6 \f[I]bool\f[] @@ -1237,6 +1330,7 @@ to watchable videos. (Example__) .IP "Description:" 4 Also search Plurk comments for URLs. + .SS extractor.reactor.wait-min & .wait-max .IP "Type:" 6 \f[I]float\f[] @@ -1248,6 +1342,7 @@ Also search Plurk comments for URLs. Minimum and maximum wait time in seconds between HTTP requests during the extraction process. + .SS extractor.readcomiconline.captcha .IP "Type:" 6 \f[I]string\f[] @@ -1263,6 +1358,7 @@ Controls how to handle redirects to CAPTCHA pages. .br * \f[I]"wait\f[]: Ask the user to solve the CAPTCHA and wait. + .SS extractor.reddit.comments .IP "Type:" 6 \f[I]integer\f[] @@ -1282,6 +1378,7 @@ appear to be 200 and 500 respectively. The value \f[I]0\f[] ignores all comments and significantly reduces the time required when scanning a subreddit. + .SS extractor.reddit.morecomments .IP "Type:" 6 \f[I]bool\f[] @@ -1295,6 +1392,7 @@ stubs in the base comment tree. This requires 1 additional API call for every 100 extra comments. + .SS extractor.reddit.date-min & .date-max .IP "Type:" 6 \f[I]Date\f[] @@ -1305,6 +1403,7 @@ This requires 1 additional API call for every 100 extra comments. .IP "Description:" 4 Ignore all submissions posted before/after this date. + .SS extractor.reddit.id-min & .id-max .IP "Type:" 6 \f[I]string\f[] @@ -1313,8 +1412,8 @@ Ignore all submissions posted before/after this date. "6kmzv2" .IP "Description:" 4 -Ignore all submissions posted before/after the submission with -this ID. +Ignore all submissions posted before/after the submission with this ID. + .SS extractor.reddit.recursion .IP "Type:" 6 @@ -1329,12 +1428,12 @@ linked to in the initial set of submissions. This value sets the maximum recursion depth. Special values: - .br * \f[I]0\f[]: Recursion is disabled .br * \f[I]-1\f[]: Infinite recursion (don't do this) + .SS extractor.reddit.refresh-token .IP "Type:" 6 \f[I]string\f[] @@ -1344,7 +1443,7 @@ Special values: .IP "Description:" 4 The \f[I]refresh-token\f[] value you get from -\f[I]linking your Reddit account to gallery-dl <OAuth_>\f[]. +\f[I]linking your Reddit account to gallery-dl\f[]. Using a \f[I]refresh-token\f[] allows you to access private or otherwise not publicly available subreddits, given that your account is @@ -1352,6 +1451,7 @@ authorized to do so, but requests to the reddit API are going to be rate limited at 600 requests every 10 minutes/600 seconds. + .SS extractor.reddit.videos .IP "Type:" 6 \f[I]bool\f[] or \f[I]string\f[] @@ -1371,6 +1471,7 @@ video extraction and download .br * \f[I]false\f[]: Ignore videos + .SS extractor.redgifs.format .IP "Type:" 6 \f[I]string\f[] @@ -1387,6 +1488,7 @@ If the selected format is not available, \f[I]"mp4"\f[], \f[I]"webm"\f[] and \f[I]"gif"\f[] (in that order) will be tried instead, until an available format is found. + .SS extractor.sankaku.wait-min & .wait-max .IP "Type:" 6 \f[I]float\f[] @@ -1401,6 +1503,7 @@ Sankaku Channel responds with \f[I]429 Too Many Requests\f[] if it receives too many HTTP requests in a certain amount of time. Waiting a few seconds between each request tries to prevent that. + .SS extractor.smugmug.videos .IP "Type:" 6 \f[I]bool\f[] @@ -1411,6 +1514,7 @@ Waiting a few seconds between each request tries to prevent that. .IP "Description:" 4 Download video files. + .SS extractor.tumblr.avatar .IP "Type:" 6 \f[I]bool\f[] @@ -1421,6 +1525,7 @@ Download video files. .IP "Description:" 4 Download blog avatars. + .SS extractor.tumblr.date-min & .date-max .IP "Type:" 6 \f[I]Date\f[] @@ -1431,6 +1536,7 @@ Download blog avatars. .IP "Description:" 4 Ignore all posts published before/after this date. + .SS extractor.tumblr.external .IP "Type:" 6 \f[I]bool\f[] @@ -1442,6 +1548,7 @@ Ignore all posts published before/after this date. Follow external URLs (e.g. from "Link" posts) and try to extract images from them. + .SS extractor.tumblr.inline .IP "Type:" 6 \f[I]bool\f[] @@ -1452,6 +1559,7 @@ images from them. .IP "Description:" 4 Search posts for inline images and videos. + .SS extractor.tumblr.reblogs .IP "Type:" 6 \f[I]bool\f[] or \f[I]string\f[] @@ -1468,6 +1576,7 @@ Search posts for inline images and videos. * \f[I]"same-blog"\f[]: Skip reblogged posts unless the original post is from the same blog + .SS extractor.tumblr.posts .IP "Type:" 6 \f[I]string\f[] or \f[I]list\f[] of \f[I]strings\f[] @@ -1486,6 +1595,7 @@ Possible types are \f[I]text\f[], \f[I]quote\f[], \f[I]link\f[], \f[I]answer\f[] You can use \f[I]"all"\f[] instead of listing all types separately. + .SS extractor.twitter.quoted .IP "Type:" 6 \f[I]bool\f[] @@ -1496,6 +1606,7 @@ You can use \f[I]"all"\f[] instead of listing all types separately. .IP "Description:" 4 Fetch media from quoted Tweets. + .SS extractor.twitter.replies .IP "Type:" 6 \f[I]bool\f[] @@ -1506,6 +1617,7 @@ Fetch media from quoted Tweets. .IP "Description:" 4 Fetch media from replies to other Tweets. + .SS extractor.twitter.retweets .IP "Type:" 6 \f[I]bool\f[] @@ -1516,6 +1628,10 @@ Fetch media from replies to other Tweets. .IP "Description:" 4 Fetch media from Retweets. +If this value is \f[I]"original"\f[], metadata for these files +will be taken from the original Tweets, not the Retweets. + + .SS extractor.twitter.twitpic .IP "Type:" 6 \f[I]bool\f[] @@ -1524,7 +1640,8 @@ Fetch media from Retweets. \f[I]false\f[] .IP "Description:" 4 -Extract \f[I]TwitPic <https://twitpic.com/>\f[] embeds. +Extract \f[I]TwitPic\f[] embeds. + .SS extractor.twitter.videos .IP "Type:" 6 @@ -1543,6 +1660,7 @@ Control video download behavior. .br * \f[I]false\f[]: Skip video Tweets + .SS extractor.vsco.videos .IP "Type:" 6 \f[I]bool\f[] @@ -1553,6 +1671,7 @@ Control video download behavior. .IP "Description:" 4 Download video files. + .SS extractor.wallhaven.api-key .IP "Type:" 6 \f[I]string\f[] @@ -1561,11 +1680,12 @@ Download video files. \f[I]null\f[] .IP "Description:" 4 -Your \f[I]API Key <https://wallhaven.cc/settings/account>\f[] to use +Your \f[I]API Key\f[] to use your account's browsing settings and default filters when searching. See https://wallhaven.cc/help/api for more information. + .SS extractor.weibo.retweets .IP "Type:" 6 \f[I]bool\f[] @@ -1576,6 +1696,7 @@ See https://wallhaven.cc/help/api for more information. .IP "Description:" 4 Extract media from retweeted posts. + .SS extractor.weibo.videos .IP "Type:" 6 \f[I]bool\f[] @@ -1586,6 +1707,7 @@ Extract media from retweeted posts. .IP "Description:" 4 Download video files. + .SS extractor.[booru].tags .IP "Type:" 6 \f[I]bool\f[] @@ -1599,6 +1721,7 @@ and provide them as \f[I]tags_<type>\f[] metadata fields. Note: This requires 1 additional HTTP request for each post. + .SS extractor.[manga-extractor].chapter-reverse .IP "Type:" 6 \f[I]bool\f[] @@ -1614,6 +1737,7 @@ Reverse the order of chapter URLs extracted from manga pages. .br * \f[I]false\f[]: Start with the first chapter + .SH DOWNLOADER OPTIONS .SS downloader.*.enabled .IP "Type:" 6 @@ -1625,6 +1749,7 @@ Reverse the order of chapter URLs extracted from manga pages. .IP "Description:" 4 Enable/Disable this downloader module. + .SS downloader.*.filesize-min & .filesize-max .IP "Type:" 6 \f[I]string\f[] @@ -1643,6 +1768,7 @@ Possible values are valid integer or floating-point numbers optionally followed by one of \f[I]k\f[], \f[I]m\f[]. \f[I]g\f[], \f[I]t\f[] or \f[I]p\f[]. These suffixes are case-insensitive. + .SS downloader.*.mtime .IP "Type:" 6 \f[I]bool\f[] @@ -1654,6 +1780,7 @@ These suffixes are case-insensitive. Use \f[I]Last-Modified\f[] HTTP response headers to set file modification times. + .SS downloader.*.part .IP "Type:" 6 \f[I]bool\f[] @@ -1672,6 +1799,7 @@ resuming incomplete downloads. * \f[I]false\f[]: Do not use \f[I].part\f[] files and write data directly into the actual output files. + .SS downloader.*.part-directory .IP "Type:" 6 \f[I]Path\f[] @@ -1686,6 +1814,7 @@ Missing directories will be created as needed. If this value is \f[I]null\f[], \f[I].part\f[] files are going to be stored alongside the actual output files. + .SS downloader.*.rate .IP "Type:" 6 \f[I]string\f[] @@ -1703,6 +1832,7 @@ Possible values are valid integer or floating-point numbers optionally followed by one of \f[I]k\f[], \f[I]m\f[]. \f[I]g\f[], \f[I]t\f[] or \f[I]p\f[]. These suffixes are case-insensitive. + .SS downloader.*.retries .IP "Type:" 6 \f[I]integer\f[] @@ -1711,9 +1841,10 @@ These suffixes are case-insensitive. \f[I]extractor.*.retries\f[] .IP "Description:" 4 -Maximum number of retries during file downloads +Maximum number of retries during file downloads, or \f[I]-1\f[] for infinite retries. + .SS downloader.*.timeout .IP "Type:" 6 \f[I]float\f[] or \f[I]null\f[] @@ -1724,6 +1855,7 @@ or \f[I]-1\f[] for infinite retries. .IP "Description:" 4 Connection timeout during file downloads. + .SS downloader.*.verify .IP "Type:" 6 \f[I]bool\f[] or \f[I]string\f[] @@ -1734,6 +1866,7 @@ Connection timeout during file downloads. .IP "Description:" 4 Certificate validation during file downloads. + .SS downloader.http.adjust-extensions .IP "Type:" 6 \f[I]bool\f[] @@ -1745,6 +1878,7 @@ Certificate validation during file downloads. Check the file headers of \f[I]jpg\f[], \f[I]png\f[], and \f[I]gif\f[] files and adjust their filename extensions if they do not match. + .SS downloader.ytdl.format .IP "Type:" 6 \f[I]string\f[] @@ -1757,6 +1891,7 @@ Video \f[I]format selection <https://github.com/ytdl-org/youtube-dl#format-selection>\f[] directly passed to youtube-dl. + .SS downloader.ytdl.forward-cookies .IP "Type:" 6 \f[I]bool\f[] @@ -1767,6 +1902,7 @@ directly passed to youtube-dl. .IP "Description:" 4 Forward cookies to youtube-dl. + .SS downloader.ytdl.logging .IP "Type:" 6 \f[I]bool\f[] @@ -1776,13 +1912,12 @@ Forward cookies to youtube-dl. .IP "Description:" 4 Route youtube-dl's output through gallery-dl's logging system. -.br Otherwise youtube-dl will write its output directly to stdout/stderr. -.br Note: Set \f[I]quiet\f[] and \f[I]no_warnings\f[] in \f[I]downloader.ytdl.raw-options\f[] to \f[I]true\f[] to suppress all output. + .SS downloader.ytdl.outtmpl .IP "Type:" 6 \f[I]string\f[] @@ -1791,7 +1926,7 @@ Note: Set \f[I]quiet\f[] and \f[I]no_warnings\f[] in \f[I]null\f[] .IP "Description:" 4 -The \f[I]Output Template <https://github.com/ytdl-org/youtube-dl#output-template>\f[] +The \f[I]Output Template\f[] used to generate filenames for files downloaded with youtube-dl. Special values: @@ -1805,6 +1940,7 @@ Note: An output template other than \f[I]null\f[] might cause unexpected results in combination with other options (e.g. \f[I]"skip": "enumerate"\f[]) + .SS downloader.ytdl.raw-options .IP "Type:" 6 \f[I]object\f[] @@ -1821,11 +1957,11 @@ cause unexpected results in combination with other options .IP "Description:" 4 Additional options passed directly to the \f[I]YoutubeDL\f[] constructor. -.br + All available options can be found in \f[I]youtube-dl's docstrings -.br <https://github.com/ytdl-org/youtube-dl/blob/master/youtube_dl/YoutubeDL.py#L138-L318>\f[]. + .SH OUTPUT OPTIONS .SS output.mode .IP "Type:" 6 @@ -1848,6 +1984,7 @@ Controls the output string format and status indicators. .br * \f[I]"auto"\f[]: Automatically choose the best suitable output mode + .SS output.shorten .IP "Type:" 6 \f[I]bool\f[] @@ -1859,6 +1996,7 @@ Controls the output string format and status indicators. Controls whether the output strings should be shortened to fit on one console line. + .SS output.progress .IP "Type:" 6 \f[I]bool\f[] or \f[I]string\f[] @@ -1880,6 +2018,7 @@ multiple URLs as arguments. as a custom \f[I]format string\f[]. Possible replacement keys are \f[I]current\f[], \f[I]total\f[] and \f[I]url\f[]. + .SS output.log .IP "Type:" 6 \f[I]string\f[] or \f[I]Logging Configuration\f[] @@ -1893,6 +2032,7 @@ Configuration for standard logging output to stderr. If this is a simple \f[I]string\f[], it specifies the format string for logging messages. + .SS output.logfile .IP "Type:" 6 \f[I]Path\f[] or \f[I]Logging Configuration\f[] @@ -1903,6 +2043,7 @@ the format string for logging messages. .IP "Description:" 4 File to write logging output to. + .SS output.unsupportedfile .IP "Type:" 6 \f[I]Path\f[] or \f[I]Logging Configuration\f[] @@ -1915,6 +2056,7 @@ File to write external URLs unsupported by *gallery-dl* to. The default format string here is \f[I]"{message}"\f[]. + .SS output.num-to-str .IP "Type:" 6 \f[I]bool\f[] @@ -1926,6 +2068,7 @@ The default format string here is \f[I]"{message}"\f[]. Convert numeric values (\f[I]integer\f[] or \f[I]float\f[]) to \f[I]string\f[] before outputting them as JSON. + .SH POSTPROCESSOR OPTIONS .SS classify.mapping .IP "Type:" 6 @@ -1949,6 +2092,7 @@ be stored in them. Files with an extension not listed will be ignored and stored in their default location. + .SS compare.action .IP "Type:" 6 \f[I]string\f[] @@ -1963,7 +2107,8 @@ The action to take when files do not compare as equal. * \f[I]"replace"\f[]: Replace/Overwrite the old version with the new one .br * \f[I]"enumerate"\f[]: Add an enumeration index to the filename of the new -version like \f[I]skip = "enumerate" <extractor.*.skip_>\f[] +version like \f[I]skip = "enumerate"\f[] + .SS compare.shallow .IP "Type:" 6 @@ -1975,6 +2120,7 @@ version like \f[I]skip = "enumerate" <extractor.*.skip_>\f[] .IP "Description:" 4 Only compare file sizes. Do not read and compare their content. + .SS exec.async .IP "Type:" 6 \f[I]bool\f[] @@ -1986,15 +2132,16 @@ Only compare file sizes. Do not read and compare their content. Controls whether to wait for a subprocess to finish or to let it run asynchronously. + .SS exec.command .IP "Type:" 6 \f[I]string\f[] or \f[I]list\f[] of \f[I]strings\f[] .IP "Example:" 4 -"convert {} {}.png && rm {}", .br -["echo", "{user[account]}", "{id}"] +* "convert {} {}.png && rm {}" .br +* ["echo", "{user[account]}", "{id}"] .IP "Description:" 4 The command to run. @@ -2012,6 +2159,7 @@ Each element of this list is treated as a \f[I]format string\f[] using the files' metadata as well as \f[I]{_path}\f[], \f[I]{_directory}\f[], and \f[I]{_filename}\f[]. + .SS exec.final .IP "Type:" 6 \f[I]bool\f[] @@ -2024,6 +2172,7 @@ Controls whether to execute \f[I]exec.command\f[] for each downloaded file or only once after all files have been downloaded successfully. + .SS metadata.mode .IP "Type:" 6 \f[I]string\f[] @@ -2043,6 +2192,7 @@ Select how to write metadata. * \f[I]"custom"\f[]: result of applying \f[I]metadata.content-format\f[] to a file's metadata dictionary + .SS metadata.directory .IP "Type:" 6 \f[I]string\f[] @@ -2057,6 +2207,7 @@ to a file's metadata dictionary Directory where metadata files are stored in relative to the current target location for file downloads. + .SS metadata.extension .IP "Type:" 6 \f[I]string\f[] @@ -2068,15 +2219,16 @@ current target location for file downloads. Filename extension for metadata files that will be appended to the original file names. + .SS metadata.extension-format .IP "Type:" 6 \f[I]string\f[] .IP "Example:" 4 -"{extension}.json", .br -"json" +* "{extension}.json" .br +* "json" .IP "Description:" 4 Custom format string to build filename extensions for metadata @@ -2084,6 +2236,7 @@ files with, which will replace the original filename extensions. Note: \f[I]metadata.extension\f[] is ignored if this option is set. + .SS metadata.content-format .IP "Type:" 6 \f[I]string\f[] @@ -2096,6 +2249,7 @@ Custom format string to build the content of metadata files with. Note: Only applies for \f[I]"mode": "custom"\f[]. + .SS mtime.key .IP "Type:" 6 \f[I]string\f[] @@ -2109,6 +2263,7 @@ Name of the metadata field whose value should be used. This value must either be a UNIX timestamp or a \f[I]datetime\f[] object. + .SS ugoira.extension .IP "Type:" 6 \f[I]string\f[] @@ -2119,6 +2274,7 @@ This value must either be a UNIX timestamp or a .IP "Description:" 4 Filename extension for the resulting video files. + .SS ugoira.ffmpeg-args .IP "Type:" 6 \f[I]list\f[] of \f[I]strings\f[] @@ -2132,6 +2288,7 @@ Filename extension for the resulting video files. .IP "Description:" 4 Additional FFmpeg command-line arguments. + .SS ugoira.ffmpeg-location .IP "Type:" 6 \f[I]Path\f[] @@ -2142,6 +2299,7 @@ Additional FFmpeg command-line arguments. .IP "Description:" 4 Location of the \f[I]ffmpeg\f[] (or \f[I]avconv\f[]) executable to use. + .SS ugoira.ffmpeg-output .IP "Type:" 6 \f[I]bool\f[] @@ -2152,6 +2310,7 @@ Location of the \f[I]ffmpeg\f[] (or \f[I]avconv\f[]) executable to use. .IP "Description:" 4 Show FFmpeg output. + .SS ugoira.ffmpeg-twopass .IP "Type:" 6 \f[I]bool\f[] @@ -2162,6 +2321,7 @@ Show FFmpeg output. .IP "Description:" 4 Enable Two-Pass encoding. + .SS ugoira.framerate .IP "Type:" 6 \f[I]string\f[] @@ -2180,6 +2340,7 @@ based on delays between frames. .br * \f[I]null\f[] or an empty \f[I]string\f[]: Don't set an explicit frame rate. + .SS ugoira.keep-files .IP "Type:" 6 \f[I]bool\f[] @@ -2190,6 +2351,7 @@ based on delays between frames. .IP "Description:" 4 Keep ZIP archives after conversion. + .SS ugoira.libx264-prevent-odd .IP "Type:" 6 \f[I]bool\f[] @@ -2201,7 +2363,7 @@ Keep ZIP archives after conversion. Prevent \f[I]"width/height not divisible by 2"\f[] errors when using \f[I]libx264\f[] or \f[I]libx265\f[] encoders by applying a simple cropping filter. See this \f[I]Stack Overflow -thread <https://stackoverflow.com/questions/20847674>\f[] +thread\f[] for more information. This option, when \f[I]libx264/5\f[] is used, automatically @@ -2209,17 +2371,6 @@ adds \f[I]["-vf", "crop=iw-mod(iw\\\\,2):ih-mod(ih\\\\,2)"]\f[] to the list of FFmpeg command-line arguments to reduce an odd width/height by 1 pixel and make them even. -.SS zip.compression -.IP "Type:" 6 -\f[I]string\f[] - -.IP "Default:" 9 -\f[I]"store"\f[] - -.IP "Description:" 4 -Compression method to use when writing the archive. - -Possible values are \f[I]"store"\f[], \f[I]"zip"\f[], \f[I]"bzip2"\f[], \f[I]"lzma"\f[]. .SS zip.extension .IP "Type:" 6 @@ -2231,6 +2382,7 @@ Possible values are \f[I]"store"\f[], \f[I]"zip"\f[], \f[I]"bzip2"\f[], \f[I]"lz .IP "Description:" 4 Filename extension for the created ZIP archive. + .SS zip.keep-files .IP "Type:" 6 \f[I]bool\f[] @@ -2241,6 +2393,7 @@ Filename extension for the created ZIP archive. .IP "Description:" 4 Keep the actual files after writing them to a ZIP archive. + .SS zip.mode .IP "Type:" 6 \f[I]string\f[] @@ -2261,6 +2414,7 @@ This greatly reduces the chance a ZIP archive gets corrupted in case the Python interpreter gets shut down unexpectedly (power outage, SIGKILL) but is also a lot slower. + .SH MISCELLANEOUS OPTIONS .SS cache.file .IP "Type:" 6 @@ -2279,6 +2433,7 @@ cookies and API tokens across gallery-dl invocations. Set this option to \f[I]null\f[] or an invalid path to disable this cache. + .SS ciphers .IP "Type:" 6 \f[I]bool\f[] or \f[I]string\f[] @@ -2293,9 +2448,10 @@ this cache. * \f[I]false\f[]: Leave the default cipher list as is .br * Any \f[I]string\f[]: Replace urllib3's default ciphers with these -(See \f[I]SSLContext.set_ciphers() <https://docs.python.org/3/library/ssl.html#ssl.SSLContext.set_ciphers>\f[] +(See \f[I]SSLContext.set_ciphers()\f[] for details) + .SS pyopenssl .IP "Type:" 6 \f[I]bool\f[] @@ -2304,18 +2460,19 @@ for details) \f[I]false\f[] .IP "Description:" 4 -Use \f[I]pyOpenSSL <https://www.pyopenssl.org/en/stable/>\f[]-backed +Use \f[I]pyOpenSSL\f[]-backed SSL-support. + .SH API TOKENS & IDS .SS extractor.deviantart.client-id & .client-secret .IP "Type:" 6 \f[I]string\f[] -.IP "How To:" 4 +.IP "How To:" 4 .br * login and visit DeviantArt's -\f[I]Applications & Keys <https://www.deviantart.com/developers/apps>\f[] +\f[I]Applications & Keys\f[] section .br * click "Register Application" @@ -2332,20 +2489,21 @@ Submission Policy, and Terms of Service. application and put them in your configuration file as \f[I]"client-id"\f[] and \f[I]"client-secret"\f[] .br -* clear your \f[I]cache <cache.file_>\f[] (\f[I]--clear-cache\f[]) to delete +* clear your \f[I]cache\f[] (\f[I]--clear-cache\f[]) to delete the \f[I]access-token\f[] from the previous \f[I]client-id\f[] .br -* get a new \f[I]refresh-token <extractor.deviantart.refresh-token_>\f[] +* get a new \f[I]refresh-token\f[] if necessary + .SS extractor.flickr.api-key & .api-secret .IP "Type:" 6 \f[I]string\f[] -.IP "How To:" 4 +.IP "How To:" 4 .br -* login and \f[I]Create an App <https://www.flickr.com/services/apps/create/apply/>\f[] -in Flickr's \f[I]App Garden <https://www.flickr.com/services/>\f[] +* login and \f[I]Create an App\f[] +in Flickr's \f[I]App Garden\f[] .br * click "APPLY FOR A NON-COMMERCIAL KEY" .br @@ -2355,21 +2513,14 @@ and click "SUBMIT" * copy \f[I]Key\f[] and \f[I]Secret\f[] and put them in your configuration file -.SS extractor.pawoo.access-token -.IP "Type:" 6 -\f[I]string\f[] - -.IP "How To -:" 4 - .SS extractor.reddit.client-id & .user-agent .IP "Type:" 6 \f[I]string\f[] -.IP "How To:" 4 +.IP "How To:" 4 .br -* login and visit the \f[I]apps <https://www.reddit.com/prefs/apps/>\f[] +* login and visit the \f[I]apps\f[] section of your account's preferences .br * click the "are you a developer? create an app..." button @@ -2384,15 +2535,16 @@ section of your account's preferences * use "\f[I]Python:<application name>:v1.0 (by /u/<username>)\f[]" as user-agent and replace \f[I]<application name>\f[] and \f[I]<username>\f[] accordingly (see Reddit's -\f[I]API access rules <https://github.com/reddit/reddit/wiki/API>\f[]) +\f[I]API access rules\f[]) + .SS extractor.smugmug.api-key & .api-secret .IP "Type:" 6 \f[I]string\f[] -.IP "How To:" 4 +.IP "How To:" 4 .br -* login and \f[I]Apply for an API Key <https://api.smugmug.com/api/developer/apply>\f[] +* login and \f[I]Apply for an API Key\f[] .br * use a random name and description, set "Type" to "Application", "Platform" to "All", @@ -2403,14 +2555,15 @@ and "Use" to "Non-Commercial" * copy \f[I]API Key\f[] and \f[I]API Secret\f[] and put them in your configuration file + .SS extractor.tumblr.api-key & .api-secret .IP "Type:" 6 \f[I]string\f[] -.IP "How To:" 4 +.IP "How To:" 4 .br * login and visit Tumblr's -\f[I]Applications <https://www.tumblr.com/oauth/apps>\f[] section +\f[I]Applications\f[] section .br * click "Register application" .br @@ -2425,18 +2578,19 @@ callback URL" * copy your \f[I]OAuth Consumer Key\f[] and \f[I]Secret Key\f[] and put them in your configuration file + .SH CUSTOM TYPES .SS Date .IP "Type:" 6 \f[I]string\f[] or \f[I]integer\f[] .IP "Example:" 4 -"2019-01-01T00:00:00", .br -"2019" with "%Y" as \f[I]date-format\f[], +* "2019-01-01T00:00:00" .br -1546297200 +* "2019" with "%Y" as \f[I]date-format\f[] .br +* 1546297200 .IP "Description:" 4 A \f[I]Date\f[] value represents a specific point in time. @@ -2446,26 +2600,27 @@ A \f[I]Date\f[] value represents a specific point in time. .br * If given as \f[I]integer\f[], it is interpreted as UTC timestamp. + .SS Path .IP "Type:" 6 \f[I]string\f[] or \f[I]list\f[] of \f[I]strings\f[] .IP "Example:" 4 -"file.ext", .br -"~/path/to/file.ext", +* "file.ext" .br -"$HOME/path/to/file.ext", +* "~/path/to/file.ext" .br -["$HOME", "path", "to", "file.ext"] +* "$HOME/path/to/file.ext" .br +* ["$HOME", "path", "to", "file.ext"] .IP "Description:" 4 A \f[I]Path\f[] is a \f[I]string\f[] representing the location of a file or directory. -Simple \f[I]tilde expansion <https://docs.python.org/3/library/os.path.html#os.path.expanduser>\f[] -and \f[I]environment variable expansion <https://docs.python.org/3/library/os.path.html#os.path.expandvars>\f[] +Simple \f[I]tilde expansion\f[] +and \f[I]environment variable expansion\f[] is supported. In Windows environments, backslashes (\f[I]"\\"\f[]) can, in addition to @@ -2475,11 +2630,11 @@ they themselves have to be escaped. The path \f[I]C:\\path\\to\\file.ext\f[] has therefore to be written as \f[I]"C:\\\\path\\\\to\\\\file.ext"\f[] if you want to use backslashes. + .SS Logging Configuration .IP "Type:" 6 \f[I]object\f[] - .IP "Example:" 4 .. code:: @@ -2490,6 +2645,8 @@ The path \f[I]C:\\path\\to\\file.ext\f[] has therefore to be written as "encoding": "ascii" } +.. code:: + { "level": "debug", "format": { @@ -2511,19 +2668,20 @@ Extended logging output configuration. or a dictionary with format strings for each loglevel. In addition to the default -\f[I]LogRecord attributes <https://docs.python.org/3/library/logging.html#logrecord-attributes>\f[], +\f[I]LogRecord attributes\f[], it is also possible to access the current -\f[I]extractor <https://github.com/mikf/gallery-dl/blob/2e516a1e3e09cb8a9e36a8f6f7e41ce8d4402f5a/gallery_dl/extractor/common.py#L24>\f[] -and \f[I]job <https://github.com/mikf/gallery-dl/blob/2e516a1e3e09cb8a9e36a8f6f7e41ce8d4402f5a/gallery_dl/job.py#L19>\f[] -objects as well as their attributes -(e.g. \f[I]"{extractor.url}"\f[]) +\f[I]extractor\f[], +\f[I]job\f[], +\f[I]path\f[], +and keywords objects and their attributes, for example +\f[I]"{extractor.url}"\f[], \f[I]"{path.filename}"\f[], \f[I]"{keywords.title}"\f[] .br * Default: \f[I]"[{name}][{levelname}] {message}"\f[] .br * format-date .br * Format string for \f[I]{asctime}\f[] fields in logging messages -(see \f[I]strftime() directives <https://docs.python.org/3/library/time.html#time.strftime>\f[]) +(see \f[I]strftime() directives\f[]) .br * Default: \f[I]"%Y-%m-%d %H:%M:%S"\f[] .br @@ -2542,7 +2700,7 @@ objects as well as their attributes .br * Mode in which the file is opened; use \f[I]"w"\f[] to truncate or \f[I]"a"\f[] to append -(see \f[I]open() <https://docs.python.org/3/library/functions.html#open>\f[]) +(see \f[I]open()\f[]) .br * Default: \f[I]"w"\f[] .br @@ -2555,14 +2713,18 @@ use \f[I]"w"\f[] to truncate or \f[I]"a"\f[] to append Note: path, mode and encoding are only applied when configuring logging output to a file. + .SS Postprocessor Configuration .IP "Type:" 6 \f[I]object\f[] - .IP "Example:" 4 .. code:: +{ "name": "mtime" } + +.. code:: + { "name": "zip", "compression": "store", @@ -2570,17 +2732,34 @@ logging output to a file. "whitelist": ["mangadex", "exhentai", "nhentai"] } - .IP "Description:" 4 -An object with the \f[I]name\f[] of a post-processor and its options. - -See \f[I]Postprocessor Options\f[] for a list of all available -post-processors and their respective options. - -You can also set a \f[I]whitelist\f[] or \f[I]blacklist\f[] to +An \f[I]object\f[] containing a \f[I]"name"\f[] attribute specifying the +post-processor type, as well as any of its \f[I]options\f[]. +It is also possible set a \f[I]"whitelist"\f[] or \f[I]"blacklist"\f[] to only enable or disable a post-processor for the specified extractor categories. +The available post-processor types are + +\f[I]classify\f[] +Categorize files by filename extension +\f[I]compare\f[] +Compare versions of the same file and replace/enumerate them on mismatch +.br +(requires \f[I]downloader.*.part\f[] = \f[I]true\f[] and \f[I]extractor.*.skip\f[] = \f[I]false\f[]) +.br +\f[I]exec\f[] +Execute external commands +\f[I]metadata\f[] +Write metadata to separate files +\f[I]mtime\f[] +Set file modification time according to its metadata +\f[I]ugoira\f[] +Convert Pixiv Ugoira to WebM using \f[I]FFmpeg\f[] +\f[I]zip\f[] +Store files in a ZIP archive + + .SH BUGS https://github.com/mikf/gallery-dl/issues diff --git a/docs/gallery-dl.conf b/docs/gallery-dl.conf index 2d7b0ff..ecb9f9b 100644 --- a/docs/gallery-dl.conf +++ b/docs/gallery-dl.conf @@ -88,10 +88,6 @@ "highlights": false, "videos": true }, - "kissmanga": - { - "captcha": "stop" - }, "nijie": { "username": null, @@ -115,10 +111,6 @@ "wait-min": 3.0, "wait-max": 6.0 }, - "readcomiconline": - { - "captcha": "stop" - }, "reddit": { "comments": 0, diff --git a/gallery_dl.egg-info/PKG-INFO b/gallery_dl.egg-info/PKG-INFO index fc9f14b..3207269 100644 --- a/gallery_dl.egg-info/PKG-INFO +++ b/gallery_dl.egg-info/PKG-INFO @@ -1,6 +1,6 @@ Metadata-Version: 2.1 Name: gallery-dl -Version: 1.15.0 +Version: 1.15.1 Summary: Command-line program to download image-galleries and -collections from several image hosting sites Home-page: https://github.com/mikf/gallery-dl Author: Mike Fährmann @@ -94,8 +94,8 @@ Description: ========== put it into your `PATH <https://en.wikipedia.org/wiki/PATH_(variable)>`__, and run it inside a command prompt (like ``cmd.exe``). - - `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.15.0/gallery-dl.exe>`__ - - `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.15.0/gallery-dl.bin>`__ + - `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.15.1/gallery-dl.exe>`__ + - `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.15.1/gallery-dl.bin>`__ These executables include a Python 3.8 interpreter and all required Python packages. @@ -119,6 +119,14 @@ Description: ========== $ choco install gallery-dl + Scoop + ---------- + + Apart from Chocolatey, *gallery-dl* is also available in Scoop_ "main" bucket for Windows users. + + .. code:: powershell + + $ scoop install gallery-dl Usage ===== @@ -311,7 +319,7 @@ Description: ========== .. _gallery-dl-example.conf: https://github.com/mikf/gallery-dl/blob/master/docs/gallery-dl-example.conf .. _configuration.rst: https://github.com/mikf/gallery-dl/blob/master/docs/configuration.rst .. _Supported Sites: https://github.com/mikf/gallery-dl/blob/master/docs/supportedsites.rst - .. _stable: https://github.com/mikf/gallery-dl/archive/v1.15.0.tar.gz + .. _stable: https://github.com/mikf/gallery-dl/archive/v1.15.1.tar.gz .. _dev: https://github.com/mikf/gallery-dl/archive/master.tar.gz .. _Python: https://www.python.org/downloads/ @@ -324,6 +332,7 @@ Description: ========== .. _Snapd: https://docs.snapcraft.io/installing-snapd .. _OAuth: https://en.wikipedia.org/wiki/OAuth .. _Chocolatey: https://chocolatey.org/install + .. _Scoop: https://scoop.sh .. |pypi| image:: https://img.shields.io/pypi/v/gallery-dl.svg :target: https://pypi.org/project/gallery-dl/ diff --git a/gallery_dl.egg-info/SOURCES.txt b/gallery_dl.egg-info/SOURCES.txt index 648e273..c2e5cb4 100644 --- a/gallery_dl.egg-info/SOURCES.txt +++ b/gallery_dl.egg-info/SOURCES.txt @@ -12,7 +12,6 @@ docs/gallery-dl-example.conf docs/gallery-dl.conf gallery_dl/__init__.py gallery_dl/__main__.py -gallery_dl/aes.py gallery_dl/cache.py gallery_dl/cloudflare.py gallery_dl/config.py @@ -91,7 +90,6 @@ gallery_dl/extractor/issuu.py gallery_dl/extractor/kabeuchi.py gallery_dl/extractor/keenspot.py gallery_dl/extractor/khinsider.py -gallery_dl/extractor/kissmanga.py gallery_dl/extractor/komikcast.py gallery_dl/extractor/konachan.py gallery_dl/extractor/lineblog.py @@ -156,6 +154,7 @@ gallery_dl/extractor/vanillarock.py gallery_dl/extractor/vsco.py gallery_dl/extractor/wallhaven.py gallery_dl/extractor/warosu.py +gallery_dl/extractor/weasyl.py gallery_dl/extractor/webtoons.py gallery_dl/extractor/weibo.py gallery_dl/extractor/wikiart.py diff --git a/gallery_dl/aes.py b/gallery_dl/aes.py deleted file mode 100644 index a45f50e..0000000 --- a/gallery_dl/aes.py +++ /dev/null @@ -1,337 +0,0 @@ -# -*- coding: utf-8 -*- - -# This is a stripped down version of youtube-dl's aes module. -# All credit for this code goes to the authors of the youtube-dl project. -# https://ytdl-org.github.io/youtube-dl/ -# https://github.com/ytdl-org/youtube-dl/ - -import base64 -from math import ceil - -BLOCK_SIZE_BYTES = 16 - - -def aes_cbc_decrypt(data, key, iv): - """ - Decrypt with aes in CBC mode - - @param {int[]} data cipher - @param {int[]} key 16/24/32-Byte cipher key - @param {int[]} iv 16-Byte IV - @returns {int[]} decrypted data - """ - expanded_key = key_expansion(key) - block_count = int(ceil(float(len(data)) / BLOCK_SIZE_BYTES)) - - decrypted_data = [] - previous_cipher_block = iv - for i in range(block_count): - block = data[i * BLOCK_SIZE_BYTES: (i + 1) * BLOCK_SIZE_BYTES] - block += [0] * (BLOCK_SIZE_BYTES - len(block)) - - decrypted_block = aes_decrypt(block, expanded_key) - decrypted_data += xor(decrypted_block, previous_cipher_block) - previous_cipher_block = block - decrypted_data = decrypted_data[:len(data)] - - return decrypted_data - - -def aes_cbc_decrypt_text(data, key, iv): - """ - Decrypt with aes in CBC mode - - @param {string} data base64 encoded cipher - @param {int[]} key 16/24/32-Byte cipher key - @param {int[]} iv 16-Byte IV - @returns {string} decrypted data as utf8 encoded string - """ - data = base64.standard_b64decode(bytes(data, "ascii")) - charcodes = aes_cbc_decrypt(list(data), key, iv) - last = charcodes[-1] - if last <= 16: - charcodes = charcodes[:-last] - return bytes(charcodes).decode() - - -def key_expansion(data): - """ - Generate key schedule - - @param {int[]} data 16/24/32-Byte cipher key - @returns {int[]} 176/208/240-Byte expanded key - """ - data = data[:] # copy - rcon_iteration = 1 - key_size_bytes = len(data) - expanded_key_size_bytes = (key_size_bytes // 4 + 7) * BLOCK_SIZE_BYTES - - while len(data) < expanded_key_size_bytes: - temp = data[-4:] - temp = key_schedule_core(temp, rcon_iteration) - rcon_iteration += 1 - data += xor(temp, data[-key_size_bytes: 4 - key_size_bytes]) - - for _ in range(3): - temp = data[-4:] - data += xor(temp, data[-key_size_bytes: 4 - key_size_bytes]) - - if key_size_bytes == 32: - temp = data[-4:] - temp = sub_bytes(temp) - data += xor(temp, data[-key_size_bytes: 4 - key_size_bytes]) - - if key_size_bytes == 32: - rounds = 3 - elif key_size_bytes == 24: - rounds = 2 - else: - rounds = 0 - for _ in range(rounds): - temp = data[-4:] - data += xor(temp, data[-key_size_bytes: 4 - key_size_bytes]) - data = data[:expanded_key_size_bytes] - - return data - - -def aes_decrypt(data, expanded_key): - """ - Decrypt one block with aes - - @param {int[]} data 16-Byte cipher - @param {int[]} expanded_key 176/208/240-Byte expanded key - @returns {int[]} 16-Byte state - """ - rounds = len(expanded_key) // BLOCK_SIZE_BYTES - 1 - - for i in range(rounds, 0, -1): - data = xor( - data, - expanded_key[i * BLOCK_SIZE_BYTES: (i + 1) * BLOCK_SIZE_BYTES] - ) - if i != rounds: - data = mix_columns_inv(data) - data = shift_rows_inv(data) - data = sub_bytes_inv(data) - data = xor(data, expanded_key[:BLOCK_SIZE_BYTES]) - - return data - - -RCON = ( - 0x8d, 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1b, 0x36, -) -SBOX = ( - 0x63, 0x7C, 0x77, 0x7B, 0xF2, 0x6B, 0x6F, 0xC5, - 0x30, 0x01, 0x67, 0x2B, 0xFE, 0xD7, 0xAB, 0x76, - 0xCA, 0x82, 0xC9, 0x7D, 0xFA, 0x59, 0x47, 0xF0, - 0xAD, 0xD4, 0xA2, 0xAF, 0x9C, 0xA4, 0x72, 0xC0, - 0xB7, 0xFD, 0x93, 0x26, 0x36, 0x3F, 0xF7, 0xCC, - 0x34, 0xA5, 0xE5, 0xF1, 0x71, 0xD8, 0x31, 0x15, - 0x04, 0xC7, 0x23, 0xC3, 0x18, 0x96, 0x05, 0x9A, - 0x07, 0x12, 0x80, 0xE2, 0xEB, 0x27, 0xB2, 0x75, - 0x09, 0x83, 0x2C, 0x1A, 0x1B, 0x6E, 0x5A, 0xA0, - 0x52, 0x3B, 0xD6, 0xB3, 0x29, 0xE3, 0x2F, 0x84, - 0x53, 0xD1, 0x00, 0xED, 0x20, 0xFC, 0xB1, 0x5B, - 0x6A, 0xCB, 0xBE, 0x39, 0x4A, 0x4C, 0x58, 0xCF, - 0xD0, 0xEF, 0xAA, 0xFB, 0x43, 0x4D, 0x33, 0x85, - 0x45, 0xF9, 0x02, 0x7F, 0x50, 0x3C, 0x9F, 0xA8, - 0x51, 0xA3, 0x40, 0x8F, 0x92, 0x9D, 0x38, 0xF5, - 0xBC, 0xB6, 0xDA, 0x21, 0x10, 0xFF, 0xF3, 0xD2, - 0xCD, 0x0C, 0x13, 0xEC, 0x5F, 0x97, 0x44, 0x17, - 0xC4, 0xA7, 0x7E, 0x3D, 0x64, 0x5D, 0x19, 0x73, - 0x60, 0x81, 0x4F, 0xDC, 0x22, 0x2A, 0x90, 0x88, - 0x46, 0xEE, 0xB8, 0x14, 0xDE, 0x5E, 0x0B, 0xDB, - 0xE0, 0x32, 0x3A, 0x0A, 0x49, 0x06, 0x24, 0x5C, - 0xC2, 0xD3, 0xAC, 0x62, 0x91, 0x95, 0xE4, 0x79, - 0xE7, 0xC8, 0x37, 0x6D, 0x8D, 0xD5, 0x4E, 0xA9, - 0x6C, 0x56, 0xF4, 0xEA, 0x65, 0x7A, 0xAE, 0x08, - 0xBA, 0x78, 0x25, 0x2E, 0x1C, 0xA6, 0xB4, 0xC6, - 0xE8, 0xDD, 0x74, 0x1F, 0x4B, 0xBD, 0x8B, 0x8A, - 0x70, 0x3E, 0xB5, 0x66, 0x48, 0x03, 0xF6, 0x0E, - 0x61, 0x35, 0x57, 0xB9, 0x86, 0xC1, 0x1D, 0x9E, - 0xE1, 0xF8, 0x98, 0x11, 0x69, 0xD9, 0x8E, 0x94, - 0x9B, 0x1E, 0x87, 0xE9, 0xCE, 0x55, 0x28, 0xDF, - 0x8C, 0xA1, 0x89, 0x0D, 0xBF, 0xE6, 0x42, 0x68, - 0x41, 0x99, 0x2D, 0x0F, 0xB0, 0x54, 0xBB, 0x16, -) -SBOX_INV = ( - 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38, - 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb, - 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87, - 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb, - 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d, - 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e, - 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2, - 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25, - 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16, - 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92, - 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda, - 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84, - 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a, - 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06, - 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02, - 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b, - 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea, - 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73, - 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85, - 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e, - 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89, - 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b, - 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20, - 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4, - 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31, - 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f, - 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d, - 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef, - 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0, - 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61, - 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26, - 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d, -) -MIX_COLUMN_MATRIX = ( - (0x2, 0x3, 0x1, 0x1), - (0x1, 0x2, 0x3, 0x1), - (0x1, 0x1, 0x2, 0x3), - (0x3, 0x1, 0x1, 0x2), -) -MIX_COLUMN_MATRIX_INV = ( - (0xE, 0xB, 0xD, 0x9), - (0x9, 0xE, 0xB, 0xD), - (0xD, 0x9, 0xE, 0xB), - (0xB, 0xD, 0x9, 0xE), -) -RIJNDAEL_EXP_TABLE = ( - 0x01, 0x03, 0x05, 0x0F, 0x11, 0x33, 0x55, 0xFF, - 0x1A, 0x2E, 0x72, 0x96, 0xA1, 0xF8, 0x13, 0x35, - 0x5F, 0xE1, 0x38, 0x48, 0xD8, 0x73, 0x95, 0xA4, - 0xF7, 0x02, 0x06, 0x0A, 0x1E, 0x22, 0x66, 0xAA, - 0xE5, 0x34, 0x5C, 0xE4, 0x37, 0x59, 0xEB, 0x26, - 0x6A, 0xBE, 0xD9, 0x70, 0x90, 0xAB, 0xE6, 0x31, - 0x53, 0xF5, 0x04, 0x0C, 0x14, 0x3C, 0x44, 0xCC, - 0x4F, 0xD1, 0x68, 0xB8, 0xD3, 0x6E, 0xB2, 0xCD, - 0x4C, 0xD4, 0x67, 0xA9, 0xE0, 0x3B, 0x4D, 0xD7, - 0x62, 0xA6, 0xF1, 0x08, 0x18, 0x28, 0x78, 0x88, - 0x83, 0x9E, 0xB9, 0xD0, 0x6B, 0xBD, 0xDC, 0x7F, - 0x81, 0x98, 0xB3, 0xCE, 0x49, 0xDB, 0x76, 0x9A, - 0xB5, 0xC4, 0x57, 0xF9, 0x10, 0x30, 0x50, 0xF0, - 0x0B, 0x1D, 0x27, 0x69, 0xBB, 0xD6, 0x61, 0xA3, - 0xFE, 0x19, 0x2B, 0x7D, 0x87, 0x92, 0xAD, 0xEC, - 0x2F, 0x71, 0x93, 0xAE, 0xE9, 0x20, 0x60, 0xA0, - 0xFB, 0x16, 0x3A, 0x4E, 0xD2, 0x6D, 0xB7, 0xC2, - 0x5D, 0xE7, 0x32, 0x56, 0xFA, 0x15, 0x3F, 0x41, - 0xC3, 0x5E, 0xE2, 0x3D, 0x47, 0xC9, 0x40, 0xC0, - 0x5B, 0xED, 0x2C, 0x74, 0x9C, 0xBF, 0xDA, 0x75, - 0x9F, 0xBA, 0xD5, 0x64, 0xAC, 0xEF, 0x2A, 0x7E, - 0x82, 0x9D, 0xBC, 0xDF, 0x7A, 0x8E, 0x89, 0x80, - 0x9B, 0xB6, 0xC1, 0x58, 0xE8, 0x23, 0x65, 0xAF, - 0xEA, 0x25, 0x6F, 0xB1, 0xC8, 0x43, 0xC5, 0x54, - 0xFC, 0x1F, 0x21, 0x63, 0xA5, 0xF4, 0x07, 0x09, - 0x1B, 0x2D, 0x77, 0x99, 0xB0, 0xCB, 0x46, 0xCA, - 0x45, 0xCF, 0x4A, 0xDE, 0x79, 0x8B, 0x86, 0x91, - 0xA8, 0xE3, 0x3E, 0x42, 0xC6, 0x51, 0xF3, 0x0E, - 0x12, 0x36, 0x5A, 0xEE, 0x29, 0x7B, 0x8D, 0x8C, - 0x8F, 0x8A, 0x85, 0x94, 0xA7, 0xF2, 0x0D, 0x17, - 0x39, 0x4B, 0xDD, 0x7C, 0x84, 0x97, 0xA2, 0xFD, - 0x1C, 0x24, 0x6C, 0xB4, 0xC7, 0x52, 0xF6, 0x01, -) -RIJNDAEL_LOG_TABLE = ( - 0x00, 0x00, 0x19, 0x01, 0x32, 0x02, 0x1a, 0xc6, - 0x4b, 0xc7, 0x1b, 0x68, 0x33, 0xee, 0xdf, 0x03, - 0x64, 0x04, 0xe0, 0x0e, 0x34, 0x8d, 0x81, 0xef, - 0x4c, 0x71, 0x08, 0xc8, 0xf8, 0x69, 0x1c, 0xc1, - 0x7d, 0xc2, 0x1d, 0xb5, 0xf9, 0xb9, 0x27, 0x6a, - 0x4d, 0xe4, 0xa6, 0x72, 0x9a, 0xc9, 0x09, 0x78, - 0x65, 0x2f, 0x8a, 0x05, 0x21, 0x0f, 0xe1, 0x24, - 0x12, 0xf0, 0x82, 0x45, 0x35, 0x93, 0xda, 0x8e, - 0x96, 0x8f, 0xdb, 0xbd, 0x36, 0xd0, 0xce, 0x94, - 0x13, 0x5c, 0xd2, 0xf1, 0x40, 0x46, 0x83, 0x38, - 0x66, 0xdd, 0xfd, 0x30, 0xbf, 0x06, 0x8b, 0x62, - 0xb3, 0x25, 0xe2, 0x98, 0x22, 0x88, 0x91, 0x10, - 0x7e, 0x6e, 0x48, 0xc3, 0xa3, 0xb6, 0x1e, 0x42, - 0x3a, 0x6b, 0x28, 0x54, 0xfa, 0x85, 0x3d, 0xba, - 0x2b, 0x79, 0x0a, 0x15, 0x9b, 0x9f, 0x5e, 0xca, - 0x4e, 0xd4, 0xac, 0xe5, 0xf3, 0x73, 0xa7, 0x57, - 0xaf, 0x58, 0xa8, 0x50, 0xf4, 0xea, 0xd6, 0x74, - 0x4f, 0xae, 0xe9, 0xd5, 0xe7, 0xe6, 0xad, 0xe8, - 0x2c, 0xd7, 0x75, 0x7a, 0xeb, 0x16, 0x0b, 0xf5, - 0x59, 0xcb, 0x5f, 0xb0, 0x9c, 0xa9, 0x51, 0xa0, - 0x7f, 0x0c, 0xf6, 0x6f, 0x17, 0xc4, 0x49, 0xec, - 0xd8, 0x43, 0x1f, 0x2d, 0xa4, 0x76, 0x7b, 0xb7, - 0xcc, 0xbb, 0x3e, 0x5a, 0xfb, 0x60, 0xb1, 0x86, - 0x3b, 0x52, 0xa1, 0x6c, 0xaa, 0x55, 0x29, 0x9d, - 0x97, 0xb2, 0x87, 0x90, 0x61, 0xbe, 0xdc, 0xfc, - 0xbc, 0x95, 0xcf, 0xcd, 0x37, 0x3f, 0x5b, 0xd1, - 0x53, 0x39, 0x84, 0x3c, 0x41, 0xa2, 0x6d, 0x47, - 0x14, 0x2a, 0x9e, 0x5d, 0x56, 0xf2, 0xd3, 0xab, - 0x44, 0x11, 0x92, 0xd9, 0x23, 0x20, 0x2e, 0x89, - 0xb4, 0x7c, 0xb8, 0x26, 0x77, 0x99, 0xe3, 0xa5, - 0x67, 0x4a, 0xed, 0xde, 0xc5, 0x31, 0xfe, 0x18, - 0x0d, 0x63, 0x8c, 0x80, 0xc0, 0xf7, 0x70, 0x07, -) - - -def sub_bytes(data): - return [SBOX[x] for x in data] - - -def sub_bytes_inv(data): - return [SBOX_INV[x] for x in data] - - -def rotate(data): - return data[1:] + [data[0]] - - -def key_schedule_core(data, rcon_iteration): - data = rotate(data) - data = sub_bytes(data) - data[0] = data[0] ^ RCON[rcon_iteration] - return data - - -def xor(data1, data2): - return [x ^ y for x, y in zip(data1, data2)] - - -def rijndael_mul(a, b): - if a == 0 or b == 0: - return 0 - return RIJNDAEL_EXP_TABLE[ - (RIJNDAEL_LOG_TABLE[a] + RIJNDAEL_LOG_TABLE[b]) % 0xFF - ] - - -def mix_column(data, matrix): - data_mixed = [] - for row in range(4): - mixed = 0 - for column in range(4): - # xor is (+) and (-) - mixed ^= rijndael_mul(data[column], matrix[row][column]) - data_mixed.append(mixed) - return data_mixed - - -def mix_columns(data, matrix=MIX_COLUMN_MATRIX): - data_mixed = [] - for i in range(4): - column = data[i * 4: (i + 1) * 4] - data_mixed += mix_column(column, matrix) - return data_mixed - - -def mix_columns_inv(data): - return mix_columns(data, MIX_COLUMN_MATRIX_INV) - - -def shift_rows_inv(data): - data_shifted = [] - for column in range(4): - for row in range(4): - data_shifted.append(data[((column - row) & 0b11) * 4 + row]) - return data_shifted - - -__all__ = ['key_expansion', 'aes_cbc_decrypt', 'aes_cbc_decrypt_text'] diff --git a/gallery_dl/extractor/500px.py b/gallery_dl/extractor/500px.py index 4dc4f0d..fd973c3 100644 --- a/gallery_dl/extractor/500px.py +++ b/gallery_dl/extractor/500px.py @@ -166,7 +166,7 @@ class _500pxGalleryExtractor(_500pxExtractor): } gallery = self._request_graphql( "GalleriesDetailQueryRendererQuery", variables, - "1afc7dede86ff73456b4defbc5aeb593e330b990943d114cbef7da5be0d7ce2f", + "fd367cacf9bebcdc0620bd749dbd8fc9b0ccbeb54fc76b8b4b95e66a8c0cba49", )["gallery"] self._photos = gallery["photos"] @@ -194,8 +194,8 @@ class _500pxGalleryExtractor(_500pxExtractor): variables["cursor"] = photos["pageInfo"]["endCursor"] photos = self._request_graphql( "GalleriesDetailPaginationContainerQuery", variables, - "3fcbc9ea1589f31c86fc43a0a02c2163" - "cab070f9d376651f270de9f30f031539", + "457c66d976f56863c81795f03e98cb54" + "3c7c6cdae7abeab8fe9e8e8a67479fa9", )["galleryByOwnerIdAndSlugOrToken"]["photos"] diff --git a/gallery_dl/extractor/8muses.py b/gallery_dl/extractor/8muses.py index fafb785..b248735 100644 --- a/gallery_dl/extractor/8muses.py +++ b/gallery_dl/extractor/8muses.py @@ -94,12 +94,12 @@ class _8musesAlbumExtractor(Extractor): if albums: for album in albums: url = self.root + "/comics/album/" + album["permalink"] - album = { - "url" : url, - "name" : album["name"], - "private": album["isPrivate"], + yield Message.Queue, url, { + "url" : url, + "name" : album["name"], + "private" : album["isPrivate"], + "_extractor": _8musesAlbumExtractor, } - yield Message.Queue, url, album if data["page"] >= data["pages"]: return diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py index 53bc726..b8e39bc 100644 --- a/gallery_dl/extractor/__init__.py +++ b/gallery_dl/extractor/__init__.py @@ -59,7 +59,6 @@ modules = [ "kabeuchi", "keenspot", "khinsider", - "kissmanga", "komikcast", "konachan", "lineblog", @@ -118,6 +117,7 @@ modules = [ "vsco", "wallhaven", "warosu", + "weasyl", "webtoons", "weibo", "wikiart", diff --git a/gallery_dl/extractor/behance.py b/gallery_dl/extractor/behance.py index 1126615..be498bc 100644 --- a/gallery_dl/extractor/behance.py +++ b/gallery_dl/extractor/behance.py @@ -31,8 +31,14 @@ class BehanceExtractor(Extractor): def _update(data): # compress data to simple lists if data["fields"] and isinstance(data["fields"][0], dict): - data["fields"] = [field["name"] for field in data["fields"]] - data["owners"] = [owner["display_name"] for owner in data["owners"]] + data["fields"] = [ + field.get("name") or field.get("label") + for field in data["fields"] + ] + data["owners"] = [ + owner.get("display_name") or owner.get("displayName") + for owner in data["owners"] + ] tags = data.get("tags") or () if tags and isinstance(tags[0], dict): @@ -101,7 +107,7 @@ class BehanceGalleryExtractor(BehanceExtractor): cookies = { "_evidon_consent_cookie": '{"consent_date":"2019-01-31T09:41:15.132Z"}', - "bcp": "815b5eee-8bdf-4898-ac79-33c2bcc0ed19", + "bcp": "4c34489d-914c-46cd-b44c-dfd0e661136d", "gk_suid": "66981391", "gki": '{"feature_project_view":false,' '"feature_discover_login_prompt":false,' @@ -184,14 +190,267 @@ class BehanceCollectionExtractor(BehanceExtractor): self.collection_id = match.group(1) def galleries(self): - url = "{}/collection/{}/a".format(self.root, self.collection_id) - params = {"offset": 0} - headers = {"X-Requested-With": "XMLHttpRequest"} + url = self.root + "/v3/graphql" + headers = { + "Origin" : self.root, + "Referer": self.root + "/collection/" + self.collection_id, + "X-BCP" : "4c34489d-914c-46cd-b44c-dfd0e661136d", + "X-NewRelic-ID" : "VgUFVldbGwsFU1BRDwUBVw==", + "X-Requested-With": "XMLHttpRequest", + } + cookies = { + "bcp" : "4c34489d-914c-46cd-b44c-dfd0e661136d", + "gk_suid": "66981391", + "ilo0" : "true", + } + + query = """ +query GetMoodboardItemsAndRecommendations( + $id: Int! + $firstItem: Int! + $afterItem: String + $shouldGetRecommendations: Boolean! + $shouldGetItems: Boolean! + $shouldGetMoodboardFields: Boolean! + ) { + viewer @include(if: $shouldGetMoodboardFields) { + isOptedOutOfRecommendations + } + moodboard(id: $id) { + ...moodboardFields @include(if: $shouldGetMoodboardFields) + + items(first: $firstItem, after: $afterItem) @include(if: $shouldGetItems) + { + pageInfo { + endCursor + hasNextPage + } + nodes { + ...nodesFields + } + } + + recommendedItems(first: 80) @include(if: $shouldGetRecommendations) { + nodes { + ...nodesFields + fetchSource + } + } + } + } + + fragment moodboardFields on Moodboard { + id + label + privacy + followerCount + isFollowing + projectCount + url + isOwner + owners { + id + displayName + url + firstName + location + locationUrl + images { + size_50 { + url + } + size_100 { + url + } + size_115 { + url + } + size_230 { + url + } + size_138 { + url + } + size_276 { + url + } + } + } + } + + fragment projectFields on Project { + id + isOwner + publishedOn + matureAccess + hasMatureContent + modifiedOn + name + url + isPrivate + slug + fields { + label + } + colors { + r + g + b + } + owners { + url + displayName + id + location + locationUrl + isProfileOwner + images { + size_50 { + url + } + size_100 { + url + } + size_115 { + url + } + size_230 { + url + } + size_138 { + url + } + size_276 { + url + } + } + } + covers { + size_original { + url + } + size_max_808 { + url + } + size_808 { + url + } + size_404 { + url + } + size_202 { + url + } + size_230 { + url + } + size_115 { + url + } + } + stats { + views { + all + } + appreciations { + all + } + comments { + all + } + } + } + + fragment exifDataValueFields on exifDataValue { + id + label + value + searchValue + } + + fragment nodesFields on MoodboardItem { + id + entityType + width + height + flexWidth + flexHeight + images { + size + url + } + + entity { + ... on Project { + ...projectFields + } + + ... on ImageModule { + project { + ...projectFields + } + + exifData { + lens { + ...exifDataValueFields + } + software { + ...exifDataValueFields + } + makeAndModel { + ...exifDataValueFields + } + focalLength { + ...exifDataValueFields + } + iso { + ...exifDataValueFields + } + location { + ...exifDataValueFields + } + flash { + ...exifDataValueFields + } + exposureMode { + ...exifDataValueFields + } + shutterSpeed { + ...exifDataValueFields + } + aperture { + ...exifDataValueFields + } + } + } + + ... on MediaCollectionComponent { + project { + ...projectFields + } + } + } + } +""" + variables = { + "afterItem": "MAo=", + "firstItem": 40, + "id" : self.collection_id, + "shouldGetItems" : True, + "shouldGetMoodboardFields": False, + "shouldGetRecommendations": False, + } + data = {"query": query, "variables": variables} while True: - data = self.request(url, params=params, headers=headers).json() - for item in data["items"]: - yield item["project"] - if len(data["items"]) < 40: + items = self.request( + url, method="POST", headers=headers, + cookies=cookies, json=data, + ).json()["data"]["moodboard"]["items"] + + for node in items["nodes"]: + yield node["entity"] + + if not items["pageInfo"]["hasNextPage"]: return - params["offset"] += len(data["items"]) + variables["afterItem"] = items["pageInfo"]["endCursor"] diff --git a/gallery_dl/extractor/deviantart.py b/gallery_dl/extractor/deviantart.py index a0f4d1c..9cceaee 100644 --- a/gallery_dl/extractor/deviantart.py +++ b/gallery_dl/extractor/deviantart.py @@ -262,9 +262,11 @@ class DeviantartExtractor(Extractor): return folder raise exception.NotFoundError("folder") - def _folder_urls(self, folders, category): - url = "{}/{}/{}/0/".format(self.root, self.user, category) - return [(url + folder["name"], folder) for folder in folders] + def _folder_urls(self, folders, category, extractor): + base = "{}/{}/{}/0/".format(self.root, self.user, category) + for folder in folders: + folder["_extractor"] = extractor + yield base + folder["name"], folder def _update_content_default(self, deviation, content): public = "premium_folder_data" not in deviation @@ -450,7 +452,7 @@ class DeviantartGalleryExtractor(DeviantartExtractor): if self.flat and not self.group: return self.api.gallery_all(self.user, self.offset) folders = self.api.gallery_folders(self.user) - return self._folder_urls(folders, "gallery") + return self._folder_urls(folders, "gallery", DeviantartFolderExtractor) class DeviantartFolderExtractor(DeviantartExtractor): @@ -589,7 +591,8 @@ class DeviantartFavoriteExtractor(DeviantartExtractor): self.api.collections(self.user, folder["folderid"]) for folder in folders ) - return self._folder_urls(folders, "favourites") + return self._folder_urls( + folders, "favourites", DeviantartCollectionExtractor) class DeviantartCollectionExtractor(DeviantartExtractor): diff --git a/gallery_dl/extractor/exhentai.py b/gallery_dl/extractor/exhentai.py index cb4df11..06b5ba2 100644 --- a/gallery_dl/extractor/exhentai.py +++ b/gallery_dl/extractor/exhentai.py @@ -392,6 +392,7 @@ class ExhentaiSearchExtractor(ExhentaiExtractor): def items(self): self.login() yield Message.Version, 1 + data = {"_extractor": ExhentaiGalleryExtractor} while True: last = None @@ -402,7 +403,7 @@ class ExhentaiSearchExtractor(ExhentaiExtractor): if url == last: continue last = url - yield Message.Queue, url, {} + yield Message.Queue, url, data if 'class="ptdd">><' in page or ">No hits found</p>" in page: return diff --git a/gallery_dl/extractor/fallenangels.py b/gallery_dl/extractor/fallenangels.py index a2d8c04..44863a9 100644 --- a/gallery_dl/extractor/fallenangels.py +++ b/gallery_dl/extractor/fallenangels.py @@ -66,9 +66,9 @@ class FallenangelsMangaExtractor(MangaExtractor): category = "fallenangels" pattern = r"(?:https?://)?((manga|truyen)\.fascans\.com/manga/[^/]+)/?$" test = ( - ("http://manga.fascans.com/manga/trinity-seven", { - "url": "293057f264de6c438b979bd1c3de4719568db452", - "keyword": "50e0374dba60734230e4284b5ffdadef5104ae62", + ("https://manga.fascans.com/manga/chronos-ruler", { + "url": "eea07dd50f5bc4903aa09e2cc3e45c7241c9a9c2", + "keyword": "c414249525d4c74ad83498b3c59a813557e59d7e", }), ("https://truyen.fascans.com/manga/rakudai-kishi-no-eiyuutan", { "url": "51a731a6b82d5eb7a335fbae6b02d06aeb2ab07b", diff --git a/gallery_dl/extractor/foolslide.py b/gallery_dl/extractor/foolslide.py index 0ab42db..bf925b6 100644 --- a/gallery_dl/extractor/foolslide.py +++ b/gallery_dl/extractor/foolslide.py @@ -173,8 +173,7 @@ EXTRACTORS = { ), "test-manga": ("https://sensescans.com/reader/series/yotsubato/", { - "url": "305e6eb6160e3bb90c3de39ff5fb7c971e052087", - "keyword": "562fb5a7362a4cb43d59d5c8a6ea8080fc65cf99", + "count": ">= 3", }), }, "_ckey": "chapterclass", diff --git a/gallery_dl/extractor/furaffinity.py b/gallery_dl/extractor/furaffinity.py index 950a174..2a5ef6e 100644 --- a/gallery_dl/extractor/furaffinity.py +++ b/gallery_dl/extractor/furaffinity.py @@ -236,7 +236,9 @@ class FuraffinityPostExtractor(FuraffinityExtractor): pattern = BASE_PATTERN + r"/(?:view|full)/(\d+)" test = ( ("https://www.furaffinity.net/view/21835115/", { - "url": "d80254eb4fba654597b4df8320d55916e11ba375", + "pattern": r"https://d\d*\.facdn\.net/(download/)?art/mirlinthloth" + r"/music/1488278723/1480267446.mirlinthloth_dj_fennmink" + r"_-_bude_s_4_ever\.mp3", "keyword": { "artist" : "mirlinthloth", "artist_url" : "mirlinthloth", diff --git a/gallery_dl/extractor/gfycat.py b/gallery_dl/extractor/gfycat.py index ac1bca3..ba2fe5d 100644 --- a/gallery_dl/extractor/gfycat.py +++ b/gallery_dl/extractor/gfycat.py @@ -100,13 +100,13 @@ class GfycatImageExtractor(GfycatExtractor): "gfyName": "GrayGenerousCowrie", "gfyNumber": "755075459", "title": "Bottom's up", - "userName": "jackson3oh3", + "username": "jackson3oh3", "createDate": 1495884169, "md5": "a4796e05b0db9ba9ce5140145cd318aa", "width": 400, "height": 224, - "frameRate": 23, - "numFrames": 158, + "frameRate": 23.0, + "numFrames": 158.0, "views": int, }, }), diff --git a/gallery_dl/extractor/hentaicafe.py b/gallery_dl/extractor/hentaicafe.py index 1ab71d6..833135e 100644 --- a/gallery_dl/extractor/hentaicafe.py +++ b/gallery_dl/extractor/hentaicafe.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2018-2019 Mike Fährmann +# Copyright 2018-2020 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -50,17 +50,17 @@ class HentaicafeMangaExtractor(foolslide.FoolslideMangaExtractor): # single chapter ("https://hentai.cafe/hazuki-yuuto-summer-blues/", { "url": "f8e24a07d6fbb7c6a6ec5ad8ad8faf2436f8751b", - "keyword": "eb9f98544098c961bd8cf5dbe69e6da51c4fb2f6", + "keyword": "5af1c570bb5f533a32b3375f9cdaa17a0152ba67", }), # multi-chapter ("https://hentai.cafe/saitom-saitom-box/", { "url": "ca3e8a91531fd6acd863d93ac3afbd8ead06a076", - "keyword": "28271062d7b4a2f99a0e1a894f69af8c5581a6bb", + "keyword": "3c28517d356cac6acbd9895c9eeefae505304078", }), # new-style URL ("https://hentai.cafe/hc.fyi/2782", { "url": "ca3e8a91531fd6acd863d93ac3afbd8ead06a076", - "keyword": "28271062d7b4a2f99a0e1a894f69af8c5581a6bb", + "keyword": "3c28517d356cac6acbd9895c9eeefae505304078", }), # foolslide URL ("https://hentai.cafe/manga/series/saitom-box/", { @@ -80,12 +80,14 @@ class HentaicafeMangaExtractor(foolslide.FoolslideMangaExtractor): chapters.reverse() return chapters - tags , pos = text.extract(page, "<p>Tags: ", "</br>") + url , pos = text.extract(page, '<link rel="canonical" href="', '"') + tags , pos = text.extract(page, "<p>Tags: ", "</br>", pos) artist, pos = text.extract(page, "\nArtists: ", "</br>", pos) manga , pos = text.extract(page, "/manga/read/", "/", pos) data = { - "tags" : text.split_html(tags)[::2], - "artist": text.split_html(artist), + "manga_id": text.parse_int(url.rpartition("/")[2]), + "tags" : text.split_html(tags)[::2], + "artist" : text.split_html(artist), } HentaicafeChapterExtractor._data(manga).update(data) diff --git a/gallery_dl/extractor/hentaifoundry.py b/gallery_dl/extractor/hentaifoundry.py index 6e82091..5eb46b6 100644 --- a/gallery_dl/extractor/hentaifoundry.py +++ b/gallery_dl/extractor/hentaifoundry.py @@ -34,7 +34,7 @@ class HentaifoundryExtractor(Extractor): yield Message.Directory, data self.set_filters() - for page_url in util.advance(self.get_image_pages(), self.start_post): + for page_url in util.advance(self._pagination(), self.start_post): image = self.get_image_metadata(page_url) image.update(data) yield Message.Url, image["src"], image @@ -50,13 +50,12 @@ class HentaifoundryExtractor(Extractor): self.request(self.root + "/?enterAgree=1") return {"user": self.user} - def get_image_pages(self): - """Yield urls of all relevant image pages""" + def _pagination(self, begin='thumbTitle"><a href="', end='"'): num = self.start_page while True: page = self.request("{}/page/{}".format(self.page_url, num)).text - yield from text.extract_iter(page, 'thumbTitle"><a href="', '"') + yield from text.extract_iter(page, begin, end) if 'class="pager"' not in page or 'class="last hidden"' in page: return @@ -90,6 +89,33 @@ class HentaifoundryExtractor(Extractor): return text.nameext_from_url(data["src"], data) + def get_story_metadata(self, html): + """Collect url and metadata for a story""" + extr = text.extract_from(html) + data = { + "user" : self.user, + "title" : text.unescape(extr( + "<div class='titlebar'>", "</a>").rpartition(">")[2]), + "author" : text.unescape(extr('alt="', '"')), + "date" : text.parse_datetime(extr( + ">Updated<", "</span>").rpartition(">")[2], "%B %d, %Y"), + "status" : extr("class='indent'>", "<"), + } + + for c in ("Chapters", "Words", "Comments", "Views", "Rating"): + data[c.lower()] = text.parse_int(extr( + ">" + c + ":</span>", "<").replace(",", "")) + + data["description"] = text.unescape(extr( + "class='storyDescript'>", "<div")) + path = extr('href="', '"') + data["src"] = self.root + path + data["index"] = text.parse_int(path.rsplit("/", 2)[1]) + data["ratings"] = [text.unescape(r) for r in text.extract_iter(extr( + "class='ratings_box'", "</div>"), "title='", "'")] + + return text.nameext_from_url(data["src"], data) + def set_filters(self): """Set site-internal filters to show all images""" token = text.unquote(text.extract( @@ -127,19 +153,41 @@ class HentaifoundryUserExtractor(HentaifoundryExtractor): """Extractor for all images of a hentai-foundry-user""" subcategory = "user" pattern = (r"(?:https?://)?(?:www\.)?hentai-foundry\.com" - r"/(?:pictures/user/([^/]+)(?:/page/(\d+))?/?$" - r"|user/([^/]+)/profile)") + r"/user/([^/]+)/profile") + test = ("https://www.hentai-foundry.com/user/Tenpura/profile",) + + def __init__(self, match): + HentaifoundryExtractor.__init__(self, match, match.group(1)) + + def items(self): + user = "/user/" + self.user + return self._dispatch_extractors(( + (HentaifoundryPicturesExtractor , + self.root + "/pictures" + user), + (HentaifoundryScrapsExtractor, + self.root + "/pictures" + user + "/scraps"), + (HentaifoundryStoriesExtractor, + self.root + "/stories" + user), + (HentaifoundryFavoriteExtractor, + self.root + user + "/faves/pictures"), + ), ("pictures",)) + + +class HentaifoundryPicturesExtractor(HentaifoundryExtractor): + """Extractor for all pictures of a hentaifoundry user""" + subcategory = "pictures" + pattern = (r"(?:https?://)?(?:www\.)?hentai-foundry\.com" + r"/pictures/user/([^/]+)(?:/page/(\d+))?/?$") test = ( ("https://www.hentai-foundry.com/pictures/user/Tenpura", { "url": "ebbc981a85073745e3ca64a0f2ab31fab967fc28", }), ("https://www.hentai-foundry.com/pictures/user/Tenpura/page/3"), - ("https://www.hentai-foundry.com/user/Tenpura/profile"), ) def __init__(self, match): HentaifoundryExtractor.__init__( - self, match, match.group(1) or match.group(3), match.group(2)) + self, match, match.group(1), match.group(2)) self.page_url = "{}/pictures/user/{}".format(self.root, self.user) def get_job_metadata(self): @@ -284,3 +332,68 @@ class HentaifoundryImageExtractor(HentaifoundryExtractor): def skip(self, _): return 0 + + +class HentaifoundryStoriesExtractor(HentaifoundryExtractor): + """Extractor for stories of a hentai-foundry user""" + subcategory = "stories" + pattern = (r"(?:https?://)?(?:www\.)?hentai-foundry\.com" + r"/stories/user/([^/]+)(?:/page/(\d+))?/?$") + test = ("https://www.hentai-foundry.com/stories/user/SnowWolf35", { + "count": ">= 35", + "keyword": { + "author" : "SnowWolf35", + "chapters" : int, + "comments" : int, + "date" : "type:datetime", + "description": str, + "index" : int, + "rating" : int, + "ratings" : list, + "status" : "re:(Inc|C)omplete", + "title" : str, + "user" : "SnowWolf35", + "views" : int, + "words" : int, + }, + }) + + def __init__(self, match): + HentaifoundryExtractor.__init__(self, match, match.group(1)) + self.page_url = "{}/stories/user/{}".format(self.root, self.user) + + def items(self): + self.get_job_metadata() + self.set_filters() + stories = self._pagination('<div class="storyRow">', '</tr></table>') + for story_html in util.advance(stories, self.start_post): + story = self.get_story_metadata(story_html) + yield Message.Directory, story + yield Message.Url, story["src"], story + + +class HentaifoundryStoryExtractor(HentaifoundryExtractor): + """Extractor for a hentaifoundry story""" + subcategory = "story" + pattern = (r"(?:https?://)?(?:www\.)?hentai-foundry\.com" + r"/stories/user/([^/]+)/(\d+)") + test = (("https://www.hentai-foundry.com/stories/user/SnowWolf35" + "/26416/Overwatch-High-Chapter-Voting-Location"), { + "url": "5a67cfa8c3bf7634c8af8485dd07c1ea74ee0ae8", + "keyword": {"title": "Overwatch High Chapter Voting Location"}, + }) + + def __init__(self, match): + HentaifoundryExtractor.__init__(self, match, match.group(1)) + self.index = match.group(2) + + def items(self): + story_url = "{}/stories/user/{}/{}/x?enterAgree=1".format( + self.root, self.user, self.index) + page = self.request(story_url).text + story = self.get_story_metadata(page) + yield Message.Directory, story + yield Message.Url, story["src"], story + + def skip(self, _): + return 0 diff --git a/gallery_dl/extractor/kissmanga.py b/gallery_dl/extractor/kissmanga.py deleted file mode 100644 index 348453d..0000000 --- a/gallery_dl/extractor/kissmanga.py +++ /dev/null @@ -1,222 +0,0 @@ -# -*- coding: utf-8 -*- - -# Copyright 2015-2020 Mike Fährmann -# -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License version 2 as -# published by the Free Software Foundation. - -"""Extract manga-chapters and entire manga from https://kissmanga.com/""" - -from .common import ChapterExtractor, MangaExtractor, Extractor -from .. import text, aes, exception -from ..cache import cache -import hashlib -import ast -import re - - -class RedirectMixin(): - """Detect and handle redirects to CAPTCHA pages""" - - def request(self, url, **kwargs): - while True: - response = Extractor.request(self, url, **kwargs) - if not response.history or "/AreYouHuman" not in response.url: - return response - if self.config("captcha", "stop") == "wait": - self.log.warning( - "Redirect to \n%s\nVisit this URL in your browser, solve " - "the CAPTCHA, and press ENTER to continue", response.url) - try: - input() - except (EOFError, OSError): - pass - else: - raise exception.StopExtraction( - "Redirect to \n%s\nVisit this URL in your browser and " - "solve the CAPTCHA to continue", response.url) - - -class KissmangaBase(RedirectMixin): - """Base class for kissmanga extractors""" - category = "kissmanga" - archive_fmt = "{chapter_id}_{page}" - root = "https://kissmanga.com" - - @staticmethod - def parse_chapter_string(data): - """Parse 'chapter_string' value contained in 'data'""" - data["chapter_string"] = text.unescape(data["chapter_string"]) - - match = re.match(( - r"(?:[Vv]ol\.0*(\d+) )?" - r"(?:[Cc]h\.)?0*(\d+)" - r"(?:[.:]0*(\d+))?" - r"(?: *[:-]? *(.+))?" - ), data["chapter_string"]) - - if not match: - match = re.match(( - r".+?(?: -)? ()" - r"0*(\d+)(?:[Vv.]0*(\d+))?" - r"(?: *[:-]? *(.+))?" - ), data["chapter_string"]) - - if match: - volume, chapter, minor, title = match.groups() - else: - volume, chapter, minor, title = 0, 0, "", data["chapter_string"] - - data["volume"] = text.parse_int(volume) - data["chapter"] = text.parse_int(chapter) - data["chapter_minor"] = "." + minor if minor else "" - data["title"] = title if title and title != "Read Online" else "" - return data - - -class KissmangaChapterExtractor(KissmangaBase, ChapterExtractor): - """Extractor for manga-chapters from kissmanga.com""" - pattern = (r"(?i)(?:https?://)?(?:www\.)?kissmanga\.com" - r"(/Manga/[^/?&#]+/[^/?&#]+\?id=(\d+))") - test = ( - ("https://kissmanga.com/Manga/Dropout/Ch-000---Oneshot-?id=145847", { - "url": "46e63fd63e9e16f19bc1e6c7a45dc060815642fd", - "keyword": "1cd0b5214ac7ae4d53e2fd8fec40ceec84cd09bf", - }), - ("https://kissmanga.com/Manga/Urban-Tales/a?id=256717", { - "url": "c26be8bf9c2abacee2076979d021634092cf38f1", - "keyword": "e1d16780df8e04076ed2b5f0637c5b710ec2f2ea", - }), - ("https://kissmanga.com/Manga/Monster/Monster-79?id=7608", { - "count": 23, - "keyword": "f433a7a8fae840e17dace316a243fa27faab86de", - }), - ("https://kissmanga.com/Manga/Houseki-no-Kuni/Oneshot?id=404189", { - "count": 49, - "keyword": "cea131c9fe9c71309b3270cd86718d4d1198c31c", - }), - ("https://kissmanga.com/mAnGa/mOnStEr/Monster-79?id=7608"), - ) - - def __init__(self, match): - ChapterExtractor.__init__(self, match) - self.chapter_id = match.group(2) - self.session.headers["Referer"] = self.root - - def metadata(self, page): - title = text.extract(page, "<title>", "</title>")[0].strip() - manga, cinfo = title.split("\n")[1:3] - data = { - "manga": manga.strip(), - "chapter_string": cinfo.strip(), - "chapter_id": text.parse_int(self.chapter_id), - "lang": "en", - "language": "English", - } - return self.parse_chapter_string(data) - - def images(self, page): - self.session.headers["Referer"] = None - try: - key = self.build_aes_key(page) - iv = (0xa5, 0xe8, 0xe2, 0xe9, 0xc2, 0x72, 0x1b, 0xe0, - 0xa8, 0x4a, 0xd6, 0x60, 0xc4, 0x72, 0xc1, 0xf3) - return [ - (aes.aes_cbc_decrypt_text( - data, key, iv).partition("&")[0], None) - for data in text.extract_iter( - page, 'push(wrapKA("', '"' - ) - ] - except UnicodeDecodeError: - self.log.error("Failed to decrypt image URLs") - except (ValueError, IndexError): - self.log.error("Failed to get AES key") - return [] - - def build_aes_key(self, page): - chko = self._chko_from_external_script() - - for script in self._scripts(page): - for stmt in [s.strip() for s in script.split(";")]: - - if stmt.startswith("var _"): - name, _, value = stmt[4:].partition(" = ") - name += "[0]" - value = ast.literal_eval(value)[0] - - elif stmt.startswith("chko = "): - stmt = stmt[7:] - if stmt == name: - chko = value - elif stmt == "chko + " + name: - chko = chko + value - elif stmt == name + " + chko": - chko = value + chko - else: - self.log.warning("unrecognized expression: '%s'", stmt) - - elif stmt.startswith("key = "): - pass - - else: - self.log.warning("unrecognized statement: '%s'", stmt) - - return list(hashlib.sha256(chko.encode("ascii")).digest()) - - @staticmethod - def _scripts(page): - end = 0 - while True: - pos = page.find("key = ", end) - if pos == -1: - return - beg = page.rindex('<script type="text/javascript">', 0, pos) + 31 - end = page.index('</script>', pos) - yield page[beg:end] - - @cache(maxage=3600) - def _chko_from_external_script(self): - script = self.request(self.root + "/Scripts/lo.js").text - - pos = script.index("var chko") - var = text.extract(script, "=", "[", pos)[0].lstrip() - idx = text.extract(script, "[", "]", pos)[0] - - pos = script.index(var) - lst = text.extract(script, "=", ";", pos)[0] - return ast.literal_eval(lst.strip())[int(idx)] - - -class KissmangaMangaExtractor(KissmangaBase, MangaExtractor): - """Extractor for manga from kissmanga.com""" - chapterclass = KissmangaChapterExtractor - pattern = (r"(?i)(?:https?://)?(?:www\.)?kissmanga\.com" - r"(/Manga/[^/?&#]+/?)$") - test = ( - ("https://kissmanga.com/Manga/Dropout", { - "url": "9e3a6f715b229aa3fafa42a1d5da5d65614cb532", - "keyword": "32b09711c28b481845acc32e3bb6054cfc90224d", - }), - ("https://kissmanga.com/manga/feng-shen-ji"), # lowercase - ) - - def chapters(self, page): - results = [] - manga, pos = text.extract(page, ' class="barTitle">', '\ninformation') - page , pos = text.extract(page, ' class="listing">', '</table>', pos) - manga = manga.strip() - needle = '" title="Read ' + manga + ' ' - manga = text.unescape(manga) - - for item in text.extract_iter(page, '<a href="', ' online">'): - url, _, chapter = item.partition(needle) - data = { - "manga": manga, "chapter_string": chapter, - "chapter_id": text.parse_int(url.rpartition("=")[2]), - "lang": "en", "language": "English", - } - self.parse_chapter_string(data) - results.append((self.root + url, data)) - return results diff --git a/gallery_dl/extractor/mangoxo.py b/gallery_dl/extractor/mangoxo.py index 0e04f97..5743498 100644 --- a/gallery_dl/extractor/mangoxo.py +++ b/gallery_dl/extractor/mangoxo.py @@ -167,6 +167,8 @@ class MangoxoChannelExtractor(MangoxoExtractor): self.login() num = total = 1 url = "{}/channel/{}/album/".format(self.root, self.channel_id) + data = {"_extractor": MangoxoAlbumExtractor} + yield Message.Version, 1 while True: @@ -174,7 +176,7 @@ class MangoxoChannelExtractor(MangoxoExtractor): for album in text.extract_iter( page, '<a class="link black" href="', '"'): - yield Message.Queue, album, {} + yield Message.Queue, album, data if num == 1: total = self._total_pages(page) diff --git a/gallery_dl/extractor/newgrounds.py b/gallery_dl/extractor/newgrounds.py index 19a2b92..f9dc886 100644 --- a/gallery_dl/extractor/newgrounds.py +++ b/gallery_dl/extractor/newgrounds.py @@ -19,8 +19,8 @@ class NewgroundsExtractor(Extractor): """Base class for newgrounds extractors""" category = "newgrounds" directory_fmt = ("{category}", "{artist[:10]:J, }") - filename_fmt = "{category}_{index}_{title}.{extension}" - archive_fmt = "{index}" + filename_fmt = "{category}_{_index}_{title}.{extension}" + archive_fmt = "{_index}" root = "https://www.newgrounds.com" cookiedomain = ".newgrounds.com" cookienames = ("NG_GG_username", "vmk1du5I8m") @@ -44,6 +44,13 @@ class NewgroundsExtractor(Extractor): if url: yield Message.Directory, post yield Message.Url, url, text.nameext_from_url(url, post) + + for num, url in enumerate(text.extract_iter( + post["_comment"], 'data-smartload-src="', '"'), 1): + post["num"] = num + post["_index"] = "{}_{:>02}".format(post["index"], num) + text.nameext_from_url(url, post) + yield Message.Url, url, post else: self.log.warning( "Unable to get download URL for '%s'", post_url) @@ -97,8 +104,9 @@ class NewgroundsExtractor(Extractor): else: data = self._extract_media_data(extr, post_url) - data["comment"] = text.unescape(text.remove_html(extr( - 'id="author_comments">', '</div>'), "", "")) + data["_comment"] = extr('id="author_comments"', '</div>') + data["comment"] = text.unescape(text.remove_html( + data["_comment"].partition(">")[2], "", "")) data["favorites"] = text.parse_int(extr( 'id="faves_load">', '<').replace(",", "")) data["score"] = text.parse_float(extr('id="score_number">', '<')) @@ -125,33 +133,54 @@ class NewgroundsExtractor(Extractor): "width" : text.parse_int(full('width="', '"')), "height" : text.parse_int(full('height="', '"')), } - data["index"] = text.parse_int( - data["url"].rpartition("/")[2].partition("_")[0]) + index = data["url"].rpartition("/")[2].partition("_")[0] + data["index"] = text.parse_int(index) + data["_index"] = index return data @staticmethod def _extract_audio_data(extr, url): + index = url.split("/")[5] return { "title" : text.unescape(extr('"og:title" content="', '"')), "description": text.unescape(extr(':description" content="', '"')), "date" : text.parse_datetime(extr( 'itemprop="datePublished" content="', '"')), "url" : extr('{"url":"', '"').replace("\\/", "/"), - "index" : text.parse_int(url.split("/")[5]), + "index" : text.parse_int(index), + "_index" : index, "rating" : "", } - @staticmethod - def _extract_media_data(extr, url): + def _extract_media_data(self, extr, url): + index = url.split("/")[5] + title = extr('"og:title" content="', '"') + src = extr('{"url":"', '"') + + if src: + src = src.replace("\\/", "/") + date = text.parse_datetime(extr( + 'itemprop="datePublished" content="', '"')) + else: + url = self.root + "/portal/video/" + index + headers = { + "Accept": "application/json, text/javascript, */*; q=0.01", + "X-Requested-With": "XMLHttpRequest", + "Referer": self.root, + } + data = self.request(url, headers=headers).json() + src = data["sources"]["360p"][0]["src"].replace(".360p.", ".") + date = text.parse_timestamp(src.rpartition("?")[2]) + return { - "title" : text.unescape(extr('"og:title" content="', '"')), - "url" : extr('{"url":"', '"').replace("\\/", "/"), - "date" : text.parse_datetime(extr( - 'itemprop="datePublished" content="', '"')), + "title" : text.unescape(title), + "url" : src, + "date" : date, "description": text.unescape(extr( 'itemprop="description" content="', '"')), "rating" : extr('class="rated-', '"'), - "index" : text.parse_int(url.split("/")[5]), + "index" : text.parse_int(index), + "_index" : index, } def _pagination(self, kind): @@ -215,6 +244,10 @@ class NewgroundsImageExtractor(NewgroundsExtractor): ("https://art.ngfiles.com/images/0/94_tomfulp_ryu-is-hawt.gif", { "url": "57f182bcbbf2612690c3a54f16ffa1da5105245e", }), + ("https://www.newgrounds.com/art/view/sailoryon/yon-dream-buster", { + "url": "84eec95e663041a80630df72719f231e157e5f5d", + "count": 2, + }) ) def __init__(self, match): @@ -236,23 +269,21 @@ class NewgroundsMediaExtractor(NewgroundsExtractor): pattern = (r"(?:https?://)?(?:www\.)?newgrounds\.com" r"(/(?:portal/view|audio/listen)/\d+)") test = ( - ("https://www.newgrounds.com/portal/view/589549", { - "url": "48d916d819c99139e6a3acbbf659a78a867d363e", - "content": "ceb865426727ec887177d99e0d20bb021e8606ae", + ("https://www.newgrounds.com/portal/view/595355", { + "pattern": r"https://uploads\.ungrounded\.net/alternate/564000" + r"/564957_alternate_31\.mp4\?1359712249", "keyword": { - "artist" : ["psychogoldfish", "tomfulp"], - "comment" : "re:People have been asking me how I like the ", - "date" : "dt:2012-02-08 21:40:56", - "description": "re:People have been asking how I like the ", + "artist" : ["kickinthehead", "danpaladin", "tomfulp"], + "comment" : "re:My fan trailer for Alien Hominid HD!", + "date" : "dt:2013-02-01 09:50:49", "favorites" : int, - "filename" : "527818_alternate_1896", - "index" : 589549, - "rating" : "t", + "filename" : "564957_alternate_31", + "index" : 595355, + "rating" : "e", "score" : float, - "tags" : ["newgrounds", "psychogoldfish", - "rage", "redesign-2012"], - "title" : "Redesign Rage", - "user" : "psychogoldfish", + "tags" : ["alienhominid", "trailer"], + "title" : "Alien Hominid Fan Trailer", + "user" : "kickinthehead", }, }), ("https://www.newgrounds.com/audio/listen/609768", { diff --git a/gallery_dl/extractor/nijie.py b/gallery_dl/extractor/nijie.py index aae17a3..2394acf 100644 --- a/gallery_dl/extractor/nijie.py +++ b/gallery_dl/extractor/nijie.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2015-2019 Mike Fährmann +# Copyright 2015-2020 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -127,9 +127,25 @@ class NijieExtractor(AsynchronousMixin, Extractor): class NijieUserExtractor(NijieExtractor): - """Extractor for works of a nijie-user""" + """Extractor for nijie user profiles""" subcategory = "user" - pattern = BASE_PATTERN + r"/members(?:_illust)?\.php\?id=(\d+)" + cookiedomain = None + pattern = BASE_PATTERN + r"/members\.php\?id=(\d+)" + test = ("https://nijie.info/members.php?id=44",) + + def items(self): + base = "{}/{{}}.php?id={}".format(self.root, self.user_id) + return self._dispatch_extractors(( + (NijieIllustrationExtractor, base.format("members_illust")), + (NijieDoujinExtractor , base.format("members_dojin")), + (NijieFavoriteExtractor , base.format("user_like_illust_view")), + ), ("illustration", "doujin")) + + +class NijieIllustrationExtractor(NijieExtractor): + """Extractor for all illustrations of a nijie-user""" + subcategory = "illustration" + pattern = BASE_PATTERN + r"/members_illust\.php\?id=(\d+)" test = ( ("https://nijie.info/members_illust.php?id=44", { "url": "66c4ff94c6e77c0765dd88f2d8c663055fda573e", @@ -152,7 +168,6 @@ class NijieUserExtractor(NijieExtractor): ("https://nijie.info/members_illust.php?id=43", { "exception": exception.NotFoundError, }), - ("https://nijie.info/members.php?id=44"), ) def image_ids(self): diff --git a/gallery_dl/extractor/nozomi.py b/gallery_dl/extractor/nozomi.py index abf88cd..5e7e387 100644 --- a/gallery_dl/extractor/nozomi.py +++ b/gallery_dl/extractor/nozomi.py @@ -106,7 +106,7 @@ class NozomiPostExtractor(NozomiExtractor): # multiple images per post ("https://nozomi.la/post/25588032.html", { "url": "6aa3b7db385abcc9d374bdffd19187bccbf8f228", - "keyword": "0aa99cbaaeada2984a1fbf912274409c6ba106d4", + "keyword": "8c3a2561ccc9ad429be9850d1383a952d0b4a8ab", "count": 7, }), ) diff --git a/gallery_dl/extractor/oauth.py b/gallery_dl/extractor/oauth.py index c07c4b7..6d7b27a 100644 --- a/gallery_dl/extractor/oauth.py +++ b/gallery_dl/extractor/oauth.py @@ -180,16 +180,11 @@ class OAuthBase(Extractor): self.send(msg) def _generate_message(self, names, values): - if len(names) == 1: - _vh = "This value has" - _is = "is" - _it = "it" - _va = "this value" - else: - _vh = "These values have" - _is = "are" - _it = "them" - _va = "these values" + _vh, _va, _is, _it = ( + ("This value has", "this value", "is", "it") + if len(names) == 1 else + ("These values have", "these values", "are", "them") + ) msg = "\nYour {} {}\n\n{}\n\n".format( " and ".join("'" + n + "'" for n in names), @@ -197,23 +192,21 @@ class OAuthBase(Extractor): "\n".join(values), ) - if self.cache: - opt = self.oauth_config(names[0]) - if opt is None or opt == "cache": - msg += _vh + " been cached and will automatically be used." - else: - msg += ( - "Set 'extractor.{}.{}' to \"cache\" to use {}.".format( - self.subcategory, names[0], _it, - ) - ) + opt = self.oauth_config(names[0]) + if self.cache and (opt is None or opt == "cache"): + msg += _vh + " been cached and will automatically be used." else: msg += "Put " + _va + " into your configuration file as \n" msg += " and\n".join( "'extractor." + self.subcategory + "." + n + "'" for n in names ) - msg += "." + if self.cache: + msg += ( + "\nor set\n'extractor.{}.{}' to \"cache\"" + .format(self.subcategory, names[0]) + ) + msg += "\nto use {}.".format(_it) return msg diff --git a/gallery_dl/extractor/paheal.py b/gallery_dl/extractor/paheal.py index 8f2d633..f08055c 100644 --- a/gallery_dl/extractor/paheal.py +++ b/gallery_dl/extractor/paheal.py @@ -95,8 +95,8 @@ class PahealPostExtractor(PahealExtractor): pattern = (r"(?:https?://)?(?:rule34|rule63|cosplay)\.paheal\.net" r"/post/view/(\d+)") test = ("https://rule34.paheal.net/post/view/481609", { - "url": "d3fd0f82762716fe3fb03c9c923e61c13ce22204", - "keyword": "35748081bfeaab48f909f4b097a4d79b2be12538", + "url": "a91d579be030753282f55b8cb4eeaa89c45a9116", + "keyword": "44154bdac3d6cf289d0d9739a566acd8b7839e50", "content": "7b924bcf150b352ac75c9d281d061e174c851a11", }) diff --git a/gallery_dl/extractor/pixiv.py b/gallery_dl/extractor/pixiv.py index eaf97fd..ee8f9bb 100644 --- a/gallery_dl/extractor/pixiv.py +++ b/gallery_dl/extractor/pixiv.py @@ -105,7 +105,7 @@ class PixivUserExtractor(PixivExtractor): # avatar (#595, 623) ("https://www.pixiv.net/en/users/173530", { "options": (("avatar", True),), - "content": "22af450d4dbaf4973d370f164f66f48c7382a6de", + "content": "4e57544480cc2036ea9608103e8f024fa737fe66", "range": "1", }), # deleted account diff --git a/gallery_dl/extractor/pornhub.py b/gallery_dl/extractor/pornhub.py index bbbc709..6b36cdd 100644 --- a/gallery_dl/extractor/pornhub.py +++ b/gallery_dl/extractor/pornhub.py @@ -29,9 +29,9 @@ class PornhubGalleryExtractor(PornhubExtractor): archive_fmt = "{id}" pattern = BASE_PATTERN + r"/album/(\d+)" test = ( - ("https://www.pornhub.com/album/1708982", { + ("https://www.pornhub.com/album/17218841", { "pattern": r"https://\w+.phncdn.com/pics/albums/\d+/\d+/\d+/\d+/", - "count": 93, + "count": 81, "keyword": { "id": int, "num": int, @@ -40,11 +40,11 @@ class PornhubGalleryExtractor(PornhubExtractor): "caption": str, "user": "Unknown", "gallery": { - "id" : 1708982, + "id" : 17218841, "score": int, "views": int, "tags" : list, - "title": "Random Hentai", + "title": "Hentai/Ecchi 41", }, }, }), diff --git a/gallery_dl/extractor/reactor.py b/gallery_dl/extractor/reactor.py index 8290d2d..e5b4b44 100644 --- a/gallery_dl/extractor/reactor.py +++ b/gallery_dl/extractor/reactor.py @@ -16,7 +16,7 @@ import time import json -BASE_PATTERN = r"(?:https?://)?([^/.]+\.reactor\.cc)" +BASE_PATTERN = r"(?:https?://)?((?:[^/.]+\.)?reactor\.cc)" class ReactorExtractor(SharedConfigMixin, Extractor): diff --git a/gallery_dl/extractor/readcomiconline.py b/gallery_dl/extractor/readcomiconline.py index dda4809..7030c81 100644 --- a/gallery_dl/extractor/readcomiconline.py +++ b/gallery_dl/extractor/readcomiconline.py @@ -1,20 +1,19 @@ # -*- coding: utf-8 -*- -# Copyright 2016-2019 Mike Fährmann +# Copyright 2016-2020 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as # published by the Free Software Foundation. -"""Extract comic-issues and entire comics from https://readcomiconline.to/""" +"""Extractors for https://readcomiconline.to/""" -from .common import ChapterExtractor, MangaExtractor -from .kissmanga import RedirectMixin -from .. import text +from .common import Extractor, ChapterExtractor, MangaExtractor +from .. import text, exception import re -class ReadcomiconlineBase(RedirectMixin): +class ReadcomiconlineBase(): """Base class for readcomiconline extractors""" category = "readcomiconline" directory_fmt = ("{category}", "{comic}", "{issue:>03}") @@ -22,6 +21,25 @@ class ReadcomiconlineBase(RedirectMixin): archive_fmt = "{issue_id}_{page}" root = "https://readcomiconline.to" + def request(self, url, **kwargs): + """Detect and handle redirects to CAPTCHA pages""" + while True: + response = Extractor.request(self, url, **kwargs) + if not response.history or "/AreYouHuman" not in response.url: + return response + if self.config("captcha", "stop") == "wait": + self.log.warning( + "Redirect to \n%s\nVisit this URL in your browser, solve " + "the CAPTCHA, and press ENTER to continue", response.url) + try: + input() + except (EOFError, OSError): + pass + else: + raise exception.StopExtraction( + "Redirect to \n%s\nVisit this URL in your browser and " + "solve the CAPTCHA to continue", response.url) + class ReadcomiconlineIssueExtractor(ReadcomiconlineBase, ChapterExtractor): """Extractor for comic-issues from readcomiconline.to""" diff --git a/gallery_dl/extractor/sankaku.py b/gallery_dl/extractor/sankaku.py index b07d024..a9252f5 100644 --- a/gallery_dl/extractor/sankaku.py +++ b/gallery_dl/extractor/sankaku.py @@ -152,7 +152,7 @@ class SankakuTagExtractor(SankakuExtractor): test = ( ("https://chan.sankakucomplex.com/?tags=bonocho", { "count": 5, - "pattern": r"https://cs\.sankakucomplex\.com/data/[^/]{2}/[^/]{2}" + "pattern": r"https://c?s\.sankakucomplex\.com/data/[^/]{2}/[^/]{2}" r"/[^/]{32}\.\w+\?e=\d+&m=[^&#]+", }), # respect 'page' query parameter diff --git a/gallery_dl/extractor/twitter.py b/gallery_dl/extractor/twitter.py index 236a001..c98a300 100644 --- a/gallery_dl/extractor/twitter.py +++ b/gallery_dl/extractor/twitter.py @@ -110,16 +110,17 @@ class TwitterExtractor(Extractor): twitpics = [] for url in tweet["entities"].get("urls", ()): url = url["expanded_url"] - if "//twitpic.com/" in url: + if "//twitpic.com/" in url and "/photos/" not in url: response = self.request(url, fatal=False) if response.status_code >= 400: continue url = text.extract( response.text, 'name="twitter:image" value="', '"')[0] - twitpics.append({ - "original_info": {}, - "media_url" : url, - }) + if url: + twitpics.append({ + "original_info": {}, + "media_url" : url, + }) if twitpics: if "extended_entities" in tweet: tweet["extended_entities"]["media"].extend(twitpics) @@ -312,6 +313,7 @@ class TwitterSearchExtractor(TwitterExtractor): test = ("https://twitter.com/search?q=nature", { "range": "1-40", "count": 40, + "archive": False, }) def metadata(self): @@ -378,6 +380,15 @@ class TwitterTweetExtractor(TwitterExtractor): "url": "0f6a841e23948e4320af7ae41125e0c5b3cadc98", "content": "f29501e44d88437fe460f5c927b7543fda0f6e34", }), + # original retweets (#1026) + ("https://twitter.com/jessica_3978/status/1296304589591810048", { + "options": (("retweets", "original"),), + "count": 2, + "keyword": { + "tweet_id": 1296296016002547713, + "date" : "dt:2020-08-20 04:00:28", + }, + }), ) def __init__(self, match): @@ -451,7 +462,8 @@ class TwitterAPI(): endpoint = "2/timeline/conversation/{}.json".format(tweet_id) tweets = [] for tweet in self._pagination(endpoint): - if tweet["id_str"] == tweet_id: + if tweet["id_str"] == tweet_id or \ + tweet.get("_retweet_id_str") == tweet_id: tweets.append(tweet) if "quoted_status_id_str" in tweet: tweet_id = tweet["quoted_status_id_str"] @@ -536,6 +548,7 @@ class TwitterAPI(): entry_tweet="tweet-", entry_cursor="cursor-bottom-"): if params is None: params = self.params.copy() + original_retweets = (self.extractor.retweets == "original") while True: cursor = tweet = None @@ -558,12 +571,17 @@ class TwitterAPI(): "Skipping %s (deleted)", entry["entryId"][len(entry_tweet):]) continue - tweet["user"] = users[tweet["user_id_str"]] if "retweeted_status_id_str" in tweet: retweet = tweets.get(tweet["retweeted_status_id_str"]) - if retweet: + if original_retweets: + if not retweet: + continue + retweet["_retweet_id_str"] = tweet["id_str"] + tweet = retweet + elif retweet: tweet["author"] = users[retweet["user_id_str"]] + tweet["user"] = users[tweet["user_id_str"]] yield tweet if "quoted_status_id_str" in tweet: diff --git a/gallery_dl/extractor/weasyl.py b/gallery_dl/extractor/weasyl.py new file mode 100644 index 0000000..a39fbf1 --- /dev/null +++ b/gallery_dl/extractor/weasyl.py @@ -0,0 +1,236 @@ +# -*- coding: utf-8 -*- + +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. + +"""Extractors for https://www.weasyl.com/""" + +from .common import Extractor, Message +from .. import text + +BASE_PATTERN = r"(?:https://)?(?:www\.)?weasyl.com/" + + +class WeasylExtractor(Extractor): + category = "weasyl" + directory_fmt = ("{category}", "{owner_login}") + filename_fmt = "{submitid} {title}.{extension}" + archive_fmt = "{submitid}" + root = "https://www.weasyl.com" + + @staticmethod + def populate_submission(data): + # Some submissions don't have content and can be skipped + if "submission" in data["media"]: + data["url"] = data["media"]["submission"][0]["url"] + data["date"] = text.parse_datetime( + data["posted_at"][:19], "%Y-%m-%dT%H:%M:%S") + text.nameext_from_url(data["url"], data) + return True + return False + + def request_submission(self, submitid): + return self.request( + "{}/api/submissions/{}/view".format(self.root, submitid)).json() + + def retrieve_journal(self, journalid): + data = self.request( + "{}/api/journals/{}/view".format(self.root, journalid)).json() + data["extension"] = "html" + data["html"] = "text:" + data["content"] + data["date"] = text.parse_datetime(data["posted_at"]) + return data + + def submissions(self, owner_login, folderid=None): + url = "{}/api/users/{}/gallery".format(self.root, owner_login) + params = { + "nextid" : None, + "folderid": folderid, + } + + while True: + data = self.request(url, params=params).json() + for submission in data["submissions"]: + if self.populate_submission(submission): + submission["folderid"] = folderid + # Do any submissions have more than one url? If so + # a urllist of the submission array urls would work. + yield Message.Url, submission["url"], submission + if not data["nextid"]: + return + params["nextid"] = data["nextid"] + + +class WeasylSubmissionExtractor(WeasylExtractor): + subcategory = "submission" + pattern = BASE_PATTERN + r"(?:~[\w-]+/submissions|submission)/(\d+)" + test = ( + ("https://www.weasyl.com/~fiz/submissions/2031/a-wesley", { + "pattern": "https://cdn.weasyl.com/~fiz/submissions/2031/41ebc1c29" + "40be928532785dfbf35c37622664d2fbb8114c3b063df969562fc5" + "1/fiz-a-wesley.png", + "keyword": { + "comments" : int, + "date" : "dt:2012-04-20 00:38:04", + "description" : "<p>(flex)</p>", + "favorites" : int, + "folder_name" : "Wesley Stuff", + "folderid" : 2081, + "friends_only": False, + "owner" : "Fiz", + "owner_login" : "fiz", + "rating" : "general", + "submitid" : 2031, + "subtype" : "visual", + "tags" : list, + "title" : "A Wesley!", + "type" : "submission", + "views" : int, + }, + }), + ("https://www.weasyl.com/submission/2031/a-wesley"), + ) + + def __init__(self, match): + WeasylExtractor.__init__(self, match) + self.submitid = match.group(1) + + def items(self): + data = self.request_submission(self.submitid) + if self.populate_submission(data): + yield Message.Directory, data + yield Message.Url, data["url"], data + + +class WeasylSubmissionsExtractor(WeasylExtractor): + subcategory = "submissions" + pattern = BASE_PATTERN + r"(?:~|submissions/)([\w-]+)/?$" + test = ( + ("https://www.weasyl.com/~tanidareal", { + "count": ">= 200" + }), + ("https://www.weasyl.com/submissions/tanidareal"), + ) + + def __init__(self, match): + WeasylExtractor.__init__(self, match) + self.owner_login = match.group(1) + + def items(self): + yield Message.Version, 1 + yield Message.Directory, {"owner_login": self.owner_login} + yield from self.submissions(self.owner_login) + + +class WeasylFolderExtractor(WeasylExtractor): + subcategory = "folder" + directory_fmt = ("{category}", "{owner_login}", "{folder_name}") + pattern = BASE_PATTERN + r"submissions/([\w-]+)\?folderid=(\d+)" + test = ("https://www.weasyl.com/submissions/tanidareal?folderid=7403", { + "count": ">= 12" + }) + + def __init__(self, match): + WeasylExtractor.__init__(self, match) + self.owner_login, self.folderid = match.groups() + + def items(self): + yield Message.Version, 1 + iter = self.submissions(self.owner_login, self.folderid) + # Folder names are only on single submission api calls + msg, url, data = next(iter) + details = self.request_submission(data["submitid"]) + yield Message.Directory, details + yield msg, url, data + yield from iter + + +class WeasylJournalExtractor(WeasylExtractor): + subcategory = "journal" + filename_fmt = "{journalid} {title}.{extension}" + archive_fmt = "{journalid}" + pattern = BASE_PATTERN + r"journal/(\d+)" + test = ("https://www.weasyl.com/journal/17647/bbcode", { + "keyword": { + "title" : "BBCode", + "date" : "dt:2013-09-19 23:11:23", + "content": "<p><a>javascript:alert(42);</a></p>" + "<p>No more of that!</p>", + }, + }) + + def __init__(self, match): + WeasylExtractor.__init__(self, match) + self.journalid = match.group(1) + + def items(self): + data = self.retrieve_journal(self.journalid) + yield Message.Version, 1 + yield Message.Directory, data + yield Message.Url, data["html"], data + + +class WeasylJournalsExtractor(WeasylExtractor): + subcategory = "journals" + filename_fmt = "{journalid} {title}.{extension}" + archive_fmt = "{journalid}" + pattern = BASE_PATTERN + r"journals/([\w-]+)" + test = ("https://www.weasyl.com/journals/charmander", { + "count": ">= 2", + }) + + def __init__(self, match): + WeasylExtractor.__init__(self, match) + self.owner_login = match.group(1) + + def items(self): + yield Message.Version, 1 + yield Message.Directory, {"owner_login": self.owner_login} + + url = "{}/journals/{}".format(self.root, self.owner_login) + page = self.request(url).text + for journalid in text.extract_iter(page, 'href="/journal/', '/'): + data = self.retrieve_journal(journalid) + yield Message.Url, data["html"], data + + +class WeasylFavoriteExtractor(WeasylExtractor): + subcategory = "favorite" + directory_fmt = ("{category}", "{owner_login}", "Favorites") + pattern = BASE_PATTERN + r"favorites\?userid=(\d+)&feature=submit" + test = ("https://www.weasyl.com/favorites?userid=184616&feature=submit", { + "count": ">= 5", + }) + + def __init__(self, match): + WeasylExtractor.__init__(self, match) + self.userid = match.group(1) + + def items(self): + owner_login = lastid = None + url = self.root + "/favorites" + params = { + "userid" : self.userid, + "feature": "submit", + } + + while True: + page = self.request(url, params=params).text + pos = page.index('id="favorites-content"') + + if not owner_login: + owner_login = text.extract(page, '<a href="/~', '"')[0] + yield Message.Directory, {"owner_login": owner_login} + + for submitid in text.extract_iter(page, "/submissions/", "/", pos): + if submitid == lastid: + continue + lastid = submitid + submission = self.request_submission(submitid) + if self.populate_submission(submission): + yield Message.Url, submission["url"], submission + + if "&nextid=" not in page: + return + params["nextid"] = submitid diff --git a/gallery_dl/extractor/weibo.py b/gallery_dl/extractor/weibo.py index 0b1b2d9..a325f87 100644 --- a/gallery_dl/extractor/weibo.py +++ b/gallery_dl/extractor/weibo.py @@ -47,21 +47,31 @@ class WeiboExtractor(Extractor): file["num"] = num yield Message.Url, file["url"], file + def statuses(self): + """Returns an iterable containing all relevant 'status' objects""" + + def _status_by_id(self, status_id): + url = "{}/detail/{}".format(self.root, status_id) + page = self.request(url, fatal=False).text + data = text.extract(page, "var $render_data = [", "][0] || {};")[0] + return json.loads(data)["status"] if data else None + def _files_from_status(self, status): - images = status.pop("pics", ()) page_info = status.pop("page_info", ()) - - for image in images: - pid = image["pid"] - if "large" in image: - image = image["large"] - geo = image.get("geo") or {} - yield text.nameext_from_url(image["url"], { - "url" : image["url"], - "pid" : pid, - "width" : text.parse_int(geo.get("width")), - "height": text.parse_int(geo.get("height")), - }) + if "pics" in status: + if len(status["pics"]) < status["pic_num"]: + status = self._status_by_id(status["id"]) or status + for image in status.pop("pics"): + pid = image["pid"] + if "large" in image: + image = image["large"] + geo = image.get("geo") or {} + yield text.nameext_from_url(image["url"], { + "url" : image["url"], + "pid" : pid, + "width" : text.parse_int(geo.get("width")), + "height": text.parse_int(geo.get("height")), + }) if self.videos and "media_info" in page_info: info = page_info["media_info"] @@ -79,9 +89,6 @@ class WeiboExtractor(Extractor): data["_ytdl_extra"] = {"protocol": "m3u8_native"} yield data - def statuses(self): - """Returns an iterable containing all relevant 'status' objects""" - class WeiboUserExtractor(WeiboExtractor): """Extractor for all images of a user on weibo.cn""" @@ -107,13 +114,13 @@ class WeiboUserExtractor(WeiboExtractor): while True: data = self.request(url, params=params).json() + cards = data["data"]["cards"] - for card in data["data"]["cards"]: + if not cards: + return + for card in cards: if "mblog" in card: yield card["mblog"] - - if not data["data"]["cards"]: - return params["page"] += 1 @@ -145,9 +152,7 @@ class WeiboStatusExtractor(WeiboExtractor): self.status_id = match.group(1) def statuses(self): - url = "{}/detail/{}".format(self.root, self.status_id) - page = self.request(url, notfound="status").text - data = text.extract(page, "var $render_data = [", "][0] || {};")[0] - if not data: + status = self._status_by_id(self.status_id) + if not status: raise exception.NotFoundError("status") - return (json.loads(data)["status"],) + return (status,) diff --git a/gallery_dl/extractor/xvideos.py b/gallery_dl/extractor/xvideos.py index 2548ead..b7d116a 100644 --- a/gallery_dl/extractor/xvideos.py +++ b/gallery_dl/extractor/xvideos.py @@ -59,13 +59,13 @@ class XvideosGalleryExtractor(XvideosBase, GalleryExtractor): def metadata(self, page): extr = text.extract_from(page) + title = extr('"title":"', '"') user = { "id" : text.parse_int(extr('"id_user":', ',')), "display": extr('"display":"', '"'), "sex" : extr('"sex":"', '"'), "name" : self.user, } - title = extr('"title":"', '"') user["description"] = extr( '<small class="mobile-hide">', '</small>').strip() tags = extr('<em>Tagged:</em>', '<').strip() diff --git a/gallery_dl/job.py b/gallery_dl/job.py index 7d08b86..b62240b 100644 --- a/gallery_dl/job.py +++ b/gallery_dl/job.py @@ -228,7 +228,7 @@ class DownloadJob(Job): for pp in postprocessors: pp.prepare(pathfmt) - if archive and kwdict in archive: + if archive and archive.check(kwdict): pathfmt.fix_extension() self.handle_skip() return @@ -385,8 +385,23 @@ class DownloadJob(Job): self.sleep = config("sleep") if not config("download", True): + # monkey-patch method to do nothing and always return True self.download = pathfmt.fix_extension + archive = config("archive") + if archive: + path = util.expand_path(archive) + try: + if "{" in path: + path = util.Formatter(path).format_map(kwdict) + self.archive = util.DownloadArchive(path, self.extractor) + except Exception as exc: + self.extractor.log.warning( + "Failed to open download archive at '%s' ('%s: %s')", + path, exc.__class__.__name__, exc) + else: + self.extractor.log.debug("Using download archive '%s'", path) + skip = config("skip", True) if skip: self._skipexc = None @@ -401,21 +416,10 @@ class DownloadJob(Job): self._skipcnt = 0 self._skipmax = text.parse_int(smax) else: + # monkey-patch methods to always return False pathfmt.exists = lambda x=None: False - - archive = config("archive") - if archive: - path = util.expand_path(archive) - try: - if "{" in path: - path = util.Formatter(path).format_map(kwdict) - self.archive = util.DownloadArchive(path, self.extractor) - except Exception as exc: - self.extractor.log.warning( - "Failed to open download archive at '%s' ('%s: %s')", - path, exc.__class__.__name__, exc) - else: - self.extractor.log.debug("Using download archive '%s'", path) + if self.archive: + self.archive.check = pathfmt.exists postprocessors = self.extractor.config_accumulate("postprocessors") if postprocessors: @@ -449,7 +453,7 @@ class DownloadJob(Job): def _build_blacklist(self): wlist = self.extractor.config("whitelist") - if wlist: + if wlist is not None: if isinstance(wlist, str): wlist = wlist.split(",") blist = {e.category for e in extractor._list_classes()} @@ -457,7 +461,7 @@ class DownloadJob(Job): return blist blist = self.extractor.config("blacklist") - if blist: + if blist is not None: if isinstance(blist, str): blist = blist.split(",") blist = set(blist) diff --git a/gallery_dl/util.py b/gallery_dl/util.py index dbebfce..3e91405 100644 --- a/gallery_dl/util.py +++ b/gallery_dl/util.py @@ -941,7 +941,7 @@ class DownloadArchive(): "archive-format", extractor.archive_fmt) ).format_map - def __contains__(self, kwdict): + def check(self, kwdict): """Return True if the item described by 'kwdict' exists in archive""" key = kwdict["_archive_key"] = self.keygen(kwdict) self.cursor.execute( diff --git a/gallery_dl/version.py b/gallery_dl/version.py index d7e2737..81976c2 100644 --- a/gallery_dl/version.py +++ b/gallery_dl/version.py @@ -6,4 +6,4 @@ # it under the terms of the GNU General Public License version 2 as # published by the Free Software Foundation. -__version__ = "1.15.0" +__version__ = "1.15.1" diff --git a/test/test_results.py b/test/test_results.py index 1380f31..a594032 100644 --- a/test/test_results.py +++ b/test/test_results.py @@ -22,19 +22,16 @@ from gallery_dl import extractor, util, job, config, exception # noqa E402 # these don't work on Travis CI TRAVIS_SKIP = { - "exhentai", "kissmanga", "mangafox", "dynastyscans", "nijie", "bobx", + "exhentai", "mangafox", "dynastyscans", "nijie", "instagram", "ngomik", "archivedmoe", "archiveofsins", "thebarchive", "fireden", "4plebs", - "sankaku", "idolcomplex", "mangahere", "readcomiconline", "mangadex", - "sankakucomplex", "warosu", "fuskator", "patreon", "komikcast", - "instagram", "ngomik", + "sankaku", "idolcomplex", "mangahere", "mangadex", "sankakucomplex", + "warosu", "fuskator", "patreon", "komikcast", } # temporary issues, etc. BROKEN = { - "8kun", - "dynastyscans", - "fallenangels", "imagevenue", + "ngomik", "photobucket", } |
