aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLibravatarUnit 193 <unit193@unit193.net>2021-11-01 05:03:49 -0400
committerLibravatarUnit 193 <unit193@unit193.net>2021-11-01 05:03:49 -0400
commit4a965d875415907cc1a016b428ae305a964f9228 (patch)
tree7cece9948a7ba390348e00c669f9cb1f7a9ba39a
parent34ba2951b8c523713425c98addb9256ea05c946f (diff)
downloadgallery-dl-4a965d875415907cc1a016b428ae305a964f9228.tar.bz2
gallery-dl-4a965d875415907cc1a016b428ae305a964f9228.tar.xz
gallery-dl-4a965d875415907cc1a016b428ae305a964f9228.tar.zst
New upstream version 1.19.1.upstream/1.19.1
-rw-r--r--CHANGELOG.md25
-rw-r--r--PKG-INFO6
-rw-r--r--README.rst4
-rw-r--r--data/man/gallery-dl.12
-rw-r--r--data/man/gallery-dl.conf.5150
-rw-r--r--docs/gallery-dl-example.conf290
-rw-r--r--docs/gallery-dl.conf5
-rw-r--r--gallery_dl.egg-info/PKG-INFO6
-rw-r--r--gallery_dl.egg-info/SOURCES.txt1
-rw-r--r--gallery_dl/cache.py21
-rw-r--r--gallery_dl/downloader/ytdl.py3
-rw-r--r--gallery_dl/extractor/__init__.py1
-rw-r--r--gallery_dl/extractor/cyberdrop.py22
-rw-r--r--gallery_dl/extractor/deviantart.py84
-rw-r--r--gallery_dl/extractor/furaffinity.py2
-rw-r--r--gallery_dl/extractor/gfycat.py31
-rw-r--r--gallery_dl/extractor/inkbunny.py100
-rw-r--r--gallery_dl/extractor/kemonoparty.py135
-rw-r--r--gallery_dl/extractor/mangadex.py27
-rw-r--r--gallery_dl/extractor/nhentai.py92
-rw-r--r--gallery_dl/extractor/patreon.py39
-rw-r--r--gallery_dl/extractor/picarto.py74
-rw-r--r--gallery_dl/extractor/pixiv.py62
-rw-r--r--gallery_dl/extractor/seisoparty.py65
-rw-r--r--gallery_dl/extractor/twitter.py20
-rw-r--r--gallery_dl/extractor/vk.py132
-rw-r--r--gallery_dl/postprocessor/compare.py58
-rw-r--r--gallery_dl/version.py2
-rw-r--r--test/test_cache.py1
29 files changed, 1095 insertions, 365 deletions
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 87dd18f..f6b60f0 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,30 @@
# Changelog
+## 1.19.1 - 2021-10-24
+### Additions
+- [inkbunny] add `following` extractor ([#515](https://github.com/mikf/gallery-dl/issues/515))
+- [inkbunny] add `pool` extractor ([#1937](https://github.com/mikf/gallery-dl/issues/1937))
+- [kemonoparty] add `discord` extractor ([#1827](https://github.com/mikf/gallery-dl/issues/1827), [#1940](https://github.com/mikf/gallery-dl/issues/1940))
+- [nhentai] add `tag` extractor ([#1950](https://github.com/mikf/gallery-dl/issues/1950), [#1955](https://github.com/mikf/gallery-dl/issues/1955))
+- [patreon] add `files` option ([#1935](https://github.com/mikf/gallery-dl/issues/1935))
+- [picarto] add `gallery` extractor ([#1931](https://github.com/mikf/gallery-dl/issues/1931))
+- [pixiv] add `sketch` extractor ([#1497](https://github.com/mikf/gallery-dl/issues/1497))
+- [seisoparty] add `favorite` extractor ([#1906](https://github.com/mikf/gallery-dl/issues/1906))
+- [twitter] add `size` option ([#1881](https://github.com/mikf/gallery-dl/issues/1881))
+- [vk] add `album` extractor ([#474](https://github.com/mikf/gallery-dl/issues/474), [#1952](https://github.com/mikf/gallery-dl/issues/1952))
+- [postprocessor:compare] add `equal` option ([#1592](https://github.com/mikf/gallery-dl/issues/1592))
+### Fixes
+- [cyberdrop] extract direct download URLs ([#1943](https://github.com/mikf/gallery-dl/issues/1943))
+- [deviantart] update `search` argument handling ([#1911](https://github.com/mikf/gallery-dl/issues/1911))
+- [deviantart] full resolution for non-downloadable images ([#293](https://github.com/mikf/gallery-dl/issues/293))
+- [furaffinity] unquote search queries ([#1958](https://github.com/mikf/gallery-dl/issues/1958))
+- [inkbunny] match "long" URLs for pools and favorites ([#1937](https://github.com/mikf/gallery-dl/issues/1937))
+- [kemonoparty] improve inline extraction ([#1899](https://github.com/mikf/gallery-dl/issues/1899))
+- [mangadex] update parameter handling for API requests ([#1908](https://github.com/mikf/gallery-dl/issues/1908))
+- [patreon] better filenames for `content` images ([#1954](https://github.com/mikf/gallery-dl/issues/1954))
+- [redgifs][gfycat] provide fallback URLs ([#1962](https://github.com/mikf/gallery-dl/issues/1962))
+- [downloader:ytdl] prevent crash in `_progress_hook()`
+
## 1.19.0 - 2021-10-01
### Additions
- [aryion] add `tag` extractor ([#1849](https://github.com/mikf/gallery-dl/issues/1849))
diff --git a/PKG-INFO b/PKG-INFO
index b101649..ca59c0c 100644
--- a/PKG-INFO
+++ b/PKG-INFO
@@ -1,6 +1,6 @@
Metadata-Version: 2.1
Name: gallery_dl
-Version: 1.19.0
+Version: 1.19.1
Summary: Command-line program to download image galleries and collections from several image hosting sites
Home-page: https://github.com/mikf/gallery-dl
Author: Mike Fährmann
@@ -75,8 +75,8 @@ Description: ==========
Prebuilt executable files with a Python interpreter and
required Python packages included are available for
- - `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.19.0/gallery-dl.exe>`__
- - `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.19.0/gallery-dl.bin>`__
+ - `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.19.1/gallery-dl.exe>`__
+ - `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.19.1/gallery-dl.bin>`__
| Executables build from the latest commit can be found at
| https://github.com/mikf/gallery-dl/actions/workflows/executables.yml
diff --git a/README.rst b/README.rst
index bd79958..e5befc8 100644
--- a/README.rst
+++ b/README.rst
@@ -64,8 +64,8 @@ Standalone Executable
Prebuilt executable files with a Python interpreter and
required Python packages included are available for
-- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.19.0/gallery-dl.exe>`__
-- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.19.0/gallery-dl.bin>`__
+- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.19.1/gallery-dl.exe>`__
+- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.19.1/gallery-dl.bin>`__
| Executables build from the latest commit can be found at
| https://github.com/mikf/gallery-dl/actions/workflows/executables.yml
diff --git a/data/man/gallery-dl.1 b/data/man/gallery-dl.1
index 29621b0..b893a3e 100644
--- a/data/man/gallery-dl.1
+++ b/data/man/gallery-dl.1
@@ -1,4 +1,4 @@
-.TH "GALLERY-DL" "1" "2021-10-01" "1.19.0" "gallery-dl Manual"
+.TH "GALLERY-DL" "1" "2021-10-24" "1.19.1" "gallery-dl Manual"
.\" disable hyphenation
.nh
diff --git a/data/man/gallery-dl.conf.5 b/data/man/gallery-dl.conf.5
index a5c0970..cdedaee 100644
--- a/data/man/gallery-dl.conf.5
+++ b/data/man/gallery-dl.conf.5
@@ -1,4 +1,4 @@
-.TH "GALLERY-DL.CONF" "5" "2021-10-01" "1.19.0" "gallery-dl Manual"
+.TH "GALLERY-DL.CONF" "5" "2021-10-24" "1.19.1" "gallery-dl Manual"
.\" disable hyphenation
.nh
.\" disable justification (adjust text to left margin only)
@@ -451,6 +451,8 @@ and optional for
.br
* \f[I]sankaku\f[]
.br
+* \f[I]seisoparty\f[]
+.br
* \f[I]subscribestar\f[]
.br
* \f[I]tapas\f[]
@@ -1203,18 +1205,6 @@ files if they are images and falls back to preview versions for
everything else (archives, etc.).
-.SS extractor.deviantart.quality
-.IP "Type:" 6
-\f[I]integer\f[]
-
-.IP "Default:" 9
-\f[I]100\f[]
-
-.IP "Description:" 4
-JPEG quality level of newer images for which
-an original file download is not available.
-
-
.SS extractor.deviantart.refresh-token
.IP "Type:" 6
\f[I]string\f[]
@@ -1411,18 +1401,24 @@ You can use \f[I]"all"\f[] instead of listing all values separately.
.SS extractor.gfycat.format
.IP "Type:" 6
-\f[I]string\f[]
+.br
+* \f[I]list\f[] of \f[I]strings\f[]
+.br
+* \f[I]string\f[]
.IP "Default:" 9
-\f[I]"mp4"\f[]
+\f[I]["mp4", "webm", "mobile", "gif"]\f[]
.IP "Description:" 4
-The name of the preferred animation format, which can be one of
-\f[I]"mp4"\f[], \f[I]"webm"\f[], \f[I]"gif"\f[], \f[I]"webp"\f[] or \f[I]"mjpg"\f[].
+List of names of the preferred animation format, which can be
+\f[I]"mp4"\f[], \f[I]"webm"\f[], \f[I]"mobile"\f[], \f[I]"gif"\f[], or \f[I]"webp"\f[].
+
+If a selected format is not available, the next one in the list will be
+tried until an available format is found.
-If the selected format is not available, \f[I]"mp4"\f[], \f[I]"webm"\f[]
-and \f[I]"gif"\f[] (in that order) will be tried instead, until an
-available format is found.
+If the format is given as \f[I]string\f[], it will be extended with
+\f[I]["mp4", "webm", "mobile", "gif"]\f[]. Use a list with one element to
+restrict it to only one possible format.
.SS extractor.hentaifoundry.include
@@ -1445,17 +1441,6 @@ Possible values are
You can use \f[I]"all"\f[] instead of listing all values separately.
-.SS extractor.hentainexus.original
-.IP "Type:" 6
-\f[I]bool\f[]
-
-.IP "Default:" 9
-\f[I]true\f[]
-
-.IP "Description:" 4
-Download original files instead of WebP versions.
-
-
.SS extractor.hitomi.metadata
.IP "Type:" 6
\f[I]bool\f[]
@@ -1608,6 +1593,20 @@ to download as mp4 videos.
The server to use for API requests.
+.SS extractor.mangadex.api-parameters
+.IP "Type:" 6
+\f[I]object\f[]
+
+.IP "Example:" 4
+{"order[updatedAt]": "desc"}
+
+.IP "Description:" 4
+Additional query parameters to send when fetching manga chapters.
+
+(See \f[I]/manga/{id}/feed\f[]
+and \f[I]/user/follows/manga/feed\f[])
+
+
.SS extractor.mangadex.lang
.IP "Type:" 6
\f[I]string\f[]
@@ -1631,6 +1630,17 @@ to filter chapters by.
Provide \f[I]artist\f[], \f[I]author\f[], and \f[I]group\f[] metadata fields.
+.SS extractor.mangadex.ratings
+.IP "Type:" 6
+\f[I]list\f[] of \f[I]strings\f[]
+
+.IP "Default:" 9
+\f[I]["safe", "suggestive", "erotica", "pornographic"]\f[]
+
+.IP "Description:" 4
+List of acceptable content ratings for returned chapters.
+
+
.SS extractor.mastodon.reblogs
.IP "Type:" 6
\f[I]bool\f[]
@@ -1774,6 +1784,20 @@ port number in your browser's address bar when using a different
port than the default.
+.SS extractor.patreon.files
+.IP "Type:" 6
+\f[I]list\f[] of \f[I]strings\f[]
+
+.IP "Default:" 9
+\f[I]["images", "attachments", "postfile", "content"]\f[]
+
+.IP "Description:" 4
+Determines the type and order of files to be downloaded.
+
+Available types are
+\f[I]postfile\f[], \f[I]images\f[], \f[I]attachments\f[], and \f[I]content\f[].
+
+
.SS extractor.photobucket.subalbums
.IP "Type:" 6
\f[I]bool\f[]
@@ -2077,19 +2101,24 @@ video extraction and download
.SS extractor.redgifs.format
.IP "Type:" 6
-\f[I]string\f[]
+.br
+* \f[I]list\f[] of \f[I]strings\f[]
+.br
+* \f[I]string\f[]
.IP "Default:" 9
-\f[I]"mp4"\f[]
+\f[I]["mp4", "webm", "mobile", "gif"]\f[]
.IP "Description:" 4
-The name of the preferred format, which can be one of
-\f[I]"mp4"\f[], \f[I]"webm"\f[], \f[I]"gif"\f[], \f[I]"webp"\f[], \f[I]"mobile"\f[],
-or \f[I]"mini"\f[].
+List of names of the preferred animation format, which can be
+\f[I]"mp4"\f[], \f[I]"webm"\f[], \f[I]"gif"\f[], \f[I]"webp"\f[], \f[I]"mobile"\f[], or \f[I]"mini"\f[].
-If the selected format is not available, \f[I]"mp4"\f[], \f[I]"webm"\f[]
-and \f[I]"gif"\f[] (in that order) will be tried instead, until an
-available format is found.
+If a selected format is not available, the next one in the list will be
+tried until an available format is found.
+
+If the format is given as \f[I]string\f[], it will be extended with
+\f[I]["mp4", "webm", "mobile", "gif"]\f[]. Use a list with one element to
+restrict it to only one possible format.
.SS extractor.sankakucomplex.embeds
@@ -2229,6 +2258,22 @@ Fetch media from all Tweets and replies in a \f[I]conversation
<https://help.twitter.com/en/using-twitter/twitter-conversations>\f[].
+.SS extractor.twitter.size
+.IP "Type:" 6
+\f[I]list\f[] of \f[I]strings\f[]
+
+.IP "Default:" 9
+\f[I]["orig", "large", "medium", "small"]\f[]
+
+.IP "Description:" 4
+The image version to download.
+Any entries after the first one will be used for potential
+\f[I]fallback\f[] URLs.
+
+Known available sizes are
+\f[I]4096x4096\f[], \f[I]orig\f[], \f[I]large\f[], \f[I]medium\f[], and \f[I]small\f[].
+
+
.SS extractor.twitter.logout
.IP "Type:" 6
\f[I]bool\f[]
@@ -2991,27 +3036,38 @@ in their default location.
\f[I]"replace"\f[]
.IP "Description:" 4
-The action to take when files do not compare as equal.
+The action to take when files do **not** compare as equal.
.br
* \f[I]"replace"\f[]: Replace/Overwrite the old version with the new one
.br
-* \f[I]"abort:N"\f[]: Same as \f[I]"replace"\f[] and stop the current extractor run
-after \f[I]N\f[] consecutive files compared as equal.
+* \f[I]"enumerate"\f[]: Add an enumeration index to the filename of the new
+version like \f[I]skip = "enumerate"\f[]
+
+
+.SS compare.equal
+.IP "Type:" 6
+\f[I]string\f[]
+
+.IP "Default:" 9
+\f[I]"null"\f[]
+
+.IP "Description:" 4
+The action to take when files do compare as equal.
.br
-* \f[I]"terminate:N"\f[]: Same as \f[I]"replace"\f[]
-and stop the current extractor run, including parent extractors,
+* \f[I]"abort:N"\f[]: Stop the current extractor run
after \f[I]N\f[] consecutive files compared as equal.
.br
-* \f[I]"exit:N"\f[]: Same as \f[I]"replace"\f[] and exit the program
+* \f[I]"terminate:N"\f[]: Stop the current extractor run,
+including parent extractors,
after \f[I]N\f[] consecutive files compared as equal.
.br
-* \f[I]"enumerate"\f[]: Add an enumeration index to the filename of the new
-version like \f[I]skip = "enumerate"\f[]
+* \f[I]"exit:N"\f[]: Exit the program
+after \f[I]N\f[] consecutive files compared as equal.
.SS compare.shallow
diff --git a/docs/gallery-dl-example.conf b/docs/gallery-dl-example.conf
index d84febd..72e7465 100644
--- a/docs/gallery-dl-example.conf
+++ b/docs/gallery-dl-example.conf
@@ -2,91 +2,123 @@
"extractor":
{
"base-directory": "~/gallery-dl/",
+
+ "#": "set global archive file for all extractors",
"archive": "~/gallery-dl/archive.sqlite3",
- "proxy": "http://10.10.1.10:3128",
+ "#": "replace invalid path characters with unicode alternatives",
+ "path-restrict": {
+ "\\": "⧹",
+ "/" : "⧸",
+ "|" : "│",
+ ":" : "꞉",
+ "*" : "∗",
+ "?" : "?",
+ "\"": "″",
+ "<" : "﹤",
+ ">" : "﹥"
+ },
+
+ "#": "write tags for several *booru sites",
"postprocessors": [
{
- "name": "ugoira",
- "whitelist": ["pixiv", "danbooru"],
- "ffmpeg-twopass": true,
- "ffmpeg-args": ["-c:v", "libvpx", "-crf", "4", "-b:v", "5000k", "-an"]
- },
- {
"name": "metadata",
- "whitelist": ["danbooru", "yandere", "sankaku"],
- "mode": "tags"
+ "mode": "tags",
+ "whitelist": ["danbooru", "moebooru", "sankaku"]
}
],
"pixiv":
{
+ "#": "override global archive setting for pixiv",
"archive": "~/gallery-dl/archive-pixiv.sqlite3",
+ "#": "set custom directory and filename format strings for all pixiv downloads",
"filename": "{id}{num}.{extension}",
"directory": ["Pixiv", "Works", "{user[id]}"],
+ "refresh-token": "aBcDeFgHiJkLmNoPqRsTuVwXyZ01234567890-FedC9",
- "username": "foo",
- "password": "bar",
+ "#": "transform ugoira into lossless MKVs",
+ "ugoira": true,
+ "postprocessors": ["ugoira-copy"],
+ "#": "use special settings for favorites and bookmarks",
"favorite":
{
"directory": ["Pixiv", "Favorites", "{user[id]}"]
},
-
"bookmark":
{
"directory": ["Pixiv", "My Bookmarks"],
-
- "username": "foo123",
- "password": "bar123"
+ "refresh-token": "01234567890aBcDeFgHiJkLmNoPqRsTuVwXyZ-ZyxW1"
}
},
+ "danbooru":
+ {
+ "ugoira": true,
+ "postprocessors": ["ugoira-webm"]
+ },
+
"exhentai":
{
+ "#": "use cookies instead of logging in with username and password",
"cookies":
{
"ipb_member_id": "12345",
"ipb_pass_hash": "1234567890abcdef",
"igneous" : "123456789",
- "hath_perks" : "m1.m2.m3.a-123456789a"
+ "hath_perks" : "m1.m2.m3.a-123456789a",
+ "sk" : "n4m34tv3574m2c4e22c35zgeehiw",
+ "sl" : "dm_2"
},
- "proxy":
- {
- "http": "http://10.10.1.10:8080",
- "https": "https://10.10.1.10:443"
- },
+ "#": "wait 2 to 4.8 seconds between HTTP requests",
+ "sleep-request": [2.0, 4.8],
"filename": "{num:>04}_{name}.{extension}",
- "directory": ["{category!c}", "{title}"],
-
- "wait-min": 1.0,
- "wait-max": 5.0
+ "directory": ["{category!c}", "{title}"]
},
- "mangadex":
+ "sankaku":
{
- "chapter-filter": "lang == 'en'",
- "postprocessors": [{
- "name": "zip",
- "keep-files": false,
- "compression": "zip"
- }]
+ "#": "authentication with cookies is not possible for sankaku",
+ "username": "user",
+ "password": "#secret#"
+ },
+
+ "furaffinity": {
+ "#": "authentication with username and password is not possible due to CAPTCHA",
+ "cookies": {
+ "a": "01234567-89ab-cdef-fedc-ba9876543210",
+ "b": "fedcba98-7654-3210-0123-456789abcdef"
+ },
+
+ "descriptions": "html",
+ "postprocessors": ["content"]
},
"deviantart":
{
+ "#": "download 'gallery' and 'scraps' images for user profile URLs",
"include": "gallery,scraps",
+
+ "#": "use custom API credentials to avoid 429 errors",
+ "client-id": "98765",
+ "client-secret": "0123456789abcdef0123456789abcdef",
+ "refresh-token": "0123456789abcdef0123456789abcdef01234567",
+
+ "#": "put description texts into a separate directory",
"metadata": true,
- "postprocessors": [{
- "name": "metadata",
- "mode": "custom",
- "directory" : "Descriptions",
- "content-format" : "{description}\n",
- "extension-format": "descr.txt"
- }]
+ "postprocessors": [
+ {
+ "name": "metadata",
+ "mode": "custom",
+ "directory" : "Descriptions",
+ "content-format" : "{description}\n",
+ "extension-format": "descr.txt"
+ }
+ ]
},
"flickr":
@@ -96,88 +128,126 @@
"size-max": 1920
},
+ "mangadex":
+ {
+ "#": "only download safe/suggestive chapters translated to English",
+ "lang": "en",
+ "ratings": ["safe", "suggestive"],
+
+ "#": "put chapters into '.cbz' archives",
+ "postprocessors": ["cbz"]
+ },
+
"reddit":
{
- "morecomments": true,
- "date-min": "2017-01",
- "date-format": "%Y-%m",
- "recursion": 1
+ "#": "only spawn child extractors for links to specific sites",
+ "whitelist": ["imgur", "redgifs", "gfycat"],
+
+ "#": "put files from child extractors into the reddit directory",
+ "parent-directory": true,
+
+ "#": "transfer metadata to any child extractor as '_reddit'",
+ "parent-metadata": "_reddit"
},
- "sankaku":
+ "imgur":
{
- "sleep": 2,
- "wait-min": 5.0,
- "wait-max": 5.0,
- "cookies": "~/gallery-dl/cookies-sankaku.txt"
+ "#": "use different directory and filename formats when coming from a reddit post",
+ "directory":
+ {
+ "'_reddit' in locals()": []
+ },
+ "filename":
+ {
+ "'_reddit' in locals()": "{_reddit[id]} {id}.{extension}",
+ "" : "{id}.{extension}"
+ }
},
"tumblr":
{
- "posts": "all",
+ "posts" : "all",
"external": false,
- "reblogs": false,
- "inline": true,
+ "reblogs" : false,
+ "inline" : true,
+ "#": "use special settings when downloading liked posts",
"likes":
{
- "posts": "video,photo,link",
+ "posts" : "video,photo,link",
"external": true,
- "reblogs": true
+ "reblogs" : true
}
},
+ "twitter":
+ {
+ "#": "write text content for *all* tweets",
+ "postprocessors": ["content"],
+ "text-tweets": true
+ },
+
"mastodon":
{
- "mastodon.xyz":
+ "#": "add 'tabletop.social' as recognized mastodon instance",
+ "#": "(run 'gallery-dl oauth:mastodon:tabletop.social to get an access token')",
+ "tabletop.social":
{
- "access-token": "cab65529..."
- },
- "tabletop.social": {
+ "root": "https://tabletop.social",
"access-token": "513a36c6..."
},
+ "#": "set filename format strings for all 'mastodon' instances",
"directory": ["mastodon", "{instance}", "{account[username]!l}"],
- "filename": "{id}_{media[id]}.{extension}"
+ "filename" : "{id}_{media[id]}.{extension}"
},
"foolslide": {
- "otscans": {"root": "https://otscans.com/foolslide"},
+ "#": "add two more foolslide instances",
+ "otscans" : {"root": "https://otscans.com/foolslide"},
"helvetica": {"root": "https://helveticascans.com/r" }
},
"foolfuuka": {
+ "#": "add two other foolfuuka 4chan archives",
"fireden-onion": {"root": "http://ydt6jy2ng3s3xg2e.onion"},
- "scalearchive": {"root": "https://archive.scaled.team" }
- },
-
- "replace invalid path characters with unicode alternatives": null,
- "path-restrict": {
- "\\": "⧹",
- "/" : "⧸",
- "|" : "│",
- ":" : "꞉",
- "*" : "∗",
- "?" : "?",
- "\"": "″",
- "<" : "﹤",
- ">" : "﹥"
+ "scalearchive" : {"root": "https://archive.scaled.team" }
}
},
"downloader":
{
- "part-directory": "/tmp/.download/",
+ "#": "restrict download speed to 1 MB/s",
"rate": "1M",
+
+ "#": "show download progress indicator after 2 seconds",
+ "progress": 2.0,
+
+ "#": "retry failed downloads up to 3 times",
"retries": 3,
- "timeout": 8.5
+
+ "#": "consider a download 'failed' after 8 seconds of inactivity",
+ "timeout": 8.0,
+
+ "#": "write '.part' files into a special directory",
+ "part-directory": "/tmp/.download/",
+
+ "#": "do not update file modification times",
+ "mtime": false,
+
+ "ytdl":
+ {
+ "#": "use yt-dlp instead of youtube-dl",
+ "module": "yt_dlp"
+ }
},
"output":
{
- "mode": "terminal",
"log": {
"level": "info",
+
+ "#": "use different ANSI colors for each log level",
"format": {
"debug" : "\u001b[0;37m{name}: {message}\u001b[0m",
"info" : "\u001b[1;37m{name}: {message}\u001b[0m",
@@ -185,11 +255,19 @@
"error" : "\u001b[1;31m{name}: {message}\u001b[0m"
}
},
+
+ "#": "shorten filenames to fit into one terminal line",
+ "#": "while also considering wider East-Asian characters",
+ "shorten": "eaw",
+
+ "#": "write logging messages to a separate file",
"logfile": {
"path": "~/gallery-dl/log.txt",
"mode": "w",
"level": "debug"
},
+
+ "#": "write unrecognized URLs to a separate file",
"unsupportedfile": {
"path": "~/gallery-dl/unsupported.txt",
"mode": "a",
@@ -198,9 +276,63 @@
}
},
- "cache": {
- "file": "~/gallery-dl/cache.sqlite3"
+ "postprocessor":
+ {
+ "#": "write 'content' metadata into separate files",
+ "content":
+ {
+ "name" : "metadata",
+
+ "#": "write data for every post instead of each individual file",
+ "event": "post",
+ "filename": "{post_id|tweet_id|id}.txt",
+
+ "#": "write only the values for 'content' or 'description'",
+ "mode" : "custom",
+ "format": "{content|description}\n"
+ },
+
+ "#": "put files into a '.cbz' archive",
+ "cbz":
+ {
+ "name": "zip",
+ "extension": "cbz"
+ },
+
+ "#": "various ugoira post processor configurations to create different file formats",
+ "ugoira-webm":
+ {
+ "name": "ugoira",
+ "extension": "webm",
+ "ffmpeg-args": ["-c:v", "libvpx-vp9", "-an", "-b:v", "0", "-crf", "30"],
+ "ffmpeg-twopass": true,
+ "ffmpeg-demuxer": "image2"
+ },
+ "ugoira-mp4":
+ {
+ "name": "ugoira",
+ "extension": "mp4",
+ "ffmpeg-args": ["-c:v", "libx264", "-an", "-b:v", "4M", "-preset", "veryslow"],
+ "ffmpeg-twopass": true,
+ "libx264-prevent-odd": true
+ },
+ "ugoira-gif":
+ {
+ "name": "ugoira",
+ "extension": "gif",
+ "ffmpeg-args": ["-filter_complex", "[0:v] split [a][b];[a] palettegen [p];[b][p] paletteuse"]
+ },
+ "ugoira-copy": {
+ "name": "ugoira",
+ "extension": "mkv",
+ "ffmpeg-args": ["-c", "copy"],
+ "libx264-prevent-odd": false,
+ "repeat-last-frame": false
+ }
},
- "netrc": true
+ "#": "use a custom cache file location",
+ "cache": {
+ "file": "~/gallery-dl/cache.sqlite3"
+ }
}
diff --git a/docs/gallery-dl.conf b/docs/gallery-dl.conf
index b998597..c89f4b9 100644
--- a/docs/gallery-dl.conf
+++ b/docs/gallery-dl.conf
@@ -76,7 +76,6 @@
"mature": true,
"metadata": false,
"original": true,
- "quality": 100,
"wait-min": 0
},
"e621":
@@ -107,7 +106,7 @@
},
"gfycat":
{
- "format": "mp4"
+ "format": ["mp4", "webm", "mobile", "gif"]
},
"hentaifoundry":
{
@@ -222,7 +221,7 @@
},
"redgifs":
{
- "format": "mp4"
+ "format": ["mp4", "webm", "mobile", "gif"]
},
"sankakucomplex":
{
diff --git a/gallery_dl.egg-info/PKG-INFO b/gallery_dl.egg-info/PKG-INFO
index 63101a1..5f9b4b8 100644
--- a/gallery_dl.egg-info/PKG-INFO
+++ b/gallery_dl.egg-info/PKG-INFO
@@ -1,6 +1,6 @@
Metadata-Version: 2.1
Name: gallery-dl
-Version: 1.19.0
+Version: 1.19.1
Summary: Command-line program to download image galleries and collections from several image hosting sites
Home-page: https://github.com/mikf/gallery-dl
Author: Mike Fährmann
@@ -75,8 +75,8 @@ Description: ==========
Prebuilt executable files with a Python interpreter and
required Python packages included are available for
- - `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.19.0/gallery-dl.exe>`__
- - `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.19.0/gallery-dl.bin>`__
+ - `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.19.1/gallery-dl.exe>`__
+ - `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.19.1/gallery-dl.bin>`__
| Executables build from the latest commit can be found at
| https://github.com/mikf/gallery-dl/actions/workflows/executables.yml
diff --git a/gallery_dl.egg-info/SOURCES.txt b/gallery_dl.egg-info/SOURCES.txt
index b662b96..6dd43c8 100644
--- a/gallery_dl.egg-info/SOURCES.txt
+++ b/gallery_dl.egg-info/SOURCES.txt
@@ -132,6 +132,7 @@ gallery_dl/extractor/patreon.py
gallery_dl/extractor/philomena.py
gallery_dl/extractor/photobucket.py
gallery_dl/extractor/photovogue.py
+gallery_dl/extractor/picarto.py
gallery_dl/extractor/piczel.py
gallery_dl/extractor/pillowfort.py
gallery_dl/extractor/pinterest.py
diff --git a/gallery_dl/cache.py b/gallery_dl/cache.py
index 7a49b61..923ed32 100644
--- a/gallery_dl/cache.py
+++ b/gallery_dl/cache.py
@@ -211,13 +211,18 @@ def _path():
return os.path.join(cachedir, "cache.sqlite3")
-try:
- dbfile = _path()
+def _init():
+ try:
+ dbfile = _path()
+
+ # restrict access permissions for new db files
+ os.close(os.open(dbfile, os.O_CREAT | os.O_RDONLY, 0o600))
+
+ DatabaseCacheDecorator.db = sqlite3.connect(
+ dbfile, timeout=60, check_same_thread=False)
+ except (OSError, TypeError, sqlite3.OperationalError):
+ global cache
+ cache = memcache
- # restrict access permissions for new db files
- os.close(os.open(dbfile, os.O_CREAT | os.O_RDONLY, 0o600))
- DatabaseCacheDecorator.db = sqlite3.connect(
- dbfile, timeout=60, check_same_thread=False)
-except (OSError, TypeError, sqlite3.OperationalError):
- cache = memcache # noqa: F811
+_init()
diff --git a/gallery_dl/downloader/ytdl.py b/gallery_dl/downloader/ytdl.py
index 86e247b..f4d3e05 100644
--- a/gallery_dl/downloader/ytdl.py
+++ b/gallery_dl/downloader/ytdl.py
@@ -136,8 +136,9 @@ class YoutubeDLDownloader(DownloaderBase):
def _progress_hook(self, info):
if info["status"] == "downloading" and \
info["elapsed"] >= self.progress:
+ total = info.get("total_bytes") or info.get("total_bytes_estimate")
self.out.progress(
- info["total_bytes"],
+ None if total is None else int(total),
info["downloaded_bytes"],
int(info["speed"]),
)
diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py
index c512548..93702ab 100644
--- a/gallery_dl/extractor/__init__.py
+++ b/gallery_dl/extractor/__init__.py
@@ -95,6 +95,7 @@ modules = [
"philomena",
"photobucket",
"photovogue",
+ "picarto",
"piczel",
"pillowfort",
"pinterest",
diff --git a/gallery_dl/extractor/cyberdrop.py b/gallery_dl/extractor/cyberdrop.py
index 2004921..d1b1b25 100644
--- a/gallery_dl/extractor/cyberdrop.py
+++ b/gallery_dl/extractor/cyberdrop.py
@@ -8,7 +8,6 @@
from .common import Extractor, Message
from .. import text
-import binascii
class CyberdropAlbumExtractor(Extractor):
@@ -19,7 +18,7 @@ class CyberdropAlbumExtractor(Extractor):
archive_fmt = "{album_id}_{id}"
pattern = r"(?:https?://)?(?:www\.)?cyberdrop\.me/a/([^/?#]+)"
test = ("https://cyberdrop.me/a/keKRjm4t", {
- "pattern": r"https://f\.cyberdrop\.cc/.*\.[a-z]+$",
+ "pattern": r"https://fs-\d+\.cyberdrop\.to/.*\.[a-z]+$",
"keyword": {
"album_id": "keKRjm4t",
"album_name": "Fate (SFW)",
@@ -38,7 +37,14 @@ class CyberdropAlbumExtractor(Extractor):
def items(self):
url = self.root + "/a/" + self.album_id
extr = text.extract_from(self.request(url).text)
- extr("const albumData = {", "")
+
+ files = []
+ append = files.append
+ while True:
+ url = extr('downloadUrl: "', '"')
+ if not url:
+ break
+ append(text.unescape(url))
data = {
"album_id" : self.album_id,
@@ -46,13 +52,11 @@ class CyberdropAlbumExtractor(Extractor):
"date" : text.parse_timestamp(extr("timestamp: ", ",")),
"album_size" : text.parse_int(extr("totalSize: ", ",")),
"description": extr("description: `", "`"),
+ "count" : len(files),
}
- files = extr("fl: '", "'").split(",")
- data["count"] = len(files)
yield Message.Directory, data
- for file_b64 in files:
- file = binascii.a2b_base64(file_b64).decode()
- text.nameext_from_url(file, data)
+ for url in files:
+ text.nameext_from_url(url, data)
data["filename"], _, data["id"] = data["filename"].rpartition("-")
- yield Message.Url, "https://f.cyberdrop.cc/" + file, data
+ yield Message.Url, url, data
diff --git a/gallery_dl/extractor/deviantart.py b/gallery_dl/extractor/deviantart.py
index 7dac770..4604d39 100644
--- a/gallery_dl/extractor/deviantart.py
+++ b/gallery_dl/extractor/deviantart.py
@@ -14,6 +14,7 @@ from ..cache import cache, memcache
import collections
import itertools
import mimetypes
+import binascii
import time
import re
@@ -39,7 +40,6 @@ class DeviantartExtractor(Extractor):
self.offset = 0
self.flat = self.config("flat", True)
self.extra = self.config("extra", False)
- self.quality = self.config("quality", "100")
self.original = self.config("original", True)
self.comments = self.config("comments", False)
self.user = match.group(1) or match.group(2)
@@ -53,9 +53,6 @@ class DeviantartExtractor(Extractor):
else:
self.unwatch = None
- if self.quality:
- self.quality = ",q_{}".format(self.quality)
-
if self.original != "image":
self._update_content = self._update_content_default
else:
@@ -104,19 +101,8 @@ class DeviantartExtractor(Extractor):
if self.original and deviation["is_downloadable"]:
self._update_content(deviation, content)
-
- if content["src"].startswith("https://images-wixmp-"):
- if deviation["index"] <= 790677560:
- # https://github.com/r888888888/danbooru/issues/4069
- intermediary, count = re.subn(
- r"(/f/[^/]+/[^/]+)/v\d+/.*",
- r"/intermediary\1", content["src"], 1)
- if count:
- deviation["_fallback"] = (content["src"],)
- content["src"] = intermediary
- if self.quality:
- content["src"] = re.sub(
- r",q_\d+", self.quality, content["src"], 1)
+ else:
+ self._update_token(deviation, content)
yield self.commit(deviation, content)
@@ -302,6 +288,32 @@ class DeviantartExtractor(Extractor):
if mtype and mtype.startswith("image/"):
content.update(data)
+ def _update_token(self, deviation, content):
+ """Replace JWT to be able to remove width/height limits
+
+ All credit goes to @Ironchest337
+ for discovering and implementing this method
+ """
+ url, sep, _ = content["src"].partition("/v1/")
+ if not sep:
+ return
+
+ # header = b'{"typ":"JWT","alg":"none"}'
+ payload = (
+ b'{"sub":"urn:app:","iss":"urn:app:","obj":[[{"path":"/f/' +
+ url.partition("/f/")[2].encode() +
+ b'"}]],"aud":["urn:service:file.download"]}'
+ )
+
+ deviation["_fallback"] = (content["src"],)
+ content["src"] = (
+ "{}?token=eyJ0eXAiOiJKV1QiLCJhbGciOiJub25lIn0.{}.".format(
+ url,
+ # base64 of 'header' is precomputed as 'eyJ0eX...'
+ # binascii.a2b_base64(header).rstrip(b"=\n").decode(),
+ binascii.b2a_base64(payload).rstrip(b"=\n").decode())
+ )
+
def _limited_request(self, url, **kwargs):
"""Limits HTTP requests to one every 2 seconds"""
kwargs["fatal"] = None
@@ -746,29 +758,27 @@ class DeviantartPopularExtractor(DeviantartExtractor):
def __init__(self, match):
DeviantartExtractor.__init__(self, match)
- self.search_term = self.time_range = self.category_path = None
self.user = ""
trange1, path, trange2, query = match.groups()
- trange = trange1 or trange2
query = text.parse_query(query)
-
- if not trange:
- trange = query.get("order")
-
- if path:
- self.category_path = path.strip("/")
- if trange:
- if trange.startswith("popular-"):
- trange = trange[8:]
- self.time_range = trange.replace("-", "").replace("hours", "hr")
- if query:
- self.search_term = query.get("q")
+ self.search_term = query.get("q")
+
+ trange = trange1 or trange2 or query.get("order", "")
+ if trange.startswith("popular-"):
+ trange = trange[8:]
+ self.time_range = {
+ "most-recent" : "now",
+ "this-week" : "1week",
+ "this-month" : "1month",
+ "this-century": "alltime",
+ "all-time" : "alltime",
+ }.get(trange, "alltime")
self.popular = {
"search": self.search_term or "",
- "range" : trange or "",
- "path" : self.category_path,
+ "range" : trange or "all-time",
+ "path" : path.strip("/") if path else "",
}
def deviations(self):
@@ -851,12 +861,8 @@ class DeviantartDeviationExtractor(DeviantartExtractor):
}),
# wixmp URL rewrite
(("https://www.deviantart.com/citizenfresh/art/Hverarond-789295466"), {
- "pattern": (r"https://images-wixmp-\w+\.wixmp\.com"
- r"/intermediary/f/[^/]+/[^.]+\.jpg")
- }),
- # wixmp URL rewrite v2 (#369)
- (("https://www.deviantart.com/josephbiwald/art/Destiny-2-804940104"), {
- "pattern": r"https://images-wixmp-\w+\.wixmp\.com/.*,q_100,"
+ "pattern": (r"https://images-wixmp-\w+\.wixmp\.com/f"
+ r"/[^/]+/[^.]+\.jpg\?token="),
}),
# GIF (#242)
(("https://www.deviantart.com/skatergators/art/COM-Moni-781571783"), {
diff --git a/gallery_dl/extractor/furaffinity.py b/gallery_dl/extractor/furaffinity.py
index 9516dfa..b5ecbd6 100644
--- a/gallery_dl/extractor/furaffinity.py
+++ b/gallery_dl/extractor/furaffinity.py
@@ -250,7 +250,7 @@ class FuraffinitySearchExtractor(FuraffinityExtractor):
FuraffinityExtractor.__init__(self, match)
self.query = text.parse_query(match.group(2))
if self.user and "q" not in self.query:
- self.query["q"] = text.unescape(self.user)
+ self.query["q"] = text.unquote(self.user)
def metadata(self):
return {"search": self.query.get("q")}
diff --git a/gallery_dl/extractor/gfycat.py b/gallery_dl/extractor/gfycat.py
index 2757852..9b4d5ee 100644
--- a/gallery_dl/extractor/gfycat.py
+++ b/gallery_dl/extractor/gfycat.py
@@ -22,7 +22,13 @@ class GfycatExtractor(Extractor):
def __init__(self, match):
Extractor.__init__(self, match)
self.key = match.group(1).lower()
- self.formats = (self.config("format", "mp4"), "mp4", "webm", "gif")
+
+ formats = self.config("format")
+ if formats is None:
+ formats = ("mp4", "webm", "mobile", "gif")
+ elif isinstance(formats, str):
+ formats = (formats, "mp4", "webm", "mobile", "gif")
+ self.formats = formats
def items(self):
metadata = self.metadata()
@@ -30,23 +36,25 @@ class GfycatExtractor(Extractor):
if "gfyName" not in gfycat:
self.log.warning("Skipping '%s' (malformed)", gfycat["gfyId"])
continue
- url = self._select_format(gfycat)
+ url = self._process(gfycat)
gfycat.update(metadata)
- gfycat["date"] = text.parse_timestamp(gfycat.get("createDate"))
yield Message.Directory, gfycat
yield Message.Url, url, gfycat
- def _select_format(self, gfyitem):
+ def _process(self, gfycat):
+ gfycat["_fallback"] = formats = self._formats(gfycat)
+ gfycat["date"] = text.parse_timestamp(gfycat.get("createDate"))
+ return next(formats, "")
+
+ def _formats(self, gfycat):
for fmt in self.formats:
key = fmt + "Url"
- if key in gfyitem:
- url = gfyitem[key]
+ if key in gfycat:
+ url = gfycat[key]
if url.startswith("http:"):
url = "https" + url[4:]
- gfyitem["extension"] = url.rpartition(".")[2]
- return url
- gfyitem["extension"] = ""
- return ""
+ gfycat["extension"] = url.rpartition(".")[2]
+ yield url
def metadata(self):
return {}
@@ -146,8 +154,7 @@ class GfycatImageExtractor(GfycatExtractor):
if "gfyName" not in gfycat:
self.log.warning("Skipping '%s' (malformed)", gfycat["gfyId"])
return
- url = self._select_format(gfycat)
- gfycat["date"] = text.parse_timestamp(gfycat.get("createDate"))
+ url = self._process(gfycat)
yield Message.Directory, gfycat
yield Message.Url, url, gfycat
diff --git a/gallery_dl/extractor/inkbunny.py b/gallery_dl/extractor/inkbunny.py
index cbe0f43..3d09d79 100644
--- a/gallery_dl/extractor/inkbunny.py
+++ b/gallery_dl/extractor/inkbunny.py
@@ -135,33 +135,123 @@ class InkbunnyUserExtractor(InkbunnyExtractor):
return self.api.search(params)
+class InkbunnyPoolExtractor(InkbunnyExtractor):
+ """Extractor for inkbunny pools"""
+ subcategory = "pool"
+ pattern = (BASE_PATTERN + r"/(?:"
+ r"poolview_process\.php\?pool_id=(\d+)|"
+ r"submissionsviewall\.php\?([^#]+&mode=pool&[^#]+))")
+ test = (
+ ("https://inkbunny.net/poolview_process.php?pool_id=28985", {
+ "count": 9,
+ }),
+ ("https://inkbunny.net/submissionsviewall.php?rid=ffffffffff"
+ "&mode=pool&pool_id=28985&page=1&orderby=pool_order&random=no"),
+ )
+
+ def __init__(self, match):
+ InkbunnyExtractor.__init__(self, match)
+ pid = match.group(1)
+ if pid:
+ self.pool_id = pid
+ self.orderby = "pool_order"
+ else:
+ params = text.parse_query(match.group(2))
+ self.pool_id = params.get("pool_id")
+ self.orderby = params.get("orderby", "pool_order")
+
+ def posts(self):
+ params = {
+ "pool_id": self.pool_id,
+ "orderby": self.orderby,
+ }
+ return self.api.search(params)
+
+
class InkbunnyFavoriteExtractor(InkbunnyExtractor):
"""Extractor for inkbunny user favorites"""
subcategory = "favorite"
- pattern = BASE_PATTERN + r"/userfavorites_process\.php\?favs_user_id=(\d+)"
+ pattern = (BASE_PATTERN + r"/(?:"
+ r"userfavorites_process\.php\?favs_user_id=(\d+)|"
+ r"submissionsviewall\.php\?([^#]+&mode=userfavs&[^#]+))")
test = (
("https://inkbunny.net/userfavorites_process.php?favs_user_id=20969", {
"pattern": r"https://[\w.]+\.metapix\.net/files/full"
r"/\d+/\d+_\w+_.+",
"range": "20-50",
}),
+ ("https://inkbunny.net/submissionsviewall.php?rid=ffffffffff"
+ "&mode=userfavs&random=no&orderby=fav_datetime&page=1&user_id=20969"),
)
def __init__(self, match):
InkbunnyExtractor.__init__(self, match)
- self.user_id = match.group(1)
+ uid = match.group(1)
+ if uid:
+ self.user_id = uid
+ self.orderby = self.config("orderby", "fav_datetime")
+ else:
+ params = text.parse_query(match.group(2))
+ self.user_id = params.get("user_id")
+ self.orderby = params.get("orderby", "fav_datetime")
def posts(self):
- orderby = self.config("orderby", "fav_datetime")
params = {
"favs_user_id": self.user_id,
- "orderby" : orderby,
+ "orderby" : self.orderby,
}
- if orderby and orderby.startswith("unread_"):
+ if self.orderby and self.orderby.startswith("unread_"):
params["unread_submissions"] = "yes"
return self.api.search(params)
+class InkbunnyFollowingExtractor(InkbunnyExtractor):
+ """Extractor for inkbunny user watches"""
+ subcategory = "following"
+ pattern = (BASE_PATTERN + r"/(?:"
+ r"watchlist_process\.php\?mode=watching&user_id=(\d+)|"
+ r"usersviewall\.php\?([^#]+&mode=watching&[^#]+))")
+ test = (
+ (("https://inkbunny.net/watchlist_process.php"
+ "?mode=watching&user_id=20969"), {
+ "pattern": InkbunnyUserExtractor.pattern,
+ "count": ">= 90",
+ }),
+ ("https://inkbunny.net/usersviewall.php?rid=ffffffffff"
+ "&mode=watching&page=1&user_id=20969&orderby=added&namesonly="),
+ )
+
+ def __init__(self, match):
+ InkbunnyExtractor.__init__(self, match)
+ self.user_id = match.group(1) or \
+ text.parse_query(match.group(2)).get("user_id")
+
+ def items(self):
+ url = self.root + "/watchlist_process.php"
+ params = {"mode": "watching", "user_id": self.user_id}
+
+ with self.request(url, params=params) as response:
+ url, _, params = response.url.partition("?")
+ page = response.text
+
+ params = text.parse_query(params)
+ params["page"] = text.parse_int(params.get("page"), 1)
+ data = {"_extractor": InkbunnyUserExtractor}
+
+ while True:
+ cnt = 0
+ for user in text.extract_iter(
+ page, '<a class="widget_userNameSmall" href="', '"',
+ page.index('id="changethumboriginal_form"')):
+ cnt += 1
+ yield Message.Queue, self.root + user, data
+
+ if cnt < 20:
+ return
+ params["page"] += 1
+ page = self.request(url, params=params).text
+
+
class InkbunnyPostExtractor(InkbunnyExtractor):
"""Extractor for individual Inkbunny posts"""
subcategory = "post"
diff --git a/gallery_dl/extractor/kemonoparty.py b/gallery_dl/extractor/kemonoparty.py
index c5f5ae7..d5aad67 100644
--- a/gallery_dl/extractor/kemonoparty.py
+++ b/gallery_dl/extractor/kemonoparty.py
@@ -14,7 +14,8 @@ from ..cache import cache
import itertools
import re
-BASE_PATTERN = r"(?:https?://)?kemono\.party/([^/?#]+)/user/([^/?#]+)"
+BASE_PATTERN = r"(?:https?://)?kemono\.party"
+USER_PATTERN = BASE_PATTERN + r"/([^/?#]+)/user/([^/?#]+)"
class KemonopartyExtractor(Extractor):
@@ -29,7 +30,9 @@ class KemonopartyExtractor(Extractor):
def items(self):
self._prepare_ddosguard_cookies()
- find_inline = re.compile(r'src="(/inline/[^"]+)').findall
+ find_inline = re.compile(
+ r'src="(?:https?://kemono\.party)?(/inline/[^"]+'
+ r'|/[0-9a-f]{2}/[0-9a-f]{2}/[0-9a-f]{64}\.[^"]+)').findall
skip_service = \
"patreon" if self.config("patreon-skip-file", True) else None
@@ -101,7 +104,7 @@ class KemonopartyExtractor(Extractor):
class KemonopartyUserExtractor(KemonopartyExtractor):
"""Extractor for all posts from a kemono.party user listing"""
subcategory = "user"
- pattern = BASE_PATTERN + r"/?(?:\?o=(\d+))?(?:$|[?#])"
+ pattern = USER_PATTERN + r"/?(?:\?o=(\d+))?(?:$|[?#])"
test = (
("https://kemono.party/fanbox/user/6993449", {
"range": "1-25",
@@ -138,11 +141,11 @@ class KemonopartyUserExtractor(KemonopartyExtractor):
class KemonopartyPostExtractor(KemonopartyExtractor):
"""Extractor for a single kemono.party post"""
subcategory = "post"
- pattern = BASE_PATTERN + r"/post/([^/?#]+)"
+ pattern = USER_PATTERN + r"/post/([^/?#]+)"
test = (
("https://kemono.party/fanbox/user/6993449/post/506575", {
- "pattern": r"https://kemono\.party/data/files/fanbox"
- r"/6993449/506575/P058kDFYus7DbqAkGlfWTlOr\.jpeg",
+ "pattern": r"https://kemono.party/data/21/0f"
+ r"/210f35388e28bbcf756db18dd516e2d82ce75[0-9a-f]+\.jpg",
"keyword": {
"added": "Wed, 06 May 2020 20:28:02 GMT",
"content": str,
@@ -197,10 +200,128 @@ class KemonopartyPostExtractor(KemonopartyExtractor):
return (posts[0],) if len(posts) > 1 else posts
+class KemonopartyDiscordExtractor(KemonopartyExtractor):
+ """Extractor for kemono.party discord servers"""
+ subcategory = "discord"
+ directory_fmt = ("{category}", "discord", "{server}",
+ "{channel_name|channel}")
+ filename_fmt = "{id}_{num:>02}_{filename}.{extension}"
+ archive_fmt = "discord_{server}_{id}_{num}"
+ pattern = BASE_PATTERN + r"/discord/server/(\d+)(?:/channel/(\d+))?#(.*)"
+ test = (
+ (("https://kemono.party/discord"
+ "/server/488668827274444803#finish-work"), {
+ "count": 4,
+ "keyword": {"channel_name": "finish-work"},
+ }),
+ (("https://kemono.party/discord"
+ "/server/256559665620451329/channel/462437519519383555#"), {
+ "pattern": r"https://kemono\.party/data/attachments/discord"
+ r"/256559665620451329/\d+/\d+/.+",
+ "count": ">= 2",
+ }),
+ # 'inline' files
+ (("https://kemono.party/discord"
+ "/server/315262215055736843/channel/315262215055736843#general"), {
+ "pattern": r"https://cdn\.discordapp\.com/attachments/\d+/\d+/.+$",
+ "range": "1-5",
+ "options": (("image-filter", "type == 'inline'"),),
+ }),
+ )
+
+ def __init__(self, match):
+ KemonopartyExtractor.__init__(self, match)
+ self.server, self.channel, self.channel_name = match.groups()
+
+ def items(self):
+ self._prepare_ddosguard_cookies()
+
+ find_inline = re.compile(
+ r"https?://(?:cdn\.discordapp.com|media\.discordapp\.net)"
+ r"(/[A-Za-z0-9-._~:/?#\[\]@!$&'()*+,;%=]+)").findall
+
+ posts = self.posts()
+ max_posts = self.config("max-posts")
+ if max_posts:
+ posts = itertools.islice(posts, max_posts)
+
+ for post in posts:
+ files = []
+ append = files.append
+ for attachment in post["attachments"]:
+ attachment["type"] = "attachment"
+ append(attachment)
+ for path in find_inline(post["content"] or ""):
+ append({"path": "https://cdn.discordapp.com" + path,
+ "name": path, "type": "inline"})
+
+ post["channel_name"] = self.channel_name
+ post["date"] = text.parse_datetime(
+ post["published"], "%a, %d %b %Y %H:%M:%S %Z")
+ yield Message.Directory, post
+
+ for post["num"], file in enumerate(files, 1):
+ post["type"] = file["type"]
+ url = file["path"]
+ if url[0] == "/":
+ url = self.root + "/data" + url
+ elif url.startswith("https://kemono.party"):
+ url = self.root + "/data" + url[20:]
+
+ text.nameext_from_url(file["name"], post)
+ yield Message.Url, url, post
+
+ def posts(self):
+ if self.channel is None:
+ url = "{}/api/discord/channels/lookup?q={}".format(
+ self.root, self.server)
+ for channel in self.request(url).json():
+ if channel["name"] == self.channel_name:
+ self.channel = channel["id"]
+ break
+ else:
+ raise exception.NotFoundError("channel")
+
+ url = "{}/api/discord/channel/{}".format(self.root, self.channel)
+ params = {"skip": 0}
+
+ while True:
+ posts = self.request(url, params=params).json()
+ yield from posts
+
+ if len(posts) < 25:
+ break
+ params["skip"] += 25
+
+
+class KemonopartyDiscordServerExtractor(KemonopartyExtractor):
+ subcategory = "discord-server"
+ pattern = BASE_PATTERN + r"/discord/server/(\d+)$"
+ test = ("https://kemono.party/discord/server/488668827274444803", {
+ "pattern": KemonopartyDiscordExtractor.pattern,
+ "count": 13,
+ })
+
+ def __init__(self, match):
+ KemonopartyExtractor.__init__(self, match)
+ self.server = match.group(1)
+
+ def items(self):
+ url = "{}/api/discord/channels/lookup?q={}".format(
+ self.root, self.server)
+ channels = self.request(url).json()
+
+ for channel in channels:
+ url = "{}/discord/server/{}/channel/{}#{}".format(
+ self.root, self.server, channel["id"], channel["name"])
+ channel["_extractor"] = KemonopartyDiscordExtractor
+ yield Message.Queue, url, channel
+
+
class KemonopartyFavoriteExtractor(KemonopartyExtractor):
"""Extractor for kemono.party favorites"""
subcategory = "favorite"
- pattern = r"(?:https?://)?kemono\.party/favorites"
+ pattern = BASE_PATTERN + r"/favorites"
test = ("https://kemono.party/favorites", {
"pattern": KemonopartyUserExtractor.pattern,
"url": "f4b5b796979bcba824af84206578c79101c7f0e1",
diff --git a/gallery_dl/extractor/mangadex.py b/gallery_dl/extractor/mangadex.py
index 634a92d..ff1d7c3 100644
--- a/gallery_dl/extractor/mangadex.py
+++ b/gallery_dl/extractor/mangadex.py
@@ -209,22 +209,15 @@ class MangadexAPI():
return self._call("/manga/" + uuid)["data"]
def manga_feed(self, uuid):
- config = self.extractor.config
- order = "desc" if config("chapter-reverse") else "asc"
+ order = "desc" if self.extractor.config("chapter-reverse") else "asc"
params = {
- "order[volume]" : order,
- "order[chapter]" : order,
- "translatedLanguage[]": config("lang"),
- "contentRating[]" : [
- "safe", "suggestive", "erotica", "pornographic"],
+ "order[volume]" : order,
+ "order[chapter]": order,
}
return self._pagination("/manga/" + uuid + "/feed", params)
def user_follows_manga_feed(self):
- params = {
- "order[publishAt]" : "desc",
- "translatedLanguage[]": self.extractor.config("lang"),
- }
+ params = {"order[publishAt]": "desc"}
return self._pagination("/user/follows/manga/feed", params)
def authenticate(self):
@@ -275,8 +268,20 @@ class MangadexAPI():
def _pagination(self, endpoint, params=None):
if params is None:
params = {}
+
+ config = self.extractor.config
+ ratings = config("ratings")
+ if ratings is None:
+ ratings = ("safe", "suggestive", "erotica", "pornographic")
+
+ params["contentRating[]"] = ratings
+ params["translatedLanguage[]"] = config("lang")
params["offset"] = 0
+ api_params = config("api-parameters")
+ if api_params:
+ params.update(api_params)
+
while True:
data = self._call(endpoint, params)
yield from data["data"]
diff --git a/gallery_dl/extractor/nhentai.py b/gallery_dl/extractor/nhentai.py
index 20b716b..9df43e5 100644
--- a/gallery_dl/extractor/nhentai.py
+++ b/gallery_dl/extractor/nhentai.py
@@ -14,15 +14,10 @@ import collections
import json
-class NhentaiBase():
- """Base class for nhentai extractors"""
+class NhentaiGalleryExtractor(GalleryExtractor):
+ """Extractor for image galleries from nhentai.net"""
category = "nhentai"
root = "https://nhentai.net"
- media_url = "https://i.nhentai.net"
-
-
-class NhentaiGalleryExtractor(NhentaiBase, GalleryExtractor):
- """Extractor for image galleries from nhentai.net"""
pattern = r"(?:https?://)?nhentai\.net/g/(\d+)"
test = ("https://nhentai.net/g/147850/", {
"url": "5179dbf0f96af44005a0ff705a0ad64ac26547d0",
@@ -87,8 +82,8 @@ class NhentaiGalleryExtractor(NhentaiBase, GalleryExtractor):
}
def images(self, _):
- ufmt = "{}/galleries/{}/{{}}.{{}}".format(
- self.media_url, self.data["media_id"])
+ ufmt = ("https://i.nhentai.net/galleries/" +
+ self.data["media_id"] + "/{}.{}")
extdict = {"j": "jpg", "p": "png", "g": "gif"}
return [
@@ -99,28 +94,24 @@ class NhentaiGalleryExtractor(NhentaiBase, GalleryExtractor):
]
-class NhentaiSearchExtractor(NhentaiBase, Extractor):
- """Extractor for nhentai search results"""
- subcategory = "search"
- pattern = r"(?:https?://)?nhentai\.net/search/?\?([^#]+)"
- test = ("https://nhentai.net/search/?q=touhou", {
- "pattern": NhentaiGalleryExtractor.pattern,
- "count": 30,
- "range": "1-30",
- })
+class NhentaiExtractor(Extractor):
+ """Base class for nhentai extractors"""
+ category = "nhentai"
+ root = "https://nhentai.net"
def __init__(self, match):
Extractor.__init__(self, match)
- self.params = text.parse_query(match.group(1))
+ self.path, self.query = match.groups()
def items(self):
data = {"_extractor": NhentaiGalleryExtractor}
- for gallery_id in self._pagination(self.params):
+ for gallery_id in self._pagination():
url = "{}/g/{}/".format(self.root, gallery_id)
yield Message.Queue, url, data
- def _pagination(self, params):
- url = "{}/search/".format(self.root)
+ def _pagination(self):
+ url = self.root + self.path
+ params = text.parse_query(self.query)
params["page"] = text.parse_int(params.get("page"), 1)
while True:
@@ -131,29 +122,40 @@ class NhentaiSearchExtractor(NhentaiBase, Extractor):
params["page"] += 1
-class NhentaiFavoriteExtractor(NhentaiBase, Extractor):
+class NhentaiTagExtractor(NhentaiExtractor):
+ """Extractor for nhentai tag searches"""
+ subcategory = "tag"
+ pattern = (r"(?:https?://)?nhentai\.net("
+ r"/(?:artist|category|character|group|language|parody|tag)"
+ r"/[^/?#]+(?:/popular[^/?#]*)?/?)(?:\?([^#]+))?")
+ test = (
+ ("https://nhentai.net/tag/sole-female/", {
+ "pattern": NhentaiGalleryExtractor.pattern,
+ "count": 30,
+ "range": "1-30",
+ }),
+ ("https://nhentai.net/artist/itou-life/"),
+ ("https://nhentai.net/group/itou-life/"),
+ ("https://nhentai.net/parody/touhou-project/"),
+ ("https://nhentai.net/character/patchouli-knowledge/popular"),
+ ("https://nhentai.net/category/doujinshi/popular-today"),
+ ("https://nhentai.net/language/english/popular-week"),
+ )
+
+
+class NhentaiSearchExtractor(NhentaiExtractor):
+ """Extractor for nhentai search results"""
+ subcategory = "search"
+ pattern = r"(?:https?://)?nhentai\.net(/search/?)\?([^#]+)"
+ test = ("https://nhentai.net/search/?q=touhou", {
+ "pattern": NhentaiGalleryExtractor.pattern,
+ "count": 30,
+ "range": "1-30",
+ })
+
+
+class NhentaiFavoriteExtractor(NhentaiExtractor):
"""Extractor for nhentai favorites"""
subcategory = "favorite"
- pattern = r"(?:https?://)?nhentai\.net/favorites/?(?:\?([^#]+))?"
+ pattern = r"(?:https?://)?nhentai\.net(/favorites/?)(?:\?([^#]+))?"
test = ("https://nhentai.net/favorites/",)
-
- def __init__(self, match):
- Extractor.__init__(self, match)
- self.params = text.parse_query(match.group(1))
-
- def items(self):
- data = {"_extractor": NhentaiGalleryExtractor}
- for gallery_id in self._pagination(self.params):
- url = "{}/g/{}/".format(self.root, gallery_id)
- yield Message.Queue, url, data
-
- def _pagination(self, params):
- url = "{}/favorites/".format(self.root)
- params["page"] = text.parse_int(params.get("page"), 1)
-
- while True:
- page = self.request(url, params=params).text
- yield from text.extract_iter(page, 'href="/g/', '/')
- if 'class="next"' not in page:
- return
- params["page"] += 1
diff --git a/gallery_dl/extractor/patreon.py b/gallery_dl/extractor/patreon.py
index 547465b..c7df089 100644
--- a/gallery_dl/extractor/patreon.py
+++ b/gallery_dl/extractor/patreon.py
@@ -32,22 +32,19 @@ class PatreonExtractor(Extractor):
if "session_id" not in self.session.cookies:
self.log.warning("no 'session_id' cookie set")
PatreonExtractor._warning = False
+ generators = self._build_file_generators(self.config("files"))
for post in self.posts():
if not post.get("current_user_can_view", True):
self.log.warning("Not allowed to view post %s", post["id"])
continue
+ yield Message.Directory, post
+
post["num"] = 0
hashes = set()
-
- yield Message.Directory, post
- for kind, url, name in itertools.chain(
- self._images(post),
- self._attachments(post),
- self._postfile(post),
- self._content(post),
- ):
+ for kind, url, name in itertools.chain.from_iterable(
+ g(post) for g in generators):
fhash = self._filehash(url)
if fhash not in hashes or not fhash:
hashes.add(fhash)
@@ -82,15 +79,14 @@ class PatreonExtractor(Extractor):
if url:
yield "attachment", url, attachment["name"]
- @staticmethod
- def _content(post):
+ def _content(self, post):
content = post.get("content")
if content:
for img in text.extract_iter(
content, '<img data-media-id="', '>'):
url = text.extract(img, 'src="', '"')[0]
if url:
- yield "content", url, url
+ yield "content", url, self._filename(url) or url
def posts(self):
"""Return all relevant post objects"""
@@ -155,7 +151,7 @@ class PatreonExtractor(Extractor):
included[file["type"]][file["id"]]
for file in files["data"]
]
- return []
+ return ()
@memcache(keyarg=1)
def _user(self, url):
@@ -212,6 +208,20 @@ class PatreonExtractor(Extractor):
"&json-api-version=1.0"
)
+ def _build_file_generators(self, filetypes):
+ if filetypes is None:
+ return (self._images, self._attachments,
+ self._postfile, self._content)
+ genmap = {
+ "images" : self._images,
+ "attachments": self._attachments,
+ "postfile" : self._postfile,
+ "content" : self._content,
+ }
+ if isinstance(filetypes, str):
+ filetypes = filetypes.split(",")
+ return [genmap[ft] for ft in filetypes]
+
class PatreonCreatorExtractor(PatreonExtractor):
"""Extractor for a creator's works"""
@@ -305,8 +315,9 @@ class PatreonPostExtractor(PatreonExtractor):
"count": 4,
}),
# postfile + content
- ("https://www.patreon.com/posts/19987002", {
- "count": 4,
+ ("https://www.patreon.com/posts/56127163", {
+ "count": 3,
+ "keyword": {"filename": r"re:^(?!1).+$"},
}),
# tags (#1539)
("https://www.patreon.com/posts/free-post-12497641", {
diff --git a/gallery_dl/extractor/picarto.py b/gallery_dl/extractor/picarto.py
new file mode 100644
index 0000000..77a07b4
--- /dev/null
+++ b/gallery_dl/extractor/picarto.py
@@ -0,0 +1,74 @@
+# -*- coding: utf-8 -*-
+
+# Copyright 2021 Mike Fährmann
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Extractors for https://picarto.tv/"""
+
+from .common import Extractor, Message
+from .. import text
+
+
+class PicartoGalleryExtractor(Extractor):
+ """Extractor for picarto galleries"""
+ category = "picarto"
+ subcategory = "gallery"
+ root = "https://picarto.tv"
+ directory_fmt = ("{category}", "{channel[name]}")
+ filename_fmt = "{id} {title}.{extension}"
+ archive_fmt = "{id}"
+ pattern = r"(?:https?://)?picarto\.tv/([^/?#]+)/gallery"
+ test = ("https://picarto.tv/fnook/gallery/default/", {
+ "pattern": r"https://images\.picarto\.tv/gallery/\d/\d\d/\d+/artwork"
+ r"/[0-9a-f-]+/large-[0-9a-f]+\.(jpg|png|gif)",
+ "count": ">= 7",
+ "keyword": {"date": "type:datetime"},
+ })
+
+ def __init__(self, match):
+ Extractor.__init__(self, match)
+ self.username = match.group(1)
+
+ def items(self):
+ for post in self.posts():
+ post["date"] = text.parse_datetime(
+ post["created_at"], "%Y-%m-%d %H:%M:%S")
+ variations = post.pop("variations", ())
+ yield Message.Directory, post
+
+ image = post["default_image"]
+ if not image:
+ continue
+ url = "https://images.picarto.tv/gallery/" + image["name"]
+ text.nameext_from_url(url, post)
+ yield Message.Url, url, post
+
+ for variation in variations:
+ post.update(variation)
+ image = post["default_image"]
+ url = "https://images.picarto.tv/gallery/" + image["name"]
+ text.nameext_from_url(url, post)
+ yield Message.Url, url, post
+
+ def posts(self):
+ url = "https://ptvintern.picarto.tv/api/channel-gallery"
+ params = {
+ "first": "30",
+ "page": 1,
+ "filter_params[album_id]": "",
+ "filter_params[channel_name]": self.username,
+ "filter_params[q]": "",
+ "filter_params[visibility]": "",
+ "order_by[field]": "published_at",
+ "order_by[order]": "DESC",
+ }
+
+ while True:
+ posts = self.request(url, params=params).json()
+ if not posts:
+ return
+ yield from posts
+ params["page"] += 1
diff --git a/gallery_dl/extractor/pixiv.py b/gallery_dl/extractor/pixiv.py
index e21a82c..8e47e2e 100644
--- a/gallery_dl/extractor/pixiv.py
+++ b/gallery_dl/extractor/pixiv.py
@@ -558,6 +558,68 @@ class PixivPixivisionExtractor(PixivExtractor):
}
+class PixivSketchExtractor(Extractor):
+ """Extractor for user pages on sketch.pixiv.net"""
+ category = "pixiv"
+ subcategory = "sketch"
+ directory_fmt = ("{category}", "sketch", "{user[unique_name]}")
+ filename_fmt = "{post_id} {id}.{extension}"
+ archive_fmt = "S{user[id]}_{id}"
+ root = "https://sketch.pixiv.net"
+ cookiedomain = ".pixiv.net"
+ pattern = r"(?:https?://)?sketch\.pixiv\.net/@([^/?#]+)"
+ test = ("https://sketch.pixiv.net/@nicoby", {
+ "pattern": r"https://img\-sketch\.pixiv\.net/uploads/medium"
+ r"/file/\d+/\d+\.(jpg|png)",
+ "count": ">= 35",
+ })
+
+ def __init__(self, match):
+ Extractor.__init__(self, match)
+ self.username = match.group(1)
+
+ def items(self):
+ headers = {"Referer": "{}/@{}".format(self.root, self.username)}
+
+ for post in self.posts():
+ media = post["media"]
+ post["post_id"] = post["id"]
+ post["date"] = text.parse_datetime(
+ post["created_at"], "%Y-%m-%dT%H:%M:%S.%f%z")
+ util.delete_items(post, ("id", "media", "_links"))
+
+ yield Message.Directory, post
+ post["_http_headers"] = headers
+
+ for photo in media:
+ original = photo["photo"]["original"]
+ post["id"] = photo["id"]
+ post["width"] = original["width"]
+ post["height"] = original["height"]
+
+ url = original["url"]
+ text.nameext_from_url(url, post)
+ yield Message.Url, url, post
+
+ def posts(self):
+ url = "{}/api/walls/@{}/posts/public.json".format(
+ self.root, self.username)
+ headers = {
+ "Accept": "application/vnd.sketch-v4+json",
+ "X-Requested-With": "{}/@{}".format(self.root, self.username),
+ "Referer": self.root + "/",
+ }
+
+ while True:
+ data = self.request(url, headers=headers).json()
+ yield from data["data"]["items"]
+
+ next_url = data["_links"].get("next")
+ if not next_url:
+ return
+ url = self.root + next_url["href"]
+
+
class PixivAppAPI():
"""Minimal interface for the Pixiv App API for mobile devices
diff --git a/gallery_dl/extractor/seisoparty.py b/gallery_dl/extractor/seisoparty.py
index 28e049b..a2a24e0 100644
--- a/gallery_dl/extractor/seisoparty.py
+++ b/gallery_dl/extractor/seisoparty.py
@@ -9,7 +9,8 @@
"""Extractors for https://seiso.party/"""
from .common import Extractor, Message
-from .. import text
+from .. import text, exception
+from ..cache import cache
import re
@@ -52,6 +53,25 @@ class SeisopartyExtractor(Extractor):
"files" : self._find_files(page),
}
+ def login(self):
+ username, password = self._get_auth_info()
+ if username:
+ self._update_cookies(self._login_impl(username, password))
+
+ @cache(maxage=28*24*3600, keyarg=1)
+ def _login_impl(self, username, password):
+ self.log.info("Logging in as %s", username)
+
+ url = self.root + "/account/login"
+ data = {"username": username, "password": password}
+
+ response = self.request(url, method="POST", data=data)
+ if response.url.endswith("/account/login") and \
+ "Username or password is incorrect" in response.text:
+ raise exception.AuthenticationError()
+
+ return {c.name: c.value for c in response.history[0].cookies}
+
class SeisopartyUserExtractor(SeisopartyExtractor):
"""Extractor for all posts from a seiso.party user listing"""
@@ -136,3 +156,46 @@ class SeisopartyPostExtractor(SeisopartyExtractor):
url = "{}/post/{}/{}/{}".format(
self.root, self.service, self.user_id, self.post_id)
return (self._parse_post(self.request(url).text, self.post_id),)
+
+
+class SeisopartyFavoriteExtractor(SeisopartyExtractor):
+ """Extractor for seiso.party favorites"""
+ subcategory = "favorite"
+ pattern = r"(?:https?://)?seiso\.party/favorites/artists/?(?:\?([^#]+))?"
+ test = (
+ ("https://seiso.party/favorites/artists", {
+ "pattern": SeisopartyUserExtractor.pattern,
+ "url": "0c862434bc3bbbe84cbf41c3a6152473a8cde683",
+ "count": 3,
+ }),
+ ("https://seiso.party/favorites/artists?sort=id&sort_direction=asc", {
+ "url": "629a8b9c6d3a8a64f521908bdb3d7426ac03f8d3",
+ }),
+ )
+
+ def __init__(self, match):
+ SeisopartyExtractor.__init__(self, match)
+ self.query = match.group(1)
+
+ def items(self):
+ self._prepare_ddosguard_cookies()
+ self.login()
+
+ url = self.root + "/favorites/artists"
+ data = {"_extractor": SeisopartyUserExtractor}
+ params = text.parse_query(self.query)
+ params["page"] = text.parse_int(params.get("page"), 1)
+
+ while True:
+ page = self.request(url, params=params).text
+
+ cnt = 0
+ for card in text.extract_iter(
+ page, '<div class="artist-card', '</a>'):
+ path = text.extract(card, '<a href="', '"')[0]
+ yield Message.Queue, self.root + path, data
+ cnt += 1
+
+ if cnt < 25:
+ return
+ params["page"] += 1
diff --git a/gallery_dl/extractor/twitter.py b/gallery_dl/extractor/twitter.py
index 4a3f6cd..568ee2e 100644
--- a/gallery_dl/extractor/twitter.py
+++ b/gallery_dl/extractor/twitter.py
@@ -41,6 +41,16 @@ class TwitterExtractor(Extractor):
self.cards = self.config("cards", False)
self._user_cache = {}
+ size = self.config("size")
+ if size is None:
+ self._size_image = "orig"
+ self._size_fallback = ("large", "medium", "small")
+ else:
+ if isinstance(size, str):
+ size = size.split(",")
+ self._size_image = size[0]
+ self._size_fallback = size[1:]
+
def items(self):
self.login()
metadata = self.metadata()
@@ -115,7 +125,7 @@ class TwitterExtractor(Extractor):
base, _, fmt = url.rpartition(".")
base += "?format=" + fmt + "&name="
files.append(text.nameext_from_url(url, {
- "url" : base + "orig",
+ "url" : base + self._size_image,
"width" : width,
"height" : height,
"_fallback": self._image_fallback(base),
@@ -123,11 +133,9 @@ class TwitterExtractor(Extractor):
else:
files.append({"url": media["media_url"]})
- @staticmethod
- def _image_fallback(base):
- yield base + "large"
- yield base + "medium"
- yield base + "small"
+ def _image_fallback(self, base):
+ for fmt in self._size_fallback:
+ yield base + fmt
def _extract_card(self, tweet, files):
card = tweet["card"]
diff --git a/gallery_dl/extractor/vk.py b/gallery_dl/extractor/vk.py
index 9dd2d47..9724c4b 100644
--- a/gallery_dl/extractor/vk.py
+++ b/gallery_dl/extractor/vk.py
@@ -12,18 +12,67 @@ from .common import Extractor, Message
from .. import text
import re
+BASE_PATTERN = r"(?:https://)?(?:www\.|m\.)?vk\.com"
-class VkPhotosExtractor(Extractor):
- """Extractor for photos from a vk user"""
+
+class VkExtractor(Extractor):
+ """Base class for vk extractors"""
category = "vk"
- subcategory = "photos"
directory_fmt = ("{category}", "{user[name]|user[id]}")
filename_fmt = "{id}.{extension}"
archive_fmt = "{id}"
root = "https://vk.com"
request_interval = 1.0
- pattern = (r"(?:https://)?(?:www\.|m\.)?vk\.com/(?:"
- r"(?:albums|photos|id)(-?\d+)|([^/?#]+))")
+
+ def items(self):
+ data = self.metadata()
+ yield Message.Directory, data
+ for photo in self.photos():
+ photo.update(data)
+ yield Message.Url, photo["url"], photo
+
+ def _pagination(self, photos_url, user_id):
+ sub = re.compile(r"/imp[fg]/").sub
+ needle = 'data-id="{}_'.format(user_id)
+ cnt = 0
+
+ headers = {
+ "X-Requested-With": "XMLHttpRequest",
+ "Origin" : self.root,
+ "Referer" : photos_url,
+ }
+ params = {
+ "al" : "1",
+ "al_ad" : "0",
+ "offset": 0,
+ "part" : "1",
+ }
+
+ while True:
+ payload = self.request(
+ photos_url, method="POST", headers=headers, data=params
+ ).json()["payload"][1]
+
+ offset = payload[0]
+ html = payload[1]
+
+ for cnt, photo in enumerate(text.extract_iter(html, needle, ')')):
+ pid = photo[:photo.find('"')]
+ url = photo[photo.rindex("(")+1:]
+ url = sub("/", url.partition("?")[0])
+ yield text.nameext_from_url(url, {"url": url, "id": pid})
+
+ if cnt <= 20 or offset == params["offset"]:
+ return
+ params["offset"] = offset
+
+
+class VkPhotosExtractor(VkExtractor):
+ """Extractor for photos from a vk user"""
+ subcategory = "photos"
+ pattern = (BASE_PATTERN + r"/(?:"
+ r"(?:albums|photos|id)(-?\d+)"
+ r"|(?!album-?\d+_)([^/?#]+))")
test = (
("https://vk.com/id398982326", {
"pattern": r"https://sun\d+-\d+\.userapi\.com/c\d+/v\d+"
@@ -58,10 +107,14 @@ class VkPhotosExtractor(Extractor):
)
def __init__(self, match):
- Extractor.__init__(self, match)
+ VkExtractor.__init__(self, match)
self.user_id, self.user_name = match.groups()
- def items(self):
+ def photos(self):
+ url = "{}/photos{}".format(self.root, self.user_id)
+ return self._pagination(url, self.user_id)
+
+ def metadata(self):
if self.user_id:
user_id = self.user_id
prefix = "public" if user_id[0] == "-" else "id"
@@ -70,40 +123,8 @@ class VkPhotosExtractor(Extractor):
else:
url = "{}/{}".format(self.root, self.user_name)
data = self._extract_profile(url)
- user_id = data["user"]["id"]
-
- photos_url = "{}/photos{}".format(self.root, user_id)
- headers = {
- "X-Requested-With": "XMLHttpRequest",
- "Origin" : self.root,
- "Referer" : photos_url,
- }
- params = {
- "al" : "1",
- "al_ad" : "0",
- "offset": 0,
- "part" : "1",
- }
-
- yield Message.Directory, data
- sub = re.compile(r"/imp[fg]/").sub
- needle = 'data-id="{}_'.format(user_id)
- cnt = 0
-
- while True:
- offset, html = self.request(
- photos_url, method="POST", headers=headers, data=params
- ).json()["payload"][1]
-
- for cnt, photo in enumerate(text.extract_iter(html, needle, ')')):
- data["id"] = photo[:photo.find('"')]
- url = photo[photo.rindex("(")+1:]
- url = sub("/", url.partition("?")[0])
- yield Message.Url, url, text.nameext_from_url(url, data)
-
- if cnt <= 40 or offset == params["offset"]:
- return
- params["offset"] = offset
+ self.user_id = data["user"]["id"]
+ return data
def _extract_profile(self, url):
extr = text.extract_from(self.request(url).text)
@@ -116,3 +137,32 @@ class VkPhotosExtractor(Extractor):
'<span class="current_text">', '</span'))),
"id" : extr('<a href="/albums', '"'),
}}
+
+
+class VkAlbumExtractor(VkExtractor):
+ """Extractor for a vk album"""
+ subcategory = "album"
+ directory_fmt = ("{category}", "{user[id]}", "{album[id]}")
+ pattern = BASE_PATTERN + r"/album(-?\d+)_(\d+)$"
+ test = (
+ ("https://vk.com/album221469416_0", {
+ "count": 3,
+ }),
+ ("https://vk.com/album-165740836_281339889", {
+ "count": 12,
+ }),
+ )
+
+ def __init__(self, match):
+ VkExtractor.__init__(self, match)
+ self.user_id, self.album_id = match.groups()
+
+ def photos(self):
+ url = "{}/album{}_{}".format(self.root, self.user_id, self.album_id)
+ return self._pagination(url, self.user_id)
+
+ def metadata(self):
+ return {
+ "user": {"id": self.user_id},
+ "album": {"id": self.album_id},
+ }
diff --git a/gallery_dl/postprocessor/compare.py b/gallery_dl/postprocessor/compare.py
index a08cdc4..b3b94f7 100644
--- a/gallery_dl/postprocessor/compare.py
+++ b/gallery_dl/postprocessor/compare.py
@@ -20,36 +20,32 @@ class ComparePP(PostProcessor):
PostProcessor.__init__(self, job)
if options.get("shallow"):
self._compare = self._compare_size
+ self._equal_exc = self._equal_cnt = 0
- action = options.get("action")
- if action == "enumerate":
- job.register_hooks({"file": self.enumerate}, options)
- else:
- job.register_hooks({"file": self.compare}, options)
- action, _, smax = action.partition(":")
- self._skipmax = text.parse_int(smax)
- self._skipexc = self._skipcnt = 0
- if action == "abort":
- self._skipexc = exception.StopExtraction
- elif action == "terminate":
- self._skipexc = exception.TerminateExtraction
- elif action == "exit":
- self._skipexc = sys.exit
-
- def compare(self, pathfmt):
+ equal = options.get("equal")
+ if equal:
+ equal, _, emax = equal.partition(":")
+ self._equal_max = text.parse_int(emax)
+ if equal == "abort":
+ self._equal_exc = exception.StopExtraction
+ elif equal == "terminate":
+ self._equal_exc = exception.TerminateExtraction
+ elif equal == "exit":
+ self._equal_exc = sys.exit
+
+ job.register_hooks({"file": (
+ self.enumerate
+ if options.get("action") == "enumerate" else
+ self.replace
+ )}, options)
+
+ def replace(self, pathfmt):
try:
if self._compare(pathfmt.realpath, pathfmt.temppath):
- if self._skipexc:
- self._skipcnt += 1
- if self._skipcnt >= self._skipmax:
- util.remove_file(pathfmt.temppath)
- print()
- raise self._skipexc()
- pathfmt.delete = True
- else:
- self._skipcnt = 0
+ return self._equal(pathfmt)
except OSError:
pass
+ self._equal_cnt = 0
def enumerate(self, pathfmt):
num = 1
@@ -58,9 +54,10 @@ class ComparePP(PostProcessor):
pathfmt.prefix = str(num) + "."
pathfmt.set_extension(pathfmt.extension, False)
num += 1
- pathfmt.delete = True
+ return self._equal(pathfmt)
except OSError:
pass
+ self._equal_cnt = 0
def _compare(self, f1, f2):
return self._compare_size(f1, f2) and self._compare_content(f1, f2)
@@ -81,5 +78,14 @@ class ComparePP(PostProcessor):
if not buf1:
return True
+ def _equal(self, pathfmt):
+ if self._equal_exc:
+ self._equal_cnt += 1
+ if self._equal_cnt >= self._equal_max:
+ util.remove_file(pathfmt.temppath)
+ print()
+ raise self._equal_exc()
+ pathfmt.delete = True
+
__postprocessor__ = ComparePP
diff --git a/gallery_dl/version.py b/gallery_dl/version.py
index acc3b8d..ee01549 100644
--- a/gallery_dl/version.py
+++ b/gallery_dl/version.py
@@ -6,4 +6,4 @@
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
-__version__ = "1.19.0"
+__version__ = "1.19.1"
diff --git a/test/test_cache.py b/test/test_cache.py
index ecf482c..9b3623a 100644
--- a/test/test_cache.py
+++ b/test/test_cache.py
@@ -20,6 +20,7 @@ from gallery_dl import config, util # noqa E402
dbpath = tempfile.mkstemp()[1]
config.set(("cache",), "file", dbpath)
from gallery_dl import cache # noqa E402
+cache._init()
# def tearDownModule():