summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLibravatarUnit 193 <unit193@unit193.net>2024-03-25 02:57:50 -0400
committerLibravatarUnit 193 <unit193@unit193.net>2024-03-25 02:57:50 -0400
commit039aecad63a074bdcc75bd4f30c84bdc4a1a244f (patch)
tree39997d0361f67208c716fba6717895b42ee309d1
parent77d11300e15ee4045b187a58ee6e039624e5d69c (diff)
parent6e662211019a89caec44de8a57c675872b0b5498 (diff)
Update upstream source from tag 'upstream/1.26.9'
Update to upstream version '1.26.9' with Debian dir e04ff15132747292d335c9d456bd879aca333115
-rw-r--r--CHANGELOG.md84
-rw-r--r--PKG-INFO6
-rw-r--r--README.rst4
-rw-r--r--data/man/gallery-dl.12
-rw-r--r--data/man/gallery-dl.conf.5254
-rw-r--r--gallery_dl.egg-info/PKG-INFO6
-rw-r--r--gallery_dl.egg-info/SOURCES.txt1
-rw-r--r--gallery_dl/downloader/http.py2
-rw-r--r--gallery_dl/exception.py2
-rw-r--r--gallery_dl/extractor/__init__.py1
-rw-r--r--gallery_dl/extractor/artstation.py97
-rw-r--r--gallery_dl/extractor/bluesky.py49
-rw-r--r--gallery_dl/extractor/bunkr.py1
-rw-r--r--gallery_dl/extractor/common.py12
-rw-r--r--gallery_dl/extractor/deviantart.py152
-rw-r--r--gallery_dl/extractor/fapello.py3
-rw-r--r--gallery_dl/extractor/flickr.py13
-rw-r--r--gallery_dl/extractor/gelbooru.py66
-rw-r--r--gallery_dl/extractor/gofile.py33
-rw-r--r--gallery_dl/extractor/hiperdex.py12
-rw-r--r--gallery_dl/extractor/idolcomplex.py15
-rw-r--r--gallery_dl/extractor/imagefap.py15
-rw-r--r--gallery_dl/extractor/imgur.py5
-rw-r--r--gallery_dl/extractor/instagram.py9
-rw-r--r--gallery_dl/extractor/kemonoparty.py20
-rw-r--r--gallery_dl/extractor/lensdump.py2
-rw-r--r--gallery_dl/extractor/mastodon.py6
-rw-r--r--gallery_dl/extractor/naver.py28
-rw-r--r--gallery_dl/extractor/nijie.py2
-rw-r--r--gallery_dl/extractor/nitter.py5
-rw-r--r--gallery_dl/extractor/paheal.py24
-rw-r--r--gallery_dl/extractor/pixiv.py21
-rw-r--r--gallery_dl/extractor/pornhub.py3
-rw-r--r--gallery_dl/extractor/reddit.py8
-rw-r--r--gallery_dl/extractor/redgifs.py11
-rw-r--r--gallery_dl/extractor/skeb.py73
-rw-r--r--gallery_dl/extractor/steamgriddb.py3
-rw-r--r--gallery_dl/extractor/subscribestar.py2
-rw-r--r--gallery_dl/extractor/test.py81
-rw-r--r--gallery_dl/extractor/twitter.py70
-rw-r--r--gallery_dl/extractor/vipergirls.py46
-rw-r--r--gallery_dl/extractor/warosu.py16
-rw-r--r--gallery_dl/extractor/weibo.py63
-rw-r--r--gallery_dl/extractor/wikimedia.py5
-rw-r--r--gallery_dl/extractor/xvideos.py32
-rw-r--r--gallery_dl/extractor/zerochan.py58
-rw-r--r--gallery_dl/formatter.py12
-rw-r--r--gallery_dl/output.py3
-rw-r--r--gallery_dl/text.py10
-rw-r--r--gallery_dl/util.py2
-rw-r--r--gallery_dl/version.py2
-rw-r--r--test/test_cache.py2
-rw-r--r--test/test_config.py2
-rw-r--r--test/test_cookies.py6
-rw-r--r--test/test_downloader.py3
-rw-r--r--test/test_extractor.py8
-rw-r--r--test/test_formatter.py32
-rw-r--r--test/test_job.py2
-rw-r--r--test/test_output.py2
-rw-r--r--test/test_postprocessor.py3
-rw-r--r--test/test_results.py32
-rw-r--r--test/test_text.py4
-rw-r--r--test/test_util.py2
63 files changed, 1147 insertions, 403 deletions
diff --git a/CHANGELOG.md b/CHANGELOG.md
index f938ab9..8cdcf64 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,89 @@
# Changelog
+## 1.26.9 - 2024-03-23
+### Extractors
+#### Additions
+- [artstation] support video clips ([#2566](https://github.com/mikf/gallery-dl/issues/2566), [#3309](https://github.com/mikf/gallery-dl/issues/3309), [#3911](https://github.com/mikf/gallery-dl/issues/3911))
+- [artstation] support collections ([#146](https://github.com/mikf/gallery-dl/issues/146))
+- [deviantart] recognize `deviantart.com/stash/…` URLs
+- [idolcomplex] support new pool URLs
+- [lensdump] recognize direct image links ([#5293](https://github.com/mikf/gallery-dl/issues/5293))
+- [skeb] add extractor for followed users ([#5290](https://github.com/mikf/gallery-dl/issues/5290))
+- [twitter] add `quotes` extractor ([#5262](https://github.com/mikf/gallery-dl/issues/5262))
+- [wikimedia] support `azurlane.koumakan.jp` ([#5256](https://github.com/mikf/gallery-dl/issues/5256))
+- [xvideos] support `/channels/` URLs ([#5244](https://github.com/mikf/gallery-dl/issues/5244))
+#### Fixes
+- [artstation] fix handling usernames with dashes in domain names ([#5224](https://github.com/mikf/gallery-dl/issues/5224))
+- [bluesky] fix not spawning child extractors for followed users ([#5246](https://github.com/mikf/gallery-dl/issues/5246))
+- [deviantart] handle CloudFront blocks ([#5363](https://github.com/mikf/gallery-dl/issues/5363))
+- [deviantart:avatar] fix `index` for URLs without `?` ([#5276](https://github.com/mikf/gallery-dl/issues/5276))
+- [deviantart:stash] fix `index` values ([#5335](https://github.com/mikf/gallery-dl/issues/5335))
+- [gofile] fix extraction
+- [hiperdex] update URL patterns & fix `manga` metadata ([#5340](https://github.com/mikf/gallery-dl/issues/5340))
+- [idolcomplex] fix metadata extraction
+- [imagefap] fix folder extraction ([#5333](https://github.com/mikf/gallery-dl/issues/5333))
+- [instagram] make accessing `like_count` non-fatal ([#5218](https://github.com/mikf/gallery-dl/issues/5218))
+- [mastodon] fix handling null `moved` account field ([#5321](https://github.com/mikf/gallery-dl/issues/5321))
+- [naver] fix EUC-KR encoding issue in old image URLs ([#5126](https://github.com/mikf/gallery-dl/issues/5126))
+- [nijie] increase default delay between requests ([#5221](https://github.com/mikf/gallery-dl/issues/5221))
+- [nitter] ignore invalid Tweets ([#5253](https://github.com/mikf/gallery-dl/issues/5253))
+- [pixiv:novel] fix text extraction ([#5285](https://github.com/mikf/gallery-dl/issues/5285), [#5309](https://github.com/mikf/gallery-dl/issues/5309))
+- [skeb] retry 429 responses containing a `request_key` cookie ([#5210](https://github.com/mikf/gallery-dl/issues/5210))
+- [warosu] fix crash for threads with deleted posts ([#5289](https://github.com/mikf/gallery-dl/issues/5289))
+- [weibo] fix retweets ([#2825](https://github.com/mikf/gallery-dl/issues/2825), [#3874](https://github.com/mikf/gallery-dl/issues/3874), [#5263](https://github.com/mikf/gallery-dl/issues/5263))
+- [weibo] fix `livephoto` filename extensions ([#5287](https://github.com/mikf/gallery-dl/issues/5287))
+- [xvideos] fix galleries with more than 500 images ([#5244](https://github.com/mikf/gallery-dl/issues/5244))
+#### Improvements
+- [bluesky] improve API error messages
+- [bluesky] handle posts with different `embed` structure
+- [deviantart:avatar] ignore default avatars ([#5276](https://github.com/mikf/gallery-dl/issues/5276))
+- [fapello] download full-sized images ([#5349](https://github.com/mikf/gallery-dl/issues/5349))
+- [gelbooru:favorite] automatically detect returned post order ([#5220](https://github.com/mikf/gallery-dl/issues/5220))
+- [imgur] fail downloads when redirected to `removed.png` ([#5308](https://github.com/mikf/gallery-dl/issues/5308))
+- [instagram] raise proper error for missing `reels_media` ([#5257](https://github.com/mikf/gallery-dl/issues/5257))
+- [instagram] change `posts are private` exception to a warning ([#5322](https://github.com/mikf/gallery-dl/issues/5322))
+- [reddit] improve preview fallback formats ([#5296](https://github.com/mikf/gallery-dl/issues/5296), [#5315](https://github.com/mikf/gallery-dl/issues/5315))
+- [steamgriddb] raise exception for deleted assets
+- [twitter] handle "account is temporarily locked" errors ([#5300](https://github.com/mikf/gallery-dl/issues/5300))
+- [weibo] rework pagination logic ([#4168](https://github.com/mikf/gallery-dl/issues/4168))
+- [zerochan] fetch more posts by using the API ([#3669](https://github.com/mikf/gallery-dl/issues/3669))
+#### Metadata
+- [bluesky] add `instance` metadata field ([#4438](https://github.com/mikf/gallery-dl/issues/4438))
+- [gelbooru:favorite] add `date_favorited` metadata field
+- [imagefap] extract `folder` metadata ([#5270](https://github.com/mikf/gallery-dl/issues/5270))
+- [instagram] default `likes` to `0` ([#5323](https://github.com/mikf/gallery-dl/issues/5323))
+- [kemonoparty] add `revision_count` metadata field ([#5334](https://github.com/mikf/gallery-dl/issues/5334))
+- [naver] unescape post `title` and `description`
+- [pornhub:gif] extract `viewkey` and `timestamp` metadata ([#4463](https://github.com/mikf/gallery-dl/issues/4463))
+- [redgifs] make `date` available for directories ([#5262](https://github.com/mikf/gallery-dl/issues/5262))
+- [subscribestar] fix `date` metadata
+- [twitter] add `birdwatch` metadata field ([#5317](https://github.com/mikf/gallery-dl/issues/5317))
+- [twitter] add `protected` metadata field ([#5327](https://github.com/mikf/gallery-dl/issues/5327))
+- [warosu] fix `board_name` metadata
+#### Options
+- [bluesky] add `reposts` option ([#4438](https://github.com/mikf/gallery-dl/issues/4438), [#5248](https://github.com/mikf/gallery-dl/issues/5248))
+- [deviantart] add `comments-avatars` option ([#4995](https://github.com/mikf/gallery-dl/issues/4995))
+- [deviantart] extend `metadata` option ([#5175](https://github.com/mikf/gallery-dl/issues/5175))
+- [flickr] add `contexts` option ([#5324](https://github.com/mikf/gallery-dl/issues/5324))
+- [gelbooru:favorite] add `order-posts` option ([#5220](https://github.com/mikf/gallery-dl/issues/5220))
+- [kemonoparty] add `order-revisions` option ([#5334](https://github.com/mikf/gallery-dl/issues/5334))
+- [vipergirls] add `like` option ([#4166](https://github.com/mikf/gallery-dl/issues/4166))
+- [vipergirls] add `domain` option ([#4166](https://github.com/mikf/gallery-dl/issues/4166))
+### Downloaders
+- [http] add MIME type and signature for `.mov` files ([#5287](https://github.com/mikf/gallery-dl/issues/5287))
+### Docker
+- build images from source instead of PyPI package
+- build `linux/arm64` images ([#5227](https://github.com/mikf/gallery-dl/issues/5227))
+- build images on every push to master
+ - tag images as `YYYY.MM.DD`
+ - tag the most recent build from master as `dev`
+ - tag the most recent release build as `latest`
+- reduce image size ([#5097](https://github.com/mikf/gallery-dl/issues/5097))
+### Miscellaneous
+- [formatter] fix local DST datetime offsets for `:O`
+- build Linux executable on Ubuntu 22.04 LTS ([#4184](https://github.com/mikf/gallery-dl/issues/4184))
+- automatically create directories for logging files ([#5249](https://github.com/mikf/gallery-dl/issues/5249))
+
## 1.26.8 - 2024-02-17
### Extractors
#### Additions
diff --git a/PKG-INFO b/PKG-INFO
index b4f974c..0395c3e 100644
--- a/PKG-INFO
+++ b/PKG-INFO
@@ -1,6 +1,6 @@
Metadata-Version: 2.1
Name: gallery_dl
-Version: 1.26.8
+Version: 1.26.9
Summary: Command-line program to download image galleries and collections from several image hosting sites
Home-page: https://github.com/mikf/gallery-dl
Download-URL: https://github.com/mikf/gallery-dl/releases/latest
@@ -112,9 +112,9 @@ Standalone Executable
Prebuilt executable files with a Python interpreter and
required Python packages included are available for
-- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.26.8/gallery-dl.exe>`__
+- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.26.9/gallery-dl.exe>`__
(Requires `Microsoft Visual C++ Redistributable Package (x86) <https://aka.ms/vs/17/release/vc_redist.x86.exe>`__)
-- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.26.8/gallery-dl.bin>`__
+- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.26.9/gallery-dl.bin>`__
Nightly Builds
diff --git a/README.rst b/README.rst
index 6f6aa02..9d017ab 100644
--- a/README.rst
+++ b/README.rst
@@ -72,9 +72,9 @@ Standalone Executable
Prebuilt executable files with a Python interpreter and
required Python packages included are available for
-- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.26.8/gallery-dl.exe>`__
+- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.26.9/gallery-dl.exe>`__
(Requires `Microsoft Visual C++ Redistributable Package (x86) <https://aka.ms/vs/17/release/vc_redist.x86.exe>`__)
-- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.26.8/gallery-dl.bin>`__
+- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.26.9/gallery-dl.bin>`__
Nightly Builds
diff --git a/data/man/gallery-dl.1 b/data/man/gallery-dl.1
index 078ff4f..863d75d 100644
--- a/data/man/gallery-dl.1
+++ b/data/man/gallery-dl.1
@@ -1,4 +1,4 @@
-.TH "GALLERY-DL" "1" "2024-02-17" "1.26.8" "gallery-dl Manual"
+.TH "GALLERY-DL" "1" "2024-03-23" "1.26.9" "gallery-dl Manual"
.\" disable hyphenation
.nh
diff --git a/data/man/gallery-dl.conf.5 b/data/man/gallery-dl.conf.5
index 4b349dd..7b57923 100644
--- a/data/man/gallery-dl.conf.5
+++ b/data/man/gallery-dl.conf.5
@@ -1,4 +1,4 @@
-.TH "GALLERY-DL.CONF" "5" "2024-02-17" "1.26.8" "gallery-dl Manual"
+.TH "GALLERY-DL.CONF" "5" "2024-03-23" "1.26.9" "gallery-dl Manual"
.\" disable hyphenation
.nh
.\" disable justification (adjust text to left margin only)
@@ -1308,6 +1308,28 @@ Try to follow external URLs of embedded players.
Limit the number of posts/projects to download.
+.SS extractor.artstation.previews
+.IP "Type:" 6
+\f[I]bool\f[]
+
+.IP "Default:" 9
+\f[I]false\f[]
+
+.IP "Description:" 4
+Download video previews.
+
+
+.SS extractor.artstation.videos
+.IP "Type:" 6
+\f[I]bool\f[]
+
+.IP "Default:" 9
+\f[I]true\f[]
+
+.IP "Description:" 4
+Download video clips.
+
+
.SS extractor.artstation.search.pro-first
.IP "Type:" 6
\f[I]bool\f[]
@@ -1448,6 +1470,17 @@ Sets the maximum depth of returned reply posts.
(See depth parameter of \f[I]app.bsky.feed.getPostThread\f[])
+.SS extractor.bluesky.reposts
+.IP "Type:" 6
+\f[I]bool\f[]
+
+.IP "Default:" 9
+\f[I]false\f[]
+
+.IP "Description:" 4
+Process reposts.
+
+
.SS extractor.cyberdrop.domain
.IP "Type:" 6
\f[I]string\f[]
@@ -1605,6 +1638,19 @@ unwatch that user at the end of the current extractor run.
Extract \f[I]comments\f[] metadata.
+.SS extractor.deviantart.comments-avatars
+.IP "Type:" 6
+\f[I]bool\f[]
+
+.IP "Default:" 9
+\f[I]false\f[]
+
+.IP "Description:" 4
+Download the avatar of each commenting user.
+
+Note: Enabling this option also enables deviantart.comments_.
+
+
.SS extractor.deviantart.extra
.IP "Type:" 6
\f[I]bool\f[]
@@ -1775,14 +1821,46 @@ form of content filtering.
.SS extractor.deviantart.metadata
.IP "Type:" 6
-\f[I]bool\f[]
+.br
+* \f[I]bool\f[]
+.br
+* \f[I]string\f[]
+.br
+* \f[I]list\f[] of \f[I]strings\f[]
.IP "Default:" 9
\f[I]false\f[]
+.IP "Example:" 4
+.br
+* "stats,submission"
+.br
+* ["camera", "stats", "submission"]
+
.IP "Description:" 4
-Request extended metadata for deviation objects to additionally provide
-\f[I]description\f[], \f[I]tags\f[], \f[I]license\f[] and \f[I]is_watching\f[] fields.
+Extract additional metadata for deviation objects.
+
+Provides
+\f[I]description\f[], \f[I]tags\f[], \f[I]license\f[], and \f[I]is_watching\f[]
+fields when enabled.
+
+It is possible to request extended metadata by specifying a list of
+
+.br
+* \f[I]camera\f[] : EXIF information (if available)
+.br
+* \f[I]stats\f[] : deviation statistics
+.br
+* \f[I]submission\f[] : submission information
+.br
+* \f[I]collection\f[] : favourited folder information (requires a \f[I]refresh token\f[])
+.br
+* \f[I]gallery\f[] : gallery folder information (requires a \f[I]refresh token\f[])
+
+Set this option to \f[I]"all"\f[] to request all extended metadata categories.
+
+See \f[I]/deviation/metadata\f[]
+for official documentation.
.SS extractor.deviantart.original
@@ -2113,6 +2191,21 @@ The \f[I]access_token\f[] and \f[I]access_token_secret\f[] values you get
from \f[I]linking your Flickr account to gallery-dl\f[].
+.SS extractor.flickr.contexts
+.IP "Type:" 6
+\f[I]bool\f[]
+
+.IP "Default:" 9
+\f[I]false\f[]
+
+.IP "Description:" 4
+For each photo, return the albums and pools it belongs to
+as \f[I]set\f[] and \f[I]pool\f[] metadata.
+
+Note: This requires 1 additional API call per photo.
+See \f[I]flickr.photos.getAllContexts\f[] for details.
+
+
.SS extractor.flickr.exif
.IP "Type:" 6
\f[I]bool\f[]
@@ -2121,9 +2214,11 @@ from \f[I]linking your Flickr account to gallery-dl\f[].
\f[I]false\f[]
.IP "Description:" 4
-Fetch exif and camera metadata for each photo.
+For each photo, return its EXIF/TIFF/GPS tags
+as \f[I]exif\f[] and \f[I]camera\f[] metadata.
Note: This requires 1 additional API call per photo.
+See \f[I]flickr.photos.getExif\f[] for details.
.SS extractor.flickr.metadata
@@ -2150,7 +2245,7 @@ Extract additional metadata
It is possible to specify a custom list of metadata includes.
See \f[I]the extras parameter\f[]
-in \f[I]Flickr API docs\f[]
+in \f[I]Flickr's API docs\f[]
for possible field names.
@@ -2271,6 +2366,24 @@ Values from the API Access Credentials section found at the bottom of your
page.
+.SS extractor.gelbooru.favorite.order-posts
+.IP "Type:" 6
+\f[I]string\f[]
+
+.IP "Default:" 9
+\f[I]"desc"\f[]
+
+.IP "Description:" 4
+Controls the order in which favorited posts are returned.
+
+.br
+* \f[I]"asc"\f[]: Ascending favorite date order (oldest first)
+.br
+* \f[I]"desc"\f[]: Descending favorite date order (newest first)
+.br
+* \f[I]"reverse"\f[]: Same as \f[I]"asc"\f[]
+
+
.SS extractor.generic.enabled
.IP "Type:" 6
\f[I]bool\f[]
@@ -2668,6 +2781,26 @@ Set this to \f[I]"unique"\f[] to filter out duplicate revisions.
Note: This requires 1 additional HTTP request per post.
+.SS extractor.kemonoparty.order-revisions
+.IP "Type:" 6
+\f[I]string\f[]
+
+.IP "Default:" 9
+\f[I]"desc"\f[]
+
+.IP "Description:" 4
+Controls the order in which
+\f[I]revisions\f[]
+are returned.
+
+.br
+* \f[I]"asc"\f[]: Ascending order (oldest first)
+.br
+* \f[I]"desc"\f[]: Descending order (newest first)
+.br
+* \f[I]"reverse"\f[]: Same as \f[I]"asc"\f[]
+
+
.SS extractor.khinsider.format
.IP "Type:" 6
\f[I]string\f[]
@@ -3742,7 +3875,7 @@ used to specify all dimensions. Valid values are:
.IP "Description:" 4
Only include assets that are in the specified file types. \f[I]all\f[] can be
-used to specifiy all file types. Valid values are:
+used to specify all file types. Valid values are:
.br
* Grids: \f[I]png\f[], \f[I]jpeg\f[], \f[I]jpg\f[], \f[I]webp\f[]
@@ -3794,7 +3927,7 @@ Include assets tagged with humor when downloading from a list of assets.
.IP "Description:" 4
Only include assets that are in the specified languages. \f[I]all\f[] can be
-used to specifiy all languages. Valid values are \f[I]ISO 639-1\f[]
+used to specify all languages. Valid values are \f[I]ISO 639-1\f[]
language codes.
@@ -4320,6 +4453,22 @@ Selects how to handle exceeding the API rate limit.
* \f[I]"wait"\f[]: Wait until rate limit reset
+.SS extractor.twitter.locked
+.IP "Type:" 6
+\f[I]string\f[]
+
+.IP "Default:" 9
+\f[I]"abort"\f[]
+
+.IP "Description:" 4
+Selects how to handle "account is temporarily locked" errors.
+
+.br
+* \f[I]"abort"\f[]: Raise an error and stop extraction
+.br
+* \f[I]"wait"\f[]: Wait until the account is unlocked and retry
+
+
.SS extractor.twitter.replies
.IP "Type:" 6
\f[I]bool\f[]
@@ -4481,6 +4630,33 @@ Available formats are
\f[I]"raw"\f[], \f[I]"full"\f[], \f[I]"regular"\f[], \f[I]"small"\f[], and \f[I]"thumb"\f[].
+.SS extractor.vipergirls.domain
+.IP "Type:" 6
+\f[I]string\f[]
+
+.IP "Default:" 9
+\f[I]"vipergirls.to"\f[]
+
+.IP "Description:" 4
+Specifies the domain used by \f[I]vipergirls\f[] extractors.
+
+For example \f[I]"viper.click"\f[] if the main domain is blocked or to bypass Cloudflare,
+
+
+.SS extractor.vipergirls.like
+.IP "Type:" 6
+\f[I]bool\f[]
+
+.IP "Default:" 9
+\f[I]false\f[]
+
+.IP "Description:" 4
+Automatically like posts after downloading their images.
+
+Note: Requires \f[I]login\f[]
+or \f[I]cookies\f[]
+
+
.SS extractor.vsco.videos
.IP "Type:" 6
\f[I]bool\f[]
@@ -4631,7 +4807,7 @@ Download \f[I]livephoto\f[] files.
\f[I]bool\f[]
.IP "Default:" 9
-\f[I]true\f[]
+\f[I]false\f[]
.IP "Description:" 4
Fetch media from retweeted posts.
@@ -4780,6 +4956,24 @@ Extract additional metadata (date, md5, tags, ...)
Note: This requires 1-2 additional HTTP requests per post.
+.SS extractor.zerochan.pagination
+.IP "Type:" 6
+\f[I]string\f[]
+
+.IP "Default:" 9
+\f[I]"api"\f[]
+
+.IP "Description:" 4
+Controls how to paginate over tag search results.
+
+.br
+* \f[I]"api"\f[]: Use the \f[I]JSON API\f[]
+(no \f[I]extension\f[] metadata)
+.br
+* \f[I]"html"\f[]: Parse HTML pages
+(limited to 100 pages * 24 posts)
+
+
.SS extractor.[booru].tags
.IP "Type:" 6
\f[I]bool\f[]
@@ -5664,13 +5858,16 @@ and \f[I]{_filename}\f[].
.SS exec.event
.IP "Type:" 6
-\f[I]string\f[]
+.br
+* \f[I]string\f[]
+.br
+* \f[I]list\f[] of \f[I]strings\f[]
.IP "Default:" 9
\f[I]"after"\f[]
.IP "Description:" 4
-The event for which \f[I]exec.command\f[] is run.
+The event(s) for which \f[I]exec.command\f[] is run.
See \f[I]metadata.event\f[] for a list of available events.
@@ -5767,15 +5964,24 @@ Note: \f[I]metadata.extension\f[] is ignored if this option is set.
.SS metadata.event
.IP "Type:" 6
-\f[I]string\f[]
+.br
+* \f[I]string\f[]
+.br
+* \f[I]list\f[] of \f[I]strings\f[]
.IP "Default:" 9
\f[I]"file"\f[]
+.IP "Example:" 4
+.br
+* "prepare,file,after"
+.br
+* ["prepare-after", "skip"]
+
.IP "Description:" 4
-The event for which metadata gets written to a file.
+The event(s) for which metadata gets written to a file.
-The available events are:
+Available events are:
\f[I]init\f[]
After post processor initialization
@@ -6013,13 +6219,18 @@ post processor with \f[I]"event": "post"\f[] runs *before* it.
.SS mtime.event
.IP "Type:" 6
-\f[I]string\f[]
+.br
+* \f[I]string\f[]
+.br
+* \f[I]list\f[] of \f[I]strings\f[]
.IP "Default:" 9
\f[I]"file"\f[]
.IP "Description:" 4
-See \f[I]metadata.event\f[]
+The event(s) for which \f[I]mtime.key\f[] or \f[I]mtime.value\f[] get evaluated.
+
+See \f[I]metadata.event\f[] for a list of available events.
.SS mtime.key
@@ -6075,13 +6286,16 @@ akin to
.SS python.event
.IP "Type:" 6
-\f[I]string\f[]
+.br
+* \f[I]string\f[]
+.br
+* \f[I]list\f[] of \f[I]strings\f[]
.IP "Default:" 9
\f[I]"file"\f[]
.IP "Description:" 4
-The event for which \f[I]python.function\f[] gets called.
+The event(s) for which \f[I]python.function\f[] gets called.
See \f[I]metadata.event\f[] for a list of available events.
@@ -6099,7 +6313,7 @@ See \f[I]metadata.event\f[] for a list of available events.
.IP "Description:" 4
The Python function to call.
-This function gets specified as \f[I]<module>:<function name>\f[]
+This function is specified as \f[I]<module>:<function name>\f[]
and gets called with the current metadata dict as argument.
\f[I]module\f[] is either an importable Python module name
@@ -6540,7 +6754,7 @@ section of your account's preferences
.br
* set \f[I]http://localhost:6414/\f[] as "redirect uri"
.br
-* solve the "I'm not a rebot" reCATCHA if needed
+* solve the "I'm not a robot" reCAPTCHA if needed
.br
* click "create app"
diff --git a/gallery_dl.egg-info/PKG-INFO b/gallery_dl.egg-info/PKG-INFO
index b4f974c..0395c3e 100644
--- a/gallery_dl.egg-info/PKG-INFO
+++ b/gallery_dl.egg-info/PKG-INFO
@@ -1,6 +1,6 @@
Metadata-Version: 2.1
Name: gallery_dl
-Version: 1.26.8
+Version: 1.26.9
Summary: Command-line program to download image galleries and collections from several image hosting sites
Home-page: https://github.com/mikf/gallery-dl
Download-URL: https://github.com/mikf/gallery-dl/releases/latest
@@ -112,9 +112,9 @@ Standalone Executable
Prebuilt executable files with a Python interpreter and
required Python packages included are available for
-- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.26.8/gallery-dl.exe>`__
+- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.26.9/gallery-dl.exe>`__
(Requires `Microsoft Visual C++ Redistributable Package (x86) <https://aka.ms/vs/17/release/vc_redist.x86.exe>`__)
-- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.26.8/gallery-dl.bin>`__
+- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.26.9/gallery-dl.bin>`__
Nightly Builds
diff --git a/gallery_dl.egg-info/SOURCES.txt b/gallery_dl.egg-info/SOURCES.txt
index ba1f7d8..96a6469 100644
--- a/gallery_dl.egg-info/SOURCES.txt
+++ b/gallery_dl.egg-info/SOURCES.txt
@@ -198,7 +198,6 @@ gallery_dl/extractor/szurubooru.py
gallery_dl/extractor/tapas.py
gallery_dl/extractor/tcbscans.py
gallery_dl/extractor/telegraph.py
-gallery_dl/extractor/test.py
gallery_dl/extractor/tmohentai.py
gallery_dl/extractor/toyhouse.py
gallery_dl/extractor/tsumino.py
diff --git a/gallery_dl/downloader/http.py b/gallery_dl/downloader/http.py
index f1d2c4a..0ff5dd9 100644
--- a/gallery_dl/downloader/http.py
+++ b/gallery_dl/downloader/http.py
@@ -399,6 +399,7 @@ MIME_TYPES = {
"video/webm": "webm",
"video/ogg" : "ogg",
"video/mp4" : "mp4",
+ "video/quicktime": "mov",
"audio/wav" : "wav",
"audio/x-wav": "wav",
@@ -441,6 +442,7 @@ SIGNATURE_CHECKS = {
"psd" : lambda s: s[0:4] == b"8BPS",
"mp4" : lambda s: (s[4:8] == b"ftyp" and s[8:11] in (
b"mp4", b"avc", b"iso", b"M4V")),
+ "mov" : lambda s: s[4:12] == b"ftypqt ",
"webm": lambda s: s[0:4] == b"\x1A\x45\xDF\xA3",
"ogg" : lambda s: s[0:4] == b"OggS",
"wav" : lambda s: (s[0:4] == b"RIFF" and
diff --git a/gallery_dl/exception.py b/gallery_dl/exception.py
index ee183fc..08dcfdc 100644
--- a/gallery_dl/exception.py
+++ b/gallery_dl/exception.py
@@ -57,7 +57,7 @@ class HttpError(ExtractionError):
def __init__(self, message, response=None):
ExtractionError.__init__(self, message)
self.response = response
- self.status = response.status_code if response else 0
+ self.status = 0 if response is None else response.status_code
class NotFoundError(ExtractionError):
diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py
index a665249..591e6a8 100644
--- a/gallery_dl/extractor/__init__.py
+++ b/gallery_dl/extractor/__init__.py
@@ -194,7 +194,6 @@ modules = [
"directlink",
"recursive",
"oauth",
- "test",
"ytdl",
"generic",
]
diff --git a/gallery_dl/extractor/artstation.py b/gallery_dl/extractor/artstation.py
index b58b3d3..49fde7b 100644
--- a/gallery_dl/extractor/artstation.py
+++ b/gallery_dl/extractor/artstation.py
@@ -29,11 +29,13 @@ class ArtstationExtractor(Extractor):
self.user = match.group(1) or match.group(2)
def items(self):
- data = self.metadata()
-
- projects = self.projects()
+ videos = self.config("videos", True)
+ previews = self.config("previews", False)
external = self.config("external", False)
max_posts = self.config("max-posts")
+
+ data = self.metadata()
+ projects = self.projects()
if max_posts:
projects = itertools.islice(projects, max_posts)
@@ -45,13 +47,29 @@ class ArtstationExtractor(Extractor):
asset["num"] = num
yield Message.Directory, asset
- if adict["has_embedded_player"] and external:
+ if adict["has_embedded_player"]:
player = adict["player_embedded"]
url = (text.extr(player, 'src="', '"') or
text.extr(player, "src='", "'"))
- if url and not url.startswith(self.root):
- asset["extension"] = None
- yield Message.Url, "ytdl:" + url, asset
+ if url.startswith(self.root):
+ # video clip hosted on artstation
+ if videos:
+ page = self.request(url).text
+ url = text.extr(page, ' src="', '"')
+ text.nameext_from_url(url, asset)
+ yield Message.Url, url, asset
+ elif url:
+ # external URL
+ if external:
+ asset["extension"] = "mp4"
+ yield Message.Url, "ytdl:" + url, asset
+ else:
+ self.log.debug(player)
+ self.log.warning(
+ "Failed to extract embedded player URL (%s)",
+ adict.get("id"))
+
+ if not previews:
continue
if adict["has_image"]:
@@ -59,10 +77,11 @@ class ArtstationExtractor(Extractor):
text.nameext_from_url(url, asset)
url = self._no_cache(url)
- lhs, _, rhs = url.partition("/large/")
- if rhs:
- url = lhs + "/4k/" + rhs
- asset["_fallback"] = self._image_fallback(lhs, rhs)
+ if "/video_clips/" not in url:
+ lhs, _, rhs = url.partition("/large/")
+ if rhs:
+ url = lhs + "/4k/" + rhs
+ asset["_fallback"] = self._image_fallback(lhs, rhs)
yield Message.Url, url, asset
@@ -175,7 +194,7 @@ class ArtstationUserExtractor(ArtstationExtractor):
subcategory = "user"
pattern = (r"(?:https?://)?(?:(?:www\.)?artstation\.com"
r"/(?!artwork|projects|search)([^/?#]+)(?:/albums/all)?"
- r"|((?!www)\w+)\.artstation\.com(?:/projects)?)/?$")
+ r"|((?!www)[\w-]+)\.artstation\.com(?:/projects)?)/?$")
example = "https://www.artstation.com/USER"
def projects(self):
@@ -192,7 +211,7 @@ class ArtstationAlbumExtractor(ArtstationExtractor):
archive_fmt = "a_{album[id]}_{asset[id]}"
pattern = (r"(?:https?://)?(?:(?:www\.)?artstation\.com"
r"/(?!artwork|projects|search)([^/?#]+)"
- r"|((?!www)\w+)\.artstation\.com)/albums/(\d+)")
+ r"|((?!www)[\w-]+)\.artstation\.com)/albums/(\d+)")
example = "https://www.artstation.com/USER/albums/12345"
def __init__(self, match):
@@ -226,7 +245,7 @@ class ArtstationLikesExtractor(ArtstationExtractor):
directory_fmt = ("{category}", "{userinfo[username]}", "Likes")
archive_fmt = "f_{userinfo[id]}_{asset[id]}"
pattern = (r"(?:https?://)?(?:www\.)?artstation\.com"
- r"/(?!artwork|projects|search)([^/?#]+)/likes/?")
+ r"/(?!artwork|projects|search)([^/?#]+)/likes")
example = "https://www.artstation.com/USER/likes"
def projects(self):
@@ -234,6 +253,54 @@ class ArtstationLikesExtractor(ArtstationExtractor):
return self._pagination(url)
+class ArtstationCollectionExtractor(ArtstationExtractor):
+ """Extractor for an artstation collection"""
+ subcategory = "collection"
+ directory_fmt = ("{category}", "{user}",
+ "{collection[id]} {collection[name]}")
+ archive_fmt = "c_{collection[id]}_{asset[id]}"
+ pattern = (r"(?:https?://)?(?:www\.)?artstation\.com"
+ r"/(?!artwork|projects|search)([^/?#]+)/collections/(\d+)")
+ example = "https://www.artstation.com/USER/collections/12345"
+
+ def __init__(self, match):
+ ArtstationExtractor.__init__(self, match)
+ self.collection_id = match.group(2)
+
+ def metadata(self):
+ url = "{}/collections/{}.json".format(
+ self.root, self.collection_id)
+ params = {"username": self.user}
+ collection = self.request(
+ url, params=params, notfound="collection").json()
+ return {"collection": collection, "user": self.user}
+
+ def projects(self):
+ url = "{}/collections/{}/projects.json".format(
+ self.root, self.collection_id)
+ params = {"collection_id": self.collection_id}
+ return self._pagination(url, params)
+
+
+class ArtstationCollectionsExtractor(ArtstationExtractor):
+ """Extractor for an artstation user's collections"""
+ subcategory = "collections"
+ pattern = (r"(?:https?://)?(?:www\.)?artstation\.com"
+ r"/(?!artwork|projects|search)([^/?#]+)/collections/?$")
+ example = "https://www.artstation.com/USER/collections"
+
+ def items(self):
+ url = self.root + "/collections.json"
+ params = {"username": self.user}
+
+ for collection in self.request(
+ url, params=params, notfound="collections").json():
+ url = "{}/{}/collections/{}".format(
+ self.root, self.user, collection["id"])
+ collection["_extractor"] = ArtstationCollectionExtractor
+ yield Message.Queue, url, collection
+
+
class ArtstationChallengeExtractor(ArtstationExtractor):
"""Extractor for submissions of artstation challenges"""
subcategory = "challenge"
@@ -355,7 +422,7 @@ class ArtstationImageExtractor(ArtstationExtractor):
"""Extractor for images from a single artstation project"""
subcategory = "image"
pattern = (r"(?:https?://)?(?:"
- r"(?:\w+\.)?artstation\.com/(?:artwork|projects|search)"
+ r"(?:[\w-]+\.)?artstation\.com/(?:artwork|projects|search)"
r"|artstn\.co/p)/(\w+)")
example = "https://www.artstation.com/artwork/abcde"
diff --git a/gallery_dl/extractor/bluesky.py b/gallery_dl/extractor/bluesky.py
index 8de0d7b..84c3187 100644
--- a/gallery_dl/extractor/bluesky.py
+++ b/gallery_dl/extractor/bluesky.py
@@ -39,12 +39,19 @@ class BlueskyExtractor(Extractor):
self._metadata_facets = ("facets" in meta)
self.api = BlueskyAPI(self)
- self._user = None
+ self._user = self._user_did = None
+ self.instance = self.root.partition("://")[2]
def items(self):
for post in self.posts():
if "post" in post:
post = post["post"]
+
+ pid = post["uri"].rpartition("/")[2]
+ if self._user_did and post["author"]["did"] != self._user_did:
+ self.log.debug("Skipping %s (repost)", pid)
+ continue
+
post.update(post["record"])
del post["record"]
@@ -75,7 +82,8 @@ class BlueskyExtractor(Extractor):
if self._metadata_user:
post["user"] = self._user or post["author"]
- post["post_id"] = post["uri"].rpartition("/")[2]
+ post["instance"] = self.instance
+ post["post_id"] = pid
post["count"] = len(images)
post["date"] = text.parse_datetime(
post["createdAt"][:19], "%Y-%m-%dT%H:%M:%S")
@@ -101,10 +109,14 @@ class BlueskyExtractor(Extractor):
post["width"] = post["height"] = 0
image = file["image"]
- post["filename"] = link = image["ref"]["$link"]
+ try:
+ cid = image["ref"]["$link"]
+ except KeyError:
+ cid = image["cid"]
+ post["filename"] = cid
post["extension"] = image["mimeType"].rpartition("/")[2]
- yield Message.Url, base + link, post
+ yield Message.Url, base + cid, post
def posts(self):
return ()
@@ -230,6 +242,7 @@ class BlueskyFollowingExtractor(BlueskyExtractor):
def items(self):
for user in self.api.get_follows(self.user):
url = "https://bsky.app/profile/" + user["did"]
+ user["_extractor"] = BlueskyUserExtractor
yield Message.Queue, url, user
@@ -314,7 +327,7 @@ class BlueskyAPI():
endpoint = "app.bsky.feed.getFeed"
params = {
"feed" : "at://{}/app.bsky.feed.generator/{}".format(
- self._did_from_actor(actor), feed),
+ self._did_from_actor(actor, False), feed),
"limit": "100",
}
return self._pagination(endpoint, params)
@@ -331,7 +344,7 @@ class BlueskyAPI():
endpoint = "app.bsky.feed.getListFeed"
params = {
"list" : "at://{}/app.bsky.graph.list/{}".format(
- self._did_from_actor(actor), list),
+ self._did_from_actor(actor, False), list),
"limit": "100",
}
return self._pagination(endpoint, params)
@@ -378,14 +391,17 @@ class BlueskyAPI():
}
return self._pagination(endpoint, params, "posts")
- def _did_from_actor(self, actor):
+ def _did_from_actor(self, actor, user_did=True):
if actor.startswith("did:"):
did = actor
else:
did = self.resolve_handle(actor)
- if self.extractor._metadata_user:
- self.extractor._user = self.get_profile(did)
+ extr = self.extractor
+ if user_did and not extr.config("reposts", False):
+ extr._user_did = did
+ if extr._metadata_user:
+ extr._user = self.get_profile(did)
return did
@@ -434,13 +450,20 @@ class BlueskyAPI():
if response.status_code < 400:
return response.json()
if response.status_code == 429:
- self.extractor.wait(seconds=60)
+ until = response.headers.get("RateLimit-Reset")
+ self.extractor.wait(until=until)
continue
+ try:
+ data = response.json()
+ msg = "API request failed ('{}: {}')".format(
+ data["error"], data["message"])
+ except Exception:
+ msg = "API request failed ({} {})".format(
+ response.status_code, response.reason)
+
self.extractor.log.debug("Server response: %s", response.text)
- raise exception.StopExtraction(
- "API request failed (%s %s)",
- response.status_code, response.reason)
+ raise exception.StopExtraction(msg)
def _pagination(self, endpoint, params, key="feed"):
while True:
diff --git a/gallery_dl/extractor/bunkr.py b/gallery_dl/extractor/bunkr.py
index 1a0e47d..a093347 100644
--- a/gallery_dl/extractor/bunkr.py
+++ b/gallery_dl/extractor/bunkr.py
@@ -54,7 +54,6 @@ class BunkrAlbumExtractor(LolisafeAlbumExtractor):
"album_id" : self.album_id,
"album_name" : text.unescape(info[0]),
"album_size" : size[1:-1],
- "description": text.unescape(info[2]) if len(info) > 2 else "",
"count" : len(urls),
}
diff --git a/gallery_dl/extractor/common.py b/gallery_dl/extractor/common.py
index cf0f8c9..d14e13a 100644
--- a/gallery_dl/extractor/common.py
+++ b/gallery_dl/extractor/common.py
@@ -203,9 +203,15 @@ class Extractor():
self.log.debug("%s (%s/%s)", msg, tries, retries+1)
if tries > retries:
break
- self.sleep(
- max(tries, self._interval()) if self._interval else tries,
- "retry")
+
+ if self._interval:
+ seconds = self._interval()
+ if seconds < tries:
+ seconds = tries
+ else:
+ seconds = tries
+
+ self.sleep(seconds, "retry")
tries += 1
raise exception.HttpError(msg, response)
diff --git a/gallery_dl/extractor/deviantart.py b/gallery_dl/extractor/deviantart.py
index 0cf4f88..ca8acaa 100644
--- a/gallery_dl/extractor/deviantart.py
+++ b/gallery_dl/extractor/deviantart.py
@@ -18,12 +18,12 @@ import binascii
import time
import re
-
BASE_PATTERN = (
r"(?:https?://)?(?:"
r"(?:www\.)?(?:fx)?deviantart\.com/(?!watch/)([\w-]+)|"
r"(?!www\.)([\w-]+)\.(?:fx)?deviantart\.com)"
)
+DEFAULT_AVATAR = "https://a.deviantart.net/avatars/default.gif"
class DeviantartExtractor(Extractor):
@@ -47,8 +47,9 @@ class DeviantartExtractor(Extractor):
self.extra = self.config("extra", False)
self.quality = self.config("quality", "100")
self.original = self.config("original", True)
- self.comments = self.config("comments", False)
self.intermediary = self.config("intermediary", True)
+ self.comments_avatars = self.config("comments-avatars", False)
+ self.comments = self.comments_avatars or self.config("comments", False)
self.api = DeviantartOAuthAPI(self)
self.group = False
@@ -83,6 +84,16 @@ class DeviantartExtractor(Extractor):
else:
self.commit_journal = None
+ def request(self, url, **kwargs):
+ if "fatal" not in kwargs:
+ kwargs["fatal"] = False
+ while True:
+ response = Extractor.request(self, url, **kwargs)
+ if response.status_code != 403 or \
+ b"Request blocked." not in response.content:
+ return response
+ self.wait(seconds=300, reason="CloudFront block")
+
def skip(self, num):
self.offset += num
return num
@@ -100,9 +111,9 @@ class DeviantartExtractor(Extractor):
if self.user:
group = self.config("group", True)
if group:
- profile = self.api.user_profile(self.user)
- if profile:
- self.user = profile["user"]["username"]
+ user = _user_details(self, self.user)
+ if user:
+ self.user = user["username"]
self.group = False
elif group == "skip":
self.log.info("Skipping group '%s'", self.user)
@@ -172,6 +183,20 @@ class DeviantartExtractor(Extractor):
deviation["is_original"] = True
yield self.commit_journal(deviation, journal)
+ if self.comments_avatars:
+ for comment in deviation["comments"]:
+ user = comment["user"]
+ name = user["username"].lower()
+ if user["usericon"] == DEFAULT_AVATAR:
+ self.log.debug(
+ "Skipping avatar of '%s' (default)", name)
+ continue
+ _user_details.update(name, user)
+
+ url = "{}/{}/avatar/".format(self.root, name)
+ comment["_extractor"] = DeviantartAvatarExtractor
+ yield Message.Queue, url, comment
+
if not self.extra:
continue
@@ -198,7 +223,9 @@ class DeviantartExtractor(Extractor):
"""Adjust the contents of a Deviation-object"""
if "index" not in deviation:
try:
- if deviation["url"].startswith("https://sta.sh"):
+ if deviation["url"].startswith((
+ "https://www.deviantart.com/stash/", "https://sta.sh",
+ )):
filename = deviation["content"]["src"].split("/")[5]
deviation["index_base36"] = filename.partition("-")[0][1:]
deviation["index"] = id_from_base36(
@@ -445,18 +472,12 @@ class DeviantartExtractor(Extractor):
def _limited_request(self, url, **kwargs):
"""Limits HTTP requests to one every 2 seconds"""
- kwargs["fatal"] = None
diff = time.time() - DeviantartExtractor._last_request
if diff < 2.0:
self.sleep(2.0 - diff, "request")
-
- while True:
- response = self.request(url, **kwargs)
- if response.status_code != 403 or \
- b"Request blocked." not in response.content:
- DeviantartExtractor._last_request = time.time()
- return response
- self.wait(seconds=180)
+ response = self.request(url, **kwargs)
+ DeviantartExtractor._last_request = time.time()
+ return response
def _fetch_premium(self, deviation):
try:
@@ -569,13 +590,18 @@ class DeviantartAvatarExtractor(DeviantartExtractor):
def deviations(self):
name = self.user.lower()
- profile = self.api.user_profile(name)
- if not profile:
+ user = _user_details(self, name)
+ if not user:
return ()
- user = profile["user"]
icon = user["usericon"]
- index = icon.rpartition("?")[2]
+ if icon == DEFAULT_AVATAR:
+ self.log.debug("Skipping avatar of '%s' (default)", name)
+ return ()
+
+ _, sep, index = icon.rpartition("?")
+ if not sep:
+ index = "0"
formats = self.config("formats")
if not formats:
@@ -658,7 +684,8 @@ class DeviantartStashExtractor(DeviantartExtractor):
"""Extractor for sta.sh-ed deviations"""
subcategory = "stash"
archive_fmt = "{index}.{extension}"
- pattern = r"(?:https?://)?sta\.sh/([a-z0-9]+)"
+ pattern = (r"(?:https?://)?(?:(?:www\.)?deviantart\.com/stash|sta\.sh)"
+ r"/([a-z0-9]+)")
example = "https://sta.sh/abcde"
skip = Extractor.skip
@@ -679,7 +706,7 @@ class DeviantartStashExtractor(DeviantartExtractor):
if uuid:
deviation = self.api.deviation(uuid)
deviation["index"] = text.parse_int(text.extr(
- page, 'gmi-deviationid="', '"'))
+ page, '\\"deviationId\\":', ','))
yield deviation
return
@@ -1086,9 +1113,8 @@ class DeviantartOAuthAPI():
if not isinstance(self.mature, str):
self.mature = "true" if self.mature else "false"
- self.folders = extractor.config("folders", False)
- self.metadata = extractor.extra or extractor.config("metadata", False)
self.strategy = extractor.config("pagination")
+ self.folders = extractor.config("folders", False)
self.public = extractor.config("public", True)
client_id = extractor.config("client-id")
@@ -1106,6 +1132,42 @@ class DeviantartOAuthAPI():
token = None
self.refresh_token_key = token
+ metadata = extractor.config("metadata", False)
+ if not metadata:
+ metadata = bool(extractor.extra)
+ if metadata:
+ self.metadata = True
+
+ if isinstance(metadata, str):
+ if metadata == "all":
+ metadata = ("submission", "camera", "stats",
+ "collection", "gallery")
+ else:
+ metadata = metadata.replace(" ", "").split(",")
+ elif not isinstance(metadata, (list, tuple)):
+ metadata = ()
+
+ self._metadata_params = {"mature_content": self.mature}
+ self._metadata_public = None
+ if metadata:
+ # extended metadata
+ self.limit = 10
+ for param in metadata:
+ self._metadata_params["ext_" + param] = "1"
+ if "ext_collection" in self._metadata_params or \
+ "ext_gallery" in self._metadata_params:
+ if token:
+ self._metadata_public = False
+ else:
+ self.log.error("'collection' and 'gallery' metadata "
+ "require a refresh token")
+ else:
+ # base metadata
+ self.limit = 50
+ else:
+ self.metadata = False
+ self.limit = None
+
self.log.debug(
"Using %s API credentials (client-id %s)",
"default" if self.client_id == self.CLIENT_ID else "custom",
@@ -1115,14 +1177,14 @@ class DeviantartOAuthAPI():
def browse_deviantsyouwatch(self, offset=0):
"""Yield deviations from users you watch"""
endpoint = "/browse/deviantsyouwatch"
- params = {"limit": "50", "offset": offset,
+ params = {"limit": 50, "offset": offset,
"mature_content": self.mature}
return self._pagination(endpoint, params, public=False)
def browse_posts_deviantsyouwatch(self, offset=0):
"""Yield posts from users you watch"""
endpoint = "/browse/posts/deviantsyouwatch"
- params = {"limit": "50", "offset": offset,
+ params = {"limit": 50, "offset": offset,
"mature_content": self.mature}
return self._pagination(endpoint, params, public=False, unpack=True)
@@ -1131,7 +1193,7 @@ class DeviantartOAuthAPI():
endpoint = "/browse/newest"
params = {
"q" : query,
- "limit" : 50 if self.metadata else 120,
+ "limit" : 120,
"offset" : offset,
"mature_content": self.mature,
}
@@ -1142,7 +1204,7 @@ class DeviantartOAuthAPI():
endpoint = "/browse/popular"
params = {
"q" : query,
- "limit" : 50 if self.metadata else 120,
+ "limit" : 120,
"timerange" : timerange,
"offset" : offset,
"mature_content": self.mature,
@@ -1249,8 +1311,11 @@ class DeviantartOAuthAPI():
"deviationids[{}]={}".format(num, deviation["deviationid"])
for num, deviation in enumerate(deviations)
)
- params = {"mature_content": self.mature}
- return self._call(endpoint, params=params)["metadata"]
+ return self._call(
+ endpoint,
+ params=self._metadata_params,
+ public=self._metadata_public,
+ )["metadata"]
def gallery(self, username, folder_id, offset=0, extend=True, public=None):
"""Yield all Deviation-objects contained in a gallery folder"""
@@ -1357,9 +1422,14 @@ class DeviantartOAuthAPI():
self.authenticate(None if public else self.refresh_token_key)
kwargs["headers"] = self.headers
response = self.extractor.request(url, **kwargs)
- data = response.json()
- status = response.status_code
+ try:
+ data = response.json()
+ except ValueError:
+ self.log.error("Unable to parse API response")
+ data = {}
+
+ status = response.status_code
if 200 <= status < 400:
if self.delay > self.delay_min:
self.delay -= 1
@@ -1412,6 +1482,9 @@ class DeviantartOAuthAPI():
if public is None:
public = self.public
+ if self.limit and params["limit"] > self.limit:
+ params["limit"] = (params["limit"] // self.limit) * self.limit
+
while True:
data = self._call(endpoint, params=params, public=public)
try:
@@ -1483,6 +1556,15 @@ class DeviantartOAuthAPI():
def _metadata(self, deviations):
"""Add extended metadata to each deviation object"""
+ if len(deviations) <= self.limit:
+ self._metadata_batch(deviations)
+ else:
+ n = self.limit
+ for index in range(0, len(deviations), n):
+ self._metadata_batch(deviations[index:index+n])
+
+ def _metadata_batch(self, deviations):
+ """Fetch extended metadata for a single batch of deviations"""
for deviation, metadata in zip(
deviations, self.deviation_metadata(deviations)):
deviation.update(metadata)
@@ -1667,6 +1749,14 @@ class DeviantartEclipseAPI():
return token
+@memcache(keyarg=1)
+def _user_details(extr, name):
+ try:
+ return extr.api.user_profile(name)["user"]
+ except Exception:
+ return None
+
+
@cache(maxage=36500*86400, keyarg=0)
def _refresh_token_cache(token):
if token and token[0] == "#":
diff --git a/gallery_dl/extractor/fapello.py b/gallery_dl/extractor/fapello.py
index aff8e61..838ae7b 100644
--- a/gallery_dl/extractor/fapello.py
+++ b/gallery_dl/extractor/fapello.py
@@ -42,7 +42,8 @@ class FapelloPostExtractor(Extractor):
"type" : "video" if 'type="video' in page else "photo",
"thumbnail": text.extr(page, 'poster="', '"'),
}
- url = text.extr(page, 'src="', '"')
+ url = text.extr(page, 'src="', '"').replace(
+ ".md", "").replace(".th", "")
yield Message.Directory, data
yield Message.Url, url, text.nameext_from_url(url, data)
diff --git a/gallery_dl/extractor/flickr.py b/gallery_dl/extractor/flickr.py
index f7dc3cc..c94a110 100644
--- a/gallery_dl/extractor/flickr.py
+++ b/gallery_dl/extractor/flickr.py
@@ -77,6 +77,8 @@ class FlickrImageExtractor(FlickrExtractor):
photo = self.api.photos_getInfo(self.item_id)
if self.api.exif:
photo.update(self.api.photos_getExif(self.item_id))
+ if self.api.contexts:
+ photo.update(self.api.photos_getAllContexts(self.item_id))
if photo["media"] == "video" and self.api.videos:
self.api._extract_video(photo)
@@ -268,6 +270,8 @@ class FlickrAPI(oauth.OAuth1API):
self.exif = extractor.config("exif", False)
self.videos = extractor.config("videos", True)
+ self.contexts = extractor.config("contexts", False)
+
self.maxsize = extractor.config("size-max")
if isinstance(self.maxsize, str):
for fmt, fmtname, fmtwidth in self.FORMATS:
@@ -311,6 +315,13 @@ class FlickrAPI(oauth.OAuth1API):
params = {"user_id": user_id}
return self._pagination("people.getPhotos", params)
+ def photos_getAllContexts(self, photo_id):
+ """Returns all visible sets and pools the photo belongs to."""
+ params = {"photo_id": photo_id}
+ data = self._call("photos.getAllContexts", params)
+ del data["stat"]
+ return data
+
def photos_getExif(self, photo_id):
"""Retrieves a list of EXIF/TIFF/GPS tags for a given photo."""
params = {"photo_id": photo_id}
@@ -444,6 +455,8 @@ class FlickrAPI(oauth.OAuth1API):
if self.exif:
photo.update(self.photos_getExif(photo["id"]))
+ if self.contexts:
+ photo.update(self.photos_getAllContexts(photo["id"]))
photo["id"] = text.parse_int(photo["id"])
if "owner" in photo:
diff --git a/gallery_dl/extractor/gelbooru.py b/gallery_dl/extractor/gelbooru.py
index 83f1392..2459a61 100644
--- a/gallery_dl/extractor/gelbooru.py
+++ b/gallery_dl/extractor/gelbooru.py
@@ -32,6 +32,9 @@ class GelbooruBase():
url = self.root + "/index.php?page=dapi&q=index&json=1"
data = self.request(url, params=params).json()
+ if not key:
+ return data
+
try:
posts = data[key]
except KeyError:
@@ -167,13 +170,61 @@ class GelbooruFavoriteExtractor(GelbooruBase,
params = {
"s" : "favorite",
"id" : self.favorite_id,
- "limit": "1",
+ "limit": "2",
}
+ data = self._api_request(params, None, True)
- count = self._api_request(params, "@attributes", True)[0]["count"]
- if count <= self.offset:
- return
+ count = data["@attributes"]["count"]
+ self.log.debug("API reports %s favorite entries", count)
+
+ favs = data["favorite"]
+ try:
+ order = 1 if favs[0]["id"] < favs[1]["id"] else -1
+ except LookupError as exc:
+ self.log.debug(
+ "Error when determining API favorite order (%s: %s)",
+ exc.__class__.__name__, exc)
+ order = -1
+ else:
+ self.log.debug("API yields favorites in %sscending order",
+ "a" if order > 0 else "de")
+
+ order_favs = self.config("order-posts")
+ if order_favs and order_favs[0] in ("r", "a"):
+ self.log.debug("Returning them in reverse")
+ order = -order
+
+ if order < 0:
+ return self._pagination(params, count)
+ return self._pagination_reverse(params, count)
+
+ def _pagination(self, params, count):
+ if self.offset:
+ pnum, skip = divmod(self.offset, self.per_page)
+ else:
+ pnum = skip = 0
+
+ params["pid"] = pnum
+ params["limit"] = self.per_page
+
+ while True:
+ favs = self._api_request(params, "favorite")
+
+ if not favs:
+ return
+
+ if skip:
+ favs = favs[skip:]
+ skip = 0
+
+ for fav in favs:
+ for post in self._api_request({"id": fav["favorite"]}):
+ post["date_favorited"] = text.parse_timestamp(fav["added"])
+ yield post
+
+ params["pid"] += 1
+ def _pagination_reverse(self, params, count):
pnum, last = divmod(count-1, self.per_page)
if self.offset > last:
# page number change
@@ -182,12 +233,11 @@ class GelbooruFavoriteExtractor(GelbooruBase,
pnum -= diff + 1
skip = self.offset
- # paginate over them in reverse
params["pid"] = pnum
params["limit"] = self.per_page
while True:
- favs = self._api_request(params, "favorite", True)
+ favs = self._api_request(params, "favorite")
favs.reverse()
if skip:
@@ -195,7 +245,9 @@ class GelbooruFavoriteExtractor(GelbooruBase,
skip = 0
for fav in favs:
- yield from self._api_request({"id": fav["favorite"]})
+ for post in self._api_request({"id": fav["favorite"]}):
+ post["date_favorited"] = text.parse_timestamp(fav["added"])
+ yield post
params["pid"] -= 1
if params["pid"] < 0:
diff --git a/gallery_dl/extractor/gofile.py b/gallery_dl/extractor/gofile.py
index 289f91c..f0eb4e9 100644
--- a/gallery_dl/extractor/gofile.py
+++ b/gallery_dl/extractor/gofile.py
@@ -41,9 +41,13 @@ class GofileFolderExtractor(Extractor):
folder = self._get_content(self.content_id, password)
yield Message.Directory, folder
+ try:
+ contents = folder.pop("children")
+ except KeyError:
+ raise exception.AuthorizationError("Password required")
+
num = 0
- contents = folder.pop("contents")
- for content_id in folder["childs"]:
+ for content_id in folder["childrenIds"]:
content = contents[content_id]
content["folder"] = folder
@@ -67,31 +71,32 @@ class GofileFolderExtractor(Extractor):
@memcache()
def _create_account(self):
self.log.debug("Creating temporary account")
- return self._api_request("createAccount")["token"]
+ return self._api_request("accounts", method="POST")["token"]
@cache(maxage=86400)
def _get_website_token(self):
self.log.debug("Fetching website token")
page = self.request(self.root + "/dist/js/alljs.js").text
- return text.extr(page, 'fetchData.wt = "', '"')
+ return text.extr(page, 'wt: "', '"')
def _get_content(self, content_id, password=None):
+ headers = {"Authorization": "Bearer " + self.api_token}
+ params = {"wt": self.website_token}
if password is not None:
- password = hashlib.sha256(password.encode()).hexdigest()
- return self._api_request("getContent", {
- "contentId" : content_id,
- "token" : self.api_token,
- "wt" : self.website_token,
- "password" : password,
- })
-
- def _api_request(self, endpoint, params=None):
+ params["password"] = hashlib.sha256(password.encode()).hexdigest()
+ return self._api_request("contents/" + content_id, params, headers)
+
+ def _api_request(self, endpoint, params=None, headers=None, method="GET"):
response = self.request(
- "https://api.gofile.io/" + endpoint, params=params).json()
+ "https://api.gofile.io/" + endpoint,
+ method=method, params=params, headers=headers,
+ ).json()
if response["status"] != "ok":
if response["status"] == "error-notFound":
raise exception.NotFoundError("content")
+ if response["status"] == "error-passwordRequired":
+ raise exception.AuthorizationError("Password required")
raise exception.StopExtraction(
"%s failed (Status: %s)", endpoint, response["status"])
diff --git a/gallery_dl/extractor/hiperdex.py b/gallery_dl/extractor/hiperdex.py
index 20491b5..aadce6c 100644
--- a/gallery_dl/extractor/hiperdex.py
+++ b/gallery_dl/extractor/hiperdex.py
@@ -25,7 +25,7 @@ class HiperdexBase():
@memcache(keyarg=1)
def manga_data(self, manga, page=None):
if not page:
- url = "{}/manga/{}/".format(self.root, manga)
+ url = "{}/mangas/{}/".format(self.root, manga)
page = self.request(url).text
extr = text.extract_from(page)
@@ -33,7 +33,7 @@ class HiperdexBase():
"url" : text.unescape(extr(
'property="og:url" content="', '"')),
"manga" : text.unescape(extr(
- '"headline": "', '"')),
+ ' property="name" title="', '"')),
"score" : text.parse_float(extr(
'id="averagerate">', '<')),
"author" : text.remove_html(extr(
@@ -68,8 +68,8 @@ class HiperdexBase():
class HiperdexChapterExtractor(HiperdexBase, ChapterExtractor):
"""Extractor for manga chapters from hiperdex.com"""
- pattern = BASE_PATTERN + r"(/manga/([^/?#]+)/([^/?#]+))"
- example = "https://hiperdex.com/manga/MANGA/CHAPTER/"
+ pattern = BASE_PATTERN + r"(/mangas?/([^/?#]+)/([^/?#]+))"
+ example = "https://hiperdex.com/mangas/MANGA/CHAPTER/"
def __init__(self, match):
root, path, self.manga, self.chapter = match.groups()
@@ -90,8 +90,8 @@ class HiperdexChapterExtractor(HiperdexBase, ChapterExtractor):
class HiperdexMangaExtractor(HiperdexBase, MangaExtractor):
"""Extractor for manga from hiperdex.com"""
chapterclass = HiperdexChapterExtractor
- pattern = BASE_PATTERN + r"(/manga/([^/?#]+))/?$"
- example = "https://hiperdex.com/manga/MANGA/"
+ pattern = BASE_PATTERN + r"(/mangas?/([^/?#]+))/?$"
+ example = "https://hiperdex.com/mangas/MANGA/"
def __init__(self, match):
root, path, self.manga = match.groups()
diff --git a/gallery_dl/extractor/idolcomplex.py b/gallery_dl/extractor/idolcomplex.py
index c249a3e..dfd9a31 100644
--- a/gallery_dl/extractor/idolcomplex.py
+++ b/gallery_dl/extractor/idolcomplex.py
@@ -101,9 +101,8 @@ class IdolcomplexExtractor(SankakuExtractor):
page = self.request(url, retries=10).text
extr = text.extract_from(page)
- pid_alnum = extr('/posts/', '"')
- vavg = extr('itemprop="ratingValue">', "<")
- vcnt = extr('itemprop="reviewCount">', "<")
+ vavg = extr('id="rating"', "</ul>")
+ vcnt = extr('>Votes</strong>:', "<")
pid = extr(">Post ID:", "<")
created = extr(' title="', '"')
@@ -120,10 +119,10 @@ class IdolcomplexExtractor(SankakuExtractor):
rating = extr(">Rating:", "<br")
data = {
- "id" : text.parse_int(pid),
- "id_alnum" : pid_alnum,
+ "id" : pid.strip(),
"md5" : file_url.rpartition("/")[2].partition(".")[0],
- "vote_average": text.parse_float(vavg),
+ "vote_average": (1.0 * vavg.count('class="star-full"') +
+ 0.5 * vavg.count('class="star-half"')),
"vote_count" : text.parse_int(vcnt),
"created_at" : created,
"date" : text.parse_datetime(
@@ -222,8 +221,8 @@ class IdolcomplexPoolExtractor(IdolcomplexExtractor):
subcategory = "pool"
directory_fmt = ("{category}", "pool", "{pool}")
archive_fmt = "p_{pool}_{id}"
- pattern = BASE_PATTERN + r"/pools?/show/(\d+)"
- example = "https://idol.sankakucomplex.com/pools/show/12345"
+ pattern = BASE_PATTERN + r"/pools?/(?:show/)?(\w+)"
+ example = "https://idol.sankakucomplex.com/pools/0123456789abcdef"
per_page = 24
def __init__(self, match):
diff --git a/gallery_dl/extractor/imagefap.py b/gallery_dl/extractor/imagefap.py
index 3bdcfdf..85446c0 100644
--- a/gallery_dl/extractor/imagefap.py
+++ b/gallery_dl/extractor/imagefap.py
@@ -161,11 +161,12 @@ class ImagefapFolderExtractor(ImagefapExtractor):
self.user = user or profile
def items(self):
- for gallery_id, name in self.galleries(self.folder_id):
+ for gallery_id, name, folder in self.galleries(self.folder_id):
url = "{}/gallery/{}".format(self.root, gallery_id)
data = {
"gallery_id": gallery_id,
"title" : text.unescape(name),
+ "folder" : text.unescape(folder),
"_extractor": ImagefapGalleryExtractor,
}
yield Message.Queue, url, data
@@ -173,6 +174,7 @@ class ImagefapFolderExtractor(ImagefapExtractor):
def galleries(self, folder_id):
"""Yield gallery IDs and titles of a folder"""
if folder_id == "-1":
+ folder_name = "Uncategorized"
if self._id:
url = "{}/usergallery.php?userid={}&folderid=-1".format(
self.root, self.user)
@@ -180,23 +182,28 @@ class ImagefapFolderExtractor(ImagefapExtractor):
url = "{}/profile/{}/galleries?folderid=-1".format(
self.root, self.user)
else:
+ folder_name = None
url = "{}/organizer/{}/".format(self.root, folder_id)
params = {"page": 0}
+ extr = text.extract_from(self.request(url, params=params).text)
+ if not folder_name:
+ folder_name = extr("class'blk_galleries'><b>", "</b>")
+
while True:
- extr = text.extract_from(self.request(url, params=params).text)
cnt = 0
while True:
- gid = extr('<a href="/gallery/', '"')
+ gid = extr(' id="gid-', '"')
if not gid:
break
- yield gid, extr("<b>", "<")
+ yield gid, extr("<b>", "<"), folder_name
cnt += 1
if cnt < 20:
break
params["page"] += 1
+ extr = text.extract_from(self.request(url, params=params).text)
class ImagefapUserExtractor(ImagefapExtractor):
diff --git a/gallery_dl/extractor/imgur.py b/gallery_dl/extractor/imgur.py
index 8884d3e..86b1edd 100644
--- a/gallery_dl/extractor/imgur.py
+++ b/gallery_dl/extractor/imgur.py
@@ -39,10 +39,15 @@ class ImgurExtractor(Extractor):
image["url"] = url = "https://i.imgur.com/{}.{}".format(
image["id"], image["ext"])
image["date"] = text.parse_datetime(image["created_at"])
+ image["_http_validate"] = self._validate
text.nameext_from_url(url, image)
return url
+ def _validate(self, response):
+ return (not response.history or
+ not response.url.endswith("/removed.png"))
+
def _items_queue(self, items):
album_ex = ImgurAlbumExtractor
image_ex = ImgurImageExtractor
diff --git a/gallery_dl/extractor/instagram.py b/gallery_dl/extractor/instagram.py
index 6eae7db..9c2b1de 100644
--- a/gallery_dl/extractor/instagram.py
+++ b/gallery_dl/extractor/instagram.py
@@ -165,7 +165,7 @@ class InstagramExtractor(Extractor):
data = {
"post_id" : post["pk"],
"post_shortcode": post["code"],
- "likes": post["like_count"],
+ "likes": post.get("like_count", 0),
"pinned": post.get("timeline_pinned_user_ids", ()),
"date": text.parse_timestamp(post.get("taken_at")),
}
@@ -689,7 +689,10 @@ class InstagramRestAPI():
def reels_media(self, reel_ids):
endpoint = "/v1/feed/reels_media/"
params = {"reel_ids": reel_ids}
- return self._call(endpoint, params=params)["reels_media"]
+ try:
+ return self._call(endpoint, params=params)["reels_media"]
+ except KeyError:
+ raise exception.AuthorizationError("Login required")
def tags_media(self, tag):
for section in self.tags_sections(tag):
@@ -733,7 +736,7 @@ class InstagramRestAPI():
not user["followed_by_viewer"]:
name = user["username"]
s = "" if name.endswith("s") else "s"
- raise exception.StopExtraction("%s'%s posts are private", name, s)
+ self.extractor.log.warning("%s'%s posts are private", name, s)
self.extractor._assign_user(user)
return user["id"]
diff --git a/gallery_dl/extractor/kemonoparty.py b/gallery_dl/extractor/kemonoparty.py
index fd5a73a..9c77b7a 100644
--- a/gallery_dl/extractor/kemonoparty.py
+++ b/gallery_dl/extractor/kemonoparty.py
@@ -15,7 +15,7 @@ import itertools
import json
import re
-BASE_PATTERN = r"(?:https?://)?(?:www\.|beta\.)?(kemono|coomer)\.(party|su)"
+BASE_PATTERN = r"(?:https?://)?(?:www\.|beta\.)?(kemono|coomer)\.(su|party)"
USER_PATTERN = BASE_PATTERN + r"/([^/?#]+)/user/([^/?#]+)"
HASH_PATTERN = r"/[0-9a-f]{2}/[0-9a-f]{2}/([0-9a-f]{64})"
@@ -41,9 +41,12 @@ class KemonopartyExtractor(Extractor):
self.revisions = self.config("revisions")
if self.revisions:
self.revisions_unique = (self.revisions == "unique")
+ order = self.config("order-revisions")
+ self.revisions_reverse = order[0] in ("r", "a") if order else False
+
self._prepare_ddosguard_cookies()
self._find_inline = re.compile(
- r'src="(?:https?://(?:kemono|coomer)\.(?:party|su))?(/inline/[^"]+'
+ r'src="(?:https?://(?:kemono|coomer)\.(?:su|party))?(/inline/[^"]+'
r'|/[0-9a-f]{2}/[0-9a-f]{2}/[0-9a-f]{64}\.[^"]+)').findall
self._json_dumps = json.JSONEncoder(
ensure_ascii=False, check_circular=False,
@@ -232,6 +235,7 @@ class KemonopartyExtractor(Extractor):
except exception.HttpError:
post["revision_hash"] = self._revision_hash(post)
post["revision_index"] = 1
+ post["revision_count"] = 1
return (post,)
revs.insert(0, post)
@@ -247,22 +251,30 @@ class KemonopartyExtractor(Extractor):
uniq.append(rev)
revs = uniq
- idx = len(revs)
+ cnt = idx = len(revs)
for rev in revs:
rev["revision_index"] = idx
+ rev["revision_count"] = cnt
idx -= 1
+ if self.revisions_reverse:
+ revs.reverse()
+
return revs
def _revisions_all(self, url):
revs = self.request(url + "/revisions").json()
- idx = len(revs)
+ cnt = idx = len(revs)
for rev in revs:
rev["revision_hash"] = self._revision_hash(rev)
rev["revision_index"] = idx
+ rev["revision_count"] = cnt
idx -= 1
+ if self.revisions_reverse:
+ revs.reverse()
+
return revs
def _revision_hash(self, revision):
diff --git a/gallery_dl/extractor/lensdump.py b/gallery_dl/extractor/lensdump.py
index d4ccf33..12e8860 100644
--- a/gallery_dl/extractor/lensdump.py
+++ b/gallery_dl/extractor/lensdump.py
@@ -104,7 +104,7 @@ class LensdumpImageExtractor(LensdumpBase, Extractor):
filename_fmt = "{category}_{id}{title:?_//}.{extension}"
directory_fmt = ("{category}",)
archive_fmt = "{id}"
- pattern = BASE_PATTERN + r"/i/(\w+)"
+ pattern = r"(?:https?://)?(?:(?:i\d?\.)?lensdump\.com|\w\.l3n\.co)/i/(\w+)"
example = "https://lensdump.com/i/ID"
def __init__(self, match):
diff --git a/gallery_dl/extractor/mastodon.py b/gallery_dl/extractor/mastodon.py
index 68b4196..030d7d1 100644
--- a/gallery_dl/extractor/mastodon.py
+++ b/gallery_dl/extractor/mastodon.py
@@ -70,7 +70,11 @@ class MastodonExtractor(BaseExtractor):
def _check_moved(self, account):
self._check_moved = None
- if "moved" in account:
+ # Certain fediverse software (such as Iceshrimp and Sharkey) have a
+ # null account "moved" field instead of not having it outright.
+ # To handle this, check if the "moved" value is truthy instead
+ # if only it exists.
+ if account.get("moved"):
self.log.warning("Account '%s' moved to '%s'",
account["acct"], account["moved"]["acct"])
diff --git a/gallery_dl/extractor/naver.py b/gallery_dl/extractor/naver.py
index 55faf9e..d3150e6 100644
--- a/gallery_dl/extractor/naver.py
+++ b/gallery_dl/extractor/naver.py
@@ -26,7 +26,8 @@ class NaverPostExtractor(NaverBase, GalleryExtractor):
"{post[date]:%Y-%m-%d} {post[title]}")
archive_fmt = "{blog[id]}_{post[num]}_{num}"
pattern = (r"(?:https?://)?blog\.naver\.com/"
- r"(?:PostView\.nhn\?blogId=(\w+)&logNo=(\d+)|(\w+)/(\d+)/?$)")
+ r"(?:PostView\.n(?:aver|hn)\?blogId=(\w+)&logNo=(\d+)|"
+ r"(\w+)/(\d+)/?$)")
example = "https://blog.naver.com/BLOGID/12345"
def __init__(self, match):
@@ -46,8 +47,10 @@ class NaverPostExtractor(NaverBase, GalleryExtractor):
extr = text.extract_from(page)
data = {
"post": {
- "title" : extr('"og:title" content="', '"'),
- "description": extr('"og:description" content="', '"'),
+ "title" : text.unescape(extr(
+ '"og:title" content="', '"')),
+ "description": text.unescape(extr(
+ '"og:description" content="', '"')).replace("&nbsp;", " "),
"num" : text.parse_int(self.post_id),
},
"blog": {
@@ -62,10 +65,13 @@ class NaverPostExtractor(NaverBase, GalleryExtractor):
return data
def images(self, page):
- return [
- (url.replace("://post", "://blog", 1).partition("?")[0], None)
- for url in text.extract_iter(page, 'data-lazy-src="', '"')
- ]
+ results = []
+ for url in text.extract_iter(page, 'data-lazy-src="', '"'):
+ url = url.replace("://post", "://blog", 1).partition("?")[0]
+ if "\ufffd" in text.unquote(url):
+ url = text.unquote(url, encoding="EUC-KR")
+ results.append((url, None))
+ return results
class NaverBlogExtractor(NaverBase, Extractor):
@@ -73,7 +79,8 @@ class NaverBlogExtractor(NaverBase, Extractor):
subcategory = "blog"
categorytransfer = True
pattern = (r"(?:https?://)?blog\.naver\.com/"
- r"(?:PostList.nhn\?(?:[^&#]+&)*blogId=([^&#]+)|(\w+)/?$)")
+ r"(?:PostList\.n(?:aver|hn)\?(?:[^&#]+&)*blogId=([^&#]+)|"
+ r"(\w+)/?$)")
example = "https://blog.naver.com/BLOGID"
def __init__(self, match):
@@ -81,12 +88,11 @@ class NaverBlogExtractor(NaverBase, Extractor):
self.blog_id = match.group(1) or match.group(2)
def items(self):
-
# fetch first post number
url = "{}/PostList.nhn?blogId={}".format(self.root, self.blog_id)
- post_num = text.extract(
+ post_num = text.extr(
self.request(url).text, 'gnFirstLogNo = "', '"',
- )[0]
+ )
# setup params for API calls
url = "{}/PostViewBottomTitleListAsync.nhn".format(self.root)
diff --git a/gallery_dl/extractor/nijie.py b/gallery_dl/extractor/nijie.py
index 9614513..c50c013 100644
--- a/gallery_dl/extractor/nijie.py
+++ b/gallery_dl/extractor/nijie.py
@@ -19,7 +19,7 @@ class NijieExtractor(AsynchronousMixin, BaseExtractor):
directory_fmt = ("{category}", "{user_id}")
filename_fmt = "{image_id}_p{num}.{extension}"
archive_fmt = "{image_id}_{num}"
- request_interval = (1.0, 2.0)
+ request_interval = (2.0, 4.0)
def __init__(self, match):
BaseExtractor.__init__(self, match)
diff --git a/gallery_dl/extractor/nitter.py b/gallery_dl/extractor/nitter.py
index d36f509..2bce597 100644
--- a/gallery_dl/extractor/nitter.py
+++ b/gallery_dl/extractor/nitter.py
@@ -219,7 +219,10 @@ class NitterExtractor(BaseExtractor):
self.user_obj = self._user_from_html(tweets_html[0])
for html, quote in map(self._extract_quote, tweets_html[1:]):
- yield self._tweet_from_html(html)
+ tweet = self._tweet_from_html(html)
+ if not tweet["date"]:
+ continue
+ yield tweet
if quoted and quote:
yield self._tweet_from_quote(quote)
diff --git a/gallery_dl/extractor/paheal.py b/gallery_dl/extractor/paheal.py
index 5226724..b21e1eb 100644
--- a/gallery_dl/extractor/paheal.py
+++ b/gallery_dl/extractor/paheal.py
@@ -26,13 +26,13 @@ class PahealExtractor(Extractor):
data = self.get_metadata()
for post in self.get_posts():
- url = post["file_url"]
- for key in ("id", "width", "height"):
- post[key] = text.parse_int(post[key])
+ post["id"] = text.parse_int(post["id"])
post["tags"] = text.unquote(post["tags"])
+ post["width"] = text.parse_int(post["width"])
+ post["height"] = text.parse_int(post["height"])
post.update(data)
yield Message.Directory, post
- yield Message.Url, url, post
+ yield Message.Url, post["file_url"], post
def get_metadata(self):
"""Return general metadata"""
@@ -114,17 +114,19 @@ class PahealTagExtractor(PahealExtractor):
tags, data, date = data.split("\n")
dimensions, size, ext = data.split(" // ")
- tags = text.unescape(tags)
width, _, height = dimensions.partition("x")
height, _, duration = height.partition(", ")
return {
- "id": pid, "md5": md5, "file_url": url,
- "width": width, "height": height,
- "duration": text.parse_float(duration[:-1]),
- "tags": tags,
- "size": text.parse_bytes(size[:-1]),
- "date": text.parse_datetime(date, "%B %d, %Y; %H:%M"),
+ "id" : pid,
+ "md5" : md5,
+ "file_url" : url,
+ "width" : width,
+ "height" : height,
+ "duration" : text.parse_float(duration[:-1]),
+ "tags" : text.unescape(tags),
+ "size" : text.parse_bytes(size[:-1]),
+ "date" : text.parse_datetime(date, "%B %d, %Y; %H:%M"),
"filename" : "{} - {}".format(pid, tags),
"extension": ext,
}
diff --git a/gallery_dl/extractor/pixiv.py b/gallery_dl/extractor/pixiv.py
index b9821f2..862a7db 100644
--- a/gallery_dl/extractor/pixiv.py
+++ b/gallery_dl/extractor/pixiv.py
@@ -650,7 +650,7 @@ class PixivNovelExtractor(PixivExtractor):
yield Message.Directory, novel
try:
- content = self.api.novel_text(novel["id"])["novel_text"]
+ content = self.api.novel_webview(novel["id"])["text"]
except Exception:
self.log.warning("Unable to download novel %s", novel["id"])
continue
@@ -663,7 +663,7 @@ class PixivNovelExtractor(PixivExtractor):
illusts = {}
for marker in text.extract_iter(content, "[", "]"):
- if marker.startswith("[jumpuri:If you would like to "):
+ if marker.startswith("uploadedimage:"):
desktop = True
elif marker.startswith("pixivimage:"):
illusts[marker[11:].partition("-")[0]] = None
@@ -918,6 +918,15 @@ class PixivAppAPI():
params = {"novel_id": novel_id}
return self._call("/v1/novel/text", params)
+ def novel_webview(self, novel_id):
+ params = {"id": novel_id, "viewer_version": "20221031_ai"}
+ return self._call(
+ "/webview/v2/novel", params, self._novel_webview_parse)
+
+ def _novel_webview_parse(self, response):
+ return util.json_loads(text.extr(
+ response.text, "novel: ", ",\n"))
+
def search_illust(self, word, sort=None, target=None, duration=None,
date_start=None, date_end=None):
params = {"word": word, "search_target": target,
@@ -962,13 +971,17 @@ class PixivAppAPI():
params = {"illust_id": illust_id}
return self._call("/v1/ugoira/metadata", params)["ugoira_metadata"]
- def _call(self, endpoint, params=None):
+ def _call(self, endpoint, params=None, parse=None):
url = "https://app-api.pixiv.net" + endpoint
while True:
self.login()
response = self.extractor.request(url, params=params, fatal=False)
- data = response.json()
+
+ if parse:
+ data = parse(response)
+ else:
+ data = response.json()
if "error" not in data:
return data
diff --git a/gallery_dl/extractor/pornhub.py b/gallery_dl/extractor/pornhub.py
index 7ff40a3..c7283fc 100644
--- a/gallery_dl/extractor/pornhub.py
+++ b/gallery_dl/extractor/pornhub.py
@@ -143,6 +143,9 @@ class PornhubGifExtractor(PornhubExtractor):
"url" : extr('"contentUrl": "', '"'),
"date" : text.parse_datetime(
extr('"uploadDate": "', '"'), "%Y-%m-%d"),
+ "viewkey" : extr('From this video: '
+ '<a href="/view_video.php?viewkey=', '"'),
+ "timestamp": extr('lass="directLink tstamp" rel="nofollow">', '<'),
"user" : text.remove_html(extr("Created by:", "</div>")),
}
diff --git a/gallery_dl/extractor/reddit.py b/gallery_dl/extractor/reddit.py
index 2ef0f9f..e099c7e 100644
--- a/gallery_dl/extractor/reddit.py
+++ b/gallery_dl/extractor/reddit.py
@@ -191,6 +191,8 @@ class RedditExtractor(Extractor):
try:
if "reddit_video_preview" in post["preview"]:
video = post["preview"]["reddit_video_preview"]
+ if "fallback_url" in video:
+ yield video["fallback_url"]
if "dash_url" in video:
yield "ytdl:" + video["dash_url"]
if "hls_url" in video:
@@ -200,6 +202,12 @@ class RedditExtractor(Extractor):
try:
for image in post["preview"]["images"]:
+ variants = image.get("variants")
+ if variants:
+ if "gif" in variants:
+ yield variants["gif"]["source"]["url"]
+ if "mp4" in variants:
+ yield variants["mp4"]["source"]["url"]
yield image["source"]["url"]
except Exception as exc:
self.log.debug("%s: %s", exc.__class__.__name__, exc)
diff --git a/gallery_dl/extractor/redgifs.py b/gallery_dl/extractor/redgifs.py
index 6185acb..327bcd1 100644
--- a/gallery_dl/extractor/redgifs.py
+++ b/gallery_dl/extractor/redgifs.py
@@ -52,23 +52,22 @@ class RedgifsExtractor(Extractor):
gif.update(metadata)
gif["count"] = cnt
+ gif["date"] = text.parse_timestamp(gif.get("createDate"))
yield Message.Directory, gif
for num, gif in enumerate(gifs, enum):
- url = self._process(gif)
+ gif["_fallback"] = formats = self._formats(gif)
+ url = next(formats, None)
+
if not url:
self.log.warning(
"Skipping '%s' (format not available)", gif["id"])
continue
+
gif["num"] = num
gif["count"] = cnt
yield Message.Url, url, gif
- def _process(self, gif):
- gif["_fallback"] = formats = self._formats(gif)
- gif["date"] = text.parse_timestamp(gif.get("createDate"))
- return next(formats, None)
-
def _formats(self, gif):
urls = gif["urls"]
for fmt in self.formats:
diff --git a/gallery_dl/extractor/skeb.py b/gallery_dl/extractor/skeb.py
index 0b29ed0..38a2d16 100644
--- a/gallery_dl/extractor/skeb.py
+++ b/gallery_dl/extractor/skeb.py
@@ -7,7 +7,7 @@
"""Extractors for https://skeb.jp/"""
from .common import Extractor, Message
-from .. import text
+from .. import text, exception
import itertools
@@ -26,6 +26,19 @@ class SkebExtractor(Extractor):
def _init(self):
self.thumbnails = self.config("thumbnails", False)
self.article = self.config("article", False)
+ self.headers = {"Accept": "application/json, text/plain, */*"}
+
+ if "Authorization" not in self.session.headers:
+ self.headers["Authorization"] = "Bearer null"
+
+ def request(self, url, **kwargs):
+ while True:
+ try:
+ return Extractor.request(self, url, **kwargs)
+ except exception.HttpError as exc:
+ if exc.status == 429 and "request_key" in exc.response.cookies:
+ continue
+ raise
def items(self):
metadata = self.metadata()
@@ -42,6 +55,12 @@ class SkebExtractor(Extractor):
url = file["file_url"]
yield Message.Url, url, text.nameext_from_url(url, post)
+ def _items_users(self):
+ base = self.root + "/@"
+ for user in self.users():
+ user["_extractor"] = SkebUserExtractor
+ yield Message.Queue, base + user["screen_name"], user
+
def posts(self):
"""Return post number"""
@@ -49,11 +68,11 @@ class SkebExtractor(Extractor):
"""Return additional metadata"""
def _pagination(self, url, params):
- headers = {"Authorization": "Bearer null"}
params["offset"] = 0
while True:
- posts = self.request(url, params=params, headers=headers).json()
+ posts = self.request(
+ url, params=params, headers=self.headers).json()
for post in posts:
parts = post["path"].split("/")
@@ -70,11 +89,24 @@ class SkebExtractor(Extractor):
return
params["offset"] += 30
+ def _pagination_users(self, endpoint, params):
+ url = "{}/api{}".format(self.root, endpoint)
+ params["offset"] = 0
+ params["limit"] = 90
+
+ while True:
+ data = self.request(
+ url, params=params, headers=self.headers).json()
+ yield from data
+
+ if len(data) < params["limit"]:
+ return
+ params["offset"] += params["limit"]
+
def _get_post_data(self, user_name, post_num):
url = "{}/api/users/{}/works/{}".format(
self.root, user_name, post_num)
- headers = {"Authorization": "Bearer null"}
- resp = self.request(url, headers=headers).json()
+ resp = self.request(url, headers=self.headers).json()
creator = resp["creator"]
post = {
"post_id" : resp["id"],
@@ -244,22 +276,23 @@ class SkebFollowingExtractor(SkebExtractor):
pattern = r"(?:https?://)?skeb\.jp/@([^/?#]+)/following_creators"
example = "https://skeb.jp/@USER/following_creators"
- def items(self):
- for user in self.users():
- url = "{}/@{}".format(self.root, user["screen_name"])
- user["_extractor"] = SkebUserExtractor
- yield Message.Queue, url, user
+ items = SkebExtractor._items_users
def users(self):
- url = "{}/api/users/{}/following_creators".format(
- self.root, self.user_name)
- params = {"sort": "date", "offset": 0, "limit": 90}
- headers = {"Authorization": "Bearer null"}
+ endpoint = "/users/{}/following_creators".format(self.user_name)
+ params = {"sort": "date"}
+ return self._pagination_users(endpoint, params)
- while True:
- data = self.request(url, params=params, headers=headers).json()
- yield from data
- if len(data) < params["limit"]:
- return
- params["offset"] += params["limit"]
+class SkebFollowingUsersExtractor(SkebExtractor):
+ """Extractor for your followed users"""
+ subcategory = "following-users"
+ pattern = r"(?:https?://)?skeb\.jp/following_users()"
+ example = "https://skeb.jp/following_users"
+
+ items = SkebExtractor._items_users
+
+ def users(self):
+ endpoint = "/following_users"
+ params = {}
+ return self._pagination_users(endpoint, params)
diff --git a/gallery_dl/extractor/steamgriddb.py b/gallery_dl/extractor/steamgriddb.py
index 9d46fd6..8582824 100644
--- a/gallery_dl/extractor/steamgriddb.py
+++ b/gallery_dl/extractor/steamgriddb.py
@@ -163,6 +163,9 @@ class SteamgriddbAssetExtractor(SteamgriddbExtractor):
def assets(self):
endpoint = "/api/public/asset/" + self.asset_type + "/" + self.asset_id
asset = self._call(endpoint)["asset"]
+ if asset is None:
+ raise exception.NotFoundError("asset ({}:{})".format(
+ self.asset_type, self.asset_id))
return (asset,)
diff --git a/gallery_dl/extractor/subscribestar.py b/gallery_dl/extractor/subscribestar.py
index 31fb891..d4adfed 100644
--- a/gallery_dl/extractor/subscribestar.py
+++ b/gallery_dl/extractor/subscribestar.py
@@ -175,7 +175,7 @@ class SubscribestarPostExtractor(SubscribestarExtractor):
"author_id" : text.parse_int(extr('data-user-id="', '"')),
"author_nick": text.unescape(extr('alt="', '"')),
"date" : self._parse_datetime(extr(
- 'class="section-subtitle">', '<')),
+ '<span class="star_link-types">', '<')),
"content" : (extr(
'<div class="post-content', '<div class="post-uploads')
.partition(">")[2]),
diff --git a/gallery_dl/extractor/test.py b/gallery_dl/extractor/test.py
deleted file mode 100644
index e3f9f74..0000000
--- a/gallery_dl/extractor/test.py
+++ /dev/null
@@ -1,81 +0,0 @@
-# -*- coding: utf-8 -*-
-
-# Copyright 2016-2023 Mike Fährmann
-#
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License version 2 as
-# published by the Free Software Foundation.
-
-"""Utility extractor to execute tests of other extractors"""
-
-from .common import Extractor, Message
-from .. import extractor, exception
-
-
-class TestExtractor(Extractor):
- """Extractor to select and run the test URLs of other extractors
-
- The general form is 'test:<categories>:<subcategories>:<indices>', where
- <categories> and <subcategories> are comma-separated (sub)category names
- and <indices> is a comma-seperated list of array indices.
- To select all possible values for a field use the star '*' character or
- leave the field empty.
-
- Examples:
- - test:pixiv
- run all pixiv tests
-
- - test:pixiv:user,favorite:0
- run the first test of the PixivUser- and PixivFavoriteExtractor
-
- - test:
- run all tests
- """
- category = "test"
- pattern = r"t(?:est)?:([^:]*)(?::([^:]*)(?::(\*|[\d,]*))?)?$"
- example = "test:CATEGORY"
-
- def __init__(self, match):
- Extractor.__init__(self, match)
- categories, subcategories, indices = match.groups()
- self.categories = self._split(categories)
- self.subcategories = self._split(subcategories)
- self.indices = self._split(indices) or self
-
- def items(self):
- extractors = extractor.extractors()
-
- if self.categories:
- extractors = [
- extr for extr in extractors
- if extr.category in self.categories
- ]
-
- if self.subcategories:
- extractors = [
- extr for extr in extractors
- if extr.subcategory in self.subcategories
- ]
-
- tests = [
- test
- for extr in extractors
- for index, test in enumerate(extr._get_tests())
- if str(index) in self.indices
- ]
-
- if not tests:
- raise exception.NotFoundError("test")
-
- for test in tests:
- yield Message.Queue, test[0], {}
-
- @staticmethod
- def __contains__(_):
- return True
-
- @staticmethod
- def _split(value):
- if value and value != "*":
- return value.split(",")
- return None
diff --git a/gallery_dl/extractor/twitter.py b/gallery_dl/extractor/twitter.py
index ad5bfc6..a5bd984 100644
--- a/gallery_dl/extractor/twitter.py
+++ b/gallery_dl/extractor/twitter.py
@@ -340,6 +340,8 @@ class TwitterExtractor(Extractor):
txt, _, tco = content.rpartition(" ")
tdata["content"] = txt if tco.startswith("https://t.co/") else content
+ if "birdwatch_pivot" in tweet:
+ tdata["birdwatch"] = tweet["birdwatch_pivot"]["subtitle"]["text"]
if "in_reply_to_screen_name" in legacy:
tdata["reply_to"] = legacy["in_reply_to_screen_name"]
if "quoted_by" in legacy:
@@ -380,6 +382,7 @@ class TwitterExtractor(Extractor):
"date" : text.parse_datetime(
uget("created_at"), "%a %b %d %H:%M:%S %z %Y"),
"verified" : uget("verified", False),
+ "protected" : uget("protected", False),
"profile_banner" : uget("profile_banner_url", ""),
"profile_image" : uget(
"profile_image_url_https", "").replace("_normal.", "."),
@@ -731,9 +734,9 @@ class TwitterEventExtractor(TwitterExtractor):
class TwitterTweetExtractor(TwitterExtractor):
- """Extractor for images from individual tweets"""
+ """Extractor for individual tweets"""
subcategory = "tweet"
- pattern = BASE_PATTERN + r"/([^/?#]+|i/web)/status/(\d+)"
+ pattern = BASE_PATTERN + r"/([^/?#]+|i/web)/status/(\d+)/?$"
example = "https://twitter.com/USER/status/12345"
def __init__(self, match):
@@ -810,6 +813,18 @@ class TwitterTweetExtractor(TwitterExtractor):
return itertools.chain(buffer, tweets)
+class TwitterQuotesExtractor(TwitterExtractor):
+ """Extractor for quotes of a Tweet"""
+ subcategory = "quotes"
+ pattern = BASE_PATTERN + r"/(?:[^/?#]+|i/web)/status/(\d+)/quotes"
+ example = "https://twitter.com/USER/status/12345/quotes"
+
+ def items(self):
+ url = "{}/search?q=quoted_tweet_id:{}".format(self.root, self.user)
+ data = {"_extractor": TwitterSearchExtractor}
+ yield Message.Queue, url, data
+
+
class TwitterAvatarExtractor(TwitterExtractor):
subcategory = "avatar"
filename_fmt = "avatar {date}.{extension}"
@@ -882,6 +897,7 @@ class TwitterAPI():
def __init__(self, extractor):
self.extractor = extractor
+ self.log = extractor.log
self.root = "https://twitter.com/i/api"
self._nsfw_warning = True
@@ -1244,7 +1260,7 @@ class TwitterAPI():
@cache(maxage=3600)
def _guest_token(self):
endpoint = "/1.1/guest/activate.json"
- self.extractor.log.info("Requesting guest token")
+ self.log.info("Requesting guest token")
return str(self._call(
endpoint, None, "POST", False, "https://api.twitter.com",
)["guest_token"])
@@ -1274,17 +1290,35 @@ class TwitterAPI():
if response.status_code < 400:
data = response.json()
- if not data.get("errors") or not any(
- (e.get("message") or "").lower().startswith("timeout")
- for e in data["errors"]):
- return data # success or non-timeout errors
- msg = data["errors"][0].get("message") or "Unspecified"
- self.extractor.log.debug("Internal Twitter error: '%s'", msg)
+ errors = data.get("errors")
+ if not errors:
+ return data
- if self.headers["x-twitter-auth-type"]:
- self.extractor.log.debug("Retrying API request")
- continue # retry
+ retry = False
+ for error in errors:
+ msg = error.get("message") or "Unspecified"
+ self.log.debug("API error: '%s'", msg)
+
+ if "this account is temporarily locked" in msg:
+ msg = "Account temporarily locked"
+ if self.extractor.config("locked") != "wait":
+ raise exception.AuthorizationError(msg)
+ self.log.warning("%s. Press ENTER to retry.", msg)
+ try:
+ input()
+ except (EOFError, OSError):
+ pass
+ retry = True
+
+ elif msg.lower().startswith("timeout"):
+ retry = True
+
+ if not retry:
+ return data
+ elif self.headers["x-twitter-auth-type"]:
+ self.log.debug("Retrying API request")
+ continue
# fall through to "Login Required"
response.status_code = 404
@@ -1374,7 +1408,7 @@ class TwitterAPI():
try:
tweet = tweets[tweet_id]
except KeyError:
- self.extractor.log.debug("Skipping %s (deleted)", tweet_id)
+ self.log.debug("Skipping %s (deleted)", tweet_id)
continue
if "retweeted_status_id_str" in tweet:
@@ -1606,8 +1640,10 @@ class TwitterAPI():
variables["cursor"] = cursor
def _pagination_users(self, endpoint, variables, path=None):
- params = {"variables": None,
- "features" : self._json_dumps(self.features_pagination)}
+ params = {
+ "variables": None,
+ "features" : self._json_dumps(self.features_pagination),
+ }
while True:
cursor = entry = None
@@ -1651,9 +1687,9 @@ class TwitterAPI():
if text.startswith("Age-restricted"):
if self._nsfw_warning:
self._nsfw_warning = False
- self.extractor.log.warning('"%s"', text)
+ self.log.warning('"%s"', text)
- self.extractor.log.debug("Skipping %s (\"%s\")", tweet_id, text)
+ self.log.debug("Skipping %s ('%s')", tweet_id, text)
@cache(maxage=365*86400, keyarg=1)
diff --git a/gallery_dl/extractor/vipergirls.py b/gallery_dl/extractor/vipergirls.py
index 5374f1c..6dfb23c 100644
--- a/gallery_dl/extractor/vipergirls.py
+++ b/gallery_dl/extractor/vipergirls.py
@@ -26,17 +26,39 @@ class VipergirlsExtractor(Extractor):
cookies_domain = ".vipergirls.to"
cookies_names = ("vg_userid", "vg_password")
+ def _init(self):
+ domain = self.config("domain")
+ if domain:
+ self.root = text.ensure_http_scheme(domain)
+
def items(self):
self.login()
+ posts = self.posts()
+
+ like = self.config("like")
+ if like:
+ user_hash = posts[0].get("hash")
+ if len(user_hash) < 16:
+ self.log.warning("Login required to like posts")
+ like = False
- for post in self.posts():
+ posts = posts.iter("post")
+ if self.page:
+ util.advance(posts, (text.parse_int(self.page[5:]) - 1) * 15)
+
+ for post in posts:
data = post.attrib
data["thread_id"] = self.thread_id
yield Message.Directory, data
+
+ image = None
for image in post:
yield Message.Queue, image.attrib["main_url"], data
+ if image is not None and like:
+ self.like(post, user_hash)
+
def login(self):
if self.cookies_check(self.cookies_names):
return
@@ -64,6 +86,17 @@ class VipergirlsExtractor(Extractor):
return {cookie.name: cookie.value
for cookie in response.cookies}
+ def like(self, post, user_hash):
+ url = self.root + "/post_thanks.php"
+ params = {
+ "do" : "post_thanks_add",
+ "p" : post.get("id"),
+ "securitytoken": user_hash,
+ }
+
+ with self.request(url, params=params, allow_redirects=False):
+ pass
+
class VipergirlsThreadExtractor(VipergirlsExtractor):
"""Extractor for vipergirls threads"""
@@ -77,12 +110,7 @@ class VipergirlsThreadExtractor(VipergirlsExtractor):
def posts(self):
url = "{}/vr.php?t={}".format(self.root, self.thread_id)
- root = ElementTree.fromstring(self.request(url).text)
- posts = root.iter("post")
-
- if self.page:
- util.advance(posts, (text.parse_int(self.page[5:]) - 1) * 15)
- return posts
+ return ElementTree.fromstring(self.request(url).text)
class VipergirlsPostExtractor(VipergirlsExtractor):
@@ -95,8 +123,8 @@ class VipergirlsPostExtractor(VipergirlsExtractor):
def __init__(self, match):
VipergirlsExtractor.__init__(self, match)
self.thread_id, self.post_id = match.groups()
+ self.page = 0
def posts(self):
url = "{}/vr.php?p={}".format(self.root, self.post_id)
- root = ElementTree.fromstring(self.request(url).text)
- return root.iter("post")
+ return ElementTree.fromstring(self.request(url).text)
diff --git a/gallery_dl/extractor/warosu.py b/gallery_dl/extractor/warosu.py
index 3bb635d..e91f45f 100644
--- a/gallery_dl/extractor/warosu.py
+++ b/gallery_dl/extractor/warosu.py
@@ -50,7 +50,7 @@ class WarosuThreadExtractor(Extractor):
title = text.unescape(text.extr(page, "class=filetitle>", "<"))
return {
"board" : self.board,
- "board_name": boardname.rpartition(" - ")[2],
+ "board_name": boardname.split(" - ")[1],
"thread" : self.thread,
"title" : title,
}
@@ -64,8 +64,7 @@ class WarosuThreadExtractor(Extractor):
def parse(self, post):
"""Build post object by extracting data from an HTML post"""
data = self._extract_post(post)
- if "<span> File:" in post:
- self._extract_image(post, data)
+ if "<span> File:" in post and self._extract_image(post, data):
part = data["image"].rpartition("/")[2]
data["tim"], _, data["extension"] = part.partition(".")
data["ext"] = "." + data["extension"]
@@ -91,6 +90,11 @@ class WarosuThreadExtractor(Extractor):
"", "<").rstrip().rpartition(".")[0])
extr("<br>", "")
- data["image"] = url = extr("<a href=", ">")
- if url[0] == "/":
- data["image"] = self.root + url
+ url = extr("<a href=", ">")
+ if url:
+ if url[0] == "/":
+ data["image"] = self.root + url
+ else:
+ data["image"] = url
+ return True
+ return False
diff --git a/gallery_dl/extractor/weibo.py b/gallery_dl/extractor/weibo.py
index 5b45148..83b1642 100644
--- a/gallery_dl/extractor/weibo.py
+++ b/gallery_dl/extractor/weibo.py
@@ -30,9 +30,9 @@ class WeiboExtractor(Extractor):
self._prefix, self.user = match.groups()
def _init(self):
- self.retweets = self.config("retweets", True)
- self.videos = self.config("videos", True)
self.livephoto = self.config("livephoto", True)
+ self.retweets = self.config("retweets", False)
+ self.videos = self.config("videos", True)
self.gifs = self.config("gifs", True)
self.gifs_video = (self.gifs == "video")
@@ -59,15 +59,25 @@ class WeiboExtractor(Extractor):
for status in self.statuses():
- files = []
- if self.retweets and "retweeted_status" in status:
+ if "ori_mid" in status and not self.retweets:
+ self.log.debug("Skipping %s (快转 retweet)", status["id"])
+ continue
+
+ if "retweeted_status" in status:
+ if not self.retweets:
+ self.log.debug("Skipping %s (retweet)", status["id"])
+ continue
+
+ # videos of the original post are in status
+ # images of the original post are in status["retweeted_status"]
+ files = []
+ self._extract_status(status, files)
+ self._extract_status(status["retweeted_status"], files)
+
if original_retweets:
status = status["retweeted_status"]
- self._extract_status(status, files)
- else:
- self._extract_status(status, files)
- self._extract_status(status["retweeted_status"], files)
else:
+ files = []
self._extract_status(status, files)
status["date"] = text.parse_datetime(
@@ -118,7 +128,7 @@ class WeiboExtractor(Extractor):
append(pic["largest"].copy())
file = {"url": pic["video"]}
- file["filehame"], _, file["extension"] = \
+ file["filename"], _, file["extension"] = \
pic["video"].rpartition("%2F")[2].rpartition(".")
append(file)
@@ -176,23 +186,34 @@ class WeiboExtractor(Extractor):
data = data["data"]
statuses = data["list"]
- if not statuses:
- return
yield from statuses
- if "next_cursor" in data: # videos, newvideo
- if data["next_cursor"] == -1:
+ # videos, newvideo
+ cursor = data.get("next_cursor")
+ if cursor:
+ if cursor == -1:
return
- params["cursor"] = data["next_cursor"]
- elif "page" in params: # home, article
- params["page"] += 1
- elif data["since_id"]: # album
+ params["cursor"] = cursor
+ continue
+
+ # album
+ since_id = data.get("since_id")
+ if since_id:
params["sinceid"] = data["since_id"]
- else: # feed, last album page
- try:
- params["since_id"] = statuses[-1]["id"] - 1
- except KeyError:
+ continue
+
+ # home, article
+ if "page" in params:
+ if not statuses:
return
+ params["page"] += 1
+ continue
+
+ # feed, last album page
+ try:
+ params["since_id"] = statuses[-1]["id"] - 1
+ except LookupError:
+ return
def _sina_visitor_system(self, response):
self.log.info("Sina Visitor System")
diff --git a/gallery_dl/extractor/wikimedia.py b/gallery_dl/extractor/wikimedia.py
index c93f33f..ac00682 100644
--- a/gallery_dl/extractor/wikimedia.py
+++ b/gallery_dl/extractor/wikimedia.py
@@ -162,6 +162,11 @@ BASE_PATTERN = WikimediaExtractor.update({
"pattern": r"(?:www\.)?pidgi\.net",
"api-path": "/wiki/api.php",
},
+ "azurlanewiki": {
+ "root": "https://azurlane.koumakan.jp",
+ "pattern": r"azurlane\.koumakan\.jp",
+ "api-path": "/w/api.php",
+ },
})
diff --git a/gallery_dl/extractor/xvideos.py b/gallery_dl/extractor/xvideos.py
index 46e574e..da9d6b0 100644
--- a/gallery_dl/extractor/xvideos.py
+++ b/gallery_dl/extractor/xvideos.py
@@ -11,6 +11,9 @@
from .common import GalleryExtractor, Extractor, Message
from .. import text, util
+BASE_PATTERN = (r"(?:https?://)?(?:www\.)?xvideos\.com"
+ r"/(?:profiles|(?:amateur-|model-)?channels)")
+
class XvideosBase():
"""Base class for xvideos extractors"""
@@ -25,9 +28,7 @@ class XvideosGalleryExtractor(XvideosBase, GalleryExtractor):
"{gallery[id]} {gallery[title]}")
filename_fmt = "{category}_{gallery[id]}_{num:>03}.{extension}"
archive_fmt = "{gallery[id]}_{num}"
- pattern = (r"(?:https?://)?(?:www\.)?xvideos\.com"
- r"/(?:profiles|amateur-channels|model-channels)"
- r"/([^/?#]+)/photos/(\d+)")
+ pattern = BASE_PATTERN + r"/([^/?#]+)/photos/(\d+)"
example = "https://www.xvideos.com/profiles/USER/photos/12345"
def __init__(self, match):
@@ -58,22 +59,35 @@ class XvideosGalleryExtractor(XvideosBase, GalleryExtractor):
},
}
- @staticmethod
- def images(page):
- """Return a list of all image urls for this gallery"""
- return [
+ def images(self, page):
+ results = [
(url, None)
for url in text.extract_iter(
page, '<a class="embed-responsive-item" href="', '"')
]
+ if not results:
+ return
+
+ while len(results) % 500 == 0:
+ path = text.rextract(page, ' href="', '"', page.find(">Next</"))[0]
+ if not path:
+ break
+ page = self.request(self.root + path).text
+ results.extend(
+ (url, None)
+ for url in text.extract_iter(
+ page, '<a class="embed-responsive-item" href="', '"')
+ )
+
+ return results
+
class XvideosUserExtractor(XvideosBase, Extractor):
"""Extractor for user profiles on xvideos.com"""
subcategory = "user"
categorytransfer = True
- pattern = (r"(?:https?://)?(?:www\.)?xvideos\.com"
- r"/profiles/([^/?#]+)/?(?:#.*)?$")
+ pattern = BASE_PATTERN + r"/([^/?#]+)/?(?:#.*)?$"
example = "https://www.xvideos.com/profiles/USER"
def __init__(self, match):
diff --git a/gallery_dl/extractor/zerochan.py b/gallery_dl/extractor/zerochan.py
index 6ee96e6..fc61dff 100644
--- a/gallery_dl/extractor/zerochan.py
+++ b/gallery_dl/extractor/zerochan.py
@@ -10,7 +10,7 @@
from .booru import BooruExtractor
from ..cache import cache
-from .. import text, exception
+from .. import text, util, exception
BASE_PATTERN = r"(?:https?://)?(?:www\.)?zerochan\.net"
@@ -21,8 +21,11 @@ class ZerochanExtractor(BooruExtractor):
root = "https://www.zerochan.net"
filename_fmt = "{id}.{extension}"
archive_fmt = "{id}"
+ page_start = 1
+ per_page = 250
cookies_domain = ".zerochan.net"
cookies_names = ("z_id", "z_hash")
+ request_interval = (0.5, 1.5)
def login(self):
self._logged_in = True
@@ -86,7 +89,7 @@ class ZerochanExtractor(BooruExtractor):
return data
- def _parse_entry_json(self, entry_id):
+ def _parse_entry_api(self, entry_id):
url = "{}/{}?json".format(self.root, entry_id)
item = self.request(url).json()
@@ -117,14 +120,22 @@ class ZerochanTagExtractor(ZerochanExtractor):
ZerochanExtractor.__init__(self, match)
self.search_tag, self.query = match.groups()
+ def _init(self):
+ if self.config("pagination") == "html":
+ self.posts = self.posts_html
+ self.per_page = 24
+ else:
+ self.posts = self.posts_api
+ self.session.headers["User-Agent"] = util.USERAGENT
+
def metadata(self):
return {"search_tags": text.unquote(
self.search_tag.replace("+", " "))}
- def posts(self):
+ def posts_html(self):
url = self.root + "/" + self.search_tag
params = text.parse_query(self.query)
- params["p"] = text.parse_int(params.get("p"), 1)
+ params["p"] = text.parse_int(params.get("p"), self.page_start)
metadata = self.config("metadata")
while True:
@@ -140,7 +151,7 @@ class ZerochanTagExtractor(ZerochanExtractor):
if metadata:
entry_id = extr('href="/', '"')
post = self._parse_entry_html(entry_id)
- post.update(self._parse_entry_json(entry_id))
+ post.update(self._parse_entry_api(entry_id))
yield post
else:
yield {
@@ -157,6 +168,41 @@ class ZerochanTagExtractor(ZerochanExtractor):
break
params["p"] += 1
+ def posts_api(self):
+ url = self.root + "/" + self.search_tag
+ metadata = self.config("metadata")
+ params = {
+ "json": "1",
+ "l" : self.per_page,
+ "p" : self.page_start,
+ }
+
+ static = "https://static.zerochan.net/.full."
+
+ while True:
+ data = self.request(url, params=params).json()
+ try:
+ posts = data["items"]
+ except ValueError:
+ return
+
+ if metadata:
+ for post in posts:
+ post_id = post["id"]
+ post.update(self._parse_entry_html(post_id))
+ post.update(self._parse_entry_api(post_id))
+ else:
+ for post in posts:
+ base = static + str(post["id"])
+ post["file_url"] = base + ".jpg"
+ post["_fallback"] = (base + ".png",)
+
+ yield from posts
+
+ if not data.get("next"):
+ return
+ params["p"] += 1
+
class ZerochanImageExtractor(ZerochanExtractor):
subcategory = "image"
@@ -170,5 +216,5 @@ class ZerochanImageExtractor(ZerochanExtractor):
def posts(self):
post = self._parse_entry_html(self.image_id)
if self.config("metadata"):
- post.update(self._parse_entry_json(self.image_id))
+ post.update(self._parse_entry_api(self.image_id))
return (post,)
diff --git a/gallery_dl/formatter.py b/gallery_dl/formatter.py
index 6098fc6..b83cf21 100644
--- a/gallery_dl/formatter.py
+++ b/gallery_dl/formatter.py
@@ -375,18 +375,18 @@ def _parse_offset(format_spec, default):
fmt = _build_format_func(format_spec, default)
if not offset or offset == "local":
- is_dst = time.daylight and time.localtime().tm_isdst > 0
- offset = -(time.altzone if is_dst else time.timezone)
+ def off(dt):
+ local = time.localtime(util.datetime_to_timestamp(dt))
+ return fmt(dt + datetime.timedelta(0, local.tm_gmtoff))
else:
hours, _, minutes = offset.partition(":")
offset = 3600 * int(hours)
if minutes:
offset += 60 * (int(minutes) if offset > 0 else -int(minutes))
+ offset = datetime.timedelta(0, offset)
- offset = datetime.timedelta(seconds=offset)
-
- def off(obj):
- return fmt(obj + offset)
+ def off(obj):
+ return fmt(obj + offset)
return off
diff --git a/gallery_dl/output.py b/gallery_dl/output.py
index c0971f0..2bcc222 100644
--- a/gallery_dl/output.py
+++ b/gallery_dl/output.py
@@ -224,6 +224,9 @@ def setup_logging_handler(key, fmt=LOG_FORMAT, lvl=LOG_LEVEL, mode="w"):
try:
path = util.expand_path(path)
handler = logging.FileHandler(path, mode, encoding)
+ except FileNotFoundError:
+ os.makedirs(os.path.dirname(path))
+ handler = logging.FileHandler(path, mode, encoding)
except (OSError, ValueError) as exc:
logging.getLogger("gallery-dl").warning(
"%s: %s", key, exc)
diff --git a/gallery_dl/text.py b/gallery_dl/text.py
index 1fb1851..b7b5211 100644
--- a/gallery_dl/text.py
+++ b/gallery_dl/text.py
@@ -59,8 +59,14 @@ def ensure_http_scheme(url, scheme="https://"):
def root_from_url(url, scheme="https://"):
"""Extract scheme and domain from a URL"""
if not url.startswith(("https://", "http://")):
- return scheme + url[:url.index("/")]
- return url[:url.index("/", 8)]
+ try:
+ return scheme + url[:url.index("/")]
+ except ValueError:
+ return scheme + url
+ try:
+ return url[:url.index("/", 8)]
+ except ValueError:
+ return url
def filename_from_url(url):
diff --git a/gallery_dl/util.py b/gallery_dl/util.py
index 751c398..bc9418f 100644
--- a/gallery_dl/util.py
+++ b/gallery_dl/util.py
@@ -563,7 +563,7 @@ WINDOWS = (os.name == "nt")
SENTINEL = object()
USERAGENT = "gallery-dl/" + version.__version__
EXECUTABLE = getattr(sys, "frozen", False)
-SPECIAL_EXTRACTORS = {"oauth", "recursive", "test"}
+SPECIAL_EXTRACTORS = {"oauth", "recursive", "generic"}
GLOBALS = {
"contains" : contains,
"parse_int": text.parse_int,
diff --git a/gallery_dl/version.py b/gallery_dl/version.py
index e89ab9c..d438ba4 100644
--- a/gallery_dl/version.py
+++ b/gallery_dl/version.py
@@ -6,4 +6,4 @@
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
-__version__ = "1.26.8"
+__version__ = "1.26.9"
diff --git a/test/test_cache.py b/test/test_cache.py
index 9b3623a..9951ef2 100644
--- a/test/test_cache.py
+++ b/test/test_cache.py
@@ -222,5 +222,5 @@ class TestCache(unittest.TestCase):
self.assertEqual(db.cache[2][0], 6)
-if __name__ == '__main__':
+if __name__ == "__main__":
unittest.main()
diff --git a/test/test_config.py b/test/test_config.py
index 859faf5..06780be 100644
--- a/test/test_config.py
+++ b/test/test_config.py
@@ -214,5 +214,5 @@ class TestConfigFiles(unittest.TestCase):
raise unittest.SkipTest(path + " not available")
-if __name__ == '__main__':
+if __name__ == "__main__":
unittest.main()
diff --git a/test/test_cookies.py b/test/test_cookies.py
index a6ad05f..208645d 100644
--- a/test/test_cookies.py
+++ b/test/test_cookies.py
@@ -63,7 +63,7 @@ class TestCookiejar(unittest.TestCase):
def _test_warning(self, filename, exc):
config.set((), "cookies", filename)
- log = logging.getLogger("test")
+ log = logging.getLogger("generic")
with mock.patch.object(log, "warning") as mock_warning:
cookies = _get_extractor("test").cookies
@@ -173,7 +173,7 @@ class TestCookieUtils(unittest.TestCase):
self.assertFalse(extr.cookies_domain, "empty")
now = int(time.time())
- log = logging.getLogger("test")
+ log = logging.getLogger("generic")
extr.cookies.set("a", "1", expires=now-100)
with mock.patch.object(log, "warning") as mw:
@@ -212,7 +212,7 @@ URLS = {
"idolcomplex": "https://idol.sankakucomplex.com/post/show/1",
"nijie" : "https://nijie.info/view.php?id=1",
"horne" : "https://horne.red/view.php?id=1",
- "test" : "test:",
+ "test" : "generic:https://example.org/",
}
diff --git a/test/test_downloader.py b/test/test_downloader.py
index f10465e..8027af5 100644
--- a/test/test_downloader.py
+++ b/test/test_downloader.py
@@ -33,7 +33,7 @@ class MockDownloaderModule(Mock):
class FakeJob():
def __init__(self):
- self.extractor = extractor.find("test:")
+ self.extractor = extractor.find("generic:https://example.org/")
self.extractor.initialize()
self.pathfmt = path.PathFormat(self.extractor)
self.out = output.NullOutput()
@@ -304,6 +304,7 @@ SAMPLES = {
("mp4" , b"????ftypavc1"),
("mp4" , b"????ftypiso3"),
("mp4" , b"????ftypM4V"),
+ ("mov" , b"????ftypqt "),
("webm", b"\x1A\x45\xDF\xA3"),
("ogg" , b"OggS"),
("wav" , b"RIFF????WAVE"),
diff --git a/test/test_extractor.py b/test/test_extractor.py
index 75a0b87..6af1226 100644
--- a/test/test_extractor.py
+++ b/test/test_extractor.py
@@ -45,7 +45,7 @@ class TestExtractorModule(unittest.TestCase):
"https://example.org/file.jpg",
"tumblr:foobar",
"oauth:flickr",
- "test:pixiv:",
+ "generic:https://example.org/",
"recursive:https://example.org/document.html",
)
@@ -208,7 +208,7 @@ class TestExtractorModule(unittest.TestCase):
class TestExtractorWait(unittest.TestCase):
def test_wait_seconds(self):
- extr = extractor.find("test:")
+ extr = extractor.find("generic:https://example.org/")
seconds = 5
until = time.time() + seconds
@@ -222,7 +222,7 @@ class TestExtractorWait(unittest.TestCase):
self._assert_isotime(calls[0][1][1], until)
def test_wait_until(self):
- extr = extractor.find("test:")
+ extr = extractor.find("generic:https://example.org/")
until = time.time() + 5
with patch("time.sleep") as sleep, patch.object(extr, "log") as log:
@@ -237,7 +237,7 @@ class TestExtractorWait(unittest.TestCase):
self._assert_isotime(calls[0][1][1], until)
def test_wait_until_datetime(self):
- extr = extractor.find("test:")
+ extr = extractor.find("generic:https://example.org/")
until = datetime.utcnow() + timedelta(seconds=5)
until_local = datetime.now() + timedelta(seconds=5)
diff --git a/test/test_formatter.py b/test/test_formatter.py
index dbdccba..89cb1aa 100644
--- a/test/test_formatter.py
+++ b/test/test_formatter.py
@@ -31,8 +31,9 @@ class TestFormatter(unittest.TestCase):
"h": "<p>foo </p> &amp; bar <p> </p>",
"u": "&#x27;&lt; / &gt;&#x27;",
"t": 1262304000,
- "dt": datetime.datetime(2010, 1, 1),
"ds": "2010-01-01T01:00:00+0100",
+ "dt": datetime.datetime(2010, 1, 1),
+ "dt_dst": datetime.datetime(2010, 6, 1),
"name": "Name",
"title1": "Title",
"title2": "",
@@ -236,19 +237,20 @@ class TestFormatter(unittest.TestCase):
self._run_test("{ds:D%Y-%m-%dT%H:%M:%S%z/O1}", "2010-01-01 01:00:00")
self._run_test("{t!d:O2}", "2010-01-01 02:00:00")
- orig_daylight = time.daylight
- orig_timezone = time.timezone
- orig_altzone = time.altzone
- try:
- time.daylight = False
- time.timezone = -3600
- self._run_test("{dt:O}", "2010-01-01 01:00:00")
- time.timezone = 7200
- self._run_test("{dt:Olocal}", "2009-12-31 22:00:00")
- finally:
- time.daylight = orig_daylight
- time.timezone = orig_timezone
- time.altzone = orig_altzone
+ def test_offset_local(self):
+ ts = self.kwdict["dt"].replace(
+ tzinfo=datetime.timezone.utc).timestamp()
+ offset = time.localtime(ts).tm_gmtoff
+ dt = self.kwdict["dt"] + datetime.timedelta(seconds=offset)
+ self._run_test("{dt:O}", str(dt))
+ self._run_test("{dt:Olocal}", str(dt))
+
+ ts = self.kwdict["dt_dst"].replace(
+ tzinfo=datetime.timezone.utc).timestamp()
+ offset = time.localtime(ts).tm_gmtoff
+ dt = self.kwdict["dt_dst"] + datetime.timedelta(seconds=offset)
+ self._run_test("{dt_dst:O}", str(dt))
+ self._run_test("{dt_dst:Olocal}", str(dt))
def test_sort(self):
self._run_test("{l:S}" , "['a', 'b', 'c']")
@@ -455,5 +457,5 @@ def noarg():
self.assertEqual(output, result, format_string)
-if __name__ == '__main__':
+if __name__ == "__main__":
unittest.main()
diff --git a/test/test_job.py b/test/test_job.py
index a6e093f..141b1b2 100644
--- a/test/test_job.py
+++ b/test/test_job.py
@@ -413,5 +413,5 @@ class TestExtractorAlt(Extractor):
subcategory = "test_subcategory"
-if __name__ == '__main__':
+if __name__ == "__main__":
unittest.main()
diff --git a/test/test_output.py b/test/test_output.py
index 84433f0..e81f768 100644
--- a/test/test_output.py
+++ b/test/test_output.py
@@ -152,5 +152,5 @@ class TestShortenEAW(unittest.TestCase):
self.assertEqual(f(s, 19, "") , "幻-想-郷###幻-想-郷")
-if __name__ == '__main__':
+if __name__ == "__main__":
unittest.main()
diff --git a/test/test_postprocessor.py b/test/test_postprocessor.py
index fb1d739..0ee7cdb 100644
--- a/test/test_postprocessor.py
+++ b/test/test_postprocessor.py
@@ -30,7 +30,8 @@ class MockPostprocessorModule(Mock):
class FakeJob():
- def __init__(self, extr=extractor.find("test:")):
+ def __init__(self, extr=extractor.find("generic:https://example.org/")):
+ extr.directory_fmt = ("{category}",)
self.extractor = extr
self.pathfmt = path.PathFormat(extr)
self.out = output.NullOutput()
diff --git a/test/test_results.py b/test/test_results.py
index bceb271..0594618 100644
--- a/test/test_results.py
+++ b/test/test_results.py
@@ -214,44 +214,46 @@ class TestExtractorResults(unittest.TestCase):
for kwdict in tjob.kwdict_list:
self._test_kwdict(kwdict, metadata)
- def _test_kwdict(self, kwdict, tests):
+ def _test_kwdict(self, kwdict, tests, parent=None):
for key, test in tests.items():
if key.startswith("?"):
key = key[1:]
if key not in kwdict:
continue
- self.assertIn(key, kwdict, msg=key)
+ path = "{}.{}".format(parent, key) if parent else key
+ self.assertIn(key, kwdict, msg=path)
value = kwdict[key]
if isinstance(test, dict):
- self._test_kwdict(value, test)
+ self._test_kwdict(value, test, path)
elif isinstance(test, type):
- self.assertIsInstance(value, test, msg=key)
+ self.assertIsInstance(value, test, msg=path)
elif isinstance(test, range):
- self.assertRange(value, test, msg=key)
+ self.assertRange(value, test, msg=path)
elif isinstance(test, list):
subtest = False
for idx, item in enumerate(test):
if isinstance(item, dict):
subtest = True
- self._test_kwdict(value[idx], item)
+ subpath = "{}[{}]".format(path, idx)
+ self._test_kwdict(value[idx], item, subpath)
if not subtest:
- self.assertEqual(test, value, msg=key)
+ self.assertEqual(test, value, msg=path)
elif isinstance(test, str):
if test.startswith("re:"):
- self.assertRegex(value, test[3:], msg=key)
+ self.assertRegex(value, test[3:], msg=path)
elif test.startswith("dt:"):
- self.assertIsInstance(value, datetime.datetime, msg=key)
- self.assertEqual(test[3:], str(value), msg=key)
+ self.assertIsInstance(value, datetime.datetime, msg=path)
+ self.assertEqual(test[3:], str(value), msg=path)
elif test.startswith("type:"):
- self.assertEqual(test[5:], type(value).__name__, msg=key)
+ self.assertEqual(test[5:], type(value).__name__, msg=path)
elif test.startswith("len:"):
- self.assertIsInstance(value, (list, tuple), msg=key)
- self.assertEqual(int(test[4:]), len(value), msg=key)
+ self.assertIsInstance(value, (list, tuple), msg=path)
+ self.assertEqual(int(test[4:]), len(value), msg=path)
else:
- self.assertEqual(test, value, msg=key)
+ self.assertEqual(test, value, msg=path)
else:
- self.assertEqual(test, value, msg=key)
+ self.assertEqual(test, value, msg=path)
class ResultJob(job.DownloadJob):
diff --git a/test/test_text.py b/test/test_text.py
index 2c0be3b..c99729c 100644
--- a/test/test_text.py
+++ b/test/test_text.py
@@ -121,12 +121,14 @@ class TestText(unittest.TestCase):
def test_root_from_url(self, f=text.root_from_url):
result = "https://example.org"
+ self.assertEqual(f("https://example.org") , result)
self.assertEqual(f("https://example.org/") , result)
self.assertEqual(f("https://example.org/path"), result)
self.assertEqual(f("example.org/") , result)
self.assertEqual(f("example.org/path/") , result)
result = "http://example.org"
+ self.assertEqual(f("http://example.org") , result)
self.assertEqual(f("http://example.org/") , result)
self.assertEqual(f("http://example.org/path/"), result)
self.assertEqual(f("example.org/", "http://") , result)
@@ -457,5 +459,5 @@ class TestText(unittest.TestCase):
self.assertEqual(f("1970.01.01"), "1970.01.01")
-if __name__ == '__main__':
+if __name__ == "__main__":
unittest.main()
diff --git a/test/test_util.py b/test/test_util.py
index 780f475..83b44b7 100644
--- a/test/test_util.py
+++ b/test/test_util.py
@@ -774,5 +774,5 @@ class TestExtractorAlt(TestExtractor):
subcategory = "test_subcategory"
-if __name__ == '__main__':
+if __name__ == "__main__":
unittest.main()