aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLibravatarUnit 193 <unit193@unit193.net>2021-06-05 20:55:43 -0400
committerLibravatarUnit 193 <unit193@unit193.net>2021-06-05 20:55:43 -0400
commitf26e7753b7a30fbe6a44cf5c72957a9096316923 (patch)
tree5906c00be309b8a429df75f1183c02188d90973a
parent3a066ea27a496139eaad532d7e53e0649ee1d848 (diff)
parent8a644b7a06c504263a478d3681eed10b4161b5be (diff)
downloadgallery-dl-f26e7753b7a30fbe6a44cf5c72957a9096316923.tar.bz2
gallery-dl-f26e7753b7a30fbe6a44cf5c72957a9096316923.tar.xz
gallery-dl-f26e7753b7a30fbe6a44cf5c72957a9096316923.tar.zst
Update upstream source from tag 'upstream/1.17.5'
Update to upstream version '1.17.5' with Debian dir b048d0b0c877962aaf3bbb88b1a2b8e2c1255371
-rw-r--r--CHANGELOG.md49
-rw-r--r--PKG-INFO8
-rw-r--r--README.rst6
-rw-r--r--data/completion/_gallery-dl2
-rw-r--r--data/man/gallery-dl.14
-rw-r--r--data/man/gallery-dl.conf.5157
-rw-r--r--docs/gallery-dl.conf9
-rw-r--r--gallery_dl.egg-info/PKG-INFO8
-rw-r--r--gallery_dl.egg-info/SOURCES.txt1
-rw-r--r--gallery_dl/__init__.py4
-rw-r--r--gallery_dl/cache.py37
-rw-r--r--gallery_dl/config.py4
-rw-r--r--gallery_dl/downloader/http.py17
-rw-r--r--gallery_dl/exception.py8
-rw-r--r--gallery_dl/extractor/35photo.py7
-rw-r--r--gallery_dl/extractor/500px.py8
-rw-r--r--gallery_dl/extractor/aryion.py12
-rw-r--r--gallery_dl/extractor/danbooru.py1
-rw-r--r--gallery_dl/extractor/deviantart.py10
-rw-r--r--gallery_dl/extractor/exhentai.py2
-rw-r--r--gallery_dl/extractor/foolfuuka.py2
-rw-r--r--gallery_dl/extractor/gelbooru.py22
-rw-r--r--gallery_dl/extractor/imagebam.py119
-rw-r--r--gallery_dl/extractor/imgur.py6
-rw-r--r--gallery_dl/extractor/inkbunny.py29
-rw-r--r--gallery_dl/extractor/instagram.py4
-rw-r--r--gallery_dl/extractor/kemonoparty.py42
-rw-r--r--gallery_dl/extractor/manganelo.py63
-rw-r--r--gallery_dl/extractor/mangapark.py6
-rw-r--r--gallery_dl/extractor/nozomi.py4
-rw-r--r--gallery_dl/extractor/patreon.py18
-rw-r--r--gallery_dl/extractor/pillowfort.py201
-rw-r--r--gallery_dl/extractor/pixiv.py37
-rw-r--r--gallery_dl/extractor/reactor.py4
-rw-r--r--gallery_dl/extractor/readcomiconline.py26
-rw-r--r--gallery_dl/extractor/sankaku.py7
-rw-r--r--gallery_dl/extractor/twitter.py20
-rw-r--r--gallery_dl/extractor/unsplash.py5
-rw-r--r--gallery_dl/extractor/weasyl.py6
-rw-r--r--gallery_dl/extractor/weibo.py24
-rw-r--r--gallery_dl/extractor/wikiart.py8
-rw-r--r--gallery_dl/job.py87
-rw-r--r--gallery_dl/option.py2
-rw-r--r--gallery_dl/output.py12
-rw-r--r--gallery_dl/postprocessor/ugoira.py87
-rw-r--r--gallery_dl/text.py2
-rw-r--r--gallery_dl/util.py13
-rw-r--r--gallery_dl/version.py2
-rw-r--r--test/test_job.py338
-rw-r--r--test/test_results.py2
-rw-r--r--test/test_text.py4
-rw-r--r--test/test_util.py9
52 files changed, 1213 insertions, 352 deletions
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 59691b7..dcc1299 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,54 @@
# Changelog
+## 1.17.5 - 2021-05-30
+### Additions
+- [kemonoparty] add `metadata` option ([#1548](https://github.com/mikf/gallery-dl/issues/1548))
+- [kemonoparty] add `type` metadata field ([#1556](https://github.com/mikf/gallery-dl/issues/1556))
+- [mangapark] recognize v2.mangapark URLs ([#1578](https://github.com/mikf/gallery-dl/issues/1578))
+- [patreon] extract user-defined `tags` ([#1539](https://github.com/mikf/gallery-dl/issues/1539), [#1540](https://github.com/mikf/gallery-dl/issues/1540))
+- [pillowfort] implement login with username & password ([#846](https://github.com/mikf/gallery-dl/issues/846))
+- [pillowfort] add `inline` and `external` options ([#846](https://github.com/mikf/gallery-dl/issues/846))
+- [pixiv] implement `max-posts` option ([#1558](https://github.com/mikf/gallery-dl/issues/1558))
+- [pixiv] add `metadata` option ([#1551](https://github.com/mikf/gallery-dl/issues/1551))
+- [twitter] add `text-tweets` option ([#570](https://github.com/mikf/gallery-dl/issues/570))
+- [weibo] extend `retweets` option ([#1542](https://github.com/mikf/gallery-dl/issues/1542))
+- [postprocessor:ugoira] support using the `image2` demuxer ([#1550](https://github.com/mikf/gallery-dl/issues/1550))
+- [postprocessor:ugoira] add `repeat-last-frame` option ([#1550](https://github.com/mikf/gallery-dl/issues/1550))
+- support `XDG_CONFIG_HOME` ([#1545](https://github.com/mikf/gallery-dl/issues/1545))
+- implement `parent-skip` and `"skip": "terminate"` options ([#1399](https://github.com/mikf/gallery-dl/issues/1399))
+### Changes
+- [twitter] resolve `t.co` URLs in `content` ([#1532](https://github.com/mikf/gallery-dl/issues/1532))
+### Fixes
+- [500px] update query hashes ([#1573](https://github.com/mikf/gallery-dl/issues/1573))
+- [aryion] find text posts in `recursive=false` mode ([#1568](https://github.com/mikf/gallery-dl/issues/1568))
+- [imagebam] fix extraction of NSFW images ([#1534](https://github.com/mikf/gallery-dl/issues/1534))
+- [imgur] update URL patterns ([#1561](https://github.com/mikf/gallery-dl/issues/1561))
+- [manganelo] update domain to `manganato.com`
+- [reactor] skip deleted/empty posts
+- [twitter] add missing retweet media entities ([#1555](https://github.com/mikf/gallery-dl/issues/1555))
+- fix ISO 639-1 code for Japanese (`jp` -> `ja`)
+
+## 1.17.4 - 2021-05-07
+### Additions
+- [gelbooru] add extractor for `/redirect.php` URLs ([#1530](https://github.com/mikf/gallery-dl/issues/1530))
+- [inkbunny] add `favorite` extractor ([#1521](https://github.com/mikf/gallery-dl/issues/1521))
+- add `output.skip` option
+- add an optional argument to `--clear-cache` to select which cache entries to remove ([#1230](https://github.com/mikf/gallery-dl/issues/1230))
+### Changes
+- [pixiv] update `translated-tags` option ([#1507](https://github.com/mikf/gallery-dl/issues/1507))
+ - rename to `tags`
+ - accept `"japanese"`, `"translated"`, and `"original"` as values
+### Fixes
+- [500px] update query hashes
+- [kemonoparty] fix download URLs ([#1514](https://github.com/mikf/gallery-dl/issues/1514))
+- [imagebam] fix extraction
+- [instagram] update query hashes
+- [nozomi] update default archive-fmt for `tag` and `search` extractors ([#1529](https://github.com/mikf/gallery-dl/issues/1529))
+- [pixiv] remove duplicate translated tags ([#1507](https://github.com/mikf/gallery-dl/issues/1507))
+- [readcomiconline] change domain to `readcomiconline.li` ([#1517](https://github.com/mikf/gallery-dl/issues/1517))
+- [sankaku] update invalid-token detection ([#1515](https://github.com/mikf/gallery-dl/issues/1515))
+- fix crash when using `--no-download` with `--ugoira-conv` ([#1507](https://github.com/mikf/gallery-dl/issues/1507))
+
## 1.17.3 - 2021-04-25
### Additions
- [danbooru] add option for extended metadata extraction ([#1458](https://github.com/mikf/gallery-dl/issues/1458))
diff --git a/PKG-INFO b/PKG-INFO
index 3df2fe0..14d8ed3 100644
--- a/PKG-INFO
+++ b/PKG-INFO
@@ -1,6 +1,6 @@
Metadata-Version: 2.1
Name: gallery_dl
-Version: 1.17.3
+Version: 1.17.5
Summary: Command-line program to download image galleries and collections from several image hosting sites
Home-page: https://github.com/mikf/gallery-dl
Author: Mike Fährmann
@@ -75,8 +75,8 @@ Description: ==========
Prebuilt executable files with a Python interpreter and
required Python packages included are available for
- - `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.17.3/gallery-dl.exe>`__
- - `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.17.3/gallery-dl.bin>`__
+ - `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.17.5/gallery-dl.exe>`__
+ - `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.17.5/gallery-dl.bin>`__
| Executables build from the latest commit can be found at
| https://github.com/mikf/gallery-dl/actions/workflows/executables.yml
@@ -190,6 +190,7 @@ Description: ==========
Linux, macOS, etc.:
* ``/etc/gallery-dl.conf``
+ * ``${XDG_CONFIG_HOME}/gallery-dl/config.json``
* ``${HOME}/.config/gallery-dl/config.json``
* ``${HOME}/.gallery-dl.conf``
@@ -220,6 +221,7 @@ Description: ==========
``inkbunny``,
``instagram``,
``mangoxo``,
+ ``pillowfort``,
``pinterest``,
``sankaku``,
``subscribestar``,
diff --git a/README.rst b/README.rst
index d659faf..66e71e7 100644
--- a/README.rst
+++ b/README.rst
@@ -64,8 +64,8 @@ Standalone Executable
Prebuilt executable files with a Python interpreter and
required Python packages included are available for
-- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.17.3/gallery-dl.exe>`__
-- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.17.3/gallery-dl.bin>`__
+- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.17.5/gallery-dl.exe>`__
+- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.17.5/gallery-dl.bin>`__
| Executables build from the latest commit can be found at
| https://github.com/mikf/gallery-dl/actions/workflows/executables.yml
@@ -179,6 +179,7 @@ Windows:
Linux, macOS, etc.:
* ``/etc/gallery-dl.conf``
+ * ``${XDG_CONFIG_HOME}/gallery-dl/config.json``
* ``${HOME}/.config/gallery-dl/config.json``
* ``${HOME}/.gallery-dl.conf``
@@ -209,6 +210,7 @@ and optional for
``inkbunny``,
``instagram``,
``mangoxo``,
+``pillowfort``,
``pinterest``,
``sankaku``,
``subscribestar``,
diff --git a/data/completion/_gallery-dl b/data/completion/_gallery-dl
index 76afd8a..436260b 100644
--- a/data/completion/_gallery-dl
+++ b/data/completion/_gallery-dl
@@ -11,7 +11,7 @@ _arguments -C -S \
{-i,--input-file}'[Download URLs found in FILE ("-" for stdin). More than one --input-file can be specified]':'<file>':_files \
--cookies'[File to load additional cookies from]':'<file>':_files \
--proxy'[Use the specified proxy]':'<url>' \
---clear-cache'[Delete all cached login sessions, cookies, etc.]' \
+--clear-cache'[Delete all cached login sessions, cookies, etc.]':'<module>' \
{-q,--quiet}'[Activate quiet mode]' \
{-v,--verbose}'[Print various debugging information]' \
{-g,--get-urls}'[Print URLs instead of downloading]' \
diff --git a/data/man/gallery-dl.1 b/data/man/gallery-dl.1
index 6a22a07..719b8b4 100644
--- a/data/man/gallery-dl.1
+++ b/data/man/gallery-dl.1
@@ -1,4 +1,4 @@
-.TH "GALLERY-DL" "1" "2021-04-25" "1.17.3" "gallery-dl Manual"
+.TH "GALLERY-DL" "1" "2021-05-30" "1.17.5" "gallery-dl Manual"
.\" disable hyphenation
.nh
@@ -35,7 +35,7 @@ File to load additional cookies from
.B "\-\-proxy" \f[I]URL\f[]
Use the specified proxy
.TP
-.B "\-\-clear\-cache"
+.B "\-\-clear\-cache" \f[I]MODULE\f[]
Delete all cached login sessions, cookies, etc.
.TP
.B "\-q, \-\-quiet"
diff --git a/data/man/gallery-dl.conf.5 b/data/man/gallery-dl.conf.5
index 0190b7f..f35f218 100644
--- a/data/man/gallery-dl.conf.5
+++ b/data/man/gallery-dl.conf.5
@@ -1,4 +1,4 @@
-.TH "GALLERY-DL.CONF" "5" "2021-04-25" "1.17.3" "gallery-dl Manual"
+.TH "GALLERY-DL.CONF" "5" "2021-05-30" "1.17.5" "gallery-dl Manual"
.\" disable hyphenation
.nh
.\" disable justification (adjust text to left margin only)
@@ -166,6 +166,17 @@ for any spawned child extractors.
Overwrite any metadata provided by a child extractor with its parent's.
+.SS extractor.*.parent-skip
+.IP "Type:" 6
+\f[I]bool\f[]
+
+.IP "Default:" 9
+\f[I]false\f[]
+
+.IP "Description:" 4
+Share number of skipped downloads between parent and child extractors.
+
+
.SS extractor.*.path-restrict
.IP "Type:" 6
\f[I]string\f[] or \f[I]object\f[]
@@ -267,12 +278,18 @@ exists or its ID is in a \f[I]download archive\f[].
* \f[I]false\f[]: Overwrite already existing files
.br
-* \f[I]"abort"\f[]: Abort the current extractor run
+* \f[I]"abort"\f[]: Stop the current extractor run
.br
-* \f[I]"abort:N"\f[]: Skip downloads and abort extractor run
+* \f[I]"abort:N"\f[]: Skip downloads and stop the current extractor run
after \f[I]N\f[] consecutive skips
.br
+* \f[I]"terminate"\f[]: Stop the current extractor run, including parent extractors
+.br
+* \f[I]"terminate:N"\f[]: Skip downloads and stop the current extractor run,
+including parent extractors, after \f[I]N\f[] consecutive skips
+
+.br
* \f[I]"exit"\f[]: Exit the program altogether
.br
* \f[I]"exit:N"\f[]: Skip downloads and exit the program
@@ -357,6 +374,8 @@ and optional for
.br
* \f[I]mangoxo\f[]
.br
+* \f[I]pillowfort\f[]
+.br
* \f[I]pinterest\f[]
.br
* \f[I]sankaku\f[]
@@ -1286,7 +1305,8 @@ A (comma-separated) list of subcategories to include
when processing a user profile.
Possible values are
-\f[I]"posts"\f[], \f[I]reels\f[], \f[I]"stories"\f[], \f[I]"highlights"\f[], \f[I]"channel"\f[].
+\f[I]"posts"\f[], \f[I]"reels"\f[], \f[I]"channel"\f[], \f[I]"tagged"\f[],
+\f[I]"stories"\f[], \f[I]"highlights"\f[].
You can use \f[I]"all"\f[] instead of listing all values separately.
@@ -1302,6 +1322,17 @@ You can use \f[I]"all"\f[] instead of listing all values separately.
Download video files.
+.SS extractor.kemonoparty.metadata
+.IP "Type:" 6
+\f[I]bool\f[]
+
+.IP "Default:" 9
+\f[I]false\f[]
+
+.IP "Description:" 4
+Extract \f[I]username\f[] metadata
+
+
.SS extractor.khinsider.format
.IP "Type:" 6
\f[I]string\f[]
@@ -1434,6 +1465,28 @@ port than the default.
Download subalbums.
+.SS extractor.pillowfort.external
+.IP "Type:" 6
+\f[I]bool\f[]
+
+.IP "Default:" 9
+\f[I]false\f[]
+
+.IP "Description:" 4
+Follow links to external sites, e.g. Twitter,
+
+
+.SS extractor.pillowfort.inline
+.IP "Type:" 6
+\f[I]bool\f[]
+
+.IP "Default:" 9
+\f[I]true\f[]
+
+.IP "Description:" 4
+Extract inline images.
+
+
.SS extractor.pillowfort.reblogs
.IP "Type:" 6
\f[I]bool\f[]
@@ -1478,7 +1531,7 @@ Download from video pins.
Download user avatars.
-.SS extractor.pixiv.work.related
+.SS extractor.pixiv.user.metadata
.IP "Type:" 6
\f[I]bool\f[]
@@ -1486,10 +1539,10 @@ Download user avatars.
\f[I]false\f[]
.IP "Description:" 4
-Also download related artworks.
+Fetch extended \f[I]user\f[] metadata.
-.SS extractor.pixiv.translated-tags
+.SS extractor.pixiv.work.related
.IP "Type:" 6
\f[I]bool\f[]
@@ -1497,7 +1550,25 @@ Also download related artworks.
\f[I]false\f[]
.IP "Description:" 4
-Provide translated ´tags`.
+Also download related artworks.
+
+
+.SS extractor.pixiv.tags
+.IP "Type:" 6
+\f[I]string\f[]
+
+.IP "Default:" 9
+\f[I]"japanese"\f[]
+
+.IP "Description:" 4
+Controls the \f[I]tags\f[] metadata field.
+
+.br
+* "japanese": List of Japanese tags
+.br
+* "translated": List of translated tags
+.br
+* "original": Unmodified list with both Japanese and translated tags
.SS extractor.pixiv.ugoira
@@ -1517,6 +1588,18 @@ Use an ugoira post processor to convert them
to watchable videos. (Example__)
+.SS extractor.pixiv.max-posts
+.IP "Type:" 6
+\f[I]integer\f[]
+
+.IP "Default:" 9
+\f[I]0\f[]
+
+.IP "Description:" 4
+When downloading galleries, this sets the maximum number of posts to get.
+A value of \f[I]0\f[] means no limit.
+
+
.SS extractor.plurk.comments
.IP "Type:" 6
\f[I]bool\f[]
@@ -1848,6 +1931,21 @@ If this value is \f[I]"original"\f[], metadata for these files
will be taken from the original Tweets, not the Retweets.
+.SS extractor.twitter.text-tweets
+.IP "Type:" 6
+\f[I]bool\f[]
+
+.IP "Default:" 9
+\f[I]false\f[]
+
+.IP "Description:" 4
+Also emit metadata for text-only Tweets without media content.
+
+This only has an effect with a \f[I]metadata\f[] (or \f[I]exec\f[]) post processor
+with \f[I]"event": "post"\f[]
+and appropriate \f[I]filename\f[].
+
+
.SS extractor.twitter.twitpic
.IP "Type:" 6
\f[I]bool\f[]
@@ -1965,7 +2063,10 @@ to use your account's browsing settings and filters.
\f[I]true\f[]
.IP "Description:" 4
-Extract media from retweeted posts.
+Fetch media from retweeted posts.
+
+If this value is \f[I]"original"\f[], metadata for these files
+will be taken from the original posts, not the retweeted posts.
.SS extractor.weibo.videos
@@ -2287,6 +2388,17 @@ Controls whether the output strings should be shortened to fit
on one console line.
+.SS output.skip
+.IP "Type:" 6
+\f[I]bool\f[]
+
+.IP "Default:" 9
+\f[I]true\f[]
+
+.IP "Description:" 4
+Show skipped file downloads.
+
+
.SS output.progress
.IP "Type:" 6
\f[I]bool\f[] or \f[I]string\f[]
@@ -2558,7 +2670,7 @@ The event for which metadata gets written to a file.
The available events are:
\f[I]init\f[]
-After post procesor initialization
+After post processor initialization
and before the first file download
\f[I]finalize\f[]
On extractor shutdown, e.g. after all files were downloaded
@@ -2631,6 +2743,19 @@ Filename extension for the resulting video files.
Additional FFmpeg command-line arguments.
+.SS ugoira.ffmpeg-demuxer
+.IP "Type:" 6
+\f[I]string\f[]
+
+.IP "Default:" 9
+\f[I]image2\f[]
+
+.IP "Description:" 4
+FFmpeg demuxer to read input files with. Possible values are
+"\f[I]image2\f[]" and
+"\f[I]concat\f[]".
+
+
.SS ugoira.ffmpeg-location
.IP "Type:" 6
\f[I]Path\f[]
@@ -2714,6 +2839,18 @@ to the list of FFmpeg command-line arguments
to reduce an odd width/height by 1 pixel and make them even.
+.SS ugoira.repeat-last-frame
+.IP "Type:" 6
+\f[I]bool\f[]
+
+.IP "Default:" 9
+\f[I]true\f[]
+
+.IP "Description:" 4
+Allow repeating the last frame when necessary
+to prevent it from only being displayed for a very short amount of time.
+
+
.SS zip.extension
.IP "Type:" 6
\f[I]string\f[]
diff --git a/docs/gallery-dl.conf b/docs/gallery-dl.conf
index 4eaf1b8..7497cd6 100644
--- a/docs/gallery-dl.conf
+++ b/docs/gallery-dl.conf
@@ -59,6 +59,8 @@
},
"deviantart":
{
+ "client-id": null,
+ "client-secret": null,
"extra": false,
"flat": true,
"folders": false,
@@ -174,6 +176,8 @@
},
"pillowfort":
{
+ "external": false,
+ "inline": true,
"reblogs": false
},
"pinterest":
@@ -183,8 +187,9 @@
},
"pixiv":
{
+ "refresh-token": null,
"avatar": false,
- "translated-tags": false,
+ "tags": "japanese",
"ugoira": true
},
"reactor":
@@ -254,6 +259,7 @@
"quoted": true,
"replies": true,
"retweets": true,
+ "text-tweets": false,
"twitpic": false,
"users": "timeline",
"videos": true
@@ -320,6 +326,7 @@
"mode": "auto",
"progress": true,
"shorten": true,
+ "skip": true,
"log": "[{name}][{levelname}] {message}",
"logfile": null,
"unsupportedfile": null
diff --git a/gallery_dl.egg-info/PKG-INFO b/gallery_dl.egg-info/PKG-INFO
index e192d75..7fe851f 100644
--- a/gallery_dl.egg-info/PKG-INFO
+++ b/gallery_dl.egg-info/PKG-INFO
@@ -1,6 +1,6 @@
Metadata-Version: 2.1
Name: gallery-dl
-Version: 1.17.3
+Version: 1.17.5
Summary: Command-line program to download image galleries and collections from several image hosting sites
Home-page: https://github.com/mikf/gallery-dl
Author: Mike Fährmann
@@ -75,8 +75,8 @@ Description: ==========
Prebuilt executable files with a Python interpreter and
required Python packages included are available for
- - `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.17.3/gallery-dl.exe>`__
- - `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.17.3/gallery-dl.bin>`__
+ - `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.17.5/gallery-dl.exe>`__
+ - `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.17.5/gallery-dl.bin>`__
| Executables build from the latest commit can be found at
| https://github.com/mikf/gallery-dl/actions/workflows/executables.yml
@@ -190,6 +190,7 @@ Description: ==========
Linux, macOS, etc.:
* ``/etc/gallery-dl.conf``
+ * ``${XDG_CONFIG_HOME}/gallery-dl/config.json``
* ``${HOME}/.config/gallery-dl/config.json``
* ``${HOME}/.gallery-dl.conf``
@@ -220,6 +221,7 @@ Description: ==========
``inkbunny``,
``instagram``,
``mangoxo``,
+ ``pillowfort``,
``pinterest``,
``sankaku``,
``subscribestar``,
diff --git a/gallery_dl.egg-info/SOURCES.txt b/gallery_dl.egg-info/SOURCES.txt
index 3cc2071..9655896 100644
--- a/gallery_dl.egg-info/SOURCES.txt
+++ b/gallery_dl.egg-info/SOURCES.txt
@@ -182,6 +182,7 @@ test/test_config.py
test/test_cookies.py
test/test_downloader.py
test/test_extractor.py
+test/test_job.py
test/test_oauth.py
test/test_postprocessor.py
test/test_results.py
diff --git a/gallery_dl/__init__.py b/gallery_dl/__init__.py
index 5bf229a..8154afc 100644
--- a/gallery_dl/__init__.py
+++ b/gallery_dl/__init__.py
@@ -186,7 +186,7 @@ def main():
elif args.clear_cache:
from . import cache
log = logging.getLogger("cache")
- cnt = cache.clear()
+ cnt = cache.clear(args.clear_cache)
if cnt is None:
log.error("Database file not available")
@@ -249,6 +249,8 @@ def main():
retval |= jobtype(url.value).run()
else:
retval |= jobtype(url).run()
+ except exception.TerminateExtraction:
+ pass
except exception.NoExtractorError:
log.error("No suitable extractor found for '%s'", url)
retval |= 64
diff --git a/gallery_dl/cache.py b/gallery_dl/cache.py
index a874f63..5ab68bf 100644
--- a/gallery_dl/cache.py
+++ b/gallery_dl/cache.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2016-2020 Mike Fährmann
+# Copyright 2016-2021 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -168,24 +168,33 @@ def cache(maxage=3600, keyarg=None):
return wrap
-def clear():
- """Delete all database entries"""
+def clear(module="all"):
+ """Delete database entries for 'module'"""
db = DatabaseCacheDecorator.db
+ if not db:
+ return None
- if db:
- rowcount = 0
- cursor = db.cursor()
- try:
+ rowcount = 0
+ cursor = db.cursor()
+ module = module.lower()
+
+ try:
+ if module == "all":
cursor.execute("DELETE FROM data")
- except sqlite3.OperationalError:
- pass # database is not initialized, can't be modified, etc.
else:
- rowcount = cursor.rowcount
- db.commit()
+ cursor.execute(
+ "DELETE FROM data "
+ "WHERE key LIKE 'gallery_dl.extractor.' || ? || '.%'",
+ (module,)
+ )
+ except sqlite3.OperationalError:
+ pass # database is not initialized, can't be modified, etc.
+ else:
+ rowcount = cursor.rowcount
+ db.commit()
+ if rowcount:
cursor.execute("VACUUM")
- return rowcount
-
- return None
+ return rowcount
def _path():
diff --git a/gallery_dl/config.py b/gallery_dl/config.py
index e0a5459..953b1b1 100644
--- a/gallery_dl/config.py
+++ b/gallery_dl/config.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2015-2020 Mike Fährmann
+# Copyright 2015-2021 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -31,6 +31,8 @@ if util.WINDOWS:
else:
_default_configs = [
"/etc/gallery-dl.conf",
+ "${XDG_CONFIG_HOME}/gallery-dl/config.json"
+ if os.environ.get("XDG_CONFIG_HOME") else
"${HOME}/.config/gallery-dl/config.json",
"${HOME}/.gallery-dl.conf",
]
diff --git a/gallery_dl/downloader/http.py b/gallery_dl/downloader/http.py
index bc42d7c..76ec46f 100644
--- a/gallery_dl/downloader/http.py
+++ b/gallery_dl/downloader/http.py
@@ -80,6 +80,10 @@ class HttpDownloader(DownloaderBase):
tries = 0
msg = ""
+ kwdict = pathfmt.kwdict
+ adjust_extension = kwdict.get(
+ "_http_adjust_extension", self.adjust_extension)
+
if self.part:
pathfmt.part_enable(self.partdir)
@@ -105,7 +109,7 @@ class HttpDownloader(DownloaderBase):
if self.headers:
headers.update(self.headers)
# file-specific headers
- extra = pathfmt.kwdict.get("_http_headers")
+ extra = kwdict.get("_http_headers")
if extra:
headers.update(extra)
@@ -139,7 +143,7 @@ class HttpDownloader(DownloaderBase):
return False
# check for invalid responses
- validate = pathfmt.kwdict.get("_http_validate")
+ validate = kwdict.get("_http_validate")
if validate and not validate(response):
self.log.warning("Invalid response")
return False
@@ -168,7 +172,7 @@ class HttpDownloader(DownloaderBase):
content = response.iter_content(self.chunk_size)
# check filename extension against file header
- if self.adjust_extension and not offset and \
+ if adjust_extension and not offset and \
pathfmt.extension in FILE_SIGNATURES:
try:
file_header = next(
@@ -198,7 +202,7 @@ class HttpDownloader(DownloaderBase):
if file_header:
fp.write(file_header)
elif offset:
- if self.adjust_extension and \
+ if adjust_extension and \
pathfmt.extension in FILE_SIGNATURES:
self._adjust_extension(pathfmt, fp.read(16))
fp.seek(offset)
@@ -222,10 +226,9 @@ class HttpDownloader(DownloaderBase):
self.downloading = False
if self.mtime:
- pathfmt.kwdict.setdefault(
- "_mtime", response.headers.get("Last-Modified"))
+ kwdict.setdefault("_mtime", response.headers.get("Last-Modified"))
else:
- pathfmt.kwdict["_mtime"] = None
+ kwdict["_mtime"] = None
return True
diff --git a/gallery_dl/exception.py b/gallery_dl/exception.py
index f553d41..0433dc9 100644
--- a/gallery_dl/exception.py
+++ b/gallery_dl/exception.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2015-2019 Mike Fährmann
+# Copyright 2015-2021 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -23,6 +23,7 @@ Exception
+-- FilterError
+-- NoExtractorError
+-- StopExtraction
+ +-- TerminateExtraction
"""
@@ -109,3 +110,8 @@ class StopExtraction(GalleryDLException):
GalleryDLException.__init__(self)
self.message = message % args if args else message
self.code = 1 if message else 0
+
+
+class TerminateExtraction(GalleryDLException):
+ """Terminate data extraction"""
+ code = 0
diff --git a/gallery_dl/extractor/35photo.py b/gallery_dl/extractor/35photo.py
index edb9d46..27634de 100644
--- a/gallery_dl/extractor/35photo.py
+++ b/gallery_dl/extractor/35photo.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2019-2020 Mike Fährmann
+# Copyright 2019-2021 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -104,7 +104,8 @@ class _35photoUserExtractor(_35photoExtractor):
r"/(?!photo_|genre_|tags/|rating/)([^/?#]+)")
test = (
("https://35photo.pro/liya", {
- "pattern": r"https://m\d+.35photo.pro/photos_(main|series)/.*.jpg",
+ "pattern": r"https://([a-z][0-9]\.)?35photo\.pro"
+ r"/photos_(main|series)/.*\.jpg",
"count": 9,
}),
("https://35photo.pro/suhoveev", {
@@ -214,7 +215,7 @@ class _35photoImageExtractor(_35photoExtractor):
test = ("https://35photo.pro/photo_753340/", {
"count": 1,
"keyword": {
- "url" : r"re:https://m\d+.35photo.pro/photos_main/.*.jpg",
+ "url" : r"re:https://35photo\.pro/photos_main/.*\.jpg",
"id" : 753340,
"title" : "Winter walk",
"description": str,
diff --git a/gallery_dl/extractor/500px.py b/gallery_dl/extractor/500px.py
index 0583eb9..c2c5a66 100644
--- a/gallery_dl/extractor/500px.py
+++ b/gallery_dl/extractor/500px.py
@@ -146,7 +146,7 @@ class _500pxGalleryExtractor(_500pxExtractor):
}),
# unavailable photos (#1335)
("https://500px.com/p/Light_Expression_Photography/galleries/street", {
- "count": ">= 7",
+ "count": 0,
}),
("https://500px.com/fashvamp/galleries/lera"),
)
@@ -172,7 +172,7 @@ class _500pxGalleryExtractor(_500pxExtractor):
}
gallery = self._request_graphql(
"GalleriesDetailQueryRendererQuery", variables,
- "fb8bb66d31b58903e2f01ebe66bbe7937b982753be3211855b7bce4e286c1a49",
+ "eda3c77ca4efe4b3347ec9c08befe3bd2c58099ebfb1f680d829fcd26d34f12d",
)["gallery"]
self._photos = gallery["photos"]
@@ -200,8 +200,8 @@ class _500pxGalleryExtractor(_500pxExtractor):
variables["cursor"] = photos["pageInfo"]["endCursor"]
photos = self._request_graphql(
"GalleriesDetailPaginationContainerQuery", variables,
- "457c66d976f56863c81795f03e98cb54"
- "3c7c6cdae7abeab8fe9e8e8a67479fa9",
+ "466cf6661a07e7fdca465edb39118efb"
+ "80fb157c6d3f620c7f518cdae0832c78",
)["galleryByOwnerIdAndSlugOrToken"]["photos"]
diff --git a/gallery_dl/extractor/aryion.py b/gallery_dl/extractor/aryion.py
index ded2ae3..0d0ad70 100644
--- a/gallery_dl/extractor/aryion.py
+++ b/gallery_dl/extractor/aryion.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2020 Mike Fährmann
+# Copyright 2020-2021 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -12,7 +12,6 @@ from .common import Extractor, Message
from .. import text, util, exception
from ..cache import cache
-
BASE_PATTERN = r"(?:https?://)?(?:www\.)?aryion\.com/g4"
@@ -33,6 +32,8 @@ class AryionExtractor(Extractor):
self._needle = "class='gallery-item' id='"
def login(self):
+ if self._check_cookies(self.cookienames):
+ return
username, password = self._get_auth_info()
if username:
self._update_cookies(self._login_impl(username, password))
@@ -73,8 +74,7 @@ class AryionExtractor(Extractor):
def _pagination(self, url):
while True:
page = self.request(url).text
- yield from text.extract_iter(
- page, self._needle, "'")
+ yield from text.extract_iter(page, self._needle, "'")
pos = page.find("Next &gt;&gt;")
if pos < 0:
@@ -173,7 +173,7 @@ class AryionGalleryExtractor(AryionExtractor):
def skip(self, num):
if self.recursive:
- num = 0
+ return 0
self.offset += num
return num
@@ -182,7 +182,7 @@ class AryionGalleryExtractor(AryionExtractor):
url = "{}/g4/gallery/{}".format(self.root, self.user)
return self._pagination(url)
else:
- self._needle = "class='thumb' href='/g4/view/"
+ self._needle = "thumb' href='/g4/view/"
url = "{}/g4/latest.php?name={}".format(self.root, self.user)
return util.advance(self._pagination(url), self.offset)
diff --git a/gallery_dl/extractor/danbooru.py b/gallery_dl/extractor/danbooru.py
index 1f86ea5..3b96a4e 100644
--- a/gallery_dl/extractor/danbooru.py
+++ b/gallery_dl/extractor/danbooru.py
@@ -61,6 +61,7 @@ class DanbooruExtractor(Extractor):
"{}/posts/{}.json?only=pixiv_ugoira_frame_data".format(
self.root, post["id"])
).json()["pixiv_ugoira_frame_data"]["data"]
+ post["_http_adjust_extension"] = False
else:
url = post["large_file_url"]
post["extension"] = "webm"
diff --git a/gallery_dl/extractor/deviantart.py b/gallery_dl/extractor/deviantart.py
index 47f589a..9a461a4 100644
--- a/gallery_dl/extractor/deviantart.py
+++ b/gallery_dl/extractor/deviantart.py
@@ -930,10 +930,12 @@ class DeviantartOAuthAPI():
self.folders = extractor.config("folders", False)
self.metadata = extractor.extra or extractor.config("metadata", False)
- self.client_id = extractor.config(
- "client-id", self.CLIENT_ID)
- self.client_secret = extractor.config(
- "client-secret", self.CLIENT_SECRET)
+ self.client_id = extractor.config("client-id")
+ if self.client_id:
+ self.client_secret = extractor.config("client-secret")
+ else:
+ self.client_id = self.CLIENT_ID
+ self.client_secret = self.CLIENT_SECRET
token = extractor.config("refresh-token")
if token is None or token == "cache":
diff --git a/gallery_dl/extractor/exhentai.py b/gallery_dl/extractor/exhentai.py
index 910da7d..64a6cb7 100644
--- a/gallery_dl/extractor/exhentai.py
+++ b/gallery_dl/extractor/exhentai.py
@@ -128,7 +128,7 @@ class ExhentaiGalleryExtractor(ExhentaiExtractor):
"gid": 1200119,
"height": int,
"image_token": "re:[0-9a-f]{10}",
- "lang": "jp",
+ "lang": "ja",
"language": "Japanese",
"parent": "",
"rating": r"re:\d\.\d+",
diff --git a/gallery_dl/extractor/foolfuuka.py b/gallery_dl/extractor/foolfuuka.py
index 0bcec2b..5962b9e 100644
--- a/gallery_dl/extractor/foolfuuka.py
+++ b/gallery_dl/extractor/foolfuuka.py
@@ -135,7 +135,7 @@ class FoolfuukaThreadExtractor(FoolfuukaExtractor):
"url": "61896d9d9a2edb556b619000a308a984307b6d30",
}),
("https://thebarchive.com/b/thread/739772332/", {
- "url": "e8b18001307d130d67db31740ce57c8561b5d80c",
+ "url": "07d39d2cb48f40fb337dc992993d965b0cd5f7cd",
}),
)
diff --git a/gallery_dl/extractor/gelbooru.py b/gallery_dl/extractor/gelbooru.py
index 863cead..df45d0d 100644
--- a/gallery_dl/extractor/gelbooru.py
+++ b/gallery_dl/extractor/gelbooru.py
@@ -8,8 +8,10 @@
"""Extractors for https://gelbooru.com/"""
+from .common import Extractor, Message
from . import gelbooru_v02
from .. import text, exception
+import binascii
class GelbooruBase():
@@ -131,3 +133,23 @@ class GelbooruPostExtractor(GelbooruBase,
}
}),
)
+
+
+class GelbooruRedirectExtractor(GelbooruBase, Extractor):
+ subcategory = "redirect"
+ pattern = (r"(?:https?://)?(?:www\.)?gelbooru\.com"
+ r"/redirect\.php\?s=([^&#]+)")
+ test = (("https://gelbooru.com/redirect.php?s=Ly9nZWxib29ydS5jb20vaW5kZXgu"
+ "cGhwP3BhZ2U9cG9zdCZzPXZpZXcmaWQ9MTgzMDA0Ng=="), {
+ "pattern": r"https://gelbooru.com/index.php"
+ r"\?page=post&s=view&id=1830046"
+ })
+
+ def __init__(self, match):
+ Extractor.__init__(self, match)
+ self.redirect_url = text.ensure_http_scheme(
+ binascii.a2b_base64(match.group(1)).decode())
+
+ def items(self):
+ data = {"_extractor": GelbooruPostExtractor}
+ yield Message.Queue, self.redirect_url, data
diff --git a/gallery_dl/extractor/imagebam.py b/gallery_dl/extractor/imagebam.py
index 76b2c38..9370840 100644
--- a/gallery_dl/extractor/imagebam.py
+++ b/gallery_dl/extractor/imagebam.py
@@ -1,12 +1,12 @@
# -*- coding: utf-8 -*-
-# Copyright 2014-2019 Mike Fährmann
+# Copyright 2014-2021 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
-"""Extract images from http://www.imagebam.com/"""
+"""Extractors for https://www.imagebam.com/"""
from .common import Extractor, Message
from .. import text, exception
@@ -15,34 +15,44 @@ from .. import text, exception
class ImagebamExtractor(Extractor):
"""Base class for imagebam extractors"""
category = "imagebam"
- root = "http://www.imagebam.com"
+ root = "https://www.imagebam.com"
+ cookies = None
- def get_image_data(self, page_url, data):
- """Fill 'data' and return image URL"""
+ def __init__(self, match):
+ Extractor.__init__(self, match)
+ self.key = match.group(1)
+ if self.cookies:
+ self.session.cookies = self.cookies
+
+ def get_image_data(self, data):
+ page_url = "{}/image/{}".format(self.root, data["image_key"])
page = self.request(page_url).text
- image_url = text.extract(page, 'property="og:image" content="', '"')[0]
- data["extension"] = image_url.rpartition(".")[2]
- data["image_key"] = page_url.rpartition("/")[2]
- data["image_id"] = data["image_key"][6:]
- return image_url
+ image_url, pos = text.extract(page, '<img src="https://images', '"')
+
+ if not image_url:
+ # cache cookies
+ ImagebamExtractor.cookies = self.session.cookies
+ # repeat request to get past "Continue to your image" pages
+ page = self.request(page_url).text
+ image_url, pos = text.extract(
+ page, '<img src="https://images', '"')
- def request_page(self, url):
- """Retrive the main part of a gallery page"""
- page = self.request(text.urljoin(self.root, url)).text
- return text.extract(page, "<fieldset>", "</fieldset>")[0]
+ filename = text.unescape(text.extract(page, 'alt="', '"', pos)[0])
+ data["url"] = "https://images" + image_url
+ data["filename"], _, data["extension"] = filename.rpartition(".")
class ImagebamGalleryExtractor(ImagebamExtractor):
"""Extractor for image galleries from imagebam.com"""
subcategory = "gallery"
- directory_fmt = ("{category}", "{title} - {gallery_key}")
- filename_fmt = "{num:>03}-{image_key}.{extension}"
+ directory_fmt = ("{category}", "{title} {gallery_key}")
+ filename_fmt = "{num:>03} {filename}.{extension}"
archive_fmt = "{gallery_key}_{image_key}"
pattern = r"(?:https?://)?(?:www\.)?imagebam\.com/gallery/([0-9a-z]+)"
test = (
- ("http://www.imagebam.com/gallery/adz2y0f9574bjpmonaismyrhtjgvey4o", {
+ ("https://www.imagebam.com/gallery/adz2y0f9574bjpmonaismyrhtjgvey4o", {
"url": "76d976788ae2757ac81694736b07b72356f5c4c8",
- "keyword": "9e25b8827474ac93c54855e798d60aa3cbecbd7a",
+ "keyword": "b048478b1bbba3072a7fa9fcc40630b3efad1f6c",
"content": "596e6bfa157f2c7169805d50075c2986549973a8",
}),
("http://www.imagebam.com/gallery/op9dwcklwdrrguibnkoe7jxgvig30o5p", {
@@ -51,78 +61,67 @@ class ImagebamGalleryExtractor(ImagebamExtractor):
"url": "32ae6fe5dc3e4ca73ff6252e522d16473595d1d1",
}),
("http://www.imagebam.com/gallery/gsl8teckymt4vbvx1stjkyk37j70va2c", {
- "exception": exception.NotFoundError,
+ "exception": exception.HttpError,
}),
)
- def __init__(self, match):
- ImagebamExtractor.__init__(self, match)
- self.gallery_key = match.group(1)
-
def items(self):
- url = "{}/gallery/{}".format(self.root, self.gallery_key)
- page = self.request_page(url)
- if not page or ">Error<" in page:
- raise exception.NotFoundError("gallery")
+ url = "{}/gallery/{}".format(self.root, self.key)
+ page = self.request(url).text
data = self.get_metadata(page)
- imgs = self.get_image_pages(page)
- data["count"] = len(imgs)
- data["gallery_key"] = self.gallery_key
+ keys = self.get_image_keys(page)
+ keys.reverse()
+ data["count"] = len(keys)
+ data["gallery_key"] = self.key
- yield Message.Version, 1
yield Message.Directory, data
- for data["num"], page_url in enumerate(imgs, 1):
- image_url = self.get_image_data(page_url, data)
- yield Message.Url, image_url, data
+ for data["num"], data["image_key"] in enumerate(keys, 1):
+ self.get_image_data(data)
+ yield Message.Url, data["url"], data
@staticmethod
def get_metadata(page):
"""Return gallery metadata"""
- return text.extract_all(page, (
- ("title" , "'> ", " <span "),
- (None , "'>", "</span>"),
- ("description", ":#FCFCFC;'>", "</div>"),
- ))[0]
-
- def get_image_pages(self, page):
- """Return a list of all image pages"""
- pages = []
+ title = text.extract(page, 'id="gallery-name">', '<')[0]
+ return {"title": text.unescape(title.strip())}
+
+ def get_image_keys(self, page):
+ """Return a list of all image keys"""
+ keys = []
while True:
- pages.extend(text.extract_iter(page, "\n<a href='", "'"))
- pos = page.find('"pagination_current"')
+ keys.extend(text.extract_iter(
+ page, '<a href="https://www.imagebam.com/image/', '"'))
+ pos = page.find('rel="next" aria-label="Next')
if pos > 0:
- url = text.extract(page, "<a href='", "'", pos)[0]
+ url = text.rextract(page, 'href="', '"', pos)[0]
if url:
- page = self.request_page(url)
+ page = self.request(url).text
continue
- return pages
+ return keys
class ImagebamImageExtractor(ImagebamExtractor):
"""Extractor for single images from imagebam.com"""
subcategory = "image"
- filename_fmt = "{image_key}.{extension}"
archive_fmt = "{image_key}"
pattern = (r"(?:https?://)?(?:\w+\.)?imagebam\.com"
r"/(?:image/|(?:[0-9a-f]{2}/){3})([0-9a-f]+)")
test = (
- ("http://www.imagebam.com/image/94d56c502511890", {
+ ("https://www.imagebam.com/image/94d56c502511890", {
"url": "5e9ba3b1451f8ded0ae3a1b84402888893915d4a",
- "keyword": "4263d4840007524129792b8587a562b5d20c2687",
+ "keyword": "2a4380d4b57554ff793898c2d6ec60987c86d1a1",
"content": "0c8768055e4e20e7c7259608b67799171b691140",
}),
("http://images3.imagebam.com/1d/8c/44/94d56c502511890.png"),
+ # NSFW (#1534)
+ ("https://www.imagebam.com/image/0850951366904951", {
+ "url": "d37297b17ed1615b4311c8ed511e50ce46e4c748",
+ }),
)
- def __init__(self, match):
- ImagebamExtractor.__init__(self, match)
- self.image_key = match.group(1)
-
def items(self):
- page_url = "{}/image/{}".format(self.root, self.image_key)
- data = {}
- image_url = self.get_image_data(page_url, data)
- yield Message.Version, 1
+ data = {"image_key": self.key}
+ self.get_image_data(data)
yield Message.Directory, data
- yield Message.Url, image_url, data
+ yield Message.Url, data["url"], data
diff --git a/gallery_dl/extractor/imgur.py b/gallery_dl/extractor/imgur.py
index 7009c7a..f925c9e 100644
--- a/gallery_dl/extractor/imgur.py
+++ b/gallery_dl/extractor/imgur.py
@@ -57,7 +57,8 @@ class ImgurImageExtractor(ImgurExtractor):
subcategory = "image"
filename_fmt = "{category}_{id}{title:?_//}.{extension}"
archive_fmt = "{id}"
- pattern = BASE_PATTERN + r"/(?!gallery|search)(\w{7}|\w{5})[sbtmlh]?\.?"
+ pattern = (BASE_PATTERN + r"/(?!gallery|search)"
+ r"(?:r/\w+/)?(\w{7}|\w{5})[sbtmlh]?")
test = (
("https://imgur.com/21yMxCS", {
"url": "6f2dcfb86815bdd72808c313e5f715610bc7b9b2",
@@ -110,6 +111,7 @@ class ImgurImageExtractor(ImgurExtractor):
("https://imgur.com/zzzzzzz", { # not found
"exception": exception.HttpError,
}),
+ ("https://m.imgur.com/r/Celebs/iHJ7tsM"),
("https://www.imgur.com/21yMxCS"), # www
("https://m.imgur.com/21yMxCS"), # mobile
("https://imgur.com/zxaY6"), # 5 character key
@@ -289,7 +291,7 @@ class ImgurFavoriteExtractor(ImgurExtractor):
class ImgurSubredditExtractor(ImgurExtractor):
"""Extractor for a subreddits's imgur links"""
subcategory = "subreddit"
- pattern = BASE_PATTERN + r"/r/([^/?#]+)"
+ pattern = BASE_PATTERN + r"/r/([^/?#]+)/?$"
test = ("https://imgur.com/r/pics", {
"range": "1-100",
"count": 100,
diff --git a/gallery_dl/extractor/inkbunny.py b/gallery_dl/extractor/inkbunny.py
index 9b5331a..2f7935b 100644
--- a/gallery_dl/extractor/inkbunny.py
+++ b/gallery_dl/extractor/inkbunny.py
@@ -64,7 +64,7 @@ class InkbunnyExtractor(Extractor):
class InkbunnyUserExtractor(InkbunnyExtractor):
"""Extractor for inkbunny user profiles"""
subcategory = "user"
- pattern = BASE_PATTERN + r"/(?!s/)(gallery/|scraps/)?([^/?#]+)"
+ pattern = BASE_PATTERN + r"/(?!s/)(gallery/|scraps/)?(\w+)(?:$|[/?#])"
test = (
("https://inkbunny.net/soina", {
"pattern": r"https://[\w.]+\.metapix\.net/files/full"
@@ -138,6 +138,33 @@ class InkbunnyUserExtractor(InkbunnyExtractor):
return self.api.search(params)
+class InkbunnyFavoriteExtractor(InkbunnyExtractor):
+ """Extractor for inkbunny user favorites"""
+ subcategory = "favorite"
+ pattern = BASE_PATTERN + r"/userfavorites_process\.php\?favs_user_id=(\d+)"
+ test = (
+ ("https://inkbunny.net/userfavorites_process.php?favs_user_id=20969", {
+ "pattern": r"https://[\w.]+\.metapix\.net/files/full"
+ r"/\d+/\d+_\w+_.+",
+ "range": "20-50",
+ }),
+ )
+
+ def __init__(self, match):
+ InkbunnyExtractor.__init__(self, match)
+ self.user_id = match.group(1)
+
+ def posts(self):
+ orderby = self.config("orderby", "fav_datetime")
+ params = {
+ "favs_user_id": self.user_id,
+ "orderby" : orderby,
+ }
+ if orderby and orderby.startswith("unread_"):
+ params["unread_submissions"] = "yes"
+ return self.api.search(params)
+
+
class InkbunnyPostExtractor(InkbunnyExtractor):
"""Extractor for individual Inkbunny posts"""
subcategory = "post"
diff --git a/gallery_dl/extractor/instagram.py b/gallery_dl/extractor/instagram.py
index a027be1..e3db789 100644
--- a/gallery_dl/extractor/instagram.py
+++ b/gallery_dl/extractor/instagram.py
@@ -408,7 +408,7 @@ class InstagramPostsExtractor(InstagramExtractor):
url = "{}/{}/".format(self.root, self.item)
user = self._extract_profile_page(url)
- query_hash = "42d2750e44dbac713ff30130659cd891"
+ query_hash = "32b14723a678bd4628d70c1f877b94c9"
variables = {"id": user["id"], "first": 50}
edge = self._get_edge_data(user, "edge_owner_to_timeline_media")
return self._pagination_graphql(query_hash, variables, edge)
@@ -613,7 +613,7 @@ class InstagramPostExtractor(InstagramExtractor):
)
def posts(self):
- query_hash = "cf28bf5eb45d62d4dc8e77cdb99d750d"
+ query_hash = "d4e8ae69cb68f66329dcebe82fb69f6d"
variables = {
"shortcode" : self.item,
"child_comment_count" : 3,
diff --git a/gallery_dl/extractor/kemonoparty.py b/gallery_dl/extractor/kemonoparty.py
index 377e00b..1b5e5e9 100644
--- a/gallery_dl/extractor/kemonoparty.py
+++ b/gallery_dl/extractor/kemonoparty.py
@@ -26,24 +26,41 @@ class KemonopartyExtractor(Extractor):
def items(self):
find_inline = re.compile(r'src="(/inline/[^"]+)').findall
+ if self.config("metadata"):
+ username = text.unescape(text.extract(
+ self.request(self.user_url).text, "<title>", " | Kemono<")[0])
+ else:
+ username = None
+
for post in self.posts():
files = []
- if post["file"]:
- files.append(post["file"])
- if post["attachments"]:
- files.extend(post["attachments"])
+ append = files.append
+ file = post["file"]
+
+ if file:
+ file["type"] = "file"
+ append(file)
+ for attachment in post["attachments"]:
+ attachment["type"] = "attachment"
+ append(attachment)
for path in find_inline(post["content"] or ""):
- files.append({"path": path, "name": path})
+ append({"path": path, "name": path, "type": "inline"})
post["date"] = text.parse_datetime(
post["published"], "%a, %d %b %Y %H:%M:%S %Z")
+ if username:
+ post["username"] = username
yield Message.Directory, post
for post["num"], file in enumerate(files, 1):
+ post["type"] = file["type"]
url = file["path"]
if url[0] == "/":
- url = self.root + url
+ url = "https://data.kemono.party" + url
+ elif url.startswith("https://kemono.party/"):
+ url = "https://data.kemono.party" + url[20:]
+
text.nameext_from_url(file["name"], post)
yield Message.Url, url, post
@@ -64,6 +81,7 @@ class KemonopartyUserExtractor(KemonopartyExtractor):
KemonopartyExtractor.__init__(self, match)
service, user_id = match.groups()
self.api_url = "{}/api/{}/user/{}".format(self.root, service, user_id)
+ self.user_url = "{}/{}/user/{}".format(self.root, service, user_id)
def posts(self):
url = self.api_url
@@ -84,7 +102,7 @@ class KemonopartyPostExtractor(KemonopartyExtractor):
pattern = BASE_PATTERN + r"/post/([^/?#]+)"
test = (
("https://kemono.party/fanbox/user/6993449/post/506575", {
- "pattern": r"https://kemono\.party/files/fanbox"
+ "pattern": r"https://data\.kemono\.party/files/fanbox"
r"/6993449/506575/P058kDFYus7DbqAkGlfWTlOr\.jpeg",
"keyword": {
"added": "Wed, 06 May 2020 20:28:02 GMT",
@@ -101,16 +119,21 @@ class KemonopartyPostExtractor(KemonopartyExtractor):
"shared_file": False,
"subcategory": "post",
"title": "c96取り置き",
+ "type": "file",
"user": "6993449",
},
}),
# inline image (#1286)
("https://kemono.party/fanbox/user/7356311/post/802343", {
- "pattern": r"https://kemono\.party/inline/fanbox"
+ "pattern": r"https://data\.kemono\.party/inline/fanbox"
r"/uaozO4Yga6ydkGIJFAQDixfE\.jpeg",
}),
+ # kemono.party -> data.kemono.party
+ ("https://kemono.party/gumroad/user/trylsc/post/IURjT", {
+ "pattern": r"https://data\.kemono\.party/(file|attachment)s"
+ r"/gumroad/trylsc/IURjT/",
+ }),
("https://kemono.party/subscribestar/user/alcorart/post/184330"),
- ("https://kemono.party/gumroad/user/trylsc/post/IURjT"),
)
def __init__(self, match):
@@ -118,6 +141,7 @@ class KemonopartyPostExtractor(KemonopartyExtractor):
service, user_id, post_id = match.groups()
self.api_url = "{}/api/{}/user/{}/post/{}".format(
self.root, service, user_id, post_id)
+ self.user_url = "{}/{}/user/{}".format(self.root, service, user_id)
def posts(self):
posts = self.request(self.api_url).json()
diff --git a/gallery_dl/extractor/manganelo.py b/gallery_dl/extractor/manganelo.py
index f8e1473..833d18e 100644
--- a/gallery_dl/extractor/manganelo.py
+++ b/gallery_dl/extractor/manganelo.py
@@ -4,35 +4,23 @@
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
-"""Extract manga-chapters and entire manga from https://manganelo.com/"""
+"""Extractors for https://manganato.com/"""
from .common import ChapterExtractor, MangaExtractor
from .. import text
import re
+BASE_PATTERN = \
+ r"(?:https?://)?((?:(?:read)?manganato|(?:www\.)?manganelo)\.com)"
-class ManganeloBase():
- """Base class for manganelo extractors"""
- category = "manganelo"
- root = "https://manganelo.com"
-
- @staticmethod
- def parse_page(page, data):
- """Parse metadata on 'page' and add it to 'data'"""
- text.extract_all(page, (
- ("manga" , '<h1>', '</h1>'),
- ('author' , '</i>Author(s) :</td>', '</tr>'),
- ), values=data)
- data["author"] = text.remove_html(data["author"])
- return data
-
-class ManganeloChapterExtractor(ManganeloBase, ChapterExtractor):
+class ManganeloChapterExtractor(ChapterExtractor):
"""Extractor for manga-chapters from manganelo.com"""
- pattern = (r"(?:https?://)?(?:www\.)?manganelo\.com"
- r"(/chapter/\w+/chapter_[^/?#]+)")
+ category = "manganelo"
+ root = "https://readmanganato.com"
+ pattern = BASE_PATTERN + r"(/(?:manga-\w+|chapter/\w+)/chapter[-_][^/?#]+)"
test = (
- ("https://manganelo.com/chapter/gq921227/chapter_23", {
+ ("https://readmanganato.com/manga-gn983696/chapter-23", {
"pattern": r"https://s\d+\.\w+\.com/mangakakalot/g\d+/gq921227/"
r"vol3_chapter_23_24_yen/\d+\.jpg",
"keyword": "3748087cf41abc97f991530e6fd53b291490d6d0",
@@ -43,11 +31,12 @@ class ManganeloChapterExtractor(ManganeloBase, ChapterExtractor):
"content": "fbec629c71f66b246bfa0604204407c0d1c8ae38",
"count": 39,
}),
+ ("https://manganelo.com/chapter/gq921227/chapter_23"),
)
def __init__(self, match):
- self.path = match.group(1)
- ChapterExtractor.__init__(self, match, self.root + self.path)
+ domain, path = match.groups()
+ ChapterExtractor.__init__(self, match, "https://" + domain + path)
self.session.headers['Referer'] = self.root
def metadata(self, page):
@@ -85,21 +74,29 @@ class ManganeloChapterExtractor(ManganeloBase, ChapterExtractor):
]
-class ManganeloMangaExtractor(ManganeloBase, MangaExtractor):
+class ManganeloMangaExtractor(MangaExtractor):
"""Extractor for manga from manganelo.com"""
+ category = "manganelo"
+ root = "https://readmanganato.com"
chapterclass = ManganeloChapterExtractor
- pattern = (r"(?:https?://)?(?:www\.)?manganelo\.com"
- r"(/(?:manga/|read_)\w+)")
+ pattern = BASE_PATTERN + r"(/(?:manga[-/]|read_)\w+)/?$"
test = (
- ("https://manganelo.com/manga/ol921234", {
- "url": "6ba7f083a6944e414ad8214b74a0a40cb60d4562",
+ ("https://manganato.com/manga-gu983703", {
+ "pattern": ManganeloChapterExtractor.pattern,
+ "count": ">= 70",
}),
("https://manganelo.com/manga/read_otome_no_teikoku", {
"pattern": ManganeloChapterExtractor.pattern,
- "count": ">= 40"
+ "count": ">= 40",
}),
+ ("https://manganelo.com/manga/ol921234/"),
)
+ def __init__(self, match):
+ domain, path = match.groups()
+ MangaExtractor.__init__(self, match, "https://" + domain + path)
+ self.session.headers['Referer'] = self.root
+
def chapters(self, page):
results = []
data = self.parse_page(page, {"lang": "en", "language": "English"})
@@ -117,3 +114,13 @@ class ManganeloMangaExtractor(ManganeloBase, MangaExtractor):
data["chapter"] = text.parse_int(chapter)
data["chapter_minor"] = sep + minor
results.append((url, data.copy()))
+
+ @staticmethod
+ def parse_page(page, data):
+ """Parse metadata on 'page' and add it to 'data'"""
+ text.extract_all(page, (
+ ("manga" , '<h1>', '</h1>'),
+ ('author' , '</i>Author(s) :</td>', '</tr>'),
+ ), values=data)
+ data["author"] = text.remove_html(data["author"])
+ return data
diff --git a/gallery_dl/extractor/mangapark.py b/gallery_dl/extractor/mangapark.py
index 558e682..9b6d4ba 100644
--- a/gallery_dl/extractor/mangapark.py
+++ b/gallery_dl/extractor/mangapark.py
@@ -17,7 +17,7 @@ import re
class MangaparkBase():
"""Base class for mangapark extractors"""
category = "mangapark"
- root_fmt = "https://mangapark.{}"
+ root_fmt = "https://v2.mangapark.{}"
browser = "firefox"
@staticmethod
@@ -51,7 +51,7 @@ class MangaparkBase():
class MangaparkChapterExtractor(MangaparkBase, ChapterExtractor):
"""Extractor for manga-chapters from mangapark.net"""
- pattern = (r"(?:https?://)?(?:www\.)?mangapark\.(me|net|com)"
+ pattern = (r"(?:https?://)?(?:www\.|v2\.)?mangapark\.(me|net|com)"
r"/manga/([^?#]+/i\d+)")
test = (
("https://mangapark.net/manga/gosu/i811653/c055/1", {
@@ -117,7 +117,7 @@ class MangaparkChapterExtractor(MangaparkBase, ChapterExtractor):
class MangaparkMangaExtractor(MangaparkBase, MangaExtractor):
"""Extractor for manga from mangapark.net"""
chapterclass = MangaparkChapterExtractor
- pattern = (r"(?:https?://)?(?:www\.)?mangapark\.(me|net|com)"
+ pattern = (r"(?:https?://)?(?:www\.|v2\.)?mangapark\.(me|net|com)"
r"(/manga/[^/?#]+)/?$")
test = (
("https://mangapark.net/manga/aria", {
diff --git a/gallery_dl/extractor/nozomi.py b/gallery_dl/extractor/nozomi.py
index e1081da..b74355d 100644
--- a/gallery_dl/extractor/nozomi.py
+++ b/gallery_dl/extractor/nozomi.py
@@ -158,7 +158,7 @@ class NozomiTagExtractor(NozomiExtractor):
"""Extractor for posts from tag searches on nozomi.la"""
subcategory = "tag"
directory_fmt = ("{category}", "{search_tags}")
- archive_fmt = "t_{search_tags}_{postid}"
+ archive_fmt = "t_{search_tags}_{dataid}"
pattern = r"(?:https?://)?nozomi\.la/tag/([^/?#]+)-(\d+)\."
test = ("https://nozomi.la/tag/3:1_aspect_ratio-1.html", {
"pattern": r"^https://i.nozomi.la/\w/\w\w/\w+\.\w+$",
@@ -180,7 +180,7 @@ class NozomiSearchExtractor(NozomiExtractor):
"""Extractor for search results on nozomi.la"""
subcategory = "search"
directory_fmt = ("{category}", "{search_tags:J }")
- archive_fmt = "t_{search_tags}_{postid}"
+ archive_fmt = "t_{search_tags}_{dataid}"
pattern = r"(?:https?://)?nozomi\.la/search\.html\?q=([^&#]+)"
test = ("https://nozomi.la/search.html?q=hibiscus%203:4_ratio#1", {
"count": ">= 5",
diff --git a/gallery_dl/extractor/patreon.py b/gallery_dl/extractor/patreon.py
index 839e0b8..9c32d7a 100644
--- a/gallery_dl/extractor/patreon.py
+++ b/gallery_dl/extractor/patreon.py
@@ -117,12 +117,22 @@ class PatreonExtractor(Extractor):
attr = post["attributes"]
attr["id"] = text.parse_int(post["id"])
- if post.get("current_user_can_view", True):
+ if attr.get("current_user_can_view", True):
+
+ relationships = post["relationships"]
attr["images"] = self._files(post, included, "images")
attr["attachments"] = self._files(post, included, "attachments")
attr["date"] = text.parse_datetime(
attr["published_at"], "%Y-%m-%dT%H:%M:%S.%f%z")
- user = post["relationships"]["user"]
+
+ tags = relationships.get("user_defined_tags")
+ attr["tags"] = [
+ tag["id"].replace("user_defined;", "")
+ for tag in tags["data"]
+ if tag["type"] == "post_tag"
+ ] if tags else []
+
+ user = relationships["user"]
attr["creator"] = (
self._user(user["links"]["related"]) or
included["user"][user["data"]["id"]])
@@ -299,6 +309,10 @@ class PatreonPostExtractor(PatreonExtractor):
("https://www.patreon.com/posts/19987002", {
"count": 4,
}),
+ # tags (#1539)
+ ("https://www.patreon.com/posts/free-post-12497641", {
+ "keyword": {"tags": ["AWMedia"]},
+ }),
("https://www.patreon.com/posts/not-found-123", {
"exception": exception.NotFoundError,
}),
diff --git a/gallery_dl/extractor/pillowfort.py b/gallery_dl/extractor/pillowfort.py
index cbd65d7..3c3fcd4 100644
--- a/gallery_dl/extractor/pillowfort.py
+++ b/gallery_dl/extractor/pillowfort.py
@@ -9,7 +9,9 @@
"""Extractors for https://www.pillowfort.social/"""
from .common import Extractor, Message
-from .. import text
+from ..cache import cache
+from .. import text, exception
+import re
BASE_PATTERN = r"(?:https?://)?www\.pillowfort\.social"
@@ -19,94 +21,171 @@ class PillowfortExtractor(Extractor):
category = "pillowfort"
root = "https://www.pillowfort.social"
directory_fmt = ("{category}", "{username}")
- filename_fmt = ("{post_id} {title|original_post[title]} "
+ filename_fmt = ("{post_id} {title|original_post[title]:?/ /}"
"{num:>02}.{extension}")
archive_fmt = "{id}"
+ cookiedomain = "www.pillowfort.social"
def __init__(self, match):
Extractor.__init__(self, match)
self.item = match.group(1)
- self.reblogs = self.config("reblogs", False)
def items(self):
- for post in self.posts():
+ self.login()
+ inline = self.config("inline", True)
+ reblogs = self.config("reblogs", False)
+ external = self.config("external", False)
+
+ if inline:
+ inline = re.compile(r'src="(https://img\d+\.pillowfort\.social'
+ r'/posts/[^"]+)').findall
- if "original_post" in post and not self.reblogs:
+ for post in self.posts():
+ if "original_post" in post and not reblogs:
continue
- files = post["media"]
- del post["media"]
+ files = post.pop("media")
+ if inline:
+ for url in inline(post["content"]):
+ files.append({"url": url})
post["date"] = text.parse_datetime(
post["created_at"], "%Y-%m-%dT%H:%M:%S.%f%z")
+ post["post_id"] = post.pop("id")
yield Message.Directory, post
post["num"] = 0
for file in files:
url = file["url"]
- if url:
- post.update(file)
+ if not url:
+ continue
+
+ if file.get("embed_code"):
+ if not external:
+ continue
+ msgtype = Message.Queue
+ else:
post["num"] += 1
+ msgtype = Message.Url
+
+ post.update(file)
+ text.nameext_from_url(url, post)
+ post["hash"], _, post["filename"] = \
+ post["filename"].partition("_")
+
+ if "id" not in file:
+ post["id"] = post["hash"]
+ if "created_at" in file:
post["date"] = text.parse_datetime(
file["created_at"], "%Y-%m-%dT%H:%M:%S.%f%z")
- yield Message.Url, url, text.nameext_from_url(url, post)
+
+ yield msgtype, url, post
+
+ def login(self):
+ cget = self.session.cookies.get
+ if cget("_Pf_new_session", domain=self.cookiedomain) \
+ or cget("remember_user_token", domain=self.cookiedomain):
+ return
+
+ username, password = self._get_auth_info()
+ if username:
+ cookies = self._login_impl(username, password)
+ self._update_cookies(cookies)
+
+ @cache(maxage=14*24*3600, keyarg=1)
+ def _login_impl(self, username, password):
+ self.log.info("Logging in as %s", username)
+
+ url = "https://www.pillowfort.social/users/sign_in"
+ page = self.request(url).text
+ auth = text.extract(page, 'name="authenticity_token" value="', '"')[0]
+
+ headers = {"Origin": self.root, "Referer": url}
+ data = {
+ "utf8" : "✓",
+ "authenticity_token": auth,
+ "user[email]" : username,
+ "user[password]" : password,
+ "user[remember_me]" : "1",
+ }
+ response = self.request(url, method="POST", headers=headers, data=data)
+
+ if not response.history:
+ raise exception.AuthenticationError()
+
+ return {
+ cookie.name: cookie.value
+ for cookie in response.history[0].cookies
+ }
class PillowfortPostExtractor(PillowfortExtractor):
"""Extractor for a single pillowfort post"""
subcategory = "post"
pattern = BASE_PATTERN + r"/posts/(\d+)"
- test = ("https://www.pillowfort.social/posts/27510", {
- "pattern": r"https://img\d+\.pillowfort\.social/posts/\w+_out\d+\.png",
- "count": 4,
- "keyword": {
- "avatar_url": str,
- "col": 0,
- "commentable": True,
- "comments_count": int,
- "community_id": None,
- "content": str,
- "created_at": str,
- "date": "type:datetime",
- "deleted": None,
- "deleted_at": None,
- "deleted_by_mod": None,
- "deleted_for_flag_id": None,
- "embed_code": None,
- "id": int,
- "last_activity": str,
- "last_activity_elapsed": str,
- "last_edited_at": None,
- "likes_count": int,
- "media_type": "picture",
- "nsfw": False,
- "num": int,
- "original_post_id": None,
- "original_post_user_id": None,
- "picture_content_type": None,
- "picture_file_name": None,
- "picture_file_size": None,
- "picture_updated_at": None,
- "post_id": 27510,
- "post_type": "picture",
- "privacy": "public",
- "reblog_copy_info": list,
- "rebloggable": True,
- "reblogged_from_post_id": None,
- "reblogged_from_user_id": None,
- "reblogs_count": int,
- "row": int,
- "small_image_url": None,
- "tags": list,
- "time_elapsed": str,
- "timestamp": str,
- "title": "What is Pillowfort.io? ",
- "updated_at": str,
- "url": r"re:https://img3.pillowfort.social/posts/.*\.png",
- "user_id": 5,
- "username": "Staff"
- },
- })
+ test = (
+ ("https://www.pillowfort.social/posts/27510", {
+ "pattern": r"https://img\d+\.pillowfort\.social"
+ r"/posts/\w+_out\d+\.png",
+ "count": 4,
+ "keyword": {
+ "avatar_url": str,
+ "col": 0,
+ "commentable": True,
+ "comments_count": int,
+ "community_id": None,
+ "content": str,
+ "created_at": str,
+ "date": "type:datetime",
+ "deleted": None,
+ "deleted_at": None,
+ "deleted_by_mod": None,
+ "deleted_for_flag_id": None,
+ "embed_code": None,
+ "id": int,
+ "last_activity": str,
+ "last_activity_elapsed": str,
+ "last_edited_at": None,
+ "likes_count": int,
+ "media_type": "picture",
+ "nsfw": False,
+ "num": int,
+ "original_post_id": None,
+ "original_post_user_id": None,
+ "picture_content_type": None,
+ "picture_file_name": None,
+ "picture_file_size": None,
+ "picture_updated_at": None,
+ "post_id": 27510,
+ "post_type": "picture",
+ "privacy": "public",
+ "reblog_copy_info": list,
+ "rebloggable": True,
+ "reblogged_from_post_id": None,
+ "reblogged_from_user_id": None,
+ "reblogs_count": int,
+ "row": int,
+ "small_image_url": None,
+ "tags": list,
+ "time_elapsed": str,
+ "timestamp": str,
+ "title": "What is Pillowfort.io? ",
+ "updated_at": str,
+ "url": r"re:https://img3.pillowfort.social/posts/.*\.png",
+ "user_id": 5,
+ "username": "Staff"
+ },
+ }),
+ ("https://www.pillowfort.social/posts/1557500", {
+ "options": (("external", True), ("inline", False)),
+ "pattern": r"https://twitter\.com/Aliciawitdaart/status"
+ r"/1282862493841457152",
+ }),
+ ("https://www.pillowfort.social/posts/1672518", {
+ "options": (("inline", True),),
+ "count": 3,
+ }),
+ )
def posts(self):
url = "{}/posts/{}/json/".format(self.root, self.item)
diff --git a/gallery_dl/extractor/pixiv.py b/gallery_dl/extractor/pixiv.py
index 8bfae06..8076fff 100644
--- a/gallery_dl/extractor/pixiv.py
+++ b/gallery_dl/extractor/pixiv.py
@@ -29,14 +29,28 @@ class PixivExtractor(Extractor):
Extractor.__init__(self, match)
self.api = PixivAppAPI(self)
self.load_ugoira = self.config("ugoira", True)
- self.translated_tags = self.config("translated-tags", False)
+ self.max_posts = self.config("max-posts", 0)
def items(self):
- tkey = "translated_name" if self.translated_tags else "name"
+ tags = self.config("tags", "japanese")
+ if tags == "original":
+ transform_tags = None
+ elif tags == "translated":
+ def transform_tags(work):
+ work["tags"] = list(set(
+ tag["translated_name"] or tag["name"]
+ for tag in work["tags"]))
+ else:
+ def transform_tags(work):
+ work["tags"] = [tag["name"] for tag in work["tags"]]
+
ratings = {0: "General", 1: "R-18", 2: "R-18G"}
metadata = self.metadata()
- for work in self.works():
+ works = self.works()
+ if self.max_posts:
+ works = itertools.islice(works, self.max_posts)
+ for work in works:
if not work["user"]["id"]:
continue
@@ -45,12 +59,10 @@ class PixivExtractor(Extractor):
del work["meta_single_page"]
del work["image_urls"]
del work["meta_pages"]
+
+ if transform_tags:
+ transform_tags(work)
work["num"] = 0
- if self.translated_tags:
- work["untranslated_tags"] = [
- tag["name"] for tag in work["tags"]
- ]
- work["tags"] = [tag[tkey] or tag["name"] for tag in work["tags"]]
work["date"] = text.parse_datetime(work["create_date"])
work["rating"] = ratings.get(work["x_restrict"])
work["suffix"] = ""
@@ -66,6 +78,7 @@ class PixivExtractor(Extractor):
url = ugoira["zip_urls"]["medium"].replace(
"_ugoira600x600", "_ugoira1920x1080")
work["frames"] = ugoira["frames"]
+ work["_http_adjust_extension"] = False
yield Message.Url, url, text.nameext_from_url(url, work)
elif work["page_count"] == 1:
@@ -115,7 +128,8 @@ class PixivUserExtractor(PixivExtractor):
}),
# deleted account
("http://www.pixiv.net/member_illust.php?id=173531", {
- "count": 0,
+ "options": (("metadata", True),),
+ "exception": exception.NotFoundError,
}),
("https://www.pixiv.net/en/users/173530"),
("https://www.pixiv.net/en/users/173530/manga"),
@@ -138,6 +152,11 @@ class PixivUserExtractor(PixivExtractor):
self.user_id = u1 or u2 or u3
self.tag = t1 or t2
+ def metadata(self):
+ if self.config("metadata"):
+ return {"user": self.api.user_detail(self.user_id)}
+ return {}
+
def works(self):
works = self.api.user_illusts(self.user_id)
diff --git a/gallery_dl/extractor/reactor.py b/gallery_dl/extractor/reactor.py
index 971347b..c62a942 100644
--- a/gallery_dl/extractor/reactor.py
+++ b/gallery_dl/extractor/reactor.py
@@ -78,6 +78,8 @@ class ReactorExtractor(Extractor):
def _parse_post(self, post):
post, _, script = post.partition('<script type="application/ld+json">')
+ if not script:
+ return
images = text.extract_iter(post, '<div class="image">', '</div>')
script = script[:script.index("</")].strip()
@@ -210,7 +212,7 @@ class JoyreactorTagExtractor(ReactorTagExtractor):
pattern = JR_BASE_PATTERN + r"/tag/([^/?#]+)"
test = (
("http://joyreactor.cc/tag/Advent+Cirno", {
- "count": ">= 17",
+ "count": ">= 15",
}),
("http://joyreactor.com/tag/Cirno", {
"url": "de1e60c15bfb07a0e9603b00dc3d05f60edc7914",
diff --git a/gallery_dl/extractor/readcomiconline.py b/gallery_dl/extractor/readcomiconline.py
index 7ffe5dc..e4075a2 100644
--- a/gallery_dl/extractor/readcomiconline.py
+++ b/gallery_dl/extractor/readcomiconline.py
@@ -1,17 +1,19 @@
# -*- coding: utf-8 -*-
-# Copyright 2016-2020 Mike Fährmann
+# Copyright 2016-2021 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
-"""Extractors for https://readcomiconline.to/"""
+"""Extractors for https://readcomiconline.li/"""
from .common import Extractor, ChapterExtractor, MangaExtractor
from .. import text, exception
import re
+BASE_PATTERN = r"(?i)(?:https?://)?(?:www\.)?readcomiconline\.(?:li|to)"
+
class ReadcomiconlineBase():
"""Base class for readcomiconline extractors"""
@@ -19,7 +21,7 @@ class ReadcomiconlineBase():
directory_fmt = ("{category}", "{comic}", "{issue:>03}")
filename_fmt = "{comic}_{issue:>03}_{page:>03}.{extension}"
archive_fmt = "{issue_id}_{page}"
- root = "https://readcomiconline.to"
+ root = "https://readcomiconline.li"
def request(self, url, **kwargs):
"""Detect and handle redirects to CAPTCHA pages"""
@@ -42,11 +44,10 @@ class ReadcomiconlineBase():
class ReadcomiconlineIssueExtractor(ReadcomiconlineBase, ChapterExtractor):
- """Extractor for comic-issues from readcomiconline.to"""
+ """Extractor for comic-issues from readcomiconline.li"""
subcategory = "issue"
- pattern = (r"(?i)(?:https?://)?(?:www\.)?readcomiconline\.to"
- r"(/Comic/[^/?#]+/[^/?#]+\?id=(\d+))")
- test = ("https://readcomiconline.to/Comic/W-i-t-c-h/Issue-130?id=22289", {
+ pattern = BASE_PATTERN + r"(/Comic/[^/?#]+/[^/?#]+\?id=(\d+))"
+ test = ("https://readcomiconline.li/Comic/W-i-t-c-h/Issue-130?id=22289", {
"url": "30d29c5afc65043bfd384c010257ec2d0ecbafa6",
"keyword": "2d9ec81ce1b11fac06ebf96ce33cdbfca0e85eb5",
})
@@ -78,18 +79,17 @@ class ReadcomiconlineIssueExtractor(ReadcomiconlineBase, ChapterExtractor):
class ReadcomiconlineComicExtractor(ReadcomiconlineBase, MangaExtractor):
- """Extractor for comics from readcomiconline.to"""
+ """Extractor for comics from readcomiconline.li"""
chapterclass = ReadcomiconlineIssueExtractor
subcategory = "comic"
- pattern = (r"(?i)(?:https?://)?(?:www\.)?readcomiconline\.to"
- r"(/Comic/[^/?#]+/?)$")
+ pattern = BASE_PATTERN + r"(/Comic/[^/?#]+/?)$"
test = (
- ("https://readcomiconline.to/Comic/W-i-t-c-h", {
- "url": "e231bc2a293edb465133c37a8e36a7e7d94cab14",
+ ("https://readcomiconline.li/Comic/W-i-t-c-h", {
+ "url": "74eb8b9504b4084fcc9367b341300b2c52260918",
"keyword": "3986248e4458fa44a201ec073c3684917f48ee0c",
}),
("https://readcomiconline.to/Comic/Bazooka-Jules", {
- "url": "711674cb78ed10bd2557315f7a67552d01b33985",
+ "url": "2f66a467a772df4d4592e97a059ddbc3e8991799",
"keyword": "f5ba5246cd787bb750924d9690cb1549199bd516",
}),
)
diff --git a/gallery_dl/extractor/sankaku.py b/gallery_dl/extractor/sankaku.py
index 5579017..9808cb8 100644
--- a/gallery_dl/extractor/sankaku.py
+++ b/gallery_dl/extractor/sankaku.py
@@ -10,7 +10,7 @@
from .booru import BooruExtractor
from .common import Message
-from .. import text, exception
+from .. import text, util, exception
from ..cache import cache
import collections
@@ -206,7 +206,7 @@ class SankakuAPI():
self.username, self.password = self.extractor._get_auth_info()
if not self.username:
- self.authenticate = lambda: None
+ self.authenticate = util.noop
def pools(self, pool_id):
params = {"lang": "en"}
@@ -250,7 +250,8 @@ class SankakuAPI():
success = True
if not success:
code = data.get("code")
- if code and code.endswith(("invalid-token", "invalid_token")):
+ if code and code.endswith(
+ ("unauthorized", "invalid-token", "invalid_token")):
_authenticate_impl.invalidate(self.username)
continue
raise exception.StopExtraction(code)
diff --git a/gallery_dl/extractor/twitter.py b/gallery_dl/extractor/twitter.py
index c323fe0..afeebb0 100644
--- a/gallery_dl/extractor/twitter.py
+++ b/gallery_dl/extractor/twitter.py
@@ -32,6 +32,7 @@ class TwitterExtractor(Extractor):
def __init__(self, match):
Extractor.__init__(self, match)
self.user = match.group(1)
+ self.textonly = self.config("text-tweets", False)
self.retweets = self.config("retweets", True)
self.replies = self.config("replies", True)
self.twitpic = self.config("twitpic", False)
@@ -64,7 +65,7 @@ class TwitterExtractor(Extractor):
self._extract_card(tweet, files)
if self.twitpic:
self._extract_twitpic(tweet, files)
- if not files:
+ if not files and not self.textonly:
continue
tdata = self._transform_tweet(tweet)
@@ -168,7 +169,6 @@ class TwitterExtractor(Extractor):
tweet["created_at"], "%a %b %d %H:%M:%S %z %Y"),
"user" : self._transform_user(tweet["user"]),
"lang" : tweet["lang"],
- "content" : tweet["full_text"],
"favorite_count": tweet["favorite_count"],
"quote_count" : tweet["quote_count"],
"reply_count" : tweet["reply_count"],
@@ -187,6 +187,14 @@ class TwitterExtractor(Extractor):
"nick": u["name"],
} for u in mentions]
+ content = tweet["full_text"]
+ urls = entities.get("urls")
+ if urls:
+ for url in urls:
+ content = content.replace(url["url"], url["expanded_url"])
+ txt, _, tco = content.rpartition(" ")
+ tdata["content"] = txt if tco.startswith("https://t.co/") else content
+
if "in_reply_to_screen_name" in tweet:
tdata["reply_to"] = tweet["in_reply_to_screen_name"]
@@ -489,6 +497,10 @@ class TwitterTweetExtractor(TwitterExtractor):
"options": (("conversations", True),),
"count": ">= 50",
}),
+ # retweet with missing media entities (#1555)
+ ("https://twitter.com/morino_ya/status/1392763691599237121", {
+ "count": 4,
+ }),
)
def __init__(self, match):
@@ -802,6 +814,10 @@ class TwitterAPI():
tweet = retweet
elif retweet:
tweet["author"] = users[retweet["user_id_str"]]
+ if "extended_entities" in retweet and \
+ "extended_entities" not in tweet:
+ tweet["extended_entities"] = \
+ retweet["extended_entities"]
tweet["user"] = users[tweet["user_id_str"]]
yield tweet
diff --git a/gallery_dl/extractor/unsplash.py b/gallery_dl/extractor/unsplash.py
index d13ce0f..e89a5b7 100644
--- a/gallery_dl/extractor/unsplash.py
+++ b/gallery_dl/extractor/unsplash.py
@@ -69,7 +69,8 @@ class UnsplashImageExtractor(UnsplashExtractor):
subcategory = "image"
pattern = BASE_PATTERN + r"/photos/([^/?#]+)"
test = ("https://unsplash.com/photos/lsoogGC_5dg", {
- "url": "b99a5829ca955b768a206aa9afc391bd3f3dd55e",
+ "pattern": r"https://images\.unsplash\.com/photo-1586348943529-"
+ r"beaae6c28db9\?ixid=\w+&ixlib=rb-1.2.1",
"keyword": {
"alt_description": "re:silhouette of trees near body of water ",
"blur_hash": "LZP4uQS4jboe%#o0WCa}2doJNaaz",
@@ -114,7 +115,7 @@ class UnsplashImageExtractor(UnsplashExtractor):
"id": "uMJXuywXLiU",
"instagram_username": "just_midwest_rock",
"last_name": "Hoefler",
- "location": "Madison, WI",
+ "location": None,
"name": "Dave Hoefler",
"portfolio_url": str,
"total_collections": int,
diff --git a/gallery_dl/extractor/weasyl.py b/gallery_dl/extractor/weasyl.py
index f8da191..711d3fa 100644
--- a/gallery_dl/extractor/weasyl.py
+++ b/gallery_dl/extractor/weasyl.py
@@ -77,7 +77,7 @@ class WeasylSubmissionExtractor(WeasylExtractor):
"keyword": {
"comments" : int,
"date" : "dt:2012-04-20 00:38:04",
- "description" : "<p>(flex)</p>",
+ "description" : "<p>(flex)</p>\n",
"favorites" : int,
"folder_name" : "Wesley Stuff",
"folderid" : 2081,
@@ -160,8 +160,8 @@ class WeasylJournalExtractor(WeasylExtractor):
"keyword": {
"title" : "BBCode",
"date" : "dt:2013-09-19 23:11:23",
- "content": "<p><a>javascript:alert(42);</a></p>"
- "<p>No more of that!</p>",
+ "content": "<p><a>javascript:alert(42);</a></p>\n\n"
+ "<p>No more of that!</p>\n",
},
})
diff --git a/gallery_dl/extractor/weibo.py b/gallery_dl/extractor/weibo.py
index a325f87..0b6a153 100644
--- a/gallery_dl/extractor/weibo.py
+++ b/gallery_dl/extractor/weibo.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2019-2020 Mike Fährmann
+# Copyright 2019-2021 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -27,16 +27,21 @@ class WeiboExtractor(Extractor):
self.videos = self.config("videos", True)
def items(self):
- yield Message.Version, 1
+ original_retweets = (self.retweets == "original")
for status in self.statuses():
- files = self._files_from_status(status)
if self.retweets and "retweeted_status" in status:
- files = itertools.chain(
- files,
- self._files_from_status(status["retweeted_status"]),
- )
+ if original_retweets:
+ status = status["retweeted_status"]
+ files = self._files_from_status(status)
+ else:
+ files = itertools.chain(
+ self._files_from_status(status),
+ self._files_from_status(status["retweeted_status"]),
+ )
+ else:
+ files = self._files_from_status(status)
for num, file in enumerate(files, 1):
if num == 1:
@@ -143,6 +148,11 @@ class WeiboStatusExtractor(WeiboExtractor):
}),
# non-numeric status ID (#664)
("https://weibo.com/3314883543/Iy7fj4qVg"),
+ # original retweets (#1542)
+ ("https://m.weibo.cn/detail/4600272267522211", {
+ "options": (("retweets", "original"),),
+ "keyword": {"status": {"id": "4600167083287033"}},
+ }),
("https://m.weibo.cn/status/4339748116375525"),
("https://m.weibo.cn/5746766133/4339748116375525"),
)
diff --git a/gallery_dl/extractor/wikiart.py b/gallery_dl/extractor/wikiart.py
index 7fd60b1..511a609 100644
--- a/gallery_dl/extractor/wikiart.py
+++ b/gallery_dl/extractor/wikiart.py
@@ -71,8 +71,8 @@ class WikiartArtistExtractor(WikiartExtractor):
directory_fmt = ("{category}", "{artist[artistName]}")
pattern = BASE_PATTERN + r"/(?!\w+-by-)([\w-]+)/?$"
test = ("https://www.wikiart.org/en/thomas-cole", {
- "url": "5ba2fbe6783fcce34e65014d16e5fbc581490c98",
- "keyword": "eb5b141cf33e6d279afd1518aae24e61cc0adf81",
+ "url": "5140343730331786117fa5f4c013a6153393e28e",
+ "keyword": "4d9cbc50ebddfcb186f31ff70b08833578dd0070",
})
def __init__(self, match):
@@ -97,8 +97,8 @@ class WikiartImageExtractor(WikiartArtistExtractor):
pattern = BASE_PATTERN + r"/(?!(?:paintings|artists)-by-)([\w-]+)/([\w-]+)"
test = (
("https://www.wikiart.org/en/thomas-cole/the-departure-1838", {
- "url": "4d9fd87680a2620eaeaf1f13e3273475dec93231",
- "keyword": "a1b083d500ce2fd364128e35b026e4ca526000cc",
+ "url": "976cc2545f308a650b5dbb35c29d3cee0f4673b3",
+ "keyword": "8e80cdcb01c1fedb934633d1c4c3ab0419cfbedf",
}),
# no year or '-' in slug
("https://www.wikiart.org/en/huang-shen/summer", {
diff --git a/gallery_dl/job.py b/gallery_dl/job.py
index 99f61d8..164c2a9 100644
--- a/gallery_dl/job.py
+++ b/gallery_dl/job.py
@@ -27,8 +27,11 @@ class Job():
extr = extractor.find(extr)
if not extr:
raise exception.NoExtractorError()
+
self.extractor = extr
self.pathfmt = None
+ self.kwdict = {}
+ self.status = 0
self._logger_extra = {
"job" : self,
@@ -39,32 +42,28 @@ class Job():
extr.log = self._wrap_logger(extr.log)
extr.log.debug("Using %s for '%s'", extr.__class__.__name__, extr.url)
- self.status = 0
- self.pred_url = self._prepare_predicates("image", True)
- self.pred_queue = self._prepare_predicates("chapter", False)
- self.kwdict = {}
-
- # user-supplied metadata
- kwdict = self.extractor.config("keywords")
- if kwdict:
- self.kwdict.update(kwdict)
-
# data from parent job
if parent:
pextr = parent.extractor
# transfer (sub)category
if pextr.config("category-transfer", pextr.categorytransfer):
+ extr._cfgpath = pextr._cfgpath
extr.category = pextr.category
extr.subcategory = pextr.subcategory
- extr._cfgpath = pextr._cfgpath
-
- # transfer parent directory
- extr._parentdir = pextr._parentdir
# reuse connection adapters
extr.session.adapters = pextr.session.adapters
+ # user-supplied metadata
+ kwdict = self.extractor.config("keywords")
+ if kwdict:
+ self.kwdict.update(kwdict)
+
+ # predicates
+ self.pred_url = self._prepare_predicates("image", True)
+ self.pred_queue = self._prepare_predicates("chapter", False)
+
def run(self):
"""Execute or run the job"""
sleep = self.extractor.config("sleep-extractor")
@@ -78,6 +77,8 @@ class Job():
if exc.message:
log.error(exc.message)
self.status |= exc.code
+ except exception.TerminateExtraction:
+ raise
except exception.GalleryDLException as exc:
log.error("%s: %s", exc.__class__.__name__, exc)
self.status |= exc.code
@@ -188,7 +189,7 @@ class Job():
class DownloadJob(Job):
"""Download images into appropriate directory/filename locations"""
- def __init__(self, url, parent=None, kwdict=None):
+ def __init__(self, url, parent=None):
Job.__init__(self, url, parent)
self.log = self.get_logger("download")
self.blacklist = None
@@ -197,19 +198,8 @@ class DownloadJob(Job):
self.hooks = ()
self.downloaders = {}
self.out = output.select()
-
- if parent:
- self.visited = parent.visited
- pfmt = parent.pathfmt
- if pfmt and parent.extractor.config("parent-directory"):
- self.extractor._parentdir = pfmt.directory
- if parent.extractor.config("parent-metadata"):
- if parent.kwdict:
- self.kwdict.update(parent.kwdict)
- if kwdict:
- self.kwdict.update(kwdict)
- else:
- self.visited = set()
+ self.visited = parent.visited if parent else set()
+ self._skipcnt = 0
def handle_url(self, url, kwdict):
"""Download the resource specified in 'url'"""
@@ -302,7 +292,27 @@ class DownloadJob(Job):
extr = None
if extr:
- self.status |= self.__class__(extr, self, kwdict).run()
+ job = self.__class__(extr, self)
+ pfmt = self.pathfmt
+ pextr = self.extractor
+
+ if pfmt and pextr.config("parent-directory"):
+ extr._parentdir = pfmt.directory
+ else:
+ extr._parentdir = pextr._parentdir
+
+ if pextr.config("parent-metadata"):
+ if self.kwdict:
+ job.kwdict.update(self.kwdict)
+ if kwdict:
+ job.kwdict.update(kwdict)
+
+ if pextr.config("parent-skip"):
+ job._skipcnt = self._skipcnt
+ self.status |= job.run()
+ self._skipcnt = job._skipcnt
+ else:
+ self.status |= job.run()
else:
self._write_unsupported(url)
@@ -398,9 +408,10 @@ class DownloadJob(Job):
skip, _, smax = skip.partition(":")
if skip == "abort":
self._skipexc = exception.StopExtraction
+ elif skip == "terminate":
+ self._skipexc = exception.TerminateExtraction
elif skip == "exit":
self._skipexc = sys.exit
- self._skipcnt = 0
self._skipmax = text.parse_int(smax)
else:
# monkey-patch methods to always return False
@@ -586,10 +597,16 @@ class UrlJob(Job):
for url in kwdict["_fallback"]:
print("|", url)
- def handle_queue(self, url, _):
- try:
- UrlJob(url, self, self.depth + 1).run()
- except exception.NoExtractorError:
+ def handle_queue(self, url, kwdict):
+ cls = kwdict.get("_extractor")
+ if cls:
+ extr = cls.from_url(url)
+ else:
+ extr = extractor.find(url)
+
+ if extr:
+ self.status |= self.__class__(extr, self).run()
+ else:
self._write_unsupported(url)
@@ -636,7 +653,7 @@ class DataJob(Job):
self.ascii = config.get(("output",), "ascii", ensure_ascii)
private = config.get(("output",), "private")
- self.filter = (lambda x: x) if private else util.filter_dict
+ self.filter = util.identity if private else util.filter_dict
def run(self):
sleep = self.extractor.config("sleep-extractor")
diff --git a/gallery_dl/option.py b/gallery_dl/option.py
index 3e585fe..6018542 100644
--- a/gallery_dl/option.py
+++ b/gallery_dl/option.py
@@ -114,7 +114,7 @@ def build_parser():
)
general.add_argument(
"--clear-cache",
- dest="clear_cache", action="store_true",
+ dest="clear_cache", metavar="MODULE", nargs="?", const="all",
help="Delete all cached login sessions, cookies, etc.",
)
diff --git a/gallery_dl/output.py b/gallery_dl/output.py
index 2d3dc17..7e1f8c1 100644
--- a/gallery_dl/output.py
+++ b/gallery_dl/output.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2015-2020 Mike Fährmann
+# Copyright 2015-2021 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -232,15 +232,19 @@ def select():
}
omode = config.get(("output",), "mode", "auto").lower()
if omode in pdict:
- return pdict[omode]()
+ output = pdict[omode]()
elif omode == "auto":
if hasattr(sys.stdout, "isatty") and sys.stdout.isatty():
- return ColorOutput() if ANSI else TerminalOutput()
+ output = ColorOutput() if ANSI else TerminalOutput()
else:
- return PipeOutput()
+ output = PipeOutput()
else:
raise Exception("invalid output mode: " + omode)
+ if not config.get(("output",), "skip", True):
+ output.skip = util.identity
+ return output
+
class NullOutput():
diff --git a/gallery_dl/postprocessor/ugoira.py b/gallery_dl/postprocessor/ugoira.py
index 14eaa8d..ac094b7 100644
--- a/gallery_dl/postprocessor/ugoira.py
+++ b/gallery_dl/postprocessor/ugoira.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2018-2020 Mike Fährmann
+# Copyright 2018-2021 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -26,6 +26,7 @@ class UgoiraPP(PostProcessor):
self.twopass = options.get("ffmpeg-twopass", False)
self.output = options.get("ffmpeg-output", True)
self.delete = not options.get("keep-files", False)
+ self.repeat = options.get("repeat-last-frame", True)
ffmpeg = options.get("ffmpeg-location")
self.ffmpeg = util.expand_path(ffmpeg) if ffmpeg else "ffmpeg"
@@ -34,6 +35,11 @@ class UgoiraPP(PostProcessor):
if rate != "auto":
self.calculate_framerate = lambda _: (None, rate)
+ if options.get("ffmpeg-demuxer") == "image2":
+ self._process = self._image2
+ else:
+ self._process = self._concat
+
if options.get("libx264-prevent-odd", True):
# get last video-codec argument
vcodec = None
@@ -72,34 +78,17 @@ class UgoiraPP(PostProcessor):
if not self._frames:
return
- rate_in, rate_out = self.calculate_framerate(self._frames)
-
with tempfile.TemporaryDirectory() as tempdir:
# extract frames
- with zipfile.ZipFile(pathfmt.temppath) as zfile:
- zfile.extractall(tempdir)
-
- # write ffconcat file
- ffconcat = tempdir + "/ffconcat.txt"
- with open(ffconcat, "w") as file:
- file.write("ffconcat version 1.0\n")
- for frame in self._frames:
- file.write("file '{}'\n".format(frame["file"]))
- file.write("duration {}\n".format(frame["delay"] / 1000))
- if self.extension != "gif":
- # repeat the last frame to prevent it from only being
- # displayed for a very short amount of time
- file.write("file '{}'\n".format(self._frames[-1]["file"]))
-
- # collect command-line arguments
- args = [self.ffmpeg]
- if rate_in:
- args += ("-r", str(rate_in))
- args += ("-i", ffconcat)
- if rate_out:
- args += ("-r", str(rate_out))
- if self.prevent_odd:
- args += ("-vf", "crop=iw-mod(iw\\,2):ih-mod(ih\\,2)")
+ try:
+ with zipfile.ZipFile(pathfmt.temppath) as zfile:
+ zfile.extractall(tempdir)
+ except FileNotFoundError:
+ pathfmt.realpath = pathfmt.temppath
+ return
+
+ # process frames and collect command-line arguments
+ args = self._process(tempdir)
if self.args:
args += self.args
self.log.debug("ffmpeg args: %s", args)
@@ -108,7 +97,7 @@ class UgoiraPP(PostProcessor):
pathfmt.set_extension(self.extension)
try:
if self.twopass:
- if "-f" not in args:
+ if "-f" not in self.args:
args += ("-f", self.extension)
args += ("-passlogfile", tempdir + "/ffmpeg2pass", "-pass")
self._exec(args + ["1", "-y", os.devnull])
@@ -127,6 +116,48 @@ class UgoiraPP(PostProcessor):
else:
pathfmt.set_extension("zip")
+ def _concat(self, path):
+ ffconcat = path + "/ffconcat.txt"
+
+ content = ["ffconcat version 1.0"]
+ append = content.append
+ for frame in self._frames:
+ append("file '{}'\nduration {}".format(
+ frame["file"], frame["delay"] / 1000))
+ if self.repeat:
+ append("file '{}'".format(frame["file"]))
+ append("")
+
+ with open(ffconcat, "w") as file:
+ file.write("\n".join(content))
+
+ rate_in, rate_out = self.calculate_framerate(self._frames)
+ args = [self.ffmpeg, "-f", "concat"]
+ if rate_in:
+ args += ("-r", str(rate_in))
+ args += ("-i", ffconcat)
+ if rate_out:
+ args += ("-r", str(rate_out))
+ return args
+
+ def _image2(self, path):
+ path += "/"
+
+ # adjust frame mtime values
+ ts = 0
+ for frame in self._frames:
+ os.utime(path + frame["file"], ns=(ts, ts))
+ ts += frame["delay"] * 1000000
+
+ return [
+ self.ffmpeg,
+ "-f", "image2",
+ "-ts_from_file", "2",
+ "-pattern_type", "sequence",
+ "-i", "{}%06d.{}".format(
+ path.replace("%", "%%"), frame["file"].rpartition(".")[2]),
+ ]
+
def _exec(self, args):
out = None if self.output else subprocess.DEVNULL
return subprocess.Popen(args, stdout=out, stderr=out).wait()
diff --git a/gallery_dl/text.py b/gallery_dl/text.py
index a6a9105..74b87fb 100644
--- a/gallery_dl/text.py
+++ b/gallery_dl/text.py
@@ -67,7 +67,7 @@ def nameext_from_url(url, data=None):
filename = unquote(filename_from_url(url))
name, _, ext = filename.rpartition(".")
- if name:
+ if name and len(ext) <= 16:
data["filename"], data["extension"] = name, ext.lower()
else:
data["filename"], data["extension"] = filename, ""
diff --git a/gallery_dl/util.py b/gallery_dl/util.py
index 2466adf..78663a0 100644
--- a/gallery_dl/util.py
+++ b/gallery_dl/util.py
@@ -81,6 +81,15 @@ def raises(cls):
return wrap
+def identity(x):
+ """Returns its argument"""
+ return x
+
+
+def noop():
+ """Does nothing"""
+
+
def generate_token(size=16):
"""Generate a random token with hexadecimal digits"""
data = random.getrandbits(size * 8).to_bytes(size, "big")
@@ -321,7 +330,7 @@ CODES = {
"hu": "Hungarian",
"id": "Indonesian",
"it": "Italian",
- "jp": "Japanese",
+ "ja": "Japanese",
"ko": "Korean",
"ms": "Malay",
"nl": "Dutch",
@@ -804,7 +813,7 @@ class PathFormat():
@staticmethod
def _build_cleanfunc(chars, repl):
if not chars:
- return lambda x: x
+ return identity
elif isinstance(chars, dict):
def func(x, table=str.maketrans(chars)):
return x.translate(table)
diff --git a/gallery_dl/version.py b/gallery_dl/version.py
index 630da7d..018554e 100644
--- a/gallery_dl/version.py
+++ b/gallery_dl/version.py
@@ -6,4 +6,4 @@
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
-__version__ = "1.17.3"
+__version__ = "1.17.5"
diff --git a/test/test_job.py b/test/test_job.py
new file mode 100644
index 0000000..1aeec1c
--- /dev/null
+++ b/test/test_job.py
@@ -0,0 +1,338 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+
+# Copyright 2021 Mike Fährmann
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+import os
+import sys
+import unittest
+from unittest.mock import patch
+
+import io
+import contextlib
+
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+from gallery_dl import job, config, text # noqa E402
+from gallery_dl.extractor.common import Extractor, Message # noqa E402
+
+
+class TestJob(unittest.TestCase):
+
+ def tearDown(self):
+ config.clear()
+
+ def _capture_stdout(self, extr_or_job):
+ if isinstance(extr_or_job, Extractor):
+ jobinstance = self.jobclass(extr_or_job)
+ else:
+ jobinstance = extr_or_job
+
+ with io.StringIO() as buffer:
+ with contextlib.redirect_stdout(buffer):
+ jobinstance.run()
+ return buffer.getvalue()
+
+
+class TestKeywordJob(TestJob):
+ jobclass = job.KeywordJob
+
+ def test_default(self):
+ extr = TestExtractor.from_url("test:")
+ self.assertEqual(self._capture_stdout(extr), """\
+Keywords for directory names:
+-----------------------------
+category
+ test_category
+subcategory
+ test_subcategory
+
+Keywords for filenames and --filter:
+------------------------------------
+category
+ test_category
+extension
+ jpg
+filename
+ 1
+num
+ 1
+subcategory
+ test_subcategory
+tags[]
+ - foo
+ - bar
+ - テスト
+user[id]
+ 123
+user[name]
+ test
+""")
+
+
+class TestUrlJob(TestJob):
+ jobclass = job.UrlJob
+
+ def test_default(self):
+ extr = TestExtractor.from_url("test:")
+ self.assertEqual(self._capture_stdout(extr), """\
+https://example.org/1.jpg
+https://example.org/2.jpg
+https://example.org/3.jpg
+""")
+
+ def test_fallback(self):
+ extr = TestExtractor.from_url("test:")
+ tjob = self.jobclass(extr)
+ tjob.handle_url = tjob.handle_url_fallback
+
+ self.assertEqual(self._capture_stdout(tjob), """\
+https://example.org/1.jpg
+| https://example.org/alt/1.jpg
+https://example.org/2.jpg
+| https://example.org/alt/2.jpg
+https://example.org/3.jpg
+| https://example.org/alt/3.jpg
+""")
+
+ def test_parent(self):
+ extr = TestExtractorParent.from_url("test:parent")
+ self.assertEqual(self._capture_stdout(extr), """\
+test:child
+test:child
+test:child
+""")
+
+ def test_child(self):
+ extr = TestExtractorParent.from_url("test:parent")
+ tjob = job.UrlJob(extr, depth=0)
+ self.assertEqual(self._capture_stdout(tjob), 3 * """\
+https://example.org/1.jpg
+https://example.org/2.jpg
+https://example.org/3.jpg
+""")
+
+
+class TestInfoJob(TestJob):
+ jobclass = job.InfoJob
+
+ def test_default(self):
+ extr = TestExtractor.from_url("test:")
+ self.assertEqual(self._capture_stdout(extr), """\
+Category / Subcategory
+ "test_category" / "test_subcategory"
+Filename format (default):
+ "test_{filename}.{extension}"
+Directory format (default):
+ ["{category}"]
+""")
+
+ def test_custom(self):
+ config.set((), "filename", "custom")
+ config.set((), "directory", ("custom",))
+ config.set((), "sleep-request", 321)
+ extr = TestExtractor.from_url("test:")
+ extr.request_interval = 123.456
+
+ self.assertEqual(self._capture_stdout(extr), """\
+Category / Subcategory
+ "test_category" / "test_subcategory"
+Filename format (custom):
+ "custom"
+Filename format (default):
+ "test_{filename}.{extension}"
+Directory format (custom):
+ ["custom"]
+Directory format (default):
+ ["{category}"]
+Request interval (custom):
+ 321
+Request interval (default):
+ 123.456
+""")
+
+ def test_base_category(self):
+ extr = TestExtractor.from_url("test:")
+ extr.basecategory = "test_basecategory"
+
+ self.assertEqual(self._capture_stdout(extr), """\
+Category / Subcategory / Basecategory
+ "test_category" / "test_subcategory" / "test_basecategory"
+Filename format (default):
+ "test_{filename}.{extension}"
+Directory format (default):
+ ["{category}"]
+""")
+
+
+class TestDataJob(TestJob):
+ jobclass = job.DataJob
+
+ def test_default(self):
+ extr = TestExtractor.from_url("test:")
+ tjob = self.jobclass(extr, file=io.StringIO())
+
+ tjob.run()
+
+ self.assertEqual(tjob.data, [
+ (Message.Directory, {
+ "category" : "test_category",
+ "subcategory": "test_subcategory",
+ }),
+ (Message.Url, "https://example.org/1.jpg", {
+ "category" : "test_category",
+ "subcategory": "test_subcategory",
+ "filename" : "1",
+ "extension" : "jpg",
+ "num" : 1,
+ "tags" : ["foo", "bar", "テスト"],
+ "user" : {"id": 123, "name": "test"},
+ }),
+ (Message.Url, "https://example.org/2.jpg", {
+ "category" : "test_category",
+ "subcategory": "test_subcategory",
+ "filename" : "2",
+ "extension" : "jpg",
+ "num" : 2,
+ "tags" : ["foo", "bar", "テスト"],
+ "user" : {"id": 123, "name": "test"},
+ }),
+ (Message.Url, "https://example.org/3.jpg", {
+ "category" : "test_category",
+ "subcategory": "test_subcategory",
+ "filename" : "3",
+ "extension" : "jpg",
+ "num" : 3,
+ "tags" : ["foo", "bar", "テスト"],
+ "user" : {"id": 123, "name": "test"},
+ }),
+ ])
+
+ def test_exception(self):
+ extr = TestExtractorException.from_url("test:exception")
+ tjob = self.jobclass(extr, file=io.StringIO())
+ tjob.run()
+ self.assertEqual(
+ tjob.data[-1], ("ZeroDivisionError", "division by zero"))
+
+ def test_private(self):
+ config.set(("output",), "private", True)
+ extr = TestExtractor.from_url("test:")
+ tjob = self.jobclass(extr, file=io.StringIO())
+
+ tjob.run()
+
+ for i in range(1, 4):
+ self.assertEqual(
+ tjob.data[i][2]["_fallback"],
+ ("https://example.org/alt/{}.jpg".format(i),),
+ )
+
+ def test_sleep(self):
+ extr = TestExtractor.from_url("test:")
+ tjob = self.jobclass(extr, file=io.StringIO())
+
+ config.set((), "sleep-extractor", 123)
+ with patch("time.sleep") as sleep:
+ tjob.run()
+ sleep.assert_called_once_with(123)
+
+ config.set((), "sleep-extractor", 0)
+ with patch("time.sleep") as sleep:
+ tjob.run()
+ sleep.assert_not_called()
+
+ def test_ascii(self):
+ extr = TestExtractor.from_url("test:")
+ tjob = self.jobclass(extr)
+
+ tjob.file = buffer = io.StringIO()
+ tjob.run()
+ self.assertIn("""\
+ "tags": [
+ "foo",
+ "bar",
+ "\\u30c6\\u30b9\\u30c8"
+ ],
+""", buffer.getvalue())
+
+ tjob.file = buffer = io.StringIO()
+ tjob.ascii = False
+ tjob.run()
+ self.assertIn("""\
+ "tags": [
+ "foo",
+ "bar",
+ "テスト"
+ ],
+""", buffer.getvalue())
+
+ def test_num_string(self):
+ extr = TestExtractor.from_url("test:")
+ tjob = self.jobclass(extr, file=io.StringIO())
+
+ with patch("gallery_dl.util.number_to_string") as nts:
+ tjob.run()
+ self.assertEqual(len(nts.call_args_list), 0)
+
+ config.set(("output",), "num-to-str", True)
+ with patch("gallery_dl.util.number_to_string") as nts:
+ tjob.run()
+ self.assertEqual(len(nts.call_args_list), 52)
+
+ tjob.run()
+ self.assertEqual(tjob.data[-1][0], Message.Url)
+ self.assertEqual(tjob.data[-1][2]["num"], "3")
+
+
+class TestExtractor(Extractor):
+ category = "test_category"
+ subcategory = "test_subcategory"
+ directory_fmt = ("{category}",)
+ filename_fmt = "test_{filename}.{extension}"
+ pattern = r"test:(child)?$"
+
+ def items(self):
+ root = "https://example.org"
+
+ yield Message.Directory, {}
+ for i in range(1, 4):
+ url = "{}/{}.jpg".format(root, i)
+ yield Message.Url, url, text.nameext_from_url(url, {
+ "num" : i,
+ "tags": ["foo", "bar", "テスト"],
+ "user": {"id": 123, "name": "test"},
+ "_fallback": ("{}/alt/{}.jpg".format(root, i),),
+ })
+
+
+class TestExtractorParent(Extractor):
+ category = "test_category"
+ subcategory = "test_subcategory_parent"
+ pattern = r"test:parent"
+
+ def items(self):
+ url = "test:child"
+
+ for i in range(11, 14):
+ yield Message.Queue, url, {
+ "num" : i,
+ "tags": ["abc", "def"],
+ "_extractor": TestExtractor,
+ }
+
+
+class TestExtractorException(Extractor):
+ category = "test_category"
+ subcategory = "test_subcategory_exception"
+ pattern = r"test:exception$"
+
+ def items(self):
+ return 1/0
+
+
+if __name__ == '__main__':
+ unittest.main()
diff --git a/test/test_results.py b/test/test_results.py
index ed6b2eb..bf2496b 100644
--- a/test/test_results.py
+++ b/test/test_results.py
@@ -312,7 +312,7 @@ def setup_test_config():
config.set(("extractor", "mangoxo") , "password", "5zbQF10_5u25259Ma")
for category in ("danbooru", "instagram", "twitter", "subscribestar",
- "e621", "inkbunny", "tapas"):
+ "e621", "inkbunny", "tapas", "pillowfort"):
config.set(("extractor", category), "username", None)
config.set(("extractor", "mastodon.social"), "access-token",
diff --git a/test/test_text.py b/test/test_text.py
index 1daefde..3ab9e73 100644
--- a/test/test_text.py
+++ b/test/test_text.py
@@ -148,6 +148,10 @@ class TestText(unittest.TestCase):
self.assertEqual(
f("http://example.org/v2/filename.ext?param=value#frag"), result)
+ # long "extension"
+ fn = "httpswww.example.orgpath-path-path-path-path-path-path-path"
+ self.assertEqual(f(fn), {"filename": fn, "extension": ""})
+
# invalid arguments
for value in INVALID:
self.assertEqual(f(value), empty)
diff --git a/test/test_util.py b/test/test_util.py
index 06de735..e2f5084 100644
--- a/test/test_util.py
+++ b/test/test_util.py
@@ -134,6 +134,7 @@ class TestISO639_1(unittest.TestCase):
self._run_test(util.code_to_language, {
("en",): "English",
("FR",): "French",
+ ("ja",): "Japanese",
("xx",): None,
("" ,): None,
(None,): None,
@@ -149,6 +150,7 @@ class TestISO639_1(unittest.TestCase):
self._run_test(util.language_to_code, {
("English",): "en",
("fRENch",): "fr",
+ ("Japanese",): "ja",
("xx",): None,
("" ,): None,
(None,): None,
@@ -484,6 +486,13 @@ class TestOther(unittest.TestCase):
with self.assertRaises(ValueError):
func(3)
+ def test_identity(self):
+ for value in (123, "foo", [1, 2, 3], (1, 2, 3), {1: 2}, None):
+ self.assertIs(util.identity(value), value)
+
+ def test_noop(self):
+ self.assertEqual(util.noop(), None)
+
def test_generate_token(self):
tokens = set()
for _ in range(100):