aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLibravatarUnit 193 <unit193@unit193.net>2022-08-29 02:17:16 -0400
committerLibravatarUnit 193 <unit193@unit193.net>2022-08-29 02:17:16 -0400
commita768930761f7f20587ae40a8cacca0e55c85290a (patch)
tree5a4163db912b93fc45f717e5e43fd5be3e66f16c
parentae2a0f5622beaa6f402526f8a7b939419283a090 (diff)
New upstream version 1.23.0.upstream/1.23.0
-rw-r--r--CHANGELOG.md54
-rw-r--r--PKG-INFO9
-rw-r--r--README.rst7
-rw-r--r--data/man/gallery-dl.12
-rw-r--r--data/man/gallery-dl.conf.5152
-rw-r--r--docs/gallery-dl.conf21
-rw-r--r--gallery_dl.egg-info/PKG-INFO9
-rw-r--r--gallery_dl.egg-info/SOURCES.txt2
-rw-r--r--gallery_dl/__init__.py13
-rw-r--r--gallery_dl/extractor/__init__.py2
-rw-r--r--gallery_dl/extractor/artstation.py5
-rw-r--r--gallery_dl/extractor/blogger.py3
-rw-r--r--gallery_dl/extractor/bunkr.py14
-rw-r--r--gallery_dl/extractor/catbox.py56
-rw-r--r--gallery_dl/extractor/common.py13
-rw-r--r--gallery_dl/extractor/danbooru.py9
-rw-r--r--gallery_dl/extractor/deviantart.py11
-rw-r--r--gallery_dl/extractor/fanbox.py2
-rw-r--r--gallery_dl/extractor/foolfuuka.py28
-rw-r--r--gallery_dl/extractor/gelbooru.py48
-rw-r--r--gallery_dl/extractor/gelbooru_v02.py55
-rw-r--r--gallery_dl/extractor/hitomi.py14
-rw-r--r--gallery_dl/extractor/instagram.py29
-rw-r--r--gallery_dl/extractor/itaku.py11
-rw-r--r--gallery_dl/extractor/kemonoparty.py48
-rw-r--r--gallery_dl/extractor/luscious.py4
-rw-r--r--gallery_dl/extractor/mastodon.py16
-rw-r--r--gallery_dl/extractor/nijie.py2
-rw-r--r--gallery_dl/extractor/oauth.py3
-rw-r--r--gallery_dl/extractor/philomena.py2
-rw-r--r--gallery_dl/extractor/poipiku.py8
-rw-r--r--gallery_dl/extractor/skeb.py16
-rw-r--r--gallery_dl/extractor/slideshare.py8
-rw-r--r--gallery_dl/extractor/smugmug.py4
-rw-r--r--gallery_dl/extractor/tapas.py2
-rw-r--r--gallery_dl/extractor/tumblr.py58
-rw-r--r--gallery_dl/extractor/twitter.py155
-rw-r--r--gallery_dl/extractor/unsplash.py4
-rw-r--r--gallery_dl/extractor/vk.py7
-rw-r--r--gallery_dl/extractor/vsco.py2
-rw-r--r--gallery_dl/extractor/wallhaven.py13
-rw-r--r--gallery_dl/extractor/weibo.py32
-rw-r--r--gallery_dl/extractor/zerochan.py156
-rw-r--r--gallery_dl/formatter.py11
-rw-r--r--gallery_dl/job.py7
-rw-r--r--gallery_dl/output.py6
-rw-r--r--gallery_dl/postprocessor/metadata.py39
-rw-r--r--gallery_dl/text.py10
-rw-r--r--gallery_dl/util.py13
-rw-r--r--gallery_dl/version.py2
-rw-r--r--test/test_formatter.py3
-rw-r--r--test/test_postprocessor.py46
-rw-r--r--test/test_text.py19
-rw-r--r--test/test_util.py36
54 files changed, 1063 insertions, 238 deletions
diff --git a/CHANGELOG.md b/CHANGELOG.md
index be9a4f7..61987d9 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,59 @@
# Changelog
+## 1.23.0 - 2022-08-28
+### Changes
+- [twitter] update `user` and `author` metdata fields
+ - for URLs with a single username or ID like `https://twitter.com/USER` or a search with a single `from:` statement, `user` will now always refer to the user referenced in the URL.
+ - for all other URLs like `https://twitter.com/i/bookmarks`, `user` and `author` refer to the same user
+ - `author` will always refer to the original Tweet author
+- [twitter] update `quote_id` and `quote_by` metadata fields
+ - `quote_id` is now non-zero for quoted Tweets and contains the Tweet ID of the quotng Tweet (was the other way round before)
+ - `quote_by` is only defined for quoted Tweets like before, but now contains the screen name of the user quoting this Tweet
+- [skeb] improve archive IDs for thumbnails and article images
+### Additions
+- [artstation] add `num` and `count` metadata fields ([#2764](https://github.com/mikf/gallery-dl/issues/2764))
+- [catbox] add `album` extractor ([#2410](https://github.com/mikf/gallery-dl/issues/2410))
+- [blogger] emit metadata for posts without files ([#2789](https://github.com/mikf/gallery-dl/issues/2789))
+- [foolfuuka] update supported domains
+- [gelbooru] add support for `api_key` and `user_id` ([#2767](https://github.com/mikf/gallery-dl/issues/2767))
+- [gelbooru] implement pagination for `pool` results ([#2853](https://github.com/mikf/gallery-dl/issues/2853))
+- [instagram] add support for a user's saved collections ([#2769](https://github.com/mikf/gallery-dl/issues/2769))
+- [instagram] provide `date` for directory format strings ([#2830](https://github.com/mikf/gallery-dl/issues/2830))
+- [kemonoparty] add `favorites` option ([#2826](https://github.com/mikf/gallery-dl/issues/2826), [#2831](https://github.com/mikf/gallery-dl/issues/2831))
+- [oauth] add `host` config option ([#2806](https://github.com/mikf/gallery-dl/issues/2806))
+- [rule34] implement pagination for `pool` results ([#2853](https://github.com/mikf/gallery-dl/issues/2853))
+- [skeb] add option to download `article` images ([#1031](https://github.com/mikf/gallery-dl/issues/1031))
+- [tumblr] download higher-quality images ([#2761](https://github.com/mikf/gallery-dl/issues/2761))
+- [tumblr] add `count` metadata field ([#2804](https://github.com/mikf/gallery-dl/issues/2804))
+- [wallhaven] implement `metadata` option ([#2803](https://github.com/mikf/gallery-dl/issues/2803))
+- [zerochan] add `tag` and `image` extractors ([#1434](https://github.com/mikf/gallery-dl/issues/1434))
+- [zerochan] implement login with username & password ([#1434](https://github.com/mikf/gallery-dl/issues/1434))
+- [postprocessor:metadata] implement `mode: modify` and `mode: delete` ([#2640](https://github.com/mikf/gallery-dl/issues/2640))
+- [formatter] add `g` conversion for slugifying a string ([#2410](https://github.com/mikf/gallery-dl/issues/2410))
+- [formatter] apply `:J` only to lists ([#2833](https://github.com/mikf/gallery-dl/issues/2833))
+- implement `path-metadata` option ([#2734](https://github.com/mikf/gallery-dl/issues/2734))
+- allow comments after input file URLs ([#2808](https://github.com/mikf/gallery-dl/issues/2808))
+- add global `warnings` option to control `urllib3` warning behavior ([#2762](https://github.com/mikf/gallery-dl/issues/2762))
+### Fixes
+- [bunkr] fix extraction ([#2788](https://github.com/mikf/gallery-dl/issues/2788))
+- [deviantart] use public access token for journals ([#2702](https://github.com/mikf/gallery-dl/issues/2702))
+- [e621] fix extraction of `popular` posts
+- [fanbox] download cover images in original size ([#2784](https://github.com/mikf/gallery-dl/issues/2784))
+- [mastodon] allow downloading without access token ([#2782](https://github.com/mikf/gallery-dl/issues/2782))
+- [hitomi] update cache expiry time ([#2863](https://github.com/mikf/gallery-dl/issues/2863))
+- [hitomi] fix error when number of tag results is a multiple of 25 ([#2870](https://github.com/mikf/gallery-dl/issues/2870))
+- [mangahere] fix `page-reverse` option ([#2795](https://github.com/mikf/gallery-dl/issues/2795))
+- [poipiku] fix posts with more than one image ([#2796](https://github.com/mikf/gallery-dl/issues/2796))
+- [poipiku] update filter for static images ([#2796](https://github.com/mikf/gallery-dl/issues/2796))
+- [slideshare] fix metadata extraction
+- [twitter] unescape `+` in search queries ([#2226](https://github.com/mikf/gallery-dl/issues/2226))
+- [twitter] fall back to unfiltered search ([#2766](https://github.com/mikf/gallery-dl/issues/2766))
+- [twitter] ignore invalid user entries ([#2850](https://github.com/mikf/gallery-dl/issues/2850))
+- [vk] prevent exceptions for broken/invalid photos ([#2774](https://github.com/mikf/gallery-dl/issues/2774))
+- [vsco] fix `collection` extraction
+- [weibo] prevent exception for missing `playback_list` ([#2792](https://github.com/mikf/gallery-dl/issues/2792))
+- [weibo] prevent errors when paginating over album entries ([#2817](https://github.com/mikf/gallery-dl/issues/2817))
+
## 1.22.4 - 2022-07-15
### Additions
- [instagram] add `pinned` metadata field ([#2752](https://github.com/mikf/gallery-dl/issues/2752))
diff --git a/PKG-INFO b/PKG-INFO
index aaf3516..60a798f 100644
--- a/PKG-INFO
+++ b/PKG-INFO
@@ -1,6 +1,6 @@
Metadata-Version: 2.1
Name: gallery_dl
-Version: 1.22.4
+Version: 1.23.0
Summary: Command-line program to download image galleries and collections from several image hosting sites
Home-page: https://github.com/mikf/gallery-dl
Download-URL: https://github.com/mikf/gallery-dl/releases/latest
@@ -99,8 +99,8 @@ Standalone Executable
Prebuilt executable files with a Python interpreter and
required Python packages included are available for
-- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.22.4/gallery-dl.exe>`__
-- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.22.4/gallery-dl.bin>`__
+- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.23.0/gallery-dl.exe>`__
+- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.23.0/gallery-dl.bin>`__
| Executables build from the latest commit can be found at
| https://github.com/mikf/gallery-dl/actions/workflows/executables.yml
@@ -251,7 +251,8 @@ and optional for
``subscribestar``,
``tapas``,
``tsumino``,
-and ``twitter``.
+``twitter``,
+and ``zerochan``.
You can set the necessary information in your configuration file
(cf. gallery-dl.conf_)
diff --git a/README.rst b/README.rst
index 1d25a83..2b45b27 100644
--- a/README.rst
+++ b/README.rst
@@ -66,8 +66,8 @@ Standalone Executable
Prebuilt executable files with a Python interpreter and
required Python packages included are available for
-- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.22.4/gallery-dl.exe>`__
-- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.22.4/gallery-dl.bin>`__
+- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.23.0/gallery-dl.exe>`__
+- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.23.0/gallery-dl.bin>`__
| Executables build from the latest commit can be found at
| https://github.com/mikf/gallery-dl/actions/workflows/executables.yml
@@ -218,7 +218,8 @@ and optional for
``subscribestar``,
``tapas``,
``tsumino``,
-and ``twitter``.
+``twitter``,
+and ``zerochan``.
You can set the necessary information in your configuration file
(cf. gallery-dl.conf_)
diff --git a/data/man/gallery-dl.1 b/data/man/gallery-dl.1
index 751d470..d4efeed 100644
--- a/data/man/gallery-dl.1
+++ b/data/man/gallery-dl.1
@@ -1,4 +1,4 @@
-.TH "GALLERY-DL" "1" "2022-07-15" "1.22.4" "gallery-dl Manual"
+.TH "GALLERY-DL" "1" "2022-08-28" "1.23.0" "gallery-dl Manual"
.\" disable hyphenation
.nh
diff --git a/data/man/gallery-dl.conf.5 b/data/man/gallery-dl.conf.5
index 39550ad..642cb78 100644
--- a/data/man/gallery-dl.conf.5
+++ b/data/man/gallery-dl.conf.5
@@ -1,4 +1,4 @@
-.TH "GALLERY-DL.CONF" "5" "2022-07-15" "1.22.4" "gallery-dl Manual"
+.TH "GALLERY-DL.CONF" "5" "2022-08-28" "1.23.0" "gallery-dl Manual"
.\" disable hyphenation
.nh
.\" disable justification (adjust text to left margin only)
@@ -458,6 +458,8 @@ and optional for
* \f[I]tsumino\f[]
.br
* \f[I]twitter\f[]
+.br
+* \f[I]zerochan\f[]
These values can also be specified via the
\f[I]-u/--username\f[] and \f[I]-p/--password\f[] command-line options or
@@ -667,6 +669,21 @@ This can then be used in \f[I]filenames\f[],
with a \f[I]metadata\f[] post processor, etc.
+.SS extractor.*.path-metadata
+.IP "Type:" 6
+\f[I]string\f[]
+
+.IP "Default:" 9
+\f[I]null\f[]
+
+.IP "Description:" 4
+Insert a reference to the current \f[I]PathFormat\f[]
+data structure into metadata dictionaries as the given name.
+
+For example, setting this option to \f[I]"gdl_path"\f[] would make it possible
+to access the current file's filename as \f[I]"[gdl_path.filename}"\f[].
+
+
.SS extractor.*.category-transfer
.IP "Type:" 6
\f[I]bool\f[]
@@ -1516,6 +1533,19 @@ Selects which site layout to expect when parsing posts.
* \f[I]"new"\f[]: Expect the *new* site layout
+.SS extractor.gelbooru.api-key & .user-id
+.IP "Type:" 6
+\f[I]string\f[]
+
+.IP "Default:" 9
+\f[I]null\f[]
+
+.IP "Description:" 4
+Values from the API Access Credentials section found at the bottom of your
+\f[I]Account Options\f[]
+page.
+
+
.SS extractor.generic.enabled
.IP "Type:" 6
\f[I]bool\f[]
@@ -1751,6 +1781,19 @@ Controls how to handle duplicate files in a post.
Extract a user's direct messages as \f[I]dms\f[] metadata.
+.SS extractor.kemonoparty.favorites
+.IP "Type:" 6
+\f[I]string\f[]
+
+.IP "Default:" 9
+\f[I]artist\f[]
+
+.IP "Description:" 4
+Determines the type of favorites to be downloaded.
+
+Available types are \f[I]artist\f[], and \f[I]post\f[].
+
+
.SS extractor.kemonoparty.files
.IP "Type:" 6
\f[I]list\f[] of \f[I]strings\f[]
@@ -2007,6 +2050,17 @@ Store tokens received during OAuth authorizations
in \f[I]cache\f[].
+.SS extractor.oauth.host
+.IP "Type:" 6
+\f[I]string\f[]
+
+.IP "Default:" 9
+\f[I]"localhost"\f[]
+
+.IP "Description:" 4
+Host name / IP address to bind to during OAuth authorization.
+
+
.SS extractor.oauth.port
.IP "Type:" 6
\f[I]integer\f[]
@@ -2424,6 +2478,17 @@ Download video embeds from external sites.
Download videos.
+.SS extractor.skeb.article
+.IP "Type:" 6
+\f[I]bool\f[]
+
+.IP "Default:" 9
+\f[I]false\f[]
+
+.IP "Description:" 4
+Download article images.
+
+
.SS extractor.skeb.sent-requests
.IP "Type:" 6
\f[I]bool\f[]
@@ -2502,6 +2567,21 @@ images from them.
Search posts for inline images and videos.
+.SS extractor.tumblr.original
+.IP "Type:" 6
+\f[I]bool\f[]
+
+.IP "Default:" 9
+\f[I]true\f[]
+
+.IP "Description:" 4
+Download full-resolution \f[I]photo\f[] images.
+
+For each photo with "maximum" resolution
+(width equal to 2048 or height equal to 3072),
+use an extra HTTP request to find the URL to its full-resolution version.
+
+
.SS extractor.tumblr.reblogs
.IP "Type:" 6
\f[I]bool\f[] or \f[I]string\f[]
@@ -2846,6 +2926,19 @@ to use your account's browsing settings and default filters when searching.
See https://wallhaven.cc/help/api for more information.
+.SS extractor.wallhaven.metadata
+.IP "Type:" 6
+\f[I]bool\f[]
+
+.IP "Default:" 9
+\f[I]false\f[]
+
+.IP "Description:" 4
+Extract additional metadata (tags, uploader)
+
+Note: This requires 1 additional HTTP request for each post.
+
+
.SS extractor.weasyl.api-key
.IP "Type:" 6
\f[I]string\f[]
@@ -3714,16 +3807,20 @@ See \f[I]metadata.event\f[] for a list of available events.
\f[I]"json"\f[]
.IP "Description:" 4
-Select how to write metadata.
+Selects how to process metadata.
.br
-* \f[I]"json"\f[]: all metadata using \f[I]json.dump()
+* \f[I]"json"\f[]: write metadata using \f[I]json.dump()
<https://docs.python.org/3/library/json.html#json.dump>\f[]
.br
-* \f[I]"tags"\f[]: \f[I]tags\f[] separated by newlines
+* \f[I]"tags"\f[]: write \f[I]tags\f[] separated by newlines
.br
-* \f[I]"custom"\f[]: result of applying \f[I]metadata.content-format\f[]
+* \f[I]"custom"\f[]: write the result of applying \f[I]metadata.content-format\f[]
to a file's metadata dictionary
+.br
+* \f[I]"modify"\f[]: add or modify metadata entries
+.br
+* \f[I]"delete"\f[]: remove metadata entries
.SS metadata.filename
@@ -3821,6 +3918,39 @@ When starting to download all files of a post,
e.g. a Tweet on Twitter or a post on Patreon.
+.SS metadata.fields
+.IP "Type:" 6
+.br
+* \f[I]list\f[] of \f[I]strings\f[]
+.br
+* \f[I]object\f[] (field name -> \f[I]format string\f[])
+
+.IP "Example:" 4
+.br
+* .. code:: json
+
+["blocked", "watching", "status[creator][name]"]
+
+.br
+* .. code:: json
+
+{
+"blocked" : "***",
+"watching" : "\\fE 'yes' if watching else 'no'",
+"status[username]": "{status[creator][name]!l}"
+}
+
+
+.IP "Description:" 4
+.br
+* \f[I]"mode": "delete"\f[]:
+A list of metadata field names to remove.
+.br
+* \f[I]"mode": "modify"\f[]:
+An object with metadata field names mapping to a \f[I]format string\f[]
+whose result is assigned to said field name.
+
+
.SS metadata.content-format
.IP "Type:" 6
\f[I]string\f[] or \f[I]list\f[] of \f[I]strings\f[]
@@ -4190,6 +4320,18 @@ The list of signal names to ignore, i.e. set
as signal handler for.
+.SS warnings
+.IP "Type:" 6
+\f[I]string\f[]
+
+.IP "Default:" 9
+\f[I]"default"\f[]
+
+.IP "Description:" 4
+The \f[I]Warnings Filter action\f[]
+used for (urllib3) warnings.
+
+
.SS pyopenssl
.IP "Type:" 6
\f[I]bool\f[]
diff --git a/docs/gallery-dl.conf b/docs/gallery-dl.conf
index 1492653..1e485ee 100644
--- a/docs/gallery-dl.conf
+++ b/docs/gallery-dl.conf
@@ -109,6 +109,11 @@
"include": "gallery",
"layout": "auto"
},
+ "gelbooru":
+ {
+ "api-key": null,
+ "user-id": null
+ },
"gfycat":
{
"format": ["mp4", "webm", "mobile", "gif"]
@@ -193,6 +198,7 @@
{
"browser": true,
"cache": true,
+ "host": "localhost",
"port": 6414
},
"paheal":
@@ -248,6 +254,12 @@
"username": null,
"password": null
},
+ "skeb":
+ {
+ "article": false,
+ "sent-requests": false,
+ "thumbnails": false
+ },
"smugmug":
{
"videos": true
@@ -273,6 +285,7 @@
"external": false,
"inline": true,
"posts": "all",
+ "original": true,
"reblogs": true
},
"twitter":
@@ -302,7 +315,8 @@
},
"wallhaven":
{
- "api-key": null
+ "api-key": null,
+ "metadata": false
},
"weasyl":
{
@@ -324,6 +338,11 @@
"module": null,
"raw-options": null
},
+ "zerochan":
+ {
+ "username": null,
+ "password": null
+ },
"booru":
{
"tags": false,
diff --git a/gallery_dl.egg-info/PKG-INFO b/gallery_dl.egg-info/PKG-INFO
index 1e1d74d..6b9d68b 100644
--- a/gallery_dl.egg-info/PKG-INFO
+++ b/gallery_dl.egg-info/PKG-INFO
@@ -1,6 +1,6 @@
Metadata-Version: 2.1
Name: gallery-dl
-Version: 1.22.4
+Version: 1.23.0
Summary: Command-line program to download image galleries and collections from several image hosting sites
Home-page: https://github.com/mikf/gallery-dl
Download-URL: https://github.com/mikf/gallery-dl/releases/latest
@@ -99,8 +99,8 @@ Standalone Executable
Prebuilt executable files with a Python interpreter and
required Python packages included are available for
-- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.22.4/gallery-dl.exe>`__
-- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.22.4/gallery-dl.bin>`__
+- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.23.0/gallery-dl.exe>`__
+- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.23.0/gallery-dl.bin>`__
| Executables build from the latest commit can be found at
| https://github.com/mikf/gallery-dl/actions/workflows/executables.yml
@@ -251,7 +251,8 @@ and optional for
``subscribestar``,
``tapas``,
``tsumino``,
-and ``twitter``.
+``twitter``,
+and ``zerochan``.
You can set the necessary information in your configuration file
(cf. gallery-dl.conf_)
diff --git a/gallery_dl.egg-info/SOURCES.txt b/gallery_dl.egg-info/SOURCES.txt
index b323e38..5f5084b 100644
--- a/gallery_dl.egg-info/SOURCES.txt
+++ b/gallery_dl.egg-info/SOURCES.txt
@@ -58,6 +58,7 @@ gallery_dl/extractor/behance.py
gallery_dl/extractor/blogger.py
gallery_dl/extractor/booru.py
gallery_dl/extractor/bunkr.py
+gallery_dl/extractor/catbox.py
gallery_dl/extractor/comicvine.py
gallery_dl/extractor/common.py
gallery_dl/extractor/cyberdrop.py
@@ -197,6 +198,7 @@ gallery_dl/extractor/wikieat.py
gallery_dl/extractor/xhamster.py
gallery_dl/extractor/xvideos.py
gallery_dl/extractor/ytdl.py
+gallery_dl/extractor/zerochan.py
gallery_dl/postprocessor/__init__.py
gallery_dl/postprocessor/classify.py
gallery_dl/postprocessor/common.py
diff --git a/gallery_dl/__init__.py b/gallery_dl/__init__.py
index 04ea54c..329e7ab 100644
--- a/gallery_dl/__init__.py
+++ b/gallery_dl/__init__.py
@@ -38,11 +38,11 @@ def parse_inputfile(file, log):
Lines starting with '#' and empty lines will be ignored.
Lines starting with '-' will be interpreted as a key-value pair separated
by an '='. where 'key' is a dot-separated option name and 'value' is a
- JSON-parsable value for it. These config options will be applied while
+ JSON-parsable value. These configuration options will be applied while
processing the next URL.
Lines starting with '-G' are the same as above, except these options will
- be valid for all following URLs, i.e. they are Global.
- Everything else will be used as potential URL.
+ be applied for *all* following URLs, i.e. they are Global.
+ Everything else will be used as a potential URL.
Example input file:
@@ -57,7 +57,8 @@ def parse_inputfile(file, log):
https://example.org/
# next URL uses default filename and 'skip' is false.
- https://example.com/index.htm
+ https://example.com/index.htm # comment1
+ https://example.com/404.htm # comment2
"""
gconf = []
lconf = []
@@ -94,6 +95,10 @@ def parse_inputfile(file, log):
else:
# url
+ if " #" in line:
+ line = line.partition(" #")[0]
+ elif "\t#" in line:
+ line = line.partition("\t#")[0]
if gconf or lconf:
yield util.ExtendedUrl(line, gconf, lconf)
gconf = []
diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py
index 70cebb3..9e4507a 100644
--- a/gallery_dl/extractor/__init__.py
+++ b/gallery_dl/extractor/__init__.py
@@ -26,6 +26,7 @@ modules = [
"behance",
"blogger",
"bunkr",
+ "catbox",
"comicvine",
"cyberdrop",
"danbooru",
@@ -150,6 +151,7 @@ modules = [
"wikieat",
"xhamster",
"xvideos",
+ "zerochan",
"booru",
"moebooru",
"foolfuuka",
diff --git a/gallery_dl/extractor/artstation.py b/gallery_dl/extractor/artstation.py
index 19b9d97..c0e8e67 100644
--- a/gallery_dl/extractor/artstation.py
+++ b/gallery_dl/extractor/artstation.py
@@ -32,9 +32,11 @@ class ArtstationExtractor(Extractor):
data = self.metadata()
for project in self.projects():
- for asset in self.get_project_assets(project["hash_id"]):
+ for num, asset in enumerate(
+ self.get_project_assets(project["hash_id"]), 1):
asset.update(data)
adict = asset["asset"]
+ asset["num"] = num
yield Message.Directory, asset
if adict["has_embedded_player"] and self.external:
@@ -85,6 +87,7 @@ class ArtstationExtractor(Extractor):
assets = data["assets"]
del data["assets"]
+ data["count"] = len(assets)
if len(assets) == 1:
data["asset"] = assets[0]
yield data
diff --git a/gallery_dl/extractor/blogger.py b/gallery_dl/extractor/blogger.py
index 21ca991..e0885d2 100644
--- a/gallery_dl/extractor/blogger.py
+++ b/gallery_dl/extractor/blogger.py
@@ -67,9 +67,6 @@ class BloggerExtractor(Extractor):
key=lambda x: x["format_id"],
)["play_url"])
- if not files:
- continue
-
post["author"] = post["author"]["displayName"]
post["replies"] = post["replies"]["totalItems"]
post["content"] = text.remove_html(content)
diff --git a/gallery_dl/extractor/bunkr.py b/gallery_dl/extractor/bunkr.py
index 9904d0a..3091f57 100644
--- a/gallery_dl/extractor/bunkr.py
+++ b/gallery_dl/extractor/bunkr.py
@@ -16,10 +16,10 @@ import json
class BunkrAlbumExtractor(LolisafeAlbumExtractor):
"""Extractor for bunkr.is albums"""
category = "bunkr"
- root = "https://app.bunkr.is"
+ root = "https://bunkr.is"
pattern = r"(?:https?://)?(?:app\.)?bunkr\.(?:is|to)/a/([^/?#]+)"
test = (
- ("https://app.bunkr.is/a/Lktg9Keq", {
+ ("https://bunkr.is/a/Lktg9Keq", {
"pattern": r"https://cdn\.bunkr\.is/test-テスト-\"&>-QjgneIQv\.png",
"content": "0c8768055e4e20e7c7259608b67799171b691140",
"keyword": {
@@ -33,7 +33,7 @@ class BunkrAlbumExtractor(LolisafeAlbumExtractor):
},
}),
# mp4 (#2239)
- ("https://bunkr.is/a/ptRHaCn2", {
+ ("https://app.bunkr.is/a/ptRHaCn2", {
"pattern": r"https://media-files\.bunkr\.is/_-RnHoW69L\.mp4",
"content": "80e61d1dbc5896ae7ef9a28734c747b28b320471",
}),
@@ -70,16 +70,16 @@ class BunkrAlbumExtractor(LolisafeAlbumExtractor):
album = props["album"]
files = props["files"]
except Exception as exc:
- self.log.debug(exc)
+ self.log.debug(exc.__class__.__name__, exc)
self.root = self.root.replace("bunkr", "app.bunkr", 1)
return self._fetch_album_api(album_id)
for file in files:
name = file["name"]
+ cdn = file["cdn"]
if name.endswith(".mp4"):
- file["file"] = "https://media-files.bunkr.is/" + name
- else:
- file["file"] = file["cdn"] + "/" + name
+ cdn = cdn.replace("//cdn", "//media-files")
+ file["file"] = cdn + "/" + name
return files, {
"album_id" : self.album_id,
diff --git a/gallery_dl/extractor/catbox.py b/gallery_dl/extractor/catbox.py
new file mode 100644
index 0000000..509108f
--- /dev/null
+++ b/gallery_dl/extractor/catbox.py
@@ -0,0 +1,56 @@
+# -*- coding: utf-8 -*-
+
+# Copyright 2022 Mike Fährmann
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Extractors for https://catbox.moe/"""
+
+from .common import GalleryExtractor
+from .. import text
+
+
+class CatboxAlbumExtractor(GalleryExtractor):
+ """Extractor for catbox albums"""
+ category = "catbox"
+ subcategory = "album"
+ root = "https://catbox.moe"
+ filename_fmt = "{filename}.{extension}"
+ directory_fmt = ("{category}", "{album_name} ({album_id})")
+ archive_fmt = "{album_id}_{filename}"
+ pattern = r"(?:https?://)?(?:www\.)?catbox\.moe(/c/[^/?#]+)"
+ test = (
+ ("https://catbox.moe/c/1igcbe", {
+ "url": "35866a88c29462814f103bc22ec031eaeb380f8a",
+ "content": "70ddb9de3872e2d17cc27e48e6bf395e5c8c0b32",
+ "pattern": r"https://files\.catbox\.moe/\w+\.\w{3}$",
+ "count": 3,
+ "keyword": {
+ "album_id": "1igcbe",
+ "album_name": "test",
+ "date": "dt:2022-08-18 00:00:00",
+ "description": "album test &>",
+ },
+ }),
+ ("https://www.catbox.moe/c/cd90s1"),
+ ("https://catbox.moe/c/w7tm47#"),
+ )
+
+ def metadata(self, page):
+ extr = text.extract_from(page)
+ return {
+ "album_id" : self.gallery_url.rpartition("/")[2],
+ "album_name" : text.unescape(extr("<h1>", "<")),
+ "date" : text.parse_datetime(extr(
+ "<p>Created ", "<"), "%B %d %Y"),
+ "description": text.unescape(extr("<p>", "<")),
+ }
+
+ def images(self, page):
+ return [
+ ("https://files.catbox.moe/" + path, None)
+ for path in text.extract_iter(
+ page, ">https://files.catbox.moe/", "<")
+ ]
diff --git a/gallery_dl/extractor/common.py b/gallery_dl/extractor/common.py
index 6ccae7f..1b41101 100644
--- a/gallery_dl/extractor/common.py
+++ b/gallery_dl/extractor/common.py
@@ -795,12 +795,23 @@ SSL_CIPHERS = {
}
+urllib3 = requests.packages.urllib3
+
# detect brotli support
try:
- BROTLI = requests.packages.urllib3.response.brotli is not None
+ BROTLI = urllib3.response.brotli is not None
except AttributeError:
BROTLI = False
+# set (urllib3) warnings filter
+action = config.get((), "warnings", "default")
+if action:
+ try:
+ import warnings
+ warnings.simplefilter(action, urllib3.exceptions.HTTPWarning)
+ except Exception:
+ pass
+del action
# Undo automatic pyOpenSSL injection by requests
pyopenssl = config.get((), "pyopenssl", False)
diff --git a/gallery_dl/extractor/danbooru.py b/gallery_dl/extractor/danbooru.py
index ec0db68..8c2ed53 100644
--- a/gallery_dl/extractor/danbooru.py
+++ b/gallery_dl/extractor/danbooru.py
@@ -34,6 +34,7 @@ class DanbooruExtractor(BaseExtractor):
self.per_page = iget("per-page", 200)
self.request_interval_min = iget("request-interval-min", 0.0)
self._pools = iget("pools")
+ self._popular_endpoint = iget("popular", "/explore/posts/popular.json")
BaseExtractor.__init__(self, match)
@@ -150,6 +151,7 @@ INSTANCES = {
"headers": {"User-Agent": "gallery-dl/{} (by mikf)".format(
__version__)},
"pools": "sort",
+ "popular": "/popular.json",
"page-limit": 750,
"per-page": 320,
"request-interval-min": 1.0,
@@ -308,7 +310,7 @@ class DanbooruPopularExtractor(DanbooruExtractor):
subcategory = "popular"
directory_fmt = ("{category}", "popular", "{scale}", "{date}")
archive_fmt = "P_{scale[0]}_{date}_{id}"
- pattern = BASE_PATTERN + r"/explore/posts/popular(?:\?([^#]*))?"
+ pattern = BASE_PATTERN + r"/(?:explore/posts/)?popular(?:\?([^#]*))?"
test = (
("https://danbooru.donmai.us/explore/posts/popular"),
(("https://danbooru.donmai.us/explore/posts/popular"
@@ -316,7 +318,7 @@ class DanbooruPopularExtractor(DanbooruExtractor):
"range": "1-120",
"count": 120,
}),
- ("https://e621.net/explore/posts/popular"),
+ ("https://e621.net/popular"),
(("https://e621.net/explore/posts/popular"
"?date=2019-06-01&scale=month"), {
"pattern": r"https://static\d.e621.net/data/../../[0-9a-f]+",
@@ -345,8 +347,7 @@ class DanbooruPopularExtractor(DanbooruExtractor):
def posts(self):
if self.page_start is None:
self.page_start = 1
- return self._pagination(
- "/explore/posts/popular.json", self.params, True)
+ return self._pagination(self._popular_endpoint, self.params, True)
class DanbooruFavoriteExtractor(DanbooruExtractor):
diff --git a/gallery_dl/extractor/deviantart.py b/gallery_dl/extractor/deviantart.py
index 39ae484..60f644d 100644
--- a/gallery_dl/extractor/deviantart.py
+++ b/gallery_dl/extractor/deviantart.py
@@ -1128,11 +1128,18 @@ class DeviantartOAuthAPI():
self._folders((deviation,))
return deviation
- def deviation_content(self, deviation_id, public=False):
+ def deviation_content(self, deviation_id, public=True):
"""Get extended content of a single Deviation"""
endpoint = "/deviation/content"
params = {"deviationid": deviation_id}
- return self._call(endpoint, params=params, public=public)
+ content = self._call(endpoint, params=params, public=public)
+ if public and content["html"].startswith(
+ ' <span class=\"username-with-symbol'):
+ if self.refresh_token_key:
+ content = self._call(endpoint, params=params, public=False)
+ else:
+ self.log.warning("Private Journal")
+ return content
def deviation_download(self, deviation_id, public=True):
"""Get the original file download (if allowed)"""
diff --git a/gallery_dl/extractor/fanbox.py b/gallery_dl/extractor/fanbox.py
index 11436cb..8481248 100644
--- a/gallery_dl/extractor/fanbox.py
+++ b/gallery_dl/extractor/fanbox.py
@@ -6,6 +6,7 @@
"""Extractors for https://www.fanbox.cc/"""
+import re
from .common import Extractor, Message
from .. import text
@@ -78,6 +79,7 @@ class FanboxExtractor(Extractor):
num = 0
cover_image = post.get("coverImageUrl")
if cover_image:
+ cover_image = re.sub("/c/[0-9a-z_]+", "", cover_image)
final_post = post.copy()
final_post["isCoverImage"] = True
final_post["fileUrl"] = cover_image
diff --git a/gallery_dl/extractor/foolfuuka.py b/gallery_dl/extractor/foolfuuka.py
index 34b52ef..5e6da5b 100644
--- a/gallery_dl/extractor/foolfuuka.py
+++ b/gallery_dl/extractor/foolfuuka.py
@@ -88,9 +88,13 @@ BASE_PATTERN = FoolfuukaExtractor.update({
"root": "https://boards.fireden.net",
"pattern": r"boards\.fireden\.net",
},
- "nyafuu": {
- "root": "https://archive.nyafuu.org",
- "pattern": r"(?:archive\.)?nyafuu\.org",
+ "rozenarcana": {
+ "root": "https://archive.alice.al",
+ "pattern": r"(?:archive\.)?alice\.al",
+ },
+ "tokyochronos": {
+ "root": "https://www.tokyochronos.net",
+ "pattern": r"(?:www\.)?tokyochronos\.net",
},
"rbt": {
"root": "https://rbt.asia",
@@ -111,7 +115,7 @@ class FoolfuukaThreadExtractor(FoolfuukaExtractor):
pattern = BASE_PATTERN + r"/([^/?#]+)/thread/(\d+)"
test = (
("https://archive.4plebs.org/tg/thread/54059290", {
- "url": "07452944164b602502b02b24521f8cee5c484d2a",
+ "url": "fd823f17b5001442b941fddcd9ec91bafedfbc79",
}),
("https://archived.moe/gd/thread/309639/", {
"url": "fdd533840e2d535abd162c02d6dfadbc12e2dcd8",
@@ -133,8 +137,11 @@ class FoolfuukaThreadExtractor(FoolfuukaExtractor):
("https://boards.fireden.net/sci/thread/11264294/", {
"url": "61cab625c95584a12a30049d054931d64f8d20aa",
}),
- ("https://archive.nyafuu.org/c/thread/2849220/", {
- "url": "bbe6f82944a45e359f5c8daf53f565913dc13e4f",
+ ("https://archive.alice.al/c/thread/2849220/", {
+ "url": "632e2c8de05de6b3847685f4bf1b4e5c6c9e0ed5",
+ }),
+ ("https://www.tokyochronos.net/a/thread/241664141/", {
+ "url": "ae03852cf44e3dcfce5be70274cb1828e1dbb7d6",
}),
("https://rbt.asia/g/thread/61487650/", {
"url": "fadd274b25150a1bdf03a40c58db320fa3b617c4",
@@ -180,7 +187,8 @@ class FoolfuukaBoardExtractor(FoolfuukaExtractor):
("https://arch.b4k.co/meta/"),
("https://desuarchive.org/a/"),
("https://boards.fireden.net/sci/"),
- ("https://archive.nyafuu.org/c/"),
+ ("https://archive.alice.al/c/"),
+ ("https://www.tokyochronos.net/a/"),
("https://rbt.asia/g/"),
("https://thebarchive.com/b/"),
)
@@ -223,7 +231,8 @@ class FoolfuukaSearchExtractor(FoolfuukaExtractor):
("https://archiveofsins.com/_/search/text/test/"),
("https://desuarchive.org/_/search/text/test/"),
("https://boards.fireden.net/_/search/text/test/"),
- ("https://archive.nyafuu.org/_/search/text/test/"),
+ ("https://archive.alice.al/_/search/text/test/"),
+ ("https://www.tokyochronos.net/_/search/text/test/"),
("https://rbt.asia/_/search/text/test/"),
("https://thebarchive.com/_/search/text/test/"),
)
@@ -288,7 +297,8 @@ class FoolfuukaGalleryExtractor(FoolfuukaExtractor):
("https://arch.b4k.co/meta/gallery/"),
("https://desuarchive.org/a/gallery/5"),
("https://boards.fireden.net/sci/gallery/6"),
- ("https://archive.nyafuu.org/c/gallery/7"),
+ ("https://archive.alice.al/c/gallery/7"),
+ ("https://www.tokyochronos.net/a/gallery/7"),
("https://rbt.asia/g/gallery/8"),
("https://thebarchive.com/b/gallery/9"),
)
diff --git a/gallery_dl/extractor/gelbooru.py b/gallery_dl/extractor/gelbooru.py
index e8bee37..92f7ac2 100644
--- a/gallery_dl/extractor/gelbooru.py
+++ b/gallery_dl/extractor/gelbooru.py
@@ -10,7 +10,7 @@
from .common import Extractor, Message
from . import gelbooru_v02
-from .. import text, util, exception
+from .. import text, exception
import binascii
@@ -21,10 +21,15 @@ class GelbooruBase():
root = "https://gelbooru.com"
def _api_request(self, params):
+ params["api_key"] = self.api_key
+ params["user_id"] = self.user_id
+
url = self.root + "/index.php?page=dapi&s=post&q=index&json=1"
data = self.request(url, params=params).json()
+
if "post" not in data:
return ()
+
posts = data["post"]
if not isinstance(posts, list):
return (posts,)
@@ -85,28 +90,29 @@ class GelbooruTagExtractor(GelbooruBase,
class GelbooruPoolExtractor(GelbooruBase,
gelbooru_v02.GelbooruV02PoolExtractor):
- """Extractor for image-pools from gelbooru.com"""
+ """Extractor for gelbooru pools"""
+ per_page = 45
pattern = (r"(?:https?://)?(?:www\.)?gelbooru\.com/(?:index\.php)?"
r"\?page=pool&s=show&id=(?P<pool>\d+)")
test = (
("https://gelbooru.com/index.php?page=pool&s=show&id=761", {
"count": 6,
}),
- ("https://gelbooru.com/index.php?page=pool&s=show&id=761", {
- "options": (("api", False),),
- "count": 6,
- }),
)
def metadata(self):
- url = "{}/index.php?page=pool&s=show&id={}".format(
- self.root, self.pool_id)
- page = self.request(url).text
+ url = self.root + "/index.php"
+ self._params = {
+ "page": "pool",
+ "s" : "show",
+ "id" : self.pool_id,
+ "pid" : self.page_start,
+ }
+ self._page = self.request(url, params=self._params).text
- name, pos = text.extract(page, "<h3>Now Viewing: ", "</h3>")
+ name, pos = text.extract(self._page, "<h3>Now Viewing: ", "</h3>")
if not name:
raise exception.NotFoundError("pool")
- self.post_ids = text.extract_iter(page, 'class="" id="p', '"', pos)
return {
"pool": text.parse_int(self.pool_id),
@@ -114,9 +120,23 @@ class GelbooruPoolExtractor(GelbooruBase,
}
def posts(self):
- params = {}
- for params["id"] in util.advance(self.post_ids, self.page_start):
- yield from self._api_request(params)
+ url = self.root + "/index.php"
+ params = self._params
+
+ page = self._page
+ del self._page
+ data = {}
+
+ while True:
+ num_ids = 0
+ for data["id"] in text.extract_iter(page, '" id="p', '"'):
+ num_ids += 1
+ yield from self._api_request(data)
+
+ if num_ids < self.per_page:
+ return
+ params["pid"] += self.per_page
+ page = self.request(url, params=params).text
class GelbooruPostExtractor(GelbooruBase,
diff --git a/gallery_dl/extractor/gelbooru_v02.py b/gallery_dl/extractor/gelbooru_v02.py
index 35a3448..8214614 100644
--- a/gallery_dl/extractor/gelbooru_v02.py
+++ b/gallery_dl/extractor/gelbooru_v02.py
@@ -21,6 +21,9 @@ class GelbooruV02Extractor(booru.BooruExtractor):
def __init__(self, match):
booru.BooruExtractor.__init__(self, match)
+ self.api_key = self.config("api-key")
+ self.user_id = self.config("user-id")
+
try:
self.api_root = INSTANCES[self.category]["api_root"]
except KeyError:
@@ -59,6 +62,24 @@ class GelbooruV02Extractor(booru.BooruExtractor):
return
params["pid"] += 1
+ def _pagination_html(self, params):
+ url = self.root + "/index.php"
+ params["pid"] = self.page_start * self.per_page
+
+ data = {}
+ while True:
+ num_ids = 0
+ page = self.request(url, params=params).text
+
+ for data["id"] in text.extract_iter(page, '" id="p', '"'):
+ num_ids += 1
+ for post in self._api_request(data):
+ yield post.attrib
+
+ if num_ids < self.per_page:
+ return
+ params["pid"] += self.per_page
+
@staticmethod
def _prepare(post):
post["date"] = text.parse_datetime(
@@ -204,7 +225,12 @@ class GelbooruV02PoolExtractor(GelbooruV02Extractor):
def __init__(self, match):
GelbooruV02Extractor.__init__(self, match)
self.pool_id = match.group(match.lastindex)
- self.post_ids = ()
+
+ if self.category == "rule34":
+ self.posts = self._posts_pages
+ self.per_page = 45
+ else:
+ self.post_ids = ()
def skip(self, num):
self.page_start += num
@@ -232,6 +258,13 @@ class GelbooruV02PoolExtractor(GelbooruV02Extractor):
for post in self._api_request(params):
yield post.attrib
+ def _posts_pages(self):
+ return self._pagination_html({
+ "page": "pool",
+ "s" : "show",
+ "id" : self.pool_id,
+ })
+
class GelbooruV02FavoriteExtractor(GelbooruV02Extractor):
subcategory = "favorite"
@@ -265,27 +298,11 @@ class GelbooruV02FavoriteExtractor(GelbooruV02Extractor):
return {"favorite_id": text.parse_int(self.favorite_id)}
def posts(self):
- url = self.root + "/index.php"
- params = {
+ return self._pagination_html({
"page": "favorites",
"s" : "view",
"id" : self.favorite_id,
- "pid" : self.page_start * self.per_page,
- }
-
- data = {}
- while True:
- num_ids = 0
- page = self.request(url, params=params).text
-
- for data["id"] in text.extract_iter(page, '" id="p', '"'):
- num_ids += 1
- for post in self._api_request(data):
- yield post.attrib
-
- if num_ids < self.per_page:
- return
- params["pid"] += self.per_page
+ })
class GelbooruV02PostExtractor(GelbooruV02Extractor):
diff --git a/gallery_dl/extractor/hitomi.py b/gallery_dl/extractor/hitomi.py
index ca7e692..f8b0c3b 100644
--- a/gallery_dl/extractor/hitomi.py
+++ b/gallery_dl/extractor/hitomi.py
@@ -174,23 +174,27 @@ class HitomiTagExtractor(Extractor):
}
offset = 0
+ total = None
while True:
headers["Referer"] = "{}/{}/{}.html?page={}".format(
self.root, self.type, self.tag, offset // 100 + 1)
headers["Range"] = "bytes={}-{}".format(offset, offset+99)
- nozomi = self.request(nozomi_url, headers=headers).content
+ response = self.request(nozomi_url, headers=headers)
- for gallery_id in decode_nozomi(nozomi):
+ for gallery_id in decode_nozomi(response.content):
gallery_url = "{}/galleries/{}.html".format(
self.root, gallery_id)
yield Message.Queue, gallery_url, data
- if len(nozomi) < 100:
- return
offset += 100
+ if total is None:
+ total = text.parse_int(
+ response.headers["content-range"].rpartition("/")[2])
+ if offset >= total:
+ return
-@memcache()
+@memcache(maxage=1800)
def _parse_gg(extr):
page = extr.request("https://ltn.hitomi.la/gg.js").text
diff --git a/gallery_dl/extractor/instagram.py b/gallery_dl/extractor/instagram.py
index 4a2c3bb..d56af8b 100644
--- a/gallery_dl/extractor/instagram.py
+++ b/gallery_dl/extractor/instagram.py
@@ -270,6 +270,7 @@ class InstagramExtractor(Extractor):
"post_shortcode": post["code"],
"likes": post["like_count"],
"pinned": post.get("timeline_pinned_user_ids", ()),
+ "date": text.parse_timestamp(post.get("taken_at")),
}
caption = post["caption"]
@@ -399,6 +400,8 @@ class InstagramExtractor(Extractor):
self.log.debug("Cursor: %s", self._cursor)
def _pagination_api(self, endpoint, params=None):
+ if params is None:
+ params = {}
while True:
data = self._request_api(endpoint, params=params)
yield from data["items"]
@@ -509,7 +512,7 @@ class InstagramChannelExtractor(InstagramExtractor):
class InstagramSavedExtractor(InstagramExtractor):
"""Extractor for ProfilePage saved media"""
subcategory = "saved"
- pattern = USER_PATTERN + r"/saved"
+ pattern = USER_PATTERN + r"/saved/?$"
test = ("https://www.instagram.com/instagram/saved/",)
def posts(self):
@@ -518,6 +521,30 @@ class InstagramSavedExtractor(InstagramExtractor):
return self._pagination_graphql(query_hash, variables)
+class InstagramCollectionExtractor(InstagramExtractor):
+ """Extractor for ProfilePage saved collection media"""
+ subcategory = "collection"
+ pattern = USER_PATTERN + r"/saved/([^/?#]+)/([^/?#]+)"
+ test = (
+ "https://www.instagram.com/instagram/saved/collection_name/123456789/",
+ )
+
+ def __init__(self, match):
+ InstagramExtractor.__init__(self, match)
+ self.user, self.collection_name, self.collection_id = match.groups()
+
+ def metadata(self):
+ return {
+ "collection_id" : self.collection_id,
+ "collection_name": text.unescape(self.collection_name),
+ }
+
+ def posts(self):
+ endpoint = "/v1/feed/collection/{}/posts/".format(self.collection_id)
+ for item in self._pagination_api(endpoint):
+ yield item["media"]
+
+
class InstagramTagExtractor(InstagramExtractor):
"""Extractor for TagPage"""
subcategory = "tag"
diff --git a/gallery_dl/extractor/itaku.py b/gallery_dl/extractor/itaku.py
index 6b2cf4c..00a32cd 100644
--- a/gallery_dl/extractor/itaku.py
+++ b/gallery_dl/extractor/itaku.py
@@ -101,9 +101,9 @@ class ItakuImageExtractor(ItakuExtractor):
"/gallery_imgs/220504_oUNIAFT/xl.jpg",
"liked_by_you": False,
"maturity_rating": "SFW",
- "num_comments": 2,
- "num_likes": 80,
- "num_reshares": 2,
+ "num_comments": int,
+ "num_likes": int,
+ "num_reshares": int,
"obj_tags": 136446,
"owner": 16775,
"owner_avatar": "https://d1wmr8tlk3viaj.cloudfront.net"
@@ -115,8 +115,9 @@ class ItakuImageExtractor(ItakuExtractor):
"tags": list,
"tags_character": ["hatsune_miku"],
"tags_copyright": ["vocaloid"],
- "tags_general" : ["twintails", "green_hair", "flag", "gloves",
- "green_eyes", "female", "racing_miku"],
+ "tags_general" : ["female", "green_eyes", "twintails",
+ "green_hair", "gloves", "flag",
+ "racing_miku"],
"title": "Racing Miku 2022 Ver.",
"too_mature": False,
"uncompressed_filesize": "0.62",
diff --git a/gallery_dl/extractor/kemonoparty.py b/gallery_dl/extractor/kemonoparty.py
index f1eb79f..816b561 100644
--- a/gallery_dl/extractor/kemonoparty.py
+++ b/gallery_dl/extractor/kemonoparty.py
@@ -440,20 +440,44 @@ class KemonopartyDiscordServerExtractor(KemonopartyExtractor):
class KemonopartyFavoriteExtractor(KemonopartyExtractor):
"""Extractor for kemono.party favorites"""
subcategory = "favorite"
- pattern = BASE_PATTERN + r"/favorites"
- test = ("https://kemono.party/favorites", {
- "pattern": KemonopartyUserExtractor.pattern,
- "url": "f4b5b796979bcba824af84206578c79101c7f0e1",
- "count": 3,
- })
+ pattern = BASE_PATTERN + r"/favorites(?:/?\?([^#]+))?"
+ test = (
+ ("https://kemono.party/favorites", {
+ "pattern": KemonopartyUserExtractor.pattern,
+ "url": "f4b5b796979bcba824af84206578c79101c7f0e1",
+ "count": 3,
+ }),
+ ("https://kemono.party/favorites?type=post", {
+ "pattern": KemonopartyPostExtractor.pattern,
+ "url": "ecfccf5f0d50b8d14caa7bbdcf071de5c1e5b90f",
+ "count": 3,
+ }),
+ )
+
+ def __init__(self, match):
+ KemonopartyExtractor.__init__(self, match)
+ self.favorites = (text.parse_query(match.group(2)).get("type") or
+ self.config("favorites") or
+ "artist")
def items(self):
self._prepare_ddosguard_cookies()
self.login()
- users = self.request(self.root + "/api/favorites").json()
- for user in users:
- user["_extractor"] = KemonopartyUserExtractor
- url = "{}/{}/user/{}".format(
- self.root, user["service"], user["id"])
- yield Message.Queue, url, user
+ if self.favorites == "artist":
+ users = self.request(
+ self.root + "/api/v1/account/favorites?type=artist").json()
+ for user in users:
+ user["_extractor"] = KemonopartyUserExtractor
+ url = "{}/{}/user/{}".format(
+ self.root, user["service"], user["id"])
+ yield Message.Queue, url, user
+
+ elif self.favorites == "post":
+ posts = self.request(
+ self.root + "/api/v1/account/favorites?type=post").json()
+ for post in posts:
+ post["_extractor"] = KemonopartyPostExtractor
+ url = "{}/{}/user/{}/post/{}".format(
+ self.root, post["service"], post["user"], post["id"])
+ yield Message.Queue, url, post
diff --git a/gallery_dl/extractor/luscious.py b/gallery_dl/extractor/luscious.py
index b5db3dd..57db0c9 100644
--- a/gallery_dl/extractor/luscious.py
+++ b/gallery_dl/extractor/luscious.py
@@ -49,7 +49,9 @@ class LusciousAlbumExtractor(LusciousExtractor):
r"/(?:albums|pictures/c/[^/?#]+/album)/[^/?#]+_(\d+)")
test = (
("https://luscious.net/albums/okinami-no-koigokoro_277031/", {
- "url": "7e4984a271a1072ac6483e4228a045895aff86f3",
+ "pattern": r"https://storage\.bhs\.cloud\.ovh\.net/v1/AUTH_\w+"
+ r"/images/NTRshouldbeillegal/277031"
+ r"/luscious_net_\d+_\d+\.jpg$",
# "content": "b3a747a6464509440bd0ff6d1267e6959f8d6ff3",
"keyword": {
"album": {
diff --git a/gallery_dl/extractor/mastodon.py b/gallery_dl/extractor/mastodon.py
index 6e780e8..493a8ef 100644
--- a/gallery_dl/extractor/mastodon.py
+++ b/gallery_dl/extractor/mastodon.py
@@ -179,12 +179,11 @@ class MastodonAPI():
try:
access_token = INSTANCES[extractor.category]["access-token"]
except (KeyError, TypeError):
- raise exception.StopExtraction(
- "Missing access token.\n"
- "Run 'gallery-dl oauth:mastodon:%s' to obtain one.",
- extractor.instance)
-
- self.headers = {"Authorization": "Bearer " + access_token}
+ pass
+ if access_token:
+ self.headers = {"Authorization": "Bearer " + access_token}
+ else:
+ self.headers = None
def account_id_by_username(self, username):
if username.startswith("id:"):
@@ -232,6 +231,11 @@ class MastodonAPI():
if code < 400:
return response
+ if code == 401:
+ raise exception.StopExtraction(
+ "Invalid or missing access token.\n"
+ "Run 'gallery-dl oauth:mastodon:%s' to obtain one.",
+ self.extractor.instance)
if code == 404:
raise exception.NotFoundError()
if code == 429:
diff --git a/gallery_dl/extractor/nijie.py b/gallery_dl/extractor/nijie.py
index 122ea46..2c8e72c 100644
--- a/gallery_dl/extractor/nijie.py
+++ b/gallery_dl/extractor/nijie.py
@@ -126,7 +126,7 @@ class NijieExtractor(AsynchronousMixin, BaseExtractor):
username, password = self._get_auth_info()
self._update_cookies(self._login_impl(username, password))
- @cache(maxage=150*24*3600, keyarg=1)
+ @cache(maxage=90*24*3600, keyarg=1)
def _login_impl(self, username, password):
if not username or not password:
raise exception.AuthenticationError(
diff --git a/gallery_dl/extractor/oauth.py b/gallery_dl/extractor/oauth.py
index 653822f..d6628c4 100644
--- a/gallery_dl/extractor/oauth.py
+++ b/gallery_dl/extractor/oauth.py
@@ -41,7 +41,8 @@ class OAuthBase(Extractor):
stdout_write("Waiting for response. (Cancel with Ctrl+c)\n")
server = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
server.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
- server.bind(("localhost", self.config("port", 6414)))
+ server.bind((self.config("host", "localhost"),
+ self.config("port", 6414)))
server.listen(1)
# workaround for ctrl+c not working during server.accept on Windows
diff --git a/gallery_dl/extractor/philomena.py b/gallery_dl/extractor/philomena.py
index fba1312..225f0ff 100644
--- a/gallery_dl/extractor/philomena.py
+++ b/gallery_dl/extractor/philomena.py
@@ -122,7 +122,7 @@ class PhilomenaPostExtractor(PhilomenaExtractor):
"tag_ids": list,
"tags": list,
"thumbnails_generated": True,
- "updated_at": "2022-04-25T09:30:57Z",
+ "updated_at": r"re:\d\d\d\d-\d\d-\d\dT\d\d:\d\d:\d\dZ",
"uploader": "Clover the Clever",
"uploader_id": 211188,
"upvotes": int,
diff --git a/gallery_dl/extractor/poipiku.py b/gallery_dl/extractor/poipiku.py
index e1846cc..8203885 100644
--- a/gallery_dl/extractor/poipiku.py
+++ b/gallery_dl/extractor/poipiku.py
@@ -51,13 +51,13 @@ class PoipikuExtractor(Extractor):
thumb = extr('class="IllustItemThumbImg" src="', '"')
if not thumb:
break
- elif thumb.startswith("/img/"):
+ elif thumb.startswith(("//img.poipiku.com/img/", "/img/")):
continue
post["num"] += 1
url = text.ensure_http_scheme(thumb[:-8])
yield Message.Url, url, text.nameext_from_url(url, post)
- if not extr('</i> show all', '<'):
+ if not extr('> show all', '<'):
continue
url = self.root + "/f/ShowAppendFileF.jsp"
@@ -131,7 +131,7 @@ class PoipikuPostExtractor(PoipikuExtractor):
pattern = BASE_PATTERN + r"/(\d+)/(\d+)"
test = (
("https://poipiku.com/25049/5864576.html", {
- "pattern": r"https://img\.poipiku\.com/user_img03/000025049"
+ "pattern": r"https://img\.poipiku\.com/user_img\d+/000025049"
r"/005864576_EWN1Y65gQ\.png$",
"keyword": {
"count": "1",
@@ -146,7 +146,7 @@ class PoipikuPostExtractor(PoipikuExtractor):
},
}),
("https://poipiku.com/2166245/6411749.html", {
- "pattern": r"https://img\.poipiku\.com/user_img01/002166245"
+ "pattern": r"https://img\.poipiku\.com/user_img\d+/002166245"
r"/006411749_\w+\.jpeg$",
"count": 4,
"keyword": {
diff --git a/gallery_dl/extractor/skeb.py b/gallery_dl/extractor/skeb.py
index 6dfc907..cd8c238 100644
--- a/gallery_dl/extractor/skeb.py
+++ b/gallery_dl/extractor/skeb.py
@@ -16,13 +16,14 @@ class SkebExtractor(Extractor):
category = "skeb"
directory_fmt = ("{category}", "{creator[screen_name]}")
filename_fmt = "{post_num}_{file_id}.{extension}"
- archive_fmt = "{post_num}_{file_id}_{content_category}"
+ archive_fmt = "{post_num}_{_file_id}_{content_category}"
root = "https://skeb.jp"
def __init__(self, match):
Extractor.__init__(self, match)
self.user_name = match.group(1)
self.thumbnails = self.config("thumbnails", False)
+ self.article = self.config("article", False)
def items(self):
for user_name, post_num in self.posts():
@@ -64,6 +65,7 @@ class SkebExtractor(Extractor):
resp = self.request(url, headers=headers).json()
creator = resp["creator"]
post = {
+ "post_id" : resp["id"],
"post_num" : post_num,
"post_url" : self.root + resp["path"],
"body" : resp["body"],
@@ -102,12 +104,22 @@ class SkebExtractor(Extractor):
if self.thumbnails and "og_image_url" in resp:
post["content_category"] = "thumb"
post["file_id"] = "thumb"
+ post["_file_id"] = str(resp["id"]) + "t"
post["file_url"] = resp["og_image_url"]
yield post
+ if self.article and "article_image_url" in resp:
+ url = resp["article_image_url"]
+ if url:
+ post["content_category"] = "article"
+ post["file_id"] = "article"
+ post["_file_id"] = str(resp["id"]) + "a"
+ post["file_url"] = url
+ yield post
+
for preview in resp["previews"]:
post["content_category"] = "preview"
- post["file_id"] = preview["id"]
+ post["file_id"] = post["_file_id"] = preview["id"]
post["file_url"] = preview["url"]
info = preview["information"]
post["original"] = {
diff --git a/gallery_dl/extractor/slideshare.py b/gallery_dl/extractor/slideshare.py
index b0b8f3b..506db26 100644
--- a/gallery_dl/extractor/slideshare.py
+++ b/gallery_dl/extractor/slideshare.py
@@ -59,7 +59,7 @@ class SlidesharePresentationExtractor(GalleryExtractor):
# mobile URL
(("https://www.slideshare.net"
"/mobile/uqudent/introduction-to-fixed-prosthodontics"), {
- "url": "59993ad7b0cb93c73011547eedcd02c622649e9d",
+ "url": "43eda2adf4dd221a251c8df794dfb82649e94647",
}),
)
@@ -72,14 +72,14 @@ class SlidesharePresentationExtractor(GalleryExtractor):
def metadata(self, page):
extr = text.extract_from(page)
descr = extr('<meta name="description" content="', '"')
- title = extr('<span class="j-title-breadcrumb">', '</span>')
- published = extr('<div class="metadata-item">', '</div>')
comments = extr('content="UserComments:', '"')
likes = extr('content="UserLikes:', '"')
views = extr('content="UserPageVisits:', '"')
+ title = extr('<span class="j-title-breadcrumb">', '</span>')
+ published = extr('<div class="metadata-item">', '</div>')
if descr.endswith("…"):
- alt_descr = extr('id="slideshow-description-text"', '</p>')
+ alt_descr = extr('slideshow-description-text"', '</p>')
if alt_descr:
descr = text.remove_html(alt_descr.partition(">")[2]).strip()
diff --git a/gallery_dl/extractor/smugmug.py b/gallery_dl/extractor/smugmug.py
index 98e914e..4010da3 100644
--- a/gallery_dl/extractor/smugmug.py
+++ b/gallery_dl/extractor/smugmug.py
@@ -111,13 +111,13 @@ class SmugmugImageExtractor(SmugmugExtractor):
test = (
("https://tdm.smugmug.com/Nature/Dove/i-kCsLJT6", {
"url": "e6408fd2c64e721fd146130dceb56a971ceb4259",
- "keyword": "460a773f5addadd3e216bda346fc524fe4eedc52",
+ "keyword": "b31a63d07c9c26eb0f79f52d60d171a98938f99b",
"content": "ecbd9d7b4f75a637abc8d35319be9ec065a44eb0",
}),
# video
("https://tstravels.smugmug.com/Dailies/Daily-Dose-2015/i-39JFNzB", {
"url": "04d0ab1ff829ca7d78f5acb5548953df08e9a5ee",
- "keyword": "eb74e5cf6780d5152ab8f11b431ec1b17fa8f69b",
+ "keyword": "4cef98133ace511adc874c9d9abac5817ba0d856",
}),
)
diff --git a/gallery_dl/extractor/tapas.py b/gallery_dl/extractor/tapas.py
index fcdf18f..545a95b 100644
--- a/gallery_dl/extractor/tapas.py
+++ b/gallery_dl/extractor/tapas.py
@@ -108,7 +108,7 @@ class TapasSeriesExtractor(TapasExtractor):
test = (
("https://tapas.io/series/just-leave-me-be", {
"pattern": r"https://\w+\.cloudfront\.net/pc/\w\w/[0-9a-f-]+\.jpg",
- "count": 127,
+ "count": 132,
}),
("https://tapas.io/series/yona", { # mature
"count": 26,
diff --git a/gallery_dl/extractor/tumblr.py b/gallery_dl/extractor/tumblr.py
index ded7fd1..b694fa0 100644
--- a/gallery_dl/extractor/tumblr.py
+++ b/gallery_dl/extractor/tumblr.py
@@ -64,6 +64,7 @@ class TumblrExtractor(Extractor):
self.inline = self.config("inline", True)
self.reblogs = self.config("reblogs", True)
self.external = self.config("external", False)
+ self.original = self.config("original", True)
if len(self.types) == 1:
self.api.posts_type = next(iter(self.types))
@@ -101,8 +102,7 @@ class TumblrExtractor(Extractor):
del post["trail"]
post["blog"] = blog
post["date"] = text.parse_timestamp(post["timestamp"])
- yield Message.Directory, post
- post["num"] = 0
+ posts = []
if "photos" in post: # type "photo" or "link"
photos = post["photos"]
@@ -110,18 +110,31 @@ class TumblrExtractor(Extractor):
for photo in photos:
post["photo"] = photo
- photo.update(photo["original_size"])
+
+ best_photo = photo["original_size"]
+ for alt_photo in photo["alt_sizes"]:
+ if (alt_photo["height"] > best_photo["height"] or
+ alt_photo["width"] > best_photo["width"]):
+ best_photo = alt_photo
+ photo.update(best_photo)
+
+ if self.original and "/s2048x3072/" in photo["url"] and (
+ photo["width"] == 2048 or photo["height"] == 3072):
+ photo["url"] = self._original_image(photo["url"])
+
del photo["original_size"]
del photo["alt_sizes"]
- yield self._prepare_image(photo["url"], post)
+ posts.append(
+ self._prepare_image(photo["url"], post.copy()))
+ del post["photo"]
url = post.get("audio_url") # type "audio"
if url and url.startswith("https://a.tumblr.com/"):
- yield self._prepare(url, post)
+ posts.append(self._prepare(url, post.copy()))
url = post.get("video_url") # type "video"
if url:
- yield self._prepare(_original_video(url), post)
+ posts.append(self._prepare(_original_video(url), post.copy()))
if self.inline and "reblog" in post: # inline media
# only "chat" posts are missing a "reblog" key in their
@@ -129,16 +142,25 @@ class TumblrExtractor(Extractor):
body = post["reblog"]["comment"] + post["reblog"]["tree_html"]
for url in re.findall('<img src="([^"]+)"', body):
url = _original_inline_image(url)
- yield self._prepare_image(url, post)
+ posts.append(self._prepare_image(url, post.copy()))
for url in re.findall('<source src="([^"]+)"', body):
url = _original_video(url)
- yield self._prepare(url, post)
+ posts.append(self._prepare(url, post.copy()))
if self.external: # external links
- post["extension"] = None
url = post.get("permalink_url") or post.get("url")
if url:
- yield Message.Queue, url, post
+ post["extension"] = None
+ posts.append((Message.Queue, url, post.copy()))
+ del post["extension"]
+
+ post["count"] = len(posts)
+ yield Message.Directory, post
+
+ for num, (msg, url, post) in enumerate(posts, 1):
+ post["num"] = num
+ post["count"] = len(posts)
+ yield msg, url, post
def posts(self):
"""Return an iterable containing all relevant posts"""
@@ -167,14 +189,12 @@ class TumblrExtractor(Extractor):
@staticmethod
def _prepare(url, post):
text.nameext_from_url(url, post)
- post["num"] += 1
post["hash"] = post["filename"].partition("_")[2]
return Message.Url, url, post
@staticmethod
def _prepare_image(url, post):
text.nameext_from_url(url, post)
- post["num"] += 1
parts = post["filename"].split("_")
try:
@@ -188,7 +208,7 @@ class TumblrExtractor(Extractor):
@staticmethod
def _prepare_avatar(url, post, blog):
text.nameext_from_url(url, post)
- post["num"] = 1
+ post["num"] = post["count"] = 1
post["blog"] = blog
post["reblogged"] = False
post["type"] = post["id"] = post["hash"] = "avatar"
@@ -200,6 +220,12 @@ class TumblrExtractor(Extractor):
def _skip_reblog_same_blog(self, post):
return self.blog != post.get("reblogged_root_uuid")
+ def _original_image(self, url):
+ url = url.replace("/s2048x3072/", "/s99999x99999/", 1)
+ headers = {"Accept": "text/html,*/*;q=0.8"}
+ response = self.request(url, headers=headers)
+ return text.extract(response.text, '" src="', '"')[0]
+
class TumblrUserExtractor(TumblrExtractor):
"""Extractor for all images from a tumblr-user"""
@@ -279,6 +305,12 @@ class TumblrPostExtractor(TumblrExtractor):
("https://mikf123.tumblr.com/post/181022380064/chat-post", {
"count": 0,
}),
+ ("https://mikf123.tumblr.com/image/689860196535762944", {
+ "pattern": r"^https://\d+\.media\.tumblr\.com"
+ r"/134791621559a79793563b636b5fe2c6"
+ r"/8f1131551cef6e74-bc/s99999x99999"
+ r"/188cf9b8915b0d0911c6c743d152fc62e8f38491\.png$",
+ }),
("http://ziemniax.tumblr.com/post/109697912859/", {
"exception": exception.NotFoundError, # HTML response (#297)
}),
diff --git a/gallery_dl/extractor/twitter.py b/gallery_dl/extractor/twitter.py
index 36b4806..0df4ea2 100644
--- a/gallery_dl/extractor/twitter.py
+++ b/gallery_dl/extractor/twitter.py
@@ -11,6 +11,7 @@
from .common import Extractor, Message
from .. import text, util, exception
from ..cache import cache
+import itertools
import json
BASE_PATTERN = (
@@ -40,7 +41,7 @@ class TwitterExtractor(Extractor):
self.quoted = self.config("quoted", False)
self.videos = self.config("videos", True)
self.cards = self.config("cards", False)
- self._user_id = None
+ self._user = self._user_obj = None
self._user_cache = {}
self._init_sizes()
@@ -90,8 +91,9 @@ class TwitterExtractor(Extractor):
if "in_reply_to_user_id_str" in data and (
not self.replies or (
self.replies == "self" and
- (self._user_id or data["in_reply_to_user_id_str"]) !=
- data["user_id_str"]
+ data["user_id_str"] !=
+ (self._user_obj["rest_id"] if self._user else
+ data["in_reply_to_user_id_str"])
)
):
self.log.debug("Skipping %s (reply)", data["id_str"])
@@ -229,11 +231,13 @@ class TwitterExtractor(Extractor):
files.append({"url": url})
def _transform_tweet(self, tweet):
- if "core" in tweet:
- user = self._transform_user(
- tweet["core"]["user_results"]["result"])
+ if "author" in tweet:
+ author = tweet["author"]
+ elif "core" in tweet:
+ author = tweet["core"]["user_results"]["result"]
else:
- user = self._transform_user(tweet["user"])
+ author = tweet["user"]
+ author = self._transform_user(author)
if "legacy" in tweet:
tweet = tweet["legacy"]
@@ -245,12 +249,13 @@ class TwitterExtractor(Extractor):
"retweet_id" : text.parse_int(
tget("retweeted_status_id_str")),
"quote_id" : text.parse_int(
- tget("quoted_status_id_str")),
+ tget("quoted_by_id_str")),
"reply_id" : text.parse_int(
tget("in_reply_to_status_id_str")),
"date" : text.parse_datetime(
tweet["created_at"], "%a %b %d %H:%M:%S %z %Y"),
- "user" : user,
+ "user" : self._user or author,
+ "author" : author,
"lang" : tweet["lang"],
"favorite_count": tget("favorite_count"),
"quote_count" : tget("quote_count"),
@@ -280,13 +285,8 @@ class TwitterExtractor(Extractor):
if "in_reply_to_screen_name" in tweet:
tdata["reply_to"] = tweet["in_reply_to_screen_name"]
- if "quoted_by_id_str" in tweet:
- tdata["quote_by"] = text.parse_int(tweet["quoted_by_id_str"])
-
- if "author" in tweet:
- tdata["author"] = self._transform_user(tweet["author"])
- else:
- tdata["author"] = tdata["user"]
+ if "quoted_by" in tweet:
+ tdata["quote_by"] = tweet["quoted_by"]
return tdata
@@ -336,6 +336,10 @@ class TwitterExtractor(Extractor):
return udata
+ def _assign_user(self, user):
+ self._user_obj = user
+ self._user = self._transform_user(user)
+
def _users_result(self, users):
userfmt = self.config("users")
if not userfmt or userfmt == "timeline":
@@ -455,33 +459,24 @@ class TwitterTimelineExtractor(TwitterExtractor):
tweet = None
for tweet in self._select_tweet_source()(self.user):
yield tweet
-
if tweet is None:
return
- # get username
- if not self.user.startswith("id:"):
- username = self.user
- elif "core" in tweet:
- username = (tweet["core"]["user_results"]["result"]
- ["legacy"]["screen_name"])
- else:
- username = tweet["user"]["screen_name"]
-
- # get tweet data
- if "legacy" in tweet:
- tweet = tweet["legacy"]
-
# build search query
- query = "from:{} max_id:{}".format(username, tweet["id_str"])
+ query = "from:{} max_id:{}".format(
+ self._user["name"], tweet["rest_id"])
if self.retweets:
query += " include:retweets include:nativeretweets"
+
if not self.textonly:
- query += (" (filter:images OR"
- " filter:native_video OR"
- " card_name:animated_gif)")
+ # try to search for media-only tweets
+ tweet = None
+ for tweet in self.api.search_adaptive(query + " filter:links"):
+ yield tweet
+ if tweet is not None:
+ return
- # yield search results starting from last tweet id
+ # yield unfiltered search results
yield from self.api.search_adaptive(query)
def _select_tweet_source(self):
@@ -625,7 +620,25 @@ class TwitterSearchExtractor(TwitterExtractor):
return {"search": text.unquote(self.user)}
def tweets(self):
- return self.api.search_adaptive(text.unquote(self.user))
+ query = text.unquote(self.user.replace("+", " "))
+
+ user = None
+ for item in query.split():
+ item = item.strip("()")
+ if item.startswith("from:"):
+ if user:
+ user = None
+ break
+ else:
+ user = item[5:]
+
+ if user is not None:
+ try:
+ self._assign_user(self.api.user_by_screen_name(user))
+ except KeyError:
+ pass
+
+ return self.api.search_adaptive(query)
class TwitterEventExtractor(TwitterExtractor):
@@ -693,7 +706,7 @@ class TwitterTweetExtractor(TwitterExtractor):
}),
("https://twitter.com/i/web/status/1424898916156284928", {
"options": (("replies", "self"),),
- "count": 0,
+ "count": 1,
}),
# "quoted" option (#854)
("https://twitter.com/StobiesGalaxy/status/1270755918330896395", {
@@ -777,20 +790,38 @@ class TwitterTweetExtractor(TwitterExtractor):
def tweets(self):
if self.config("conversations", False):
- return self.api.tweet_detail(self.tweet_id)
+ return self._tweets_conversation(self.tweet_id)
+ else:
+ return self._tweets_single(self.tweet_id)
+ def _tweets_single(self, tweet_id):
tweets = []
- tweet_id = self.tweet_id
+
for tweet in self.api.tweet_detail(tweet_id):
if tweet["rest_id"] == tweet_id or \
tweet.get("_retweet_id_str") == tweet_id:
+ self._assign_user(tweet["core"]["user_results"]["result"])
tweets.append(tweet)
tweet_id = tweet["legacy"].get("quoted_status_id_str")
if not tweet_id:
break
+
return tweets
+ def _tweets_conversation(self, tweet_id):
+ tweets = self.api.tweet_detail(tweet_id)
+ buffer = []
+
+ for tweet in tweets:
+ buffer.append(tweet)
+ if tweet["rest_id"] == tweet_id or \
+ tweet.get("_retweet_id_str") == tweet_id:
+ self._assign_user(tweet["core"]["user_results"]["result"])
+ break
+
+ return itertools.chain(buffer, tweets)
+
class TwitterImageExtractor(Extractor):
category = "twitter"
@@ -888,7 +919,6 @@ class TwitterAPI():
self._nsfw_warning = True
self._syndication = extractor.config("syndication")
self._json_dumps = json.JSONEncoder(separators=(",", ":")).encode
- self._user = None
cookies = extractor.session.cookies
cookiedomain = extractor.cookiedomain
@@ -1050,13 +1080,13 @@ class TwitterAPI():
def _user_id_by_screen_name(self, screen_name):
if screen_name.startswith("id:"):
- self._user = util.SENTINEL
user_id = screen_name[3:]
+ user = self.user_by_rest_id(user_id)
else:
user = ()
try:
- user = self._user = self.user_by_screen_name(screen_name)
+ user = self.user_by_screen_name(screen_name)
user_id = user["rest_id"]
except KeyError:
if "unavailable_message" in user:
@@ -1066,7 +1096,7 @@ class TwitterAPI():
else:
raise exception.NotFoundError("user")
- self.extractor._user_id = user_id
+ self.extractor._assign_user(user)
return user_id
@cache(maxage=3600)
@@ -1183,7 +1213,7 @@ class TwitterAPI():
if quoted:
quoted = quoted.copy()
quoted["author"] = users[quoted["user_id_str"]]
- quoted["user"] = tweet["user"]
+ quoted["quoted_by"] = tweet["user"]["screen_name"]
quoted["quoted_by_id_str"] = tweet["id_str"]
yield quoted
@@ -1226,17 +1256,10 @@ class TwitterAPI():
except LookupError:
extr.log.debug(data)
- if self._user:
- user = self._user
- if user is util.SENTINEL:
- try:
- user = self.user_by_rest_id(variables["userId"])
- except KeyError:
- raise exception.NotFoundError("user")
- user = user.get("legacy")
- if not user:
- pass
- elif user.get("blocked_by"):
+ user = extr._user_obj
+ if user:
+ user = user["legacy"]
+ if user.get("blocked_by"):
if self.headers["x-twitter-auth-type"] and \
extr.config("logout"):
guest_token = self._guest_token()
@@ -1322,7 +1345,7 @@ class TwitterAPI():
try:
legacy["retweeted_status_id_str"] = \
retweet["rest_id"]
- legacy["author"] = \
+ tweet["author"] = \
retweet["core"]["user_results"]["result"]
if "extended_entities" in retweet["legacy"] and \
"extended_entities" not in legacy:
@@ -1336,9 +1359,9 @@ class TwitterAPI():
if "quoted_status_result" in tweet:
try:
quoted = tweet["quoted_status_result"]["result"]
- quoted["legacy"]["author"] = \
- quoted["core"]["user_results"]["result"]
- quoted["core"] = tweet["core"]
+ quoted["legacy"]["quoted_by"] = (
+ tweet["core"]["user_results"]["result"]
+ ["legacy"]["screen_name"])
quoted["legacy"]["quoted_by_id_str"] = tweet["rest_id"]
yield quoted
except KeyError:
@@ -1374,10 +1397,14 @@ class TwitterAPI():
if instr["type"] == "TimelineAddEntries":
for entry in instr["entries"]:
if entry["entryId"].startswith("user-"):
- user = (entry["content"]["itemContent"]
- ["user_results"]["result"])
- if "rest_id" in user:
- yield user
+ try:
+ user = (entry["content"]["itemContent"]
+ ["user_results"]["result"])
+ except KeyError:
+ pass
+ else:
+ if "rest_id" in user:
+ yield user
elif entry["entryId"].startswith("cursor-bottom-"):
cursor = entry["content"]["value"]
elif instr["type"] == "TimelineTerminateTimeline":
@@ -1439,6 +1466,6 @@ class TwitterAPI():
return {
"rest_id": tweet["id_str"],
"legacy" : tweet,
- "user" : tweet["user"],
+ "core" : {"user_results": {"result": tweet["user"]}},
"_retweet_id_str": retweet_id,
}
diff --git a/gallery_dl/extractor/unsplash.py b/gallery_dl/extractor/unsplash.py
index c29d730..623ed94 100644
--- a/gallery_dl/extractor/unsplash.py
+++ b/gallery_dl/extractor/unsplash.py
@@ -84,7 +84,7 @@ class UnsplashImageExtractor(UnsplashExtractor):
"blur_hash": "LZP4uQS4jboe%#o0WCa}2doJNaaz",
"categories": list,
"color": "#f3c08c",
- "created_at": "2020-04-08T08:29:42-04:00",
+ "created_at": "2020-04-08T12:29:42Z",
"date": "dt:2020-04-08 12:29:42",
"description": "The Island",
"downloads": int,
@@ -112,7 +112,7 @@ class UnsplashImageExtractor(UnsplashExtractor):
},
"title": "Beaver Dam, WI 53916, USA"
},
- "promoted_at": "2020-04-08T11:12:03-04:00",
+ "promoted_at": "2020-04-08T15:12:03Z",
"sponsorship": None,
"tags": list,
"updated_at": str,
diff --git a/gallery_dl/extractor/vk.py b/gallery_dl/extractor/vk.py
index ab2153f..25b00fe 100644
--- a/gallery_dl/extractor/vk.py
+++ b/gallery_dl/extractor/vk.py
@@ -40,12 +40,17 @@ class VkExtractor(Extractor):
continue
try:
+ photo["url"] = photo[size + "src"]
+ except KeyError:
+ self.log.warning("no photo URL found (%s)", photo.get("id"))
+ continue
+
+ try:
_, photo["width"], photo["height"] = photo[size]
except ValueError:
# photo without width/height entries (#2535)
photo["width"] = photo["height"] = 0
- photo["url"] = photo[size + "src"]
photo["id"] = photo["id"].rpartition("_")[2]
photo.update(data)
diff --git a/gallery_dl/extractor/vsco.py b/gallery_dl/extractor/vsco.py
index 756384b..668be0f 100644
--- a/gallery_dl/extractor/vsco.py
+++ b/gallery_dl/extractor/vsco.py
@@ -169,7 +169,7 @@ class VscoCollectionExtractor(VscoExtractor):
return self._pagination(url, params, tkn, "medias", (
data["medias"]["byId"][mid["id"]]["media"]
for mid in data
- ["collections"]["byCollectionId"][cid]["byPage"]["1"]["collection"]
+ ["collections"]["byId"][cid]["1"]["collection"]
))
diff --git a/gallery_dl/extractor/wallhaven.py b/gallery_dl/extractor/wallhaven.py
index 37eab24..0ad8523 100644
--- a/gallery_dl/extractor/wallhaven.py
+++ b/gallery_dl/extractor/wallhaven.py
@@ -189,7 +189,7 @@ class WallhavenAPI():
def collections(self, username):
endpoint = "/v1/collections/" + username
- return self._pagination(endpoint)
+ return self._pagination(endpoint, metadata=False)
def search(self, params):
endpoint = "/v1/search"
@@ -200,13 +200,20 @@ class WallhavenAPI():
return self.extractor.request(
url, headers=self.headers, params=params).json()
- def _pagination(self, endpoint, params=None):
+ def _pagination(self, endpoint, params=None, metadata=None):
if params is None:
params = {}
+ if metadata is None:
+ metadata = self.extractor.config("metadata")
while True:
data = self._call(endpoint, params)
- yield from data["data"]
+
+ if metadata:
+ for wp in data["data"]:
+ yield self.info(str(wp["id"]))
+ else:
+ yield from data["data"]
meta = data.get("meta")
if not meta or meta["current_page"] >= meta["last_page"]:
diff --git a/gallery_dl/extractor/weibo.py b/gallery_dl/extractor/weibo.py
index bdbdc8c..189c0c5 100644
--- a/gallery_dl/extractor/weibo.py
+++ b/gallery_dl/extractor/weibo.py
@@ -99,13 +99,14 @@ class WeiboExtractor(Extractor):
else:
yield pic["largest"].copy()
- if "page_info" in status:
- page_info = status["page_info"]
- if "media_info" not in page_info or not self.videos:
- return
- media = max(page_info["media_info"]["playback_list"],
- key=lambda m: m["meta"]["quality_index"])
- yield media["play_info"].copy()
+ if "page_info" in status and self.videos:
+ try:
+ media = max(status["page_info"]["media_info"]["playback_list"],
+ key=lambda m: m["meta"]["quality_index"])
+ except KeyError:
+ pass
+ else:
+ yield media["play_info"].copy()
def _status_by_id(self, status_id):
url = "{}/ajax/statuses/show?id={}".format(self.root, status_id)
@@ -147,14 +148,17 @@ class WeiboExtractor(Extractor):
return
yield from statuses
- if "next_cursor" in data:
+ if "next_cursor" in data: # videos, newvideo
params["cursor"] = data["next_cursor"]
- elif "page" in params:
+ elif "page" in params: # home, article
params["page"] += 1
- elif data["since_id"]:
+ elif data["since_id"]: # album
params["sinceid"] = data["since_id"]
- else:
- params["since_id"] = statuses[-1]["id"] - 1
+ else: # feed, last album page
+ try:
+ params["since_id"] = statuses[-1]["id"] - 1
+ except KeyError:
+ return
def _sina_visitor_system(self, response):
self.log.info("Sina Visitor System")
@@ -366,6 +370,10 @@ class WeiboStatusExtractor(WeiboExtractor):
"pattern": r"https://g\.us\.sinaimg.cn/o0/qNZcaAAglx07Wuf921CM0104"
r"120005tc0E010\.mp4\?label=gif_mp4",
}),
+ # missing 'playback_list' (#2792)
+ ("https://weibo.com/2909128931/4409545658754086", {
+ "count": 9,
+ }),
("https://m.weibo.cn/status/4339748116375525"),
("https://m.weibo.cn/5746766133/4339748116375525"),
)
diff --git a/gallery_dl/extractor/zerochan.py b/gallery_dl/extractor/zerochan.py
new file mode 100644
index 0000000..2b5acd8
--- /dev/null
+++ b/gallery_dl/extractor/zerochan.py
@@ -0,0 +1,156 @@
+# -*- coding: utf-8 -*-
+
+# Copyright 2022 Mike Fährmann
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Extractors for https://www.zerochan.net/"""
+
+from .booru import BooruExtractor
+from ..cache import cache
+from .. import text, exception
+
+BASE_PATTERN = r"(?:https?://)?(?:www\.)?zerochan\.net"
+
+
+class ZerochanExtractor(BooruExtractor):
+ """Base class for zerochan extractors"""
+ category = "zerochan"
+ root = "https://www.zerochan.net"
+ filename_fmt = "{id}.{extension}"
+ archive_fmt = "{id}"
+ cookiedomain = ".zerochan.net"
+ cookienames = ("z_id", "z_hash")
+
+ def login(self):
+ if not self._check_cookies(self.cookienames):
+ username, password = self._get_auth_info()
+ if username:
+ self._update_cookies(self._login_impl(username, password))
+ # force legacy layout
+ self.session.cookies.set("v3", "0", domain=self.cookiedomain)
+
+ @cache(maxage=90*86400, keyarg=1)
+ def _login_impl(self, username, password):
+ self.log.info("Logging in as %s", username)
+
+ url = self.root + "/login"
+ headers = {
+ "Origin" : self.root,
+ "Referer" : url,
+ }
+ data = {
+ "ref" : "/",
+ "name" : username,
+ "password": password,
+ "login" : "Login",
+ }
+
+ response = self.request(url, method="POST", headers=headers, data=data)
+ if not response.history:
+ raise exception.AuthenticationError()
+
+ return response.cookies
+
+ def _parse_entry_page(self, entry_id):
+ url = "{}/{}".format(self.root, entry_id)
+ extr = text.extract_from(self.request(url).text)
+
+ return {
+ "id" : entry_id,
+ "author": extr('"author": "', '"'),
+ "file_url": extr('"contentUrl": "', '"'),
+ "date" : text.parse_datetime(extr(
+ '"datePublished": "', '"'), "%a %b %d %H:%M:%S %Y"),
+ "width" : extr('"width": "', ' '),
+ "height": extr('"height": "', ' '),
+ "size" : extr('"contentSize": "', 'B'),
+ "path" : text.split_html(extr(
+ 'class="breadcrumbs', '</p>'))[3::2],
+ "tags" : extr('alt="Tags: ', '"').split(", ")
+ }
+
+
+class ZerochanTagExtractor(ZerochanExtractor):
+ subcategory = "tag"
+ directory_fmt = ("{category}", "{search_tags}")
+ pattern = BASE_PATTERN + r"/(?!\d+$)([^/?#]+)/?(?:\?([^#]+))?"
+ test = ("https://www.zerochan.net/Perth+%28Kantai+Collection%29", {
+ "pattern": r"https://static\.zerochan\.net/.+\.full\.\d+\.(jpg|png)",
+ "count": "> 24",
+ "keywords": {
+ "extension": r"re:jpg|png",
+ "file_url": "",
+ "filename": r"re:Perth.\(Kantai.Collection\).full.\d+",
+ "height": r"re:^\d+$",
+ "id": r"re:^\d+$",
+ "name": "Perth (Kantai Collection)",
+ "search_tags": "Perth (Kantai Collection)",
+ "size": r"re:^\d+k$",
+ "width": r"re:^\d+$",
+ },
+ })
+
+ def __init__(self, match):
+ ZerochanExtractor.__init__(self, match)
+ self.search_tag, self.query = match.groups()
+
+ def metadata(self):
+ return {"search_tags": text.unquote(
+ self.search_tag.replace("+", " "))}
+
+ def posts(self):
+ url = self.root + "/" + self.search_tag
+ params = text.parse_query(self.query)
+ params["p"] = text.parse_int(params.get("p"), 1)
+
+ while True:
+ page = self.request(url, params=params).text
+ thumbs = text.extract(page, '<ul id="thumbs', '</ul>')[0]
+ extr = text.extract_from(thumbs)
+
+ while True:
+ post = extr('<li class="', '>')
+ if not post:
+ break
+ yield {
+ "id" : extr('href="/', '"'),
+ "name" : extr('alt="', '"'),
+ "width" : extr('title="', 'x'),
+ "height": extr('', ' '),
+ "size" : extr('', 'B'),
+ "file_url": "https://static." + extr(
+ '<a href="https://static.', '"'),
+ }
+
+ if 'rel="next"' not in page:
+ break
+ params["p"] += 1
+
+
+class ZerochanImageExtractor(ZerochanExtractor):
+ subcategory = "image"
+ pattern = BASE_PATTERN + r"/(\d+)"
+ test = ("https://www.zerochan.net/2920445", {
+ "pattern": r"https://static\.zerochan\.net/"
+ r"Perth\.%28Kantai\.Collection%29\.full.2920445\.jpg",
+ "keyword": {
+ "author": "YukinoTokisaki",
+ "date": "dt:2020-04-24 21:33:44",
+ "file_url": str,
+ "filename": "Perth.(Kantai.Collection).full.2920445",
+ "height": "1366",
+ "id": "2920445",
+ "size": "1929k",
+ "width": "1920",
+ },
+ })
+
+ def __init__(self, match):
+ ZerochanExtractor.__init__(self, match)
+ self.image_id = match.group(1)
+
+ def posts(self):
+ return (self._parse_entry_page(self.image_id),)
diff --git a/gallery_dl/formatter.py b/gallery_dl/formatter.py
index bc4d837..dd32b8a 100644
--- a/gallery_dl/formatter.py
+++ b/gallery_dl/formatter.py
@@ -296,12 +296,14 @@ def _parse_maxlen(format_spec, default):
def _parse_join(format_spec, default):
separator, _, format_spec = format_spec.partition(_SEPARATOR)
- separator = separator[1:]
+ join = separator[1:].join
fmt = _build_format_func(format_spec, default)
- def join(obj):
- return fmt(separator.join(obj))
- return join
+ def apply_join(obj):
+ if isinstance(obj, str):
+ return fmt(obj)
+ return fmt(join(obj))
+ return apply_join
def _parse_replace(format_spec, default):
@@ -379,6 +381,7 @@ _CONVERSIONS = {
"T": util.datetime_to_timestamp_string,
"d": text.parse_timestamp,
"U": text.unescape,
+ "g": text.slugify,
"S": util.to_string,
"s": str,
"r": repr,
diff --git a/gallery_dl/job.py b/gallery_dl/job.py
index 9636bef..7b22b1d 100644
--- a/gallery_dl/job.py
+++ b/gallery_dl/job.py
@@ -35,10 +35,13 @@ class Job():
self.status = 0
self.url_key = extr.config("url-metadata")
+ path_key = extr.config("path-metadata")
+ path_proxy = output.PathfmtProxy(self)
+
self._logger_extra = {
"job" : self,
"extractor": extr,
- "path" : output.PathfmtProxy(self),
+ "path" : path_proxy,
"keywords" : output.KwdictProxy(self),
}
extr.log = self._wrap_logger(extr.log)
@@ -58,6 +61,8 @@ class Job():
kwdict = extr.config("keywords")
if kwdict:
self.kwdict.update(kwdict)
+ if path_key:
+ self.kwdict[path_key] = path_proxy
# predicates
self.pred_url = self._prepare_predicates("image", True)
diff --git a/gallery_dl/output.py b/gallery_dl/output.py
index e7c66cf..3017f85 100644
--- a/gallery_dl/output.py
+++ b/gallery_dl/output.py
@@ -73,6 +73,12 @@ class PathfmtProxy():
pathfmt = object.__getattribute__(self, "job").pathfmt
return pathfmt.__dict__.get(name) if pathfmt else None
+ def __str__(self):
+ pathfmt = object.__getattribute__(self, "job").pathfmt
+ if pathfmt:
+ return pathfmt.path or pathfmt.directory
+ return ""
+
class KwdictProxy():
__slots__ = ("job",)
diff --git a/gallery_dl/postprocessor/metadata.py b/gallery_dl/postprocessor/metadata.py
index 2d16db8..d9baed3 100644
--- a/gallery_dl/postprocessor/metadata.py
+++ b/gallery_dl/postprocessor/metadata.py
@@ -30,6 +30,17 @@ class MetadataPP(PostProcessor):
elif mode == "tags":
self.write = self._write_tags
ext = "txt"
+ elif mode == "modify":
+ self.run = self._run_modify
+ self.fields = {
+ name: formatter.parse(value, None, util.identity).format_map
+ for name, value in options.get("fields").items()
+ }
+ ext = None
+ elif mode == "delete":
+ self.run = self._run_delete
+ self.fields = options.get("fields")
+ ext = None
else:
self.write = self._write_json
self.indent = options.get("indent", 4)
@@ -99,7 +110,7 @@ class MetadataPP(PostProcessor):
with open(path, "w", encoding="utf-8") as fp:
self.write(fp, pathfmt.kwdict)
except FileNotFoundError:
- os.makedirs(directory)
+ os.makedirs(directory, exist_ok=True)
with open(path, "w", encoding="utf-8") as fp:
self.write(fp, pathfmt.kwdict)
@@ -114,6 +125,32 @@ class MetadataPP(PostProcessor):
def _run_stdout(self, pathfmt):
self.write(sys.stdout, pathfmt.kwdict)
+ def _run_modify(self, pathfmt):
+ kwdict = pathfmt.kwdict
+ for key, func in self.fields.items():
+ obj = kwdict
+ try:
+ while "[" in key:
+ name, _, key = key.partition("[")
+ obj = obj[name]
+ key = key.rstrip("]")
+ obj[key] = func(kwdict)
+ except Exception:
+ pass
+
+ def _run_delete(self, pathfmt):
+ kwdict = pathfmt.kwdict
+ for key in self.fields:
+ obj = kwdict
+ try:
+ while "[" in key:
+ name, _, key = key.partition("[")
+ obj = obj[name]
+ key = key.rstrip("]")
+ del obj[key]
+ except Exception:
+ pass
+
def _directory(self, pathfmt):
return pathfmt.realdirectory
diff --git a/gallery_dl/text.py b/gallery_dl/text.py
index 97ef3ac..79cf016 100644
--- a/gallery_dl/text.py
+++ b/gallery_dl/text.py
@@ -39,6 +39,16 @@ def split_html(txt):
return []
+def slugify(value):
+ """Convert a string to a URL slug
+
+ Adapted from:
+ https://github.com/django/django/blob/master/django/utils/text.py
+ """
+ value = re.sub(r"[^\w\s-]", "", str(value).lower())
+ return re.sub(r"[-\s]+", "-", value).strip("-_")
+
+
def ensure_http_scheme(url, scheme="https://"):
"""Prepend 'scheme' to 'url' if it doesn't have one"""
if url and not url.startswith(("https://", "http://")):
diff --git a/gallery_dl/util.py b/gallery_dl/util.py
index 009ee08..4ba1cba 100644
--- a/gallery_dl/util.py
+++ b/gallery_dl/util.py
@@ -161,13 +161,16 @@ def delete_items(obj, keys):
def enumerate_reversed(iterable, start=0, length=None):
"""Enumerate 'iterable' and return its elements in reverse order"""
- start -= 1
if length is None:
length = len(iterable)
- return zip(
- range(length - start, start, -1),
- reversed(iterable),
- )
+
+ try:
+ iterable = zip(range(start-1+length, start-1, -1), reversed(iterable))
+ except TypeError:
+ iterable = list(zip(range(start, start+length), iterable))
+ iterable.reverse()
+
+ return iterable
def number_to_string(value, numbers=(int, float)):
diff --git a/gallery_dl/version.py b/gallery_dl/version.py
index 76f879c..d12d088 100644
--- a/gallery_dl/version.py
+++ b/gallery_dl/version.py
@@ -6,4 +6,4 @@
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
-__version__ = "1.22.4"
+__version__ = "1.23.0"
diff --git a/test/test_formatter.py b/test/test_formatter.py
index aec091a..b335332 100644
--- a/test/test_formatter.py
+++ b/test/test_formatter.py
@@ -58,6 +58,7 @@ class TestFormatter(unittest.TestCase):
self._run_test("{dt!T}", "1262304000")
self._run_test("{l!j}", '["a", "b", "c"]')
self._run_test("{dt!j}", '"2010-01-01 00:00:00"')
+ self._run_test("{a!g}", "hello-world")
with self.assertRaises(KeyError):
self._run_test("{a!q}", "hello world")
@@ -177,7 +178,7 @@ class TestFormatter(unittest.TestCase):
self._run_test("{l:J - />20}", " a - b - c")
self._run_test("{a:J/}" , self.kwdict["a"])
- self._run_test("{a:J, /}" , ", ".join(self.kwdict["a"]))
+ self._run_test("{a:J, /}" , self.kwdict["a"])
def test_replace(self):
self._run_test("{a:Rh/C/}" , "CElLo wOrLd")
diff --git a/test/test_postprocessor.py b/test/test_postprocessor.py
index 7a216bb..42babd3 100644
--- a/test/test_postprocessor.py
+++ b/test/test_postprocessor.py
@@ -339,6 +339,52 @@ class MetadataTest(BasePostprocessorTest):
{"category": "test", "extension": "ext", "filename": "file"}
""")
+ def test_metadata_modify(self):
+ kwdict = {"foo": 0, "bar": {"bax": 1, "bay": 2, "baz": 3}}
+ self._create({
+ "mode": "modify",
+ "fields": {
+ "foo" : "{filename}-{foo!s}",
+ "foo2" : "\fE bar['bax'] + 122",
+ "bar[baz]": "{_now}",
+ "bar[ba2]": "test",
+ },
+ }, kwdict)
+ pdict = self.pathfmt.kwdict
+
+ self.assertIsNot(kwdict, pdict)
+ self.assertEqual(pdict["foo"], kwdict["foo"])
+ self.assertEqual(pdict["bar"], kwdict["bar"])
+
+ self._trigger()
+
+ self.assertEqual(pdict["foo"] , "file-0")
+ self.assertEqual(pdict["foo2"] , 123)
+ self.assertEqual(pdict["bar"]["ba2"], "test")
+ self.assertIsInstance(pdict["bar"]["baz"], datetime)
+
+ def test_metadata_delete(self):
+ kwdict = {"foo": 0, "bar": {"bax": 1, "bay": 2, "baz": 3}}
+ self._create({"mode": "delete", "fields": ["foo", "bar[baz]"]}, kwdict)
+ pdict = self.pathfmt.kwdict
+
+ self.assertIsNot(kwdict, pdict)
+ self.assertEqual(pdict["foo"], kwdict["foo"])
+ self.assertEqual(pdict["bar"], kwdict["bar"])
+
+ del kwdict["foo"]
+ del kwdict["bar"]["baz"]
+
+ self._trigger()
+ self.assertNotIn("foo", pdict)
+ self.assertNotIn("baz", pdict["bar"])
+ self.assertEqual(kwdict["bar"], pdict["bar"])
+
+ self._trigger()
+ self.assertNotIn("foo", pdict)
+ self.assertNotIn("baz", pdict["bar"])
+ self.assertEqual(kwdict["bar"], pdict["bar"])
+
@staticmethod
def _output(mock):
return "".join(
diff --git a/test/test_text.py b/test/test_text.py
index ffed726..0ac7767 100644
--- a/test/test_text.py
+++ b/test/test_text.py
@@ -1,7 +1,7 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
-# Copyright 2015-2021 Mike Fährmann
+# Copyright 2015-2022 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -75,6 +75,23 @@ class TestText(unittest.TestCase):
for value in INVALID:
self.assertEqual(f(value), empty)
+ def test_slugify(self, f=text.slugify):
+ self.assertEqual(f("Hello World"), "hello-world")
+ self.assertEqual(f("-HeLLo---World-"), "hello-world")
+ self.assertEqual(f("_-H#e:l#l:o+\t+W?o!rl=d-_"), "hello-world")
+ self.assertEqual(f("_Hello_World_"), "hello_world")
+
+ self.assertEqual(f(""), "")
+ self.assertEqual(f("-"), "")
+ self.assertEqual(f("--"), "")
+
+ self.assertEqual(f(()), "")
+ self.assertEqual(f([]), "")
+ self.assertEqual(f({}), "")
+ self.assertEqual(f(None), "none")
+ self.assertEqual(f(1), "1")
+ self.assertEqual(f(2.3), "23")
+
def test_ensure_http_scheme(self, f=text.ensure_http_scheme):
result = "https://example.org/filename.ext"
diff --git a/test/test_util.py b/test/test_util.py
index 7ab1175..2921ea2 100644
--- a/test/test_util.py
+++ b/test/test_util.py
@@ -15,6 +15,7 @@ import io
import random
import string
import datetime
+import itertools
import http.cookiejar
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
@@ -541,6 +542,41 @@ class TestOther(unittest.TestCase):
r = util.filter_dict(d)
self.assertEqual(r, {"foo": 123})
+ def test_enumerate_reversed(self):
+
+ seq = [11, 22, 33]
+ result = [(3, 33), (2, 22), (1, 11)]
+
+ def gen():
+ for i in seq:
+ yield i
+
+ def gen_2():
+ yield from seq
+
+ def assertEqual(it1, it2):
+ ae = self.assertEqual
+ for i1, i2 in itertools.zip_longest(it1, it2):
+ ae(i1, i2)
+
+ assertEqual(
+ util.enumerate_reversed(seq), [(2, 33), (1, 22), (0, 11)])
+ assertEqual(
+ util.enumerate_reversed(seq, 1), result)
+ assertEqual(
+ util.enumerate_reversed(seq, 2), [(4, 33), (3, 22), (2, 11)])
+
+ assertEqual(
+ util.enumerate_reversed(gen(), 0, len(seq)),
+ [(2, 33), (1, 22), (0, 11)])
+ assertEqual(
+ util.enumerate_reversed(gen(), 1, len(seq)), result)
+ assertEqual(
+ util.enumerate_reversed(gen_2(), 1, len(seq)), result)
+ assertEqual(
+ util.enumerate_reversed(gen_2(), 2, len(seq)),
+ [(4, 33), (3, 22), (2, 11)])
+
def test_number_to_string(self, f=util.number_to_string):
self.assertEqual(f(1) , "1")
self.assertEqual(f(1.0) , "1.0")