aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLibravatarUnit 193 <unit193@unit193.net>2021-08-04 02:14:44 -0400
committerLibravatarUnit 193 <unit193@unit193.net>2021-08-04 02:14:44 -0400
commit873d9a628e9412a79bdc64cd962470749de3425b (patch)
tree8cd421ef79a9fa784147fa888543216f0872357b
parent32de2b06db501c7de81678bce8e3e0c3e63d340c (diff)
downloadgallery-dl-873d9a628e9412a79bdc64cd962470749de3425b.tar.bz2
gallery-dl-873d9a628e9412a79bdc64cd962470749de3425b.tar.xz
gallery-dl-873d9a628e9412a79bdc64cd962470749de3425b.tar.zst
New upstream version 1.18.2.upstream/1.18.2
-rw-r--r--CHANGELOG.md45
-rw-r--r--PKG-INFO9
-rw-r--r--README.rst7
-rw-r--r--data/man/gallery-dl.12
-rw-r--r--data/man/gallery-dl.conf.5285
-rw-r--r--docs/gallery-dl.conf17
-rw-r--r--gallery_dl.egg-info/PKG-INFO9
-rw-r--r--gallery_dl.egg-info/SOURCES.txt5
-rw-r--r--gallery_dl/__init__.py2
-rw-r--r--gallery_dl/downloader/ytdl.py69
-rw-r--r--gallery_dl/extractor/500px.py2
-rw-r--r--gallery_dl/extractor/__init__.py5
-rw-r--r--gallery_dl/extractor/architizer.py2
-rw-r--r--gallery_dl/extractor/bbc.py80
-rw-r--r--gallery_dl/extractor/bcy.py5
-rw-r--r--gallery_dl/extractor/comicvine.py78
-rw-r--r--gallery_dl/extractor/common.py3
-rw-r--r--gallery_dl/extractor/deviantart.py49
-rw-r--r--gallery_dl/extractor/directlink.py5
-rw-r--r--gallery_dl/extractor/exhentai.py2
-rw-r--r--gallery_dl/extractor/foolfuuka.py2
-rw-r--r--gallery_dl/extractor/hentaihand.py30
-rw-r--r--gallery_dl/extractor/hiperdex.py23
-rw-r--r--gallery_dl/extractor/hitomi.py9
-rw-r--r--gallery_dl/extractor/imagehosts.py32
-rw-r--r--gallery_dl/extractor/instagram.py51
-rw-r--r--gallery_dl/extractor/kemonoparty.py45
-rw-r--r--gallery_dl/extractor/mangadex.py5
-rw-r--r--gallery_dl/extractor/mangafox.py67
-rw-r--r--gallery_dl/extractor/mangapark.py6
-rw-r--r--gallery_dl/extractor/mangasee.py112
-rw-r--r--gallery_dl/extractor/mastodon.py62
-rw-r--r--gallery_dl/extractor/moebooru.py2
-rw-r--r--gallery_dl/extractor/naverwebtoon.py2
-rw-r--r--gallery_dl/extractor/philomena.py4
-rw-r--r--gallery_dl/extractor/pixiv.py46
-rw-r--r--gallery_dl/extractor/pururin.py21
-rw-r--r--gallery_dl/extractor/reactor.py28
-rw-r--r--gallery_dl/extractor/seisoparty.py142
-rw-r--r--gallery_dl/extractor/subscribestar.py4
-rw-r--r--gallery_dl/extractor/twitter.py29
-rw-r--r--gallery_dl/extractor/vk.py65
-rw-r--r--gallery_dl/extractor/webtoons.py5
-rw-r--r--gallery_dl/extractor/wikiart.py4
-rw-r--r--gallery_dl/extractor/ytdl.py136
-rw-r--r--gallery_dl/job.py40
-rw-r--r--gallery_dl/postprocessor/metadata.py2
-rw-r--r--gallery_dl/util.py200
-rw-r--r--gallery_dl/version.py2
-rw-r--r--test/test_extractor.py2
-rw-r--r--test/test_results.py6
-rw-r--r--test/test_util.py9
52 files changed, 1551 insertions, 323 deletions
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 0a4c90c..72f9c42 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,50 @@
# Changelog
+## 1.18.2 - 2021-07-23
+### Additions
+- [bbc] add `gallery` and `programme` extractors ([#1706](https://github.com/mikf/gallery-dl/issues/1706))
+- [comicvine] add extractor ([#1712](https://github.com/mikf/gallery-dl/issues/1712))
+- [kemonoparty] add `max-posts` option ([#1674](https://github.com/mikf/gallery-dl/issues/1674))
+- [kemonoparty] parse `o` query parameters ([#1674](https://github.com/mikf/gallery-dl/issues/1674))
+- [mastodon] add `reblogs` and `replies` options ([#1669](https://github.com/mikf/gallery-dl/issues/1669))
+- [pixiv] add extractor for `pixivision` articles ([#1672](https://github.com/mikf/gallery-dl/issues/1672))
+- [ytdl] add experimental extractor for sites supported by youtube-dl ([#1680](https://github.com/mikf/gallery-dl/issues/1680), [#878](https://github.com/mikf/gallery-dl/issues/878))
+- extend `parent-metadata` functionality ([#1687](https://github.com/mikf/gallery-dl/issues/1687), [#1651](https://github.com/mikf/gallery-dl/issues/1651), [#1364](https://github.com/mikf/gallery-dl/issues/1364))
+- add `archive-prefix` option ([#1711](https://github.com/mikf/gallery-dl/issues/1711))
+- add `url-metadata` option ([#1659](https://github.com/mikf/gallery-dl/issues/1659), [#1073](https://github.com/mikf/gallery-dl/issues/1073))
+### Changes
+- [kemonoparty] skip duplicated patreon files ([#1689](https://github.com/mikf/gallery-dl/issues/1689), [#1667](https://github.com/mikf/gallery-dl/issues/1667))
+- [mangadex] use custom User-Agent header ([#1535](https://github.com/mikf/gallery-dl/issues/1535))
+### Fixes
+- [hitomi] fix image URLs ([#1679](https://github.com/mikf/gallery-dl/issues/1679))
+- [imagevenue] fix extraction ([#1677](https://github.com/mikf/gallery-dl/issues/1677))
+- [instagram] fix extraction of `/explore/tags/` posts ([#1666](https://github.com/mikf/gallery-dl/issues/1666))
+- [moebooru] fix `tags` ending with a `+` when logged in ([#1702](https://github.com/mikf/gallery-dl/issues/1702))
+- [naverwebtoon] fix comic extraction
+- [pururin] update domain and fix extraction
+- [vk] improve metadata extraction and URL pattern ([#1691](https://github.com/mikf/gallery-dl/issues/1691))
+- [downloader:ytdl] fix `outtmpl` setting for yt-dlp ([#1680](https://github.com/mikf/gallery-dl/issues/1680))
+
+## 1.18.1 - 2021-07-04
+### Additions
+- [mangafox] add `manga` extractor ([#1633](https://github.com/mikf/gallery-dl/issues/1633))
+- [mangasee] add `chapter` and `manga` extractors
+- [mastodon] implement `text-posts` option ([#1569](https://github.com/mikf/gallery-dl/issues/1569), [#1669](https://github.com/mikf/gallery-dl/issues/1669))
+- [seisoparty] add `user` and `post` extractors ([#1635](https://github.com/mikf/gallery-dl/issues/1635))
+- implement conditional directories ([#1394](https://github.com/mikf/gallery-dl/issues/1394))
+- add `T` format string conversion ([#1646](https://github.com/mikf/gallery-dl/issues/1646))
+- document format string syntax
+### Changes
+- [twitter] set `retweet_id` for original retweets ([#1481](https://github.com/mikf/gallery-dl/issues/1481))
+### Fixes
+- [directlink] manually encode Referer URLs ([#1647](https://github.com/mikf/gallery-dl/issues/1647))
+- [hiperdex] use domain from input URL
+- [kemonoparty] fix `username` extraction ([#1652](https://github.com/mikf/gallery-dl/issues/1652))
+- [kemonoparty] warn about missing DDoS-GUARD cookies
+- [twitter] ensure guest tokens are returned as string ([#1665](https://github.com/mikf/gallery-dl/issues/1665))
+- [webtoons] match arbitrary language codes ([#1643](https://github.com/mikf/gallery-dl/issues/1643))
+- fix depth counter in UrlJob when specifying `-g` multiple times
+
## 1.18.0 - 2021-06-19
### Additions
- [foolfuuka] support `archive.wakarimasen.moe` ([#1595](https://github.com/mikf/gallery-dl/issues/1595))
diff --git a/PKG-INFO b/PKG-INFO
index ef2b047..fa33df0 100644
--- a/PKG-INFO
+++ b/PKG-INFO
@@ -1,6 +1,6 @@
Metadata-Version: 2.1
Name: gallery_dl
-Version: 1.18.0
+Version: 1.18.2
Summary: Command-line program to download image galleries and collections from several image hosting sites
Home-page: https://github.com/mikf/gallery-dl
Author: Mike Fährmann
@@ -16,7 +16,7 @@ Description: ==========
*gallery-dl* is a command-line program to download image galleries and
collections from several image hosting sites (see `Supported Sites`_).
It is a cross-platform tool with many configuration options
- and powerful filenaming capabilities.
+ and powerful `filenaming capabilities <Formatting_>`_.
|pypi| |build| |gitter|
@@ -75,8 +75,8 @@ Description: ==========
Prebuilt executable files with a Python interpreter and
required Python packages included are available for
- - `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.18.0/gallery-dl.exe>`__
- - `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.18.0/gallery-dl.bin>`__
+ - `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.18.2/gallery-dl.exe>`__
+ - `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.18.2/gallery-dl.bin>`__
| Executables build from the latest commit can be found at
| https://github.com/mikf/gallery-dl/actions/workflows/executables.yml
@@ -333,6 +333,7 @@ Description: ==========
.. _gallery-dl-example.conf: https://github.com/mikf/gallery-dl/blob/master/docs/gallery-dl-example.conf
.. _configuration.rst: https://github.com/mikf/gallery-dl/blob/master/docs/configuration.rst
.. _Supported Sites: https://github.com/mikf/gallery-dl/blob/master/docs/supportedsites.md
+ .. _Formatting: https://github.com/mikf/gallery-dl/blob/master/docs/formatting.md
.. _Python: https://www.python.org/downloads/
.. _PyPI: https://pypi.org/
diff --git a/README.rst b/README.rst
index f3a42fc..cbdb93e 100644
--- a/README.rst
+++ b/README.rst
@@ -5,7 +5,7 @@ gallery-dl
*gallery-dl* is a command-line program to download image galleries and
collections from several image hosting sites (see `Supported Sites`_).
It is a cross-platform tool with many configuration options
-and powerful filenaming capabilities.
+and powerful `filenaming capabilities <Formatting_>`_.
|pypi| |build| |gitter|
@@ -64,8 +64,8 @@ Standalone Executable
Prebuilt executable files with a Python interpreter and
required Python packages included are available for
-- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.18.0/gallery-dl.exe>`__
-- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.18.0/gallery-dl.bin>`__
+- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.18.2/gallery-dl.exe>`__
+- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.18.2/gallery-dl.bin>`__
| Executables build from the latest commit can be found at
| https://github.com/mikf/gallery-dl/actions/workflows/executables.yml
@@ -322,6 +322,7 @@ To authenticate with a ``mastodon`` instance, run *gallery-dl* with
.. _gallery-dl-example.conf: https://github.com/mikf/gallery-dl/blob/master/docs/gallery-dl-example.conf
.. _configuration.rst: https://github.com/mikf/gallery-dl/blob/master/docs/configuration.rst
.. _Supported Sites: https://github.com/mikf/gallery-dl/blob/master/docs/supportedsites.md
+.. _Formatting: https://github.com/mikf/gallery-dl/blob/master/docs/formatting.md
.. _Python: https://www.python.org/downloads/
.. _PyPI: https://pypi.org/
diff --git a/data/man/gallery-dl.1 b/data/man/gallery-dl.1
index 25da021..ee57b4b 100644
--- a/data/man/gallery-dl.1
+++ b/data/man/gallery-dl.1
@@ -1,4 +1,4 @@
-.TH "GALLERY-DL" "1" "2021-06-19" "1.18.0" "gallery-dl Manual"
+.TH "GALLERY-DL" "1" "2021-07-23" "1.18.2" "gallery-dl Manual"
.\" disable hyphenation
.nh
diff --git a/data/man/gallery-dl.conf.5 b/data/man/gallery-dl.conf.5
index 84e8e0e..91101d1 100644
--- a/data/man/gallery-dl.conf.5
+++ b/data/man/gallery-dl.conf.5
@@ -1,4 +1,4 @@
-.TH "GALLERY-DL.CONF" "5" "2021-06-19" "1.18.0" "gallery-dl Manual"
+.TH "GALLERY-DL.CONF" "5" "2021-07-23" "1.18.2" "gallery-dl Manual"
.\" disable hyphenation
.nh
.\" disable justification (adjust text to left margin only)
@@ -79,7 +79,7 @@ those as makeshift comments by settings their values to arbitrary strings.
.IP "Example:" 4
.br
-* .. code::
+* .. code:: json
"{manga}_c{chapter}_{page:>03}.{extension}"
@@ -135,13 +135,29 @@ a valid filename extension.
.SS extractor.*.directory
.IP "Type:" 6
-\f[I]list\f[] of \f[I]strings\f[]
+\f[I]list\f[] of \f[I]strings\f[] or \f[I]object\f[]
.IP "Example:" 4
+.br
+* .. code:: json
+
["{category}", "{manga}", "c{chapter} - {title}"]
+.br
+* .. code:: json
+
+{
+"'nature' in content": ["Nature Pictures"],
+"retweet_id != 0" : ["{category}", "{user[name]}", "Retweets"],
+"" : ["{category}", "{user[name]}"]
+}
+
+
.IP "Description:" 4
-A list of \f[I]format strings\f[] for the resulting target directory.
+A list of \f[I]format strings\f[] to build target directory paths with.
+
+If this is an \f[I]object\f[], it must contain Python expressions mapping to the
+list of format strings to use.
Each individual string in such a list represents a single path
segment, which will be joined together and appended to the
@@ -174,13 +190,27 @@ for any spawned child extractors.
.SS extractor.*.parent-metadata
.IP "Type:" 6
-\f[I]bool\f[]
+\f[I]bool\f[] or \f[I]string\f[]
.IP "Default:" 9
\f[I]false\f[]
.IP "Description:" 4
-Overwrite any metadata provided by a child extractor with its parent's.
+If \f[I]true\f[], overwrite any metadata provided by a child extractor
+with its parent's.
+
+If this is a \f[I]string\f[], add a parent's metadata to its children's
+.br
+to a field named after said string.
+For example with \f[I]"parent-metadata": "_p_"\f[]:
+.br
+
+.. code:: json
+
+{
+"id": "child-id",
+"_p_": {"id": "parent-id"}
+}
.SS extractor.*.parent-skip
@@ -194,6 +224,17 @@ Overwrite any metadata provided by a child extractor with its parent's.
Share number of skipped downloads between parent and child extractors.
+.SS extractor.*.url-metadata
+.IP "Type:" 6
+\f[I]string\f[]
+
+.IP "Default:" 9
+\f[I]null\f[]
+
+.IP "Description:" 4
+Insert a file's download URL into its metadata dictionary as the given name.
+
+
.SS extractor.*.path-restrict
.IP "Type:" 6
\f[I]string\f[] or \f[I]object\f[]
@@ -555,7 +596,7 @@ any
.IP "Description:" 4
Default value used for missing or undefined keyword names in
-format strings.
+\f[I]format strings\f[].
.SS extractor.*.category-transfer
@@ -623,6 +664,17 @@ may pose a security risk.
An alternative \f[I]format string\f[] to build archive IDs with.
+.SS extractor.*.archive-prefix
+.IP "Type:" 6
+\f[I]string\f[]
+
+.IP "Default:" 9
+\f[I]"{category}"\f[]
+
+.IP "Description:" 4
+Prefix for archive IDs.
+
+
.SS extractor.*.postprocessors
.IP "Type:" 6
\f[I]list\f[] of \f[I]Postprocessor Configuration\f[] objects
@@ -862,6 +914,35 @@ descend into subfolders
Download embedded videos hosted on https://www.blogger.com/
+.SS extractor.danbooru.ugoira
+.IP "Type:" 6
+\f[I]bool\f[]
+
+.IP "Default:" 9
+\f[I]false\f[]
+
+.IP "Description:" 4
+Controls the download target for Ugoira posts.
+
+.br
+* \f[I]true\f[]: Original ZIP archives
+.br
+* \f[I]false\f[]: Converted video files
+
+
+.SS extractor.danbooru.metadata
+.IP "Type:" 6
+\f[I]bool\f[]
+
+.IP "Default:" 9
+\f[I]false\f[]
+
+.IP "Description:" 4
+Extract additional metadata (notes, artist commentary, parent, children)
+
+Note: This requires 1 additional HTTP request for each post.
+
+
.SS extractor.derpibooru.api-key
.IP "Type:" 6
\f[I]string\f[]
@@ -1341,6 +1422,17 @@ You can use \f[I]"all"\f[] instead of listing all values separately.
Download video files.
+.SS extractor.kemonoparty.max-posts
+.IP "Type:" 6
+\f[I]integer\f[]
+
+.IP "Default:" 9
+\f[I]null\f[]
+
+.IP "Description:" 4
+Limit the number of posts to download.
+
+
.SS extractor.kemonoparty.metadata
.IP "Type:" 6
\f[I]bool\f[]
@@ -1352,6 +1444,17 @@ Download video files.
Extract \f[I]username\f[] metadata
+.SS extractor.kemonoparty.patreon-skip-file
+.IP "Type:" 6
+\f[I]bool\f[]
+
+.IP "Default:" 9
+\f[I]true\f[]
+
+.IP "Description:" 4
+Skip main files in Patreon posts to avoid duplicates.
+
+
.SS extractor.khinsider.format
.IP "Type:" 6
\f[I]string\f[]
@@ -1380,6 +1483,62 @@ the first in the list gets chosen (usually mp3).
The server to use for API requests.
+.SS extractor.mangadex.lang
+.IP "Type:" 6
+\f[I]string\f[]
+
+.IP "Example:" 4
+"en"
+
+.IP "Description:" 4
+\f[I]ISO 639-1\f[] language code
+to filter chapters by.
+
+
+.SS extractor.mangadex.metadata
+.IP "Type:" 6
+\f[I]bool\f[]
+
+.IP "Default:" 9
+\f[I]false\f[]
+
+.IP "Description:" 4
+Provide \f[I]artist\f[], \f[I]author\f[], and \f[I]group\f[] metadata fields.
+
+
+.SS extractor.mastodon.reblogs
+.IP "Type:" 6
+\f[I]bool\f[]
+
+.IP "Default:" 9
+\f[I]false\f[]
+
+.IP "Description:" 4
+Fetch media from reblogged posts.
+
+
+.SS extractor.mastodon.replies
+.IP "Type:" 6
+\f[I]bool\f[]
+
+.IP "Default:" 9
+\f[I]true\f[]
+
+.IP "Description:" 4
+Fetch media from replies to other posts.
+
+
+.SS extractor.mastodon.text-posts
+.IP "Type:" 6
+\f[I]bool\f[]
+
+.IP "Default:" 9
+\f[I]false\f[]
+
+.IP "Description:" 4
+Also emit metadata for text-only posts without media content.
+
+
.SS extractor.newgrounds.flash
.IP "Type:" 6
\f[I]bool\f[]
@@ -2099,6 +2258,118 @@ will be taken from the original posts, not the retweeted posts.
Download video files.
+.SS extractor.ytdl.enabled
+.IP "Type:" 6
+\f[I]bool\f[]
+
+.IP "Default:" 9
+\f[I]false\f[]
+
+.IP "Description:" 4
+Match **all** URLs, even ones without a \f[I]ytdl:\f[] prefix.
+
+
+.SS extractor.ytdl.format
+.IP "Type:" 6
+\f[I]string\f[]
+
+.IP "Default:" 9
+youtube-dl's default, currently \f[I]"bestvideo+bestaudio/best"\f[]
+
+.IP "Description:" 4
+Video \f[I]format selection
+<https://github.com/ytdl-org/youtube-dl#format-selection>\f[]
+directly passed to youtube-dl.
+
+
+.SS extractor.ytdl.generic
+.IP "Type:" 6
+\f[I]bool\f[]
+
+.IP "Default:" 9
+\f[I]true\f[]
+
+.IP "Description:" 4
+Controls the use of youtube-dl's generic extractor.
+
+Set this option to \f[I]"force"\f[] for the same effect as youtube-dl's
+\f[I]--force-generic-extractor\f[].
+
+
+.SS extractor.ytdl.logging
+.IP "Type:" 6
+\f[I]bool\f[]
+
+.IP "Default:" 9
+\f[I]true\f[]
+
+.IP "Description:" 4
+Route youtube-dl's output through gallery-dl's logging system.
+Otherwise youtube-dl will write its output directly to stdout/stderr.
+
+Note: Set \f[I]quiet\f[] and \f[I]no_warnings\f[] in
+\f[I]extractor.ytdl.raw-options\f[] to \f[I]true\f[] to suppress all output.
+
+
+.SS extractor.ytdl.module
+.IP "Type:" 6
+\f[I]string\f[]
+
+.IP "Default:" 9
+\f[I]"youtube_dl"\f[]
+
+.IP "Description:" 4
+Name of the youtube-dl Python module to import.
+
+
+.SS extractor.ytdl.raw-options
+.IP "Type:" 6
+\f[I]object\f[]
+
+.IP "Example:" 4
+.. code:: json
+
+{
+"quiet": true,
+"writesubtitles": true,
+"merge_output_format": "mkv"
+}
+
+
+.IP "Description:" 4
+Additional options passed directly to the \f[I]YoutubeDL\f[] constructor.
+
+All available options can be found in \f[I]youtube-dl's docstrings
+<https://github.com/ytdl-org/youtube-dl/blob/master/youtube_dl/YoutubeDL.py#L138-L318>\f[].
+
+
+.SS extractor.[booru].tags
+.IP "Type:" 6
+\f[I]bool\f[]
+
+.IP "Default:" 9
+\f[I]false\f[]
+
+.IP "Description:" 4
+Categorize tags by their respective types
+and provide them as \f[I]tags_<type>\f[] metadata fields.
+
+Note: This requires 1 additional HTTP request for each post.
+
+
+.SS extractor.[booru].notes
+.IP "Type:" 6
+\f[I]bool\f[]
+
+.IP "Default:" 9
+\f[I]false\f[]
+
+.IP "Description:" 4
+Extract overlay notes (position and text).
+
+Note: This requires 1 additional HTTP request for each post.
+
+
.SS extractor.[manga-extractor].chapter-reverse
.IP "Type:" 6
\f[I]bool\f[]
diff --git a/docs/gallery-dl.conf b/docs/gallery-dl.conf
index 9514c7a..ffbed52 100644
--- a/docs/gallery-dl.conf
+++ b/docs/gallery-dl.conf
@@ -105,10 +105,6 @@
{
"include": "pictures"
},
- "hentainexus":
- {
- "original": true
- },
"hitomi":
{
"metadata": true
@@ -148,7 +144,9 @@
},
"mangadex":
{
- "api-server": "https://api.mangadex.org"
+ "api-server": "https://api.mangadex.org",
+ "metadata": false,
+ "lang": null
},
"mangoxo":
{
@@ -285,6 +283,15 @@
"retweets": true,
"videos": true
},
+ "ytdl":
+ {
+ "enabled": false,
+ "format": null,
+ "generic": true,
+ "logging": true,
+ "module": "youtube_dl",
+ "raw-options": null
+ },
"booru":
{
"tags": false,
diff --git a/gallery_dl.egg-info/PKG-INFO b/gallery_dl.egg-info/PKG-INFO
index b53c326..c8f8dec 100644
--- a/gallery_dl.egg-info/PKG-INFO
+++ b/gallery_dl.egg-info/PKG-INFO
@@ -1,6 +1,6 @@
Metadata-Version: 2.1
Name: gallery-dl
-Version: 1.18.0
+Version: 1.18.2
Summary: Command-line program to download image galleries and collections from several image hosting sites
Home-page: https://github.com/mikf/gallery-dl
Author: Mike Fährmann
@@ -16,7 +16,7 @@ Description: ==========
*gallery-dl* is a command-line program to download image galleries and
collections from several image hosting sites (see `Supported Sites`_).
It is a cross-platform tool with many configuration options
- and powerful filenaming capabilities.
+ and powerful `filenaming capabilities <Formatting_>`_.
|pypi| |build| |gitter|
@@ -75,8 +75,8 @@ Description: ==========
Prebuilt executable files with a Python interpreter and
required Python packages included are available for
- - `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.18.0/gallery-dl.exe>`__
- - `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.18.0/gallery-dl.bin>`__
+ - `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.18.2/gallery-dl.exe>`__
+ - `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.18.2/gallery-dl.bin>`__
| Executables build from the latest commit can be found at
| https://github.com/mikf/gallery-dl/actions/workflows/executables.yml
@@ -333,6 +333,7 @@ Description: ==========
.. _gallery-dl-example.conf: https://github.com/mikf/gallery-dl/blob/master/docs/gallery-dl-example.conf
.. _configuration.rst: https://github.com/mikf/gallery-dl/blob/master/docs/configuration.rst
.. _Supported Sites: https://github.com/mikf/gallery-dl/blob/master/docs/supportedsites.md
+ .. _Formatting: https://github.com/mikf/gallery-dl/blob/master/docs/formatting.md
.. _Python: https://www.python.org/downloads/
.. _PyPI: https://pypi.org/
diff --git a/gallery_dl.egg-info/SOURCES.txt b/gallery_dl.egg-info/SOURCES.txt
index 9655896..f8a3c2c 100644
--- a/gallery_dl.egg-info/SOURCES.txt
+++ b/gallery_dl.egg-info/SOURCES.txt
@@ -45,10 +45,12 @@ gallery_dl/extractor/adultempire.py
gallery_dl/extractor/architizer.py
gallery_dl/extractor/artstation.py
gallery_dl/extractor/aryion.py
+gallery_dl/extractor/bbc.py
gallery_dl/extractor/bcy.py
gallery_dl/extractor/behance.py
gallery_dl/extractor/blogger.py
gallery_dl/extractor/booru.py
+gallery_dl/extractor/comicvine.py
gallery_dl/extractor/common.py
gallery_dl/extractor/cyberdrop.py
gallery_dl/extractor/danbooru.py
@@ -105,6 +107,7 @@ gallery_dl/extractor/mangahere.py
gallery_dl/extractor/mangakakalot.py
gallery_dl/extractor/manganelo.py
gallery_dl/extractor/mangapark.py
+gallery_dl/extractor/mangasee.py
gallery_dl/extractor/mangoxo.py
gallery_dl/extractor/mastodon.py
gallery_dl/extractor/message.py
@@ -141,6 +144,7 @@ gallery_dl/extractor/redgifs.py
gallery_dl/extractor/sankaku.py
gallery_dl/extractor/sankakucomplex.py
gallery_dl/extractor/seiga.py
+gallery_dl/extractor/seisoparty.py
gallery_dl/extractor/senmanga.py
gallery_dl/extractor/sexcom.py
gallery_dl/extractor/shopify.py
@@ -168,6 +172,7 @@ gallery_dl/extractor/weibo.py
gallery_dl/extractor/wikiart.py
gallery_dl/extractor/xhamster.py
gallery_dl/extractor/xvideos.py
+gallery_dl/extractor/ytdl.py
gallery_dl/postprocessor/__init__.py
gallery_dl/postprocessor/classify.py
gallery_dl/postprocessor/common.py
diff --git a/gallery_dl/__init__.py b/gallery_dl/__init__.py
index d5893b7..2cad029 100644
--- a/gallery_dl/__init__.py
+++ b/gallery_dl/__init__.py
@@ -127,6 +127,8 @@ def main():
# extractor modules
modules = config.get(("extractor",), "modules")
if modules is not None:
+ if isinstance(modules, str):
+ modules = modules.split(",")
extractor.modules = modules
extractor._module_iter = iter(modules)
diff --git a/gallery_dl/downloader/ytdl.py b/gallery_dl/downloader/ytdl.py
index e116188..b1e1d58 100644
--- a/gallery_dl/downloader/ytdl.py
+++ b/gallery_dl/downloader/ytdl.py
@@ -15,13 +15,9 @@ import os
class YoutubeDLDownloader(DownloaderBase):
scheme = "ytdl"
- module = None
def __init__(self, job):
- module = self.module
- if not module:
- module_name = self.config("module") or "youtube_dl"
- module = YoutubeDLDownloader.module = __import__(module_name)
+ module = __import__(self.config("module") or "youtube_dl")
DownloaderBase.__init__(self, job)
extractor = job.extractor
@@ -41,7 +37,10 @@ class YoutubeDLDownloader(DownloaderBase):
"max_filesize": text.parse_bytes(
self.config("filesize-max"), None),
}
- options.update(self.config("raw-options") or {})
+
+ raw_options = self.config("raw-options")
+ if raw_options:
+ options.update(raw_options)
if self.config("logging", True):
options["logger"] = self.log
@@ -54,30 +53,37 @@ class YoutubeDLDownloader(DownloaderBase):
self.ytdl = module.YoutubeDL(options)
def download(self, url, pathfmt):
- if self.forward_cookies:
- set_cookie = self.ytdl.cookiejar.set_cookie
- for cookie in self.session.cookies:
- set_cookie(cookie)
-
- try:
- info_dict = self.ytdl.extract_info(url[5:], download=False)
- except Exception:
- return False
+ kwdict = pathfmt.kwdict
+
+ ytdl = kwdict.pop("_ytdl_instance", None)
+ if not ytdl:
+ ytdl = self.ytdl
+ if self.forward_cookies:
+ set_cookie = ytdl.cookiejar.set_cookie
+ for cookie in self.session.cookies:
+ set_cookie(cookie)
+
+ info_dict = kwdict.pop("_ytdl_info_dict", None)
+ if not info_dict:
+ try:
+ info_dict = ytdl.extract_info(url[5:], download=False)
+ except Exception:
+ return False
if "entries" in info_dict:
- index = pathfmt.kwdict.get("_ytdl_index")
+ index = kwdict.get("_ytdl_index")
if index is None:
- return self._download_playlist(pathfmt, info_dict)
+ return self._download_playlist(ytdl, pathfmt, info_dict)
else:
info_dict = info_dict["entries"][index]
- extra = pathfmt.kwdict.get("_ytdl_extra")
+ extra = kwdict.get("_ytdl_extra")
if extra:
info_dict.update(extra)
- return self._download_video(pathfmt, info_dict)
+ return self._download_video(ytdl, pathfmt, info_dict)
- def _download_video(self, pathfmt, info_dict):
+ def _download_video(self, ytdl, pathfmt, info_dict):
if "url" in info_dict:
text.nameext_from_url(info_dict["url"], pathfmt.kwdict)
@@ -86,8 +92,8 @@ class YoutubeDLDownloader(DownloaderBase):
info_dict["ext"] = "mkv"
if self.outtmpl:
- self.ytdl.params["outtmpl"] = self.outtmpl
- pathfmt.filename = filename = self.ytdl.prepare_filename(info_dict)
+ self._set_outtmpl(ytdl, self.outtmpl)
+ pathfmt.filename = filename = ytdl.prepare_filename(info_dict)
pathfmt.extension = info_dict["ext"]
pathfmt.path = pathfmt.directory + filename
pathfmt.realpath = pathfmt.temppath = (
@@ -101,26 +107,35 @@ class YoutubeDLDownloader(DownloaderBase):
if self.part and self.partdir:
pathfmt.temppath = os.path.join(
self.partdir, pathfmt.filename)
- self.ytdl.params["outtmpl"] = pathfmt.temppath.replace("%", "%%")
+
+ self._set_outtmpl(ytdl, pathfmt.temppath.replace("%", "%%"))
self.out.start(pathfmt.path)
try:
- self.ytdl.process_info(info_dict)
+ ytdl.process_info(info_dict)
except Exception:
self.log.debug("Traceback", exc_info=True)
return False
return True
- def _download_playlist(self, pathfmt, info_dict):
+ def _download_playlist(self, ytdl, pathfmt, info_dict):
pathfmt.set_extension("%(playlist_index)s.%(ext)s")
- self.ytdl.params["outtmpl"] = pathfmt.realpath
+ self._set_outtmpl(ytdl, pathfmt.realpath)
for entry in info_dict["entries"]:
- self.ytdl.process_info(entry)
+ ytdl.process_info(entry)
return True
+ @staticmethod
+ def _set_outtmpl(ytdl, outtmpl):
+ try:
+ ytdl.outtmpl_dict["default"] = outtmpl
+ except AttributeError:
+ ytdl.params["outtmpl"] = outtmpl
+
def compatible_formats(formats):
+ """Returns True if 'formats' are compatible for merge"""
video_ext = formats[0].get("ext")
audio_ext = formats[1].get("ext")
diff --git a/gallery_dl/extractor/500px.py b/gallery_dl/extractor/500px.py
index 4cf5e48..696b370 100644
--- a/gallery_dl/extractor/500px.py
+++ b/gallery_dl/extractor/500px.py
@@ -140,7 +140,7 @@ class _500pxGalleryExtractor(_500pxExtractor):
}),
# unavailable photos (#1335)
("https://500px.com/p/Light_Expression_Photography/galleries/street", {
- "count": 0,
+ "count": 4,
}),
("https://500px.com/fashvamp/galleries/lera"),
)
diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py
index d927d70..1a6a899 100644
--- a/gallery_dl/extractor/__init__.py
+++ b/gallery_dl/extractor/__init__.py
@@ -20,9 +20,11 @@ modules = [
"architizer",
"artstation",
"aryion",
+ "bbc",
"bcy",
"behance",
"blogger",
+ "comicvine",
"cyberdrop",
"danbooru",
"deviantart",
@@ -74,6 +76,7 @@ modules = [
"mangakakalot",
"manganelo",
"mangapark",
+ "mangasee",
"mangoxo",
"myhentaigallery",
"myportfolio",
@@ -105,6 +108,7 @@ modules = [
"sankaku",
"sankakucomplex",
"seiga",
+ "seisoparty",
"senmanga",
"sexcom",
"simplyhentai",
@@ -141,6 +145,7 @@ modules = [
"recursive",
"oauth",
"test",
+ "ytdl",
]
diff --git a/gallery_dl/extractor/architizer.py b/gallery_dl/extractor/architizer.py
index 9629e25..dbc197e 100644
--- a/gallery_dl/extractor/architizer.py
+++ b/gallery_dl/extractor/architizer.py
@@ -37,7 +37,7 @@ class ArchitizerProjectExtractor(GalleryExtractor):
"subcategory": "project",
"title": "House LO",
"type": "Residential › Private House",
- "year": "2018",
+ "year": "2020",
},
})
diff --git a/gallery_dl/extractor/bbc.py b/gallery_dl/extractor/bbc.py
new file mode 100644
index 0000000..ace8a28
--- /dev/null
+++ b/gallery_dl/extractor/bbc.py
@@ -0,0 +1,80 @@
+# -*- coding: utf-8 -*-
+
+# Copyright 2021 Mike Fährmann
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Extractors for https://bbc.co.uk/"""
+
+from .common import GalleryExtractor, Extractor, Message
+from .. import text, util
+import json
+
+BASE_PATTERN = r"(?:https?://)?(?:www\.)?bbc\.co\.uk(/programmes/"
+
+
+class BbcGalleryExtractor(GalleryExtractor):
+ """Extractor for a programme gallery on bbc.co.uk"""
+ category = "bbc"
+ root = "https://www.bbc.co.uk"
+ directory_fmt = ("{category}", "{path[0]}", "{path[1]}", "{path[2]}",
+ "{path[3:]:J - /}")
+ filename_fmt = "{num:>02}.{extension}"
+ archive_fmt = "{programme}_{num}"
+ pattern = BASE_PATTERN + r"[^/?#]+(?!/galleries)(?:/[^/?#]+)?)$"
+ test = (
+ ("https://www.bbc.co.uk/programmes/p084qtzs/p085g9kg", {
+ "pattern": r"https://ichef\.bbci\.co\.uk"
+ r"/images/ic/976x549_b/\w+\.jpg",
+ "count": 37,
+ "keyword": {
+ "programme": "p084qtzs",
+ "path": ["BBC One", "Doctor Who", "The Timeless Children"],
+ },
+ }),
+ ("https://www.bbc.co.uk/programmes/p084qtzs"),
+ )
+
+ def metadata(self, page):
+ data = json.loads(text.extract(
+ page, '<script type="application/ld+json">', '</script>')[0])
+ return {
+ "programme": self.gallery_url.split("/")[4],
+ "path": list(util.unique_sequence(
+ element["name"]
+ for element in data["itemListElement"]
+ )),
+ }
+
+ def images(self, page):
+ return [
+ (imgset.rpartition(", ")[2].partition(" ")[0], None)
+ for imgset in text.extract_iter(page, 'data-image-src-sets="', '"')
+ ]
+
+
+class BbcProgrammeExtractor(Extractor):
+ """Extractor for all galleries of a bbc programme"""
+ category = "bbc"
+ subcategory = "programme"
+ root = "https://www.bbc.co.uk"
+ pattern = BASE_PATTERN + r"[^/?#]+/galleries)"
+ test = ("https://www.bbc.co.uk/programmes/b006q2x0/galleries", {
+ "pattern": BbcGalleryExtractor.pattern,
+ "count": ">= 24",
+ })
+
+ def __init__(self, match):
+ Extractor.__init__(self, match)
+ self.galleries_url = self.root + match.group(1)
+
+ def items(self):
+ page = self.request(self.galleries_url).text
+ data = {"_extractor": BbcGalleryExtractor}
+
+ for programme_id in text.extract_iter(
+ page, '<a href="https://www.bbc.co.uk/programmes/', '"'):
+ url = "https://www.bbc.co.uk/programmes/" + programme_id
+ yield Message.Queue, url, data
diff --git a/gallery_dl/extractor/bcy.py b/gallery_dl/extractor/bcy.py
index d6e3683..f867bd9 100644
--- a/gallery_dl/extractor/bcy.py
+++ b/gallery_dl/extractor/bcy.py
@@ -173,9 +173,8 @@ class BcyPostExtractor(BcyExtractor):
("https://bcy.net/item/detail/6950136331708144648", {
"pattern": r"https://p\d-bcy.byteimg.com/img/banciyuan/[0-9a-f]+"
r"~tplv-banciyuan-logo-v3:.+\.image",
- "count": 10,
- "keyword": {"filter": "watermark"}
-
+ "count": 8,
+ "keyword": {"filter": "watermark"},
}),
# deleted
("https://bcy.net/item/detail/6780546160802143236", {
diff --git a/gallery_dl/extractor/comicvine.py b/gallery_dl/extractor/comicvine.py
new file mode 100644
index 0000000..3a57886
--- /dev/null
+++ b/gallery_dl/extractor/comicvine.py
@@ -0,0 +1,78 @@
+# -*- coding: utf-8 -*-
+
+# Copyright 2021 Mike Fährmann
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Extractors for https://comicvine.gamespot.com/"""
+
+from .booru import BooruExtractor
+from .. import text
+import operator
+
+
+class ComicvineTagExtractor(BooruExtractor):
+ """Extractor for a gallery on comicvine.gamespot.com"""
+ category = "comicvine"
+ subcategory = "tag"
+ basecategory = ""
+ root = "https://comicvine.gamespot.com"
+ per_page = 1000
+ directory_fmt = ("{category}", "{tag}")
+ filename_fmt = "{filename}.{extension}"
+ archive_fmt = "{id}"
+ pattern = (r"(?:https?://)?comicvine\.gamespot\.com"
+ r"(/([^/?#]+)/(\d+-\d+)/images/.*)")
+ test = (
+ ("https://comicvine.gamespot.com/jock/4040-5653/images/", {
+ "pattern": r"https://comicvine\.gamespot\.com/a/uploads"
+ r"/original/\d+/\d+/\d+-.+\.(jpe?g|png)",
+ "count": ">= 140",
+ }),
+ (("https://comicvine.gamespot.com/batman/4005-1699"
+ "/images/?tag=Fan%20Art%20%26%20Cosplay"), {
+ "pattern": r"https://comicvine\.gamespot\.com/a/uploads"
+ r"/original/\d+/\d+/\d+-.+",
+ "count": ">= 450",
+ }),
+ )
+
+ def __init__(self, match):
+ BooruExtractor.__init__(self, match)
+ self.path, self.object_name, self.object_id = match.groups()
+
+ def metadata(self):
+ return {"tag": text.unquote(self.object_name)}
+
+ def posts(self):
+ url = self.root + "/js/image-data.json"
+ params = {
+ "images": text.extract(
+ self.request(self.root + self.path).text,
+ 'data-gallery-id="', '"')[0],
+ "start" : self.page_start,
+ "count" : self.per_page,
+ "object": self.object_id,
+ }
+
+ while True:
+ images = self.request(url, params=params).json()["images"]
+ yield from images
+
+ if len(images) < self.per_page:
+ return
+ params["start"] += self.per_page
+
+ def skip(self, num):
+ self.page_start = num
+ return num
+
+ _file_url = operator.itemgetter("original")
+
+ @staticmethod
+ def _prepare(post):
+ post["date"] = text.parse_datetime(
+ post["dateCreated"], "%a, %b %d %Y")
+ post["tags"] = [tag["name"] for tag in post["tags"] if tag["name"]]
diff --git a/gallery_dl/extractor/common.py b/gallery_dl/extractor/common.py
index 048e0a3..2533ae5 100644
--- a/gallery_dl/extractor/common.py
+++ b/gallery_dl/extractor/common.py
@@ -174,8 +174,7 @@ class Extractor():
elif until:
if isinstance(until, datetime.datetime):
# convert to UTC timestamp
- epoch = datetime.datetime(1970, 1, 1)
- until = (until - epoch) / datetime.timedelta(0, 1)
+ until = (until - util.EPOCH) / util.SECOND
else:
until = float(until)
seconds = until - now
diff --git a/gallery_dl/extractor/deviantart.py b/gallery_dl/extractor/deviantart.py
index 70e268d..163d7ba 100644
--- a/gallery_dl/extractor/deviantart.py
+++ b/gallery_dl/extractor/deviantart.py
@@ -258,19 +258,25 @@ class DeviantartExtractor(Extractor):
return Message.Url, txt, deviation
@staticmethod
- def _find_folder(folders, name):
- match = re.compile(name.replace(
- "-", r"[^a-z0-9]+") + "$", re.IGNORECASE).match
- for folder in folders:
- if match(folder["name"]):
- return folder
+ def _find_folder(folders, name, uuid):
+ if uuid.isdecimal():
+ match = re.compile(name.replace(
+ "-", r"[^a-z0-9]+") + "$", re.IGNORECASE).match
+ for folder in folders:
+ if match(folder["name"]):
+ return folder
+ else:
+ for folder in folders:
+ if folder["folderid"] == uuid:
+ return folder
raise exception.NotFoundError("folder")
def _folder_urls(self, folders, category, extractor):
- base = "{}/{}/{}/0/".format(self.root, self.user, category)
+ base = "{}/{}/{}/".format(self.root, self.user, category)
for folder in folders:
folder["_extractor"] = extractor
- yield base + folder["name"], folder
+ url = "{}{}/{}".format(base, folder["folderid"], folder["name"])
+ yield url, folder
def _update_content_default(self, deviation, content):
public = "premium_folder_data" not in deviation
@@ -422,7 +428,8 @@ class DeviantartGalleryExtractor(DeviantartExtractor):
}),
# group
("https://www.deviantart.com/yakuzafc/gallery", {
- "pattern": r"https://www.deviantart.com/yakuzafc/gallery/0/",
+ "pattern": r"https://www.deviantart.com/yakuzafc/gallery"
+ r"/\w{8}-\w{4}-\w{4}-\w{4}-\w{12}/",
"count": ">= 15",
}),
# 'folders' option (#276)
@@ -461,7 +468,7 @@ class DeviantartFolderExtractor(DeviantartExtractor):
subcategory = "folder"
directory_fmt = ("{category}", "{username}", "{folder[title]}")
archive_fmt = "F_{folder[uuid]}_{index}.{extension}"
- pattern = BASE_PATTERN + r"/gallery/(\d+)/([^/?#]+)"
+ pattern = BASE_PATTERN + r"/gallery/([^/?#]+)/([^/?#]+)"
test = (
# user
("https://www.deviantart.com/shimoda7/gallery/722019/Miscellaneous", {
@@ -473,6 +480,12 @@ class DeviantartFolderExtractor(DeviantartExtractor):
"count": ">= 4",
"options": (("original", False),),
}),
+ # uuid
+ (("https://www.deviantart.com/shimoda7/gallery"
+ "/B38E3C6A-2029-6B45-757B-3C8D3422AD1A/misc"), {
+ "count": 5,
+ "options": (("original", False),),
+ }),
# name starts with '_', special characters (#1451)
(("https://www.deviantart.com/justatest235723"
"/gallery/69302698/-test-b-c-d-e-f-"), {
@@ -491,7 +504,7 @@ class DeviantartFolderExtractor(DeviantartExtractor):
def deviations(self):
folders = self.api.gallery_folders(self.user)
- folder = self._find_folder(folders, self.folder_name)
+ folder = self._find_folder(folders, self.folder_name, self.folder_id)
self.folder = {
"title": folder["name"],
"uuid" : folder["folderid"],
@@ -611,10 +624,15 @@ class DeviantartCollectionExtractor(DeviantartExtractor):
directory_fmt = ("{category}", "{username}", "Favourites",
"{collection[title]}")
archive_fmt = "C_{collection[uuid]}_{index}.{extension}"
- pattern = BASE_PATTERN + r"/favourites/(\d+)/([^/?#]+)"
+ pattern = BASE_PATTERN + r"/favourites/([^/?#]+)/([^/?#]+)"
test = (
- (("https://www.deviantart.com/pencilshadings"
- "/favourites/70595441/3D-Favorites"), {
+ (("https://www.deviantart.com/pencilshadings/favourites"
+ "/70595441/3D-Favorites"), {
+ "count": ">= 20",
+ "options": (("original", False),),
+ }),
+ (("https://www.deviantart.com/pencilshadings/favourites"
+ "/F050486B-CB62-3C66-87FB-1105A7F6379F/3D Favorites"), {
"count": ">= 20",
"options": (("original", False),),
}),
@@ -630,7 +648,8 @@ class DeviantartCollectionExtractor(DeviantartExtractor):
def deviations(self):
folders = self.api.collections_folders(self.user)
- folder = self._find_folder(folders, self.collection_name)
+ folder = self._find_folder(
+ folders, self.collection_name, self.collection_id)
self.collection = {
"title": folder["name"],
"uuid" : folder["folderid"],
diff --git a/gallery_dl/extractor/directlink.py b/gallery_dl/extractor/directlink.py
index a6346bf..8505b0b 100644
--- a/gallery_dl/extractor/directlink.py
+++ b/gallery_dl/extractor/directlink.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2017-2020 Mike Fährmann
+# Copyright 2017-2021 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -59,7 +59,8 @@ class DirectlinkExtractor(Extractor):
data["path"], _, name = data["path"].rpartition("/")
data["filename"], _, ext = name.rpartition(".")
data["extension"] = ext.lower()
- data["_http_headers"] = {"Referer": self.url}
+ data["_http_headers"] = {
+ "Referer": self.url.encode("latin-1", "ignore")}
yield Message.Version, 1
yield Message.Directory, data
diff --git a/gallery_dl/extractor/exhentai.py b/gallery_dl/extractor/exhentai.py
index 64a6cb7..bccd6c8 100644
--- a/gallery_dl/extractor/exhentai.py
+++ b/gallery_dl/extractor/exhentai.py
@@ -122,7 +122,7 @@ class ExhentaiGalleryExtractor(ExhentaiExtractor):
"date": "dt:2018-03-18 20:15:00",
"eh_category": "Non-H",
"expunged": False,
- "favorites": "17",
+ "favorites": "18",
"filecount": "4",
"filesize": 1488978,
"gid": 1200119,
diff --git a/gallery_dl/extractor/foolfuuka.py b/gallery_dl/extractor/foolfuuka.py
index 5ea3adb..b82160f 100644
--- a/gallery_dl/extractor/foolfuuka.py
+++ b/gallery_dl/extractor/foolfuuka.py
@@ -137,7 +137,7 @@ class FoolfuukaThreadExtractor(FoolfuukaExtractor):
"url": "61896d9d9a2edb556b619000a308a984307b6d30",
}),
("https://thebarchive.com/b/thread/739772332/", {
- "url": "07d39d2cb48f40fb337dc992993d965b0cd5f7cd",
+ "url": "e8b18001307d130d67db31740ce57c8561b5d80c",
}),
("https://archive.wakarimasen.moe/a/thread/223157648/", {
"url": "fef0758d2eb81b1ba783051fd5ec491d70107a78",
diff --git a/gallery_dl/extractor/hentaihand.py b/gallery_dl/extractor/hentaihand.py
index 4485925..fa8e98b 100644
--- a/gallery_dl/extractor/hentaihand.py
+++ b/gallery_dl/extractor/hentaihand.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2020 Mike Fährmann
+# Copyright 2020-2021 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -19,21 +19,23 @@ class HentaihandGalleryExtractor(GalleryExtractor):
root = "https://hentaihand.com"
pattern = r"(?:https?://)?(?:www\.)?hentaihand\.com/\w+/comic/([\w-]+)"
test = (
- (("https://hentaihand.com/en/comic/kouda-tomohiro-chiyomi-"
- "blizzard-comic-aun-2016-12-english-nanda-sore-scans"), {
- "pattern": r"https://cdn.hentaihand.com/.*/images/304546/\d+.jpg$",
- "count": 19,
+ (("https://hentaihand.com/en/comic/c75-takumi-na-muchi-choudenji-hou-"
+ "no-aishi-kata-how-to-love-a-super-electromagnetic-gun-toaru-kagaku-"
+ "no-railgun-english"), {
+ "pattern": r"https://cdn.hentaihand.com/.*/images/5669/\d+.jpg$",
+ "count": 50,
"keyword": {
- "artists" : ["Kouda Tomohiro"],
- "date" : "dt:2020-02-06 00:00:00",
- "gallery_id": 304546,
+ "artists" : ["Takumi Na Muchi"],
+ "date" : "dt:2014-06-28 00:00:00",
+ "gallery_id": 5669,
"lang" : "en",
"language" : "English",
- "relationships": ["Family", "Step family"],
+ "parodies" : ["Toaru Kagaku No Railgun"],
+ "relationships": list,
"tags" : list,
- "title" : r"re:\[Kouda Tomohiro\] Chiyomi Blizzard",
- "title_alt" : r"re:\[幸田朋弘\] ちよみブリザード",
- "type" : "Manga",
+ "title" : r"re:\(C75\) \[Takumi na Muchi\] Choudenji Hou ",
+ "title_alt" : r"re:\(C75\) \[たくみなむち\] 超電磁砲のあいしかた",
+ "type" : "Doujinshi",
},
}),
)
@@ -76,9 +78,9 @@ class HentaihandTagExtractor(Extractor):
r"/\w+/(parody|character|tag|artist|group|language"
r"|category|relationship)/([^/?#]+)")
test = (
- ("https://hentaihand.com/en/artist/himuro", {
+ ("https://hentaihand.com/en/artist/takumi-na-muchi", {
"pattern": HentaihandGalleryExtractor.pattern,
- "count": ">= 18",
+ "count": ">= 6",
}),
("https://hentaihand.com/en/tag/full-color"),
("https://hentaihand.com/fr/language/japanese"),
diff --git a/gallery_dl/extractor/hiperdex.py b/gallery_dl/extractor/hiperdex.py
index 7ad06c9..a40d631 100644
--- a/gallery_dl/extractor/hiperdex.py
+++ b/gallery_dl/extractor/hiperdex.py
@@ -13,13 +13,13 @@ from .. import text
from ..cache import memcache
import re
-BASE_PATTERN = r"(?:https?://)?(?:www\.)?hiperdex\d?\.(?:com|net|info)"
+BASE_PATTERN = r"((?:https?://)?(?:www\.)?hiperdex\d?\.(?:com|net|info))"
class HiperdexBase():
"""Base class for hiperdex extractors"""
category = "hiperdex"
- root = "https://hiperdex2.com"
+ root = "https://hiperdex.com"
@memcache(keyarg=1)
def manga_data(self, manga, page=None):
@@ -65,7 +65,7 @@ class HiperdexChapterExtractor(HiperdexBase, ChapterExtractor):
"""Extractor for manga chapters from hiperdex.com"""
pattern = BASE_PATTERN + r"(/manga/([^/?#]+)/([^/?#]+))"
test = (
- ("https://hiperdex2.com/manga/domestic-na-kanojo/154-5/", {
+ ("https://hiperdex.com/manga/domestic-na-kanojo/154-5/", {
"pattern": r"https://hiperdex\d?.(com|net|info)/wp-content/uploads"
r"/WP-manga/data/manga_\w+/[0-9a-f]{32}/\d+\.webp",
"count": 9,
@@ -82,12 +82,14 @@ class HiperdexChapterExtractor(HiperdexBase, ChapterExtractor):
"type" : "Manga",
},
}),
+ ("https://hiperdex2.com/manga/domestic-na-kanojo/154-5/"),
("https://hiperdex.net/manga/domestic-na-kanojo/154-5/"),
("https://hiperdex.info/manga/domestic-na-kanojo/154-5/"),
)
def __init__(self, match):
- path, self.manga, self.chapter = match.groups()
+ root, path, self.manga, self.chapter = match.groups()
+ self.root = text.ensure_http_scheme(root)
ChapterExtractor.__init__(self, match, self.root + path + "/")
def metadata(self, _):
@@ -106,7 +108,7 @@ class HiperdexMangaExtractor(HiperdexBase, MangaExtractor):
chapterclass = HiperdexChapterExtractor
pattern = BASE_PATTERN + r"(/manga/([^/?#]+))/?$"
test = (
- ("https://hiperdex2.com/manga/youre-not-that-special/", {
+ ("https://hiperdex.com/manga/youre-not-that-special/", {
"count": 51,
"pattern": HiperdexChapterExtractor.pattern,
"keyword": {
@@ -123,12 +125,14 @@ class HiperdexMangaExtractor(HiperdexBase, MangaExtractor):
"type" : "Manhwa",
},
}),
+ ("https://hiperdex2.com/manga/youre-not-that-special/"),
("https://hiperdex.net/manga/youre-not-that-special/"),
("https://hiperdex.info/manga/youre-not-that-special/"),
)
def __init__(self, match):
- path, self.manga = match.groups()
+ root, path, self.manga = match.groups()
+ self.root = text.ensure_http_scheme(root)
MangaExtractor.__init__(self, match, self.root + path + "/")
def chapters(self, page):
@@ -156,10 +160,10 @@ class HiperdexArtistExtractor(HiperdexBase, MangaExtractor):
categorytransfer = False
chapterclass = HiperdexMangaExtractor
reverse = False
- pattern = BASE_PATTERN + r"(/manga-a(?:rtist|uthor)/([^/?#]+))"
+ pattern = BASE_PATTERN + r"(/manga-a(?:rtist|uthor)/(?:[^/?#]+))"
test = (
- ("https://hiperdex2.com/manga-artist/beck-ho-an/"),
("https://hiperdex.net/manga-artist/beck-ho-an/"),
+ ("https://hiperdex2.com/manga-artist/beck-ho-an/"),
("https://hiperdex.info/manga-artist/beck-ho-an/"),
("https://hiperdex.com/manga-author/viagra/", {
"pattern": HiperdexMangaExtractor.pattern,
@@ -168,7 +172,8 @@ class HiperdexArtistExtractor(HiperdexBase, MangaExtractor):
)
def __init__(self, match):
- MangaExtractor.__init__(self, match, self.root + match.group(1) + "/")
+ self.root = text.ensure_http_scheme(match.group(1))
+ MangaExtractor.__init__(self, match, self.root + match.group(2) + "/")
def chapters(self, page):
results = []
diff --git a/gallery_dl/extractor/hitomi.py b/gallery_dl/extractor/hitomi.py
index 497509d..2ea5dfa 100644
--- a/gallery_dl/extractor/hitomi.py
+++ b/gallery_dl/extractor/hitomi.py
@@ -35,12 +35,12 @@ class HitomiGalleryExtractor(GalleryExtractor):
}),
# Game CG with scenes (#321)
("https://hitomi.la/galleries/733697.html", {
- "url": "8dfbcb1e51cec43a7112d58b7e92153155ada3b9",
+ "url": "1de8510bd4c3048a1cbbf242505d8449e93ba5a4",
"count": 210,
}),
# fallback for galleries only available through /reader/ URLs
("https://hitomi.la/galleries/1045954.html", {
- "url": "a5af7fdca1f5c93c289af128914a8488ea345036",
+ "url": "681bb07d8ce4d0c4d0592e47b239b6e42d566386",
"count": 1413,
}),
# gallery with "broken" redirect
@@ -140,11 +140,10 @@ class HitomiGalleryExtractor(GalleryExtractor):
# see https://ltn.hitomi.la/common.js
inum = int(ihash[-3:-1], 16)
- frontends = 2 if inum < 0x70 else 3
- inum = 1 if inum < 0x49 else inum
+ offset = 2 if inum < 0x40 else 1 if inum < 0x80 else 0
url = "https://{}b.hitomi.la/images/{}/{}/{}.{}".format(
- chr(97 + (inum % frontends)),
+ chr(97 + offset),
ihash[-1], ihash[-3:-1], ihash,
idata["extension"],
)
diff --git a/gallery_dl/extractor/imagehosts.py b/gallery_dl/extractor/imagehosts.py
index d757e17..9328437 100644
--- a/gallery_dl/extractor/imagehosts.py
+++ b/gallery_dl/extractor/imagehosts.py
@@ -133,18 +133,30 @@ class AcidimgImageExtractor(ImagehostImageExtractor):
class ImagevenueImageExtractor(ImagehostImageExtractor):
"""Extractor for single images from imagevenue.com"""
category = "imagevenue"
- pattern = (r"(?:https?://)?(img\d+\.imagevenue\.com"
- r"/img\.php\?image=(?:[a-z]+_)?(\d+)_[^&#]+)")
- test = (("http://img28116.imagevenue.com/img.php"
- "?image=th_52709_test_122_64lo.jpg"), {
- "url": "46812995d557f2c6adf0ebd0e631e6e4e45facde",
- "content": "59ec819cbd972dd9a71f25866fbfc416f2f215b3",
- })
- https = False
+ pattern = (r"(?:https?://)?((?:www|img\d+)\.imagevenue\.com"
+ r"/([A-Z0-9]{8,10}|view/.*|img\.php\?.*))")
+ test = (
+ ("https://www.imagevenue.com/ME13LS07", {
+ "pattern": r"https://cdn-images\.imagevenue\.com"
+ r"/10/ac/05/ME13LS07_o\.png",
+ "keyword": "ae15d6e3b2095f019eee84cd896700cd34b09c36",
+ "content": "cfaa8def53ed1a575e0c665c9d6d8cf2aac7a0ee",
+ }),
+ (("https://www.imagevenue.com/view/o?i=92518_13732377"
+ "annakarina424200712535AM_122_486lo.jpg&h=img150&l=loc486"), {
+ "url": "8bf0254e29250d8f5026c0105bbdda3ee3d84980",
+ }),
+ (("http://img28116.imagevenue.com/img.php"
+ "?image=th_52709_test_122_64lo.jpg"), {
+ "url": "f98e3091df7f48a05fb60fbd86f789fc5ec56331",
+ }),
+ )
def get_info(self, page):
- url = text.extract(page, "SRC='", "'")[0]
- return text.urljoin(self.page_url, url), url
+ pos = page.index('class="card-body')
+ url, pos = text.extract(page, '<img src="', '"', pos)
+ filename, pos = text.extract(page, 'alt="', '"', pos)
+ return url, text.unescape(filename)
class ImagetwistImageExtractor(ImagehostImageExtractor):
diff --git a/gallery_dl/extractor/instagram.py b/gallery_dl/extractor/instagram.py
index b015556..28b5506 100644
--- a/gallery_dl/extractor/instagram.py
+++ b/gallery_dl/extractor/instagram.py
@@ -49,7 +49,7 @@ class InstagramExtractor(Extractor):
if "__typename" in post:
post = self._parse_post_graphql(post)
else:
- post = self._parse_post_reel(post)
+ post = self._parse_post_api(post)
post.update(data)
files = post.pop("_files")
@@ -239,16 +239,23 @@ class InstagramExtractor(Extractor):
return data
- def _parse_post_reel(self, post):
+ def _parse_post_api(self, post):
if "media" in post:
media = post["media"]
owner = media["user"]
- post["items"] = (media,)
data = {
"post_id" : media["pk"],
"post_shortcode": self._shortcode_from_id(media["pk"]),
}
+
+ if "carousel_media" in media:
+ post["items"] = media["carousel_media"]
+ data["sidecar_media_id"] = data["post_id"]
+ data["sidecar_shortcode"] = data["post_shortcode"]
+ else:
+ post["items"] = (media,)
+
else:
reel_id = str(post["id"]).rpartition(":")[2]
owner = post["user"]
@@ -279,9 +286,11 @@ class InstagramExtractor(Extractor):
files.append({
"num" : num,
- "date" : text.parse_timestamp(item["taken_at"]),
+ "date" : text.parse_timestamp(item.get("taken_at") or
+ media.get("taken_at")),
"media_id" : item["pk"],
- "shortcode" : item["code"],
+ "shortcode" : (item.get("code") or
+ self._shortcode_from_id(item["pk"])),
"display_url": image["url"],
"video_url" : video["url"] if video else None,
"width" : media["width"],
@@ -485,18 +494,42 @@ class InstagramTagExtractor(InstagramExtractor):
})
def metadata(self):
- return {"tag": self.item}
+ return {"tag": text.unquote(self.item)}
def posts(self):
url = "{}/explore/tags/{}/".format(self.root, self.item)
- data = self._extract_shared_data(url)
- hashtag = data["entry_data"]["TagPage"][0]["graphql"]["hashtag"]
+ page = self._extract_shared_data(url)["entry_data"]["TagPage"][0]
+ if "data" in page:
+ return self._pagination_sections(page["data"]["recent"])
+
+ hashtag = page["graphql"]["hashtag"]
query_hash = "9b498c08113f1e09617a1703c22b2f32"
variables = {"tag_name": hashtag["name"], "first": 50}
edge = self._get_edge_data(hashtag, "edge_hashtag_to_media")
return self._pagination_graphql(query_hash, variables, edge)
+ def _pagination_sections(self, info):
+ endpoint = "/v1/tags/instagram/sections/"
+ data = {
+ "include_persistent": "0",
+ "max_id" : None,
+ "page" : None,
+ "surface": "grid",
+ "tab" : "recent",
+ }
+
+ while True:
+ for section in info["sections"]:
+ yield from section["layout_content"]["medias"]
+
+ if not info.get("more_available"):
+ return
+
+ data["max_id"] = info["next_max_id"]
+ data["page"] = info["next_page"]
+ info = self._request_api(endpoint, method="POST", data=data)
+
def _pagination_graphql(self, query_hash, variables, data):
while True:
for edge in data["edges"]:
@@ -619,7 +652,7 @@ class InstagramPostExtractor(InstagramExtractor):
)
def posts(self):
- query_hash = "971f52b26328008c768b7d8e4ac9ce3c"
+ query_hash = "1f950d414a6e11c98c556aa007b3157d"
variables = {
"shortcode" : self.item,
"child_comment_count" : 3,
diff --git a/gallery_dl/extractor/kemonoparty.py b/gallery_dl/extractor/kemonoparty.py
index 8c51d5d..7218488 100644
--- a/gallery_dl/extractor/kemonoparty.py
+++ b/gallery_dl/extractor/kemonoparty.py
@@ -10,6 +10,7 @@
from .common import Extractor, Message
from .. import text
+import itertools
import re
BASE_PATTERN = r"(?:https?://)?kemono\.party/([^/?#]+)/user/([^/?#]+)"
@@ -22,17 +23,32 @@ class KemonopartyExtractor(Extractor):
directory_fmt = ("{category}", "{service}", "{user}")
filename_fmt = "{id}_{title}_{num:>02}_{filename}.{extension}"
archive_fmt = "{service}_{user}_{id}_{num}"
+ cookiedomain = ".kemono.party"
+ _warning = True
def items(self):
+ if self._warning:
+ if not self._check_cookies(("__ddg1", "__ddg2")):
+ self.log.warning("no DDoS-GUARD cookies set (__ddg1, __ddg2)")
+ KemonopartyExtractor._warning = False
+
find_inline = re.compile(r'src="(/inline/[^"]+)').findall
+ skip_service = \
+ "patreon" if self.config("patreon-skip-file", True) else None
if self.config("metadata"):
username = text.unescape(text.extract(
- self.request(self.user_url).text, "<title>", " | Kemono<")[0])
+ self.request(self.user_url).text, "<title>", " | Kemono"
+ )[0]).lstrip()
else:
username = None
- for post in self.posts():
+ posts = self.posts()
+ max_posts = self.config("max-posts")
+ if max_posts:
+ posts = itertools.islice(posts, max_posts)
+
+ for post in posts:
files = []
append = files.append
@@ -40,7 +56,8 @@ class KemonopartyExtractor(Extractor):
if file:
file["type"] = "file"
- append(file)
+ if post["service"] != skip_service or not post["attachments"]:
+ append(file)
for attachment in post["attachments"]:
attachment["type"] = "attachment"
append(attachment)
@@ -68,24 +85,30 @@ class KemonopartyExtractor(Extractor):
class KemonopartyUserExtractor(KemonopartyExtractor):
"""Extractor for all posts from a kemono.party user listing"""
subcategory = "user"
- pattern = BASE_PATTERN + r"/?(?:$|[?#])"
+ pattern = BASE_PATTERN + r"/?(?:\?o=(\d+))?(?:$|[?#])"
test = (
("https://kemono.party/fanbox/user/6993449", {
"range": "1-25",
"count": 25,
}),
+ # 'max-posts' option, 'o' query parameter (#1674)
+ ("https://kemono.party/patreon/user/881792?o=150", {
+ "options": (("max-posts", 25),),
+ "count": "< 100",
+ }),
("https://kemono.party/subscribestar/user/alcorart"),
)
def __init__(self, match):
KemonopartyExtractor.__init__(self, match)
- service, user_id = match.groups()
+ service, user_id, offset = match.groups()
self.api_url = "{}/api/{}/user/{}".format(self.root, service, user_id)
self.user_url = "{}/{}/user/{}".format(self.root, service, user_id)
+ self.offset = text.parse_int(offset)
def posts(self):
url = self.api_url
- params = {"o": 0}
+ params = {"o": self.offset}
while True:
posts = self.request(url, params=params).json()
@@ -133,6 +156,16 @@ class KemonopartyPostExtractor(KemonopartyExtractor):
"pattern": r"https://data\.kemono\.party/(file|attachment)s"
r"/gumroad/trylsc/IURjT/",
}),
+ # username (#1548, #1652)
+ ("https://kemono.party/gumroad/user/3252870377455/post/aJnAH", {
+ "options": (("metadata", True),),
+ "keyword": {"username": "Kudalyn's Creations"},
+ }),
+ # skip patreon main file (#1667, #1689)
+ ("https://kemono.party/patreon/user/4158582/post/32099982", {
+ "count": 2,
+ "keyword": {"type": "attachment"},
+ }),
("https://kemono.party/subscribestar/user/alcorart/post/184330"),
)
diff --git a/gallery_dl/extractor/mangadex.py b/gallery_dl/extractor/mangadex.py
index 0fe46b1..a8241dc 100644
--- a/gallery_dl/extractor/mangadex.py
+++ b/gallery_dl/extractor/mangadex.py
@@ -11,6 +11,7 @@
from .common import Extractor, Message
from .. import text, util, exception
from ..cache import cache, memcache
+from ..version import __version__
from collections import defaultdict
BASE_PATTERN = r"(?:https?://)?(?:www\.)?mangadex\.(?:org|cc)"
@@ -27,6 +28,7 @@ class MangadexExtractor(Extractor):
archive_fmt = "{chapter_id}_{page}"
root = "https://mangadex.org"
_cache = {}
+ _headers = {"User-Agent": "gallery-dl/" + __version__}
def __init__(self, match):
Extractor.__init__(self, match)
@@ -116,6 +118,7 @@ class MangadexChapterExtractor(MangadexExtractor):
yield Message.Directory, data
cattributes = chapter["data"]["attributes"]
+ data["_http_headers"] = self._headers
base = "{}/data/{}/".format(
self.api.athome_server(self.uuid)["baseUrl"], cattributes["hash"])
for data["page"], page in enumerate(cattributes["data"], 1):
@@ -170,7 +173,7 @@ class MangadexAPI():
def __init__(self, extr):
self.extractor = extr
- self.headers = {}
+ self.headers = extr._headers.copy()
self.username, self.password = self.extractor._get_auth_info()
if not self.username:
diff --git a/gallery_dl/extractor/mangafox.py b/gallery_dl/extractor/mangafox.py
index a9d504e..f6514ca 100644
--- a/gallery_dl/extractor/mangafox.py
+++ b/gallery_dl/extractor/mangafox.py
@@ -6,17 +6,21 @@
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
-"""Extractors for from https://fanfox.net/"""
+"""Extractors for https://fanfox.net/"""
-from .common import ChapterExtractor
+from .common import ChapterExtractor, MangaExtractor
from .. import text
+import re
+
+BASE_PATTERN = r"(?:https?://)?(?:www\.|m\.)?(?:fanfox\.net|mangafox\.me)"
class MangafoxChapterExtractor(ChapterExtractor):
- """Extractor for manga-chapters from fanfox.net"""
+ """Extractor for manga chapters from fanfox.net"""
category = "mangafox"
- pattern = (r"(?:https?://)?(?:www\.|m\.)?(?:fanfox\.net|mangafox\.me)"
- r"(/manga/[^/]+/((?:v([^/?#]+)/)?c(\d+)([^/?#]*)))")
+ root = "https://m.fanfox.net"
+ pattern = BASE_PATTERN + \
+ r"(/manga/[^/?#]+/((?:v([^/?#]+)/)?c(\d+)([^/?#]*)))"
test = (
("http://fanfox.net/manga/kidou_keisatsu_patlabor/v05/c006.2/1.html", {
"keyword": "5661dab258d42d09d98f194f7172fb9851a49766",
@@ -25,7 +29,6 @@ class MangafoxChapterExtractor(ChapterExtractor):
("http://mangafox.me/manga/kidou_keisatsu_patlabor/v05/c006.2/"),
("http://fanfox.net/manga/black_clover/vTBD/c295/1.html"),
)
- root = "https://m.fanfox.net"
def __init__(self, match):
base, self.cstr, self.volume, self.chapter, self.minor = match.groups()
@@ -60,3 +63,55 @@ class MangafoxChapterExtractor(ChapterExtractor):
pnum += 2
page = self.request("{}/{}.html".format(self.urlbase, pnum)).text
+
+
+class MangafoxMangaExtractor(MangaExtractor):
+ """Extractor for manga from fanfox.net"""
+ category = "mangafox"
+ root = "https://m.fanfox.net"
+ chapterclass = MangafoxChapterExtractor
+ pattern = BASE_PATTERN + r"(/manga/[^/?#]+)/?$"
+ test = (
+ ("https://fanfox.net/manga/kanojo_mo_kanojo", {
+ "pattern": MangafoxChapterExtractor.pattern,
+ "count": ">=60",
+ }),
+ ("https://mangafox.me/manga/shangri_la_frontier", {
+ "pattern": MangafoxChapterExtractor.pattern,
+ "count": ">=45",
+ }),
+ ("https://m.fanfox.net/manga/sentai_daishikkaku"),
+ )
+
+ def chapters(self, page):
+ match_info = re.compile(r"Ch (\d+)(\S*)(?: (.*))?").match
+ manga, pos = text.extract(page, '<p class="title">', '</p>')
+ author, pos = text.extract(page, '<p>Author(s):', '</p>', pos)
+ data = {
+ "manga" : text.unescape(manga),
+ "author" : text.remove_html(author),
+ "lang" : "en",
+ "language": "English",
+ }
+
+ results = []
+ pos = page.index('<dd class="chlist">')
+ while True:
+ url, pos = text.extract(page, '<a href="//', '"', pos)
+ if url == 'mangafox.la?f=mobile':
+ return results
+ info, pos = text.extract(page, '>', '<span', pos)
+ date, pos = text.extract(page, 'right">', '</span>', pos)
+
+ match = match_info(text.unescape(info))
+ if match:
+ chapter, minor, title = match.groups()
+ chapter_minor = minor
+ else:
+ chapter, _, minor = url[:-7].rpartition("/c")[2].partition(".")
+ chapter_minor = "." + minor
+
+ data["chapter"] = text.parse_int(chapter)
+ data["chapter_minor"] = chapter_minor if minor else ""
+ data["date"] = date
+ results.append(("https://" + url, data.copy()))
diff --git a/gallery_dl/extractor/mangapark.py b/gallery_dl/extractor/mangapark.py
index 9b6d4ba..4bd5572 100644
--- a/gallery_dl/extractor/mangapark.py
+++ b/gallery_dl/extractor/mangapark.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2015-2020 Mike Fährmann
+# Copyright 2015-2021 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -56,7 +56,7 @@ class MangaparkChapterExtractor(MangaparkBase, ChapterExtractor):
test = (
("https://mangapark.net/manga/gosu/i811653/c055/1", {
"count": 50,
- "keyword": "8344bdda8cd8414e7729a4e148379f147e3437da",
+ "keyword": "db1ed9af4f972756a25dbfa5af69a8f155b043ff",
}),
(("https://mangapark.net/manga"
"/ad-astra-per-aspera-hata-kenjirou/i662051/c001.2/1"), {
@@ -121,7 +121,7 @@ class MangaparkMangaExtractor(MangaparkBase, MangaExtractor):
r"(/manga/[^/?#]+)/?$")
test = (
("https://mangapark.net/manga/aria", {
- "url": "f07caf0bc5097c9b32c8c0d6f446bce1bf4bd329",
+ "url": "b8f7db2f581404753c4af37af66c049a41273b94",
"keyword": "2c0d28efaf84fcfe62932b6931ef3c3987cd48c0",
}),
("https://mangapark.me/manga/aria"),
diff --git a/gallery_dl/extractor/mangasee.py b/gallery_dl/extractor/mangasee.py
new file mode 100644
index 0000000..1b3dd18
--- /dev/null
+++ b/gallery_dl/extractor/mangasee.py
@@ -0,0 +1,112 @@
+# -*- coding: utf-8 -*-
+
+# Copyright 2021 Mike Fährmann
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Extractors for https://mangasee123.com/"""
+
+from .common import ChapterExtractor, MangaExtractor
+from .. import text
+import json
+
+
+class MangaseeBase():
+ category = "mangasee"
+ browser = "firefox"
+ root = "https://mangasee123.com"
+
+ @staticmethod
+ def _transform_chapter(data):
+ chapter = data["Chapter"]
+ return {
+ "title" : data["ChapterName"] or "",
+ "index" : chapter[0],
+ "chapter" : int(chapter[1:-1]),
+ "chapter_minor": "" if chapter[-1] == "0" else "." + chapter[-1],
+ "chapter_string": chapter,
+ "lang" : "en",
+ "language": "English",
+ "date" : text.parse_datetime(
+ data["Date"], "%Y-%m-%d %H:%M:%S"),
+ }
+
+
+class MangaseeChapterExtractor(MangaseeBase, ChapterExtractor):
+ pattern = r"(?:https?://)?mangasee123\.com(/read-online/[^/?#]+\.html)"
+ test = (("https://mangasee123.com/read-online"
+ "/Tokyo-Innocent-chapter-4.5-page-1.html"), {
+ "pattern": r"https://[^/]+/manga/Tokyo-Innocent/0004\.5-00\d\.png",
+ "count": 8,
+ "keyword": {
+ "chapter": 4,
+ "chapter_minor": ".5",
+ "chapter_string": "100045",
+ "count": 8,
+ "date": "dt:2020-01-20 21:52:53",
+ "extension": "png",
+ "filename": r"re:0004\.5-00\d",
+ "index": "1",
+ "lang": "en",
+ "language": "English",
+ "manga": "Tokyo Innocent",
+ "page": int,
+ "title": "",
+ },
+ })
+
+ def metadata(self, page):
+ extr = text.extract_from(page)
+ self.chapter = data = json.loads(extr("vm.CurChapter =", ";\r\n"))
+ self.domain = extr('vm.CurPathName = "', '"')
+ self.slug = extr('vm.IndexName = "', '"')
+
+ data = self._transform_chapter(data)
+ data["manga"] = extr('vm.SeriesName = "', '"')
+ return data
+
+ def images(self, page):
+ chapter = self.chapter["Chapter"][1:]
+ if chapter[-1] == "0":
+ chapter = chapter[:-1]
+ else:
+ chapter = chapter[:-1] + "." + chapter[-1]
+
+ base = "https://{}/manga/{}/".format(self.domain, self.slug)
+ if self.chapter["Directory"]:
+ base += self.chapter["Directory"] + "/"
+ base += chapter + "-"
+
+ return [
+ ("{}{:>03}.png".format(base, i), None)
+ for i in range(1, int(self.chapter["Page"]) + 1)
+ ]
+
+
+class MangaseeMangaExtractor(MangaseeBase, MangaExtractor):
+ chapterclass = MangaseeChapterExtractor
+ pattern = r"(?:https?://)?mangasee123\.com(/manga/[^/?#]+)"
+ test = (("https://mangasee123.com/manga"
+ "/Nakamura-Koedo-To-Daizu-Keisuke-Wa-Umaku-Ikanai"), {
+ "pattern": MangaseeChapterExtractor.pattern,
+ "count": ">= 17",
+ })
+
+ def chapters(self, page):
+ slug, pos = text.extract(page, 'vm.IndexName = "', '"')
+ chapters = json.loads(text.extract(
+ page, "vm.Chapters = ", ";\r\n", pos)[0])
+
+ result = []
+ for data in map(self._transform_chapter, chapters):
+ url = "{}/read-online/{}-chapter-{}{}".format(
+ self.root, slug, data["chapter"], data["chapter_minor"])
+ if data["index"] != "1":
+ url += "-index-" + data["index"]
+ url += "-page-1.html"
+
+ data["manga"] = slug
+ result.append((url, data))
+ return result
diff --git a/gallery_dl/extractor/mastodon.py b/gallery_dl/extractor/mastodon.py
index daa3d65..ff0bfc3 100644
--- a/gallery_dl/extractor/mastodon.py
+++ b/gallery_dl/extractor/mastodon.py
@@ -6,7 +6,7 @@
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
-"""Extractors for mastodon instances"""
+"""Extractors for Mastodon instances"""
from .common import BaseExtractor, Message
from .. import text, exception
@@ -25,30 +25,37 @@ class MastodonExtractor(BaseExtractor):
BaseExtractor.__init__(self, match)
self.instance = self.root.partition("://")[2]
self.item = match.group(match.lastindex)
+ self.reblogs = self.config("reblogs", False)
+ self.replies = self.config("replies", True)
def items(self):
for status in self.statuses():
+
+ if not self.reblogs and status["reblog"]:
+ self.log.debug("Skipping %s (reblog)", status["id"])
+ continue
+ if not self.replies and status["in_reply_to_id"]:
+ self.log.debug("Skipping %s (reply)", status["id"])
+ continue
+
attachments = status["media_attachments"]
- if attachments:
- self.prepare(status)
- yield Message.Directory, status
- for media in attachments:
- status["media"] = media
- url = media["url"]
- yield Message.Url, url, text.nameext_from_url(url, status)
+ del status["media_attachments"]
+
+ status["instance"] = self.instance
+ status["tags"] = [tag["name"] for tag in status["tags"]]
+ status["date"] = text.parse_datetime(
+ status["created_at"][:19], "%Y-%m-%dT%H:%M:%S")
+
+ yield Message.Directory, status
+ for media in attachments:
+ status["media"] = media
+ url = media["url"]
+ yield Message.Url, url, text.nameext_from_url(url, status)
def statuses(self):
- """Return an iterable containing all relevant Status-objects"""
+ """Return an iterable containing all relevant Status objects"""
return ()
- def prepare(self, status):
- """Prepare a status object"""
- del status["media_attachments"]
- status["instance"] = self.instance
- status["tags"] = [tag["name"] for tag in status["tags"]]
- status["date"] = text.parse_datetime(
- status["created_at"][:19], "%Y-%m-%dT%H:%M:%S")
-
INSTANCES = {
"mastodon.social": {
@@ -97,6 +104,7 @@ class MastodonUserExtractor(MastodonExtractor):
def statuses(self):
api = MastodonAPI(self)
+
username = self.item
handle = "@{}@{}".format(username, self.instance)
for account in api.account_search(handle, 1):
@@ -104,7 +112,12 @@ class MastodonUserExtractor(MastodonExtractor):
break
else:
raise exception.NotFoundError("account")
- return api.account_statuses(account["id"])
+
+ return api.account_statuses(
+ account["id"],
+ only_media=not self.config("text-posts", False),
+ exclude_replies=not self.replies,
+ )
class MastodonStatusExtractor(MastodonExtractor):
@@ -130,8 +143,8 @@ class MastodonStatusExtractor(MastodonExtractor):
class MastodonAPI():
"""Minimal interface for the Mastodon API
+ https://docs.joinmastodon.org/
https://github.com/tootsuite/mastodon
- https://github.com/tootsuite/documentation/blob/master/Using-the-API/API.md
"""
def __init__(self, extractor):
@@ -153,15 +166,17 @@ class MastodonAPI():
self.headers = {"Authorization": "Bearer " + access_token}
def account_search(self, query, limit=40):
- """Search for content"""
+ """Search for accounts"""
endpoint = "/v1/accounts/search"
params = {"q": query, "limit": limit}
return self._call(endpoint, params).json()
- def account_statuses(self, account_id):
- """Get an account's statuses"""
+ def account_statuses(self, account_id, only_media=True,
+ exclude_replies=False):
+ """Fetch an account's statuses"""
endpoint = "/v1/accounts/{}/statuses".format(account_id)
- params = {"only_media": "1"}
+ params = {"only_media" : "1" if only_media else "0",
+ "exclude_replies": "1" if exclude_replies else "0"}
return self._pagination(endpoint, params)
def status(self, status_id):
@@ -202,6 +217,7 @@ class MastodonAPI():
if not url:
return
url = url["url"]
+ params = None
@cache(maxage=100*365*24*3600, keyarg=0)
diff --git a/gallery_dl/extractor/moebooru.py b/gallery_dl/extractor/moebooru.py
index df77110..604d65c 100644
--- a/gallery_dl/extractor/moebooru.py
+++ b/gallery_dl/extractor/moebooru.py
@@ -32,7 +32,7 @@ class MoebooruExtractor(BooruExtractor):
html = text.extract(page, '<ul id="tag-', '</ul>')[0]
if html:
tags = collections.defaultdict(list)
- pattern = re.compile(r"tag-type-([^\"' ]+).*?[?;]tags=([^\"']+)")
+ pattern = re.compile(r"tag-type-([^\"' ]+).*?[?;]tags=([^\"'+]+)")
for tag_type, tag_name in pattern.findall(html):
tags[tag_type].append(text.unquote(tag_name))
for key, value in tags.items():
diff --git a/gallery_dl/extractor/naverwebtoon.py b/gallery_dl/extractor/naverwebtoon.py
index 1da3e49..348f6a1 100644
--- a/gallery_dl/extractor/naverwebtoon.py
+++ b/gallery_dl/extractor/naverwebtoon.py
@@ -105,5 +105,5 @@ class NaverwebtoonComicExtractor(NaverwebtoonBase, Extractor):
return [
self.root + "/webtoon/detail.nhn?" + query
for query in text.extract_iter(
- page, '<a href="/webtoon/detail.nhn?', '"')
+ page, '<a href="/webtoon/detail?', '"')
][::2]
diff --git a/gallery_dl/extractor/philomena.py b/gallery_dl/extractor/philomena.py
index 64fc938..c6c885c 100644
--- a/gallery_dl/extractor/philomena.py
+++ b/gallery_dl/extractor/philomena.py
@@ -107,11 +107,11 @@ class PhilomenaPostExtractor(PhilomenaExtractor):
"source_url": "https://www.deviantart.com/speccysy/art"
"/Afternoon-Flight-215193985",
"spoilered": False,
- "tag_count": 38,
+ "tag_count": 39,
"tag_ids": list,
"tags": list,
"thumbnails_generated": True,
- "updated_at": "2021-05-28T17:39:38Z",
+ "updated_at": "2021-07-13T14:22:40Z",
"uploader": "Clover the Clever",
"uploader_id": 211188,
"upvotes": int,
diff --git a/gallery_dl/extractor/pixiv.py b/gallery_dl/extractor/pixiv.py
index ff07a57..aefe644 100644
--- a/gallery_dl/extractor/pixiv.py
+++ b/gallery_dl/extractor/pixiv.py
@@ -6,10 +6,10 @@
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
-"""Extract images and ugoira from https://www.pixiv.net/"""
+"""Extractors for https://www.pixiv.net/"""
from .common import Extractor, Message
-from .. import text, exception
+from .. import text, util, exception
from ..cache import cache
from datetime import datetime, timedelta
import itertools
@@ -517,6 +517,48 @@ class PixivFollowExtractor(PixivExtractor):
return {"user_follow": self.api.user}
+class PixivPixivisionExtractor(PixivExtractor):
+ """Extractor for illustrations from a pixivision article"""
+ subcategory = "pixivision"
+ directory_fmt = ("{category}", "pixivision",
+ "{pixivision_id} {pixivision_title}")
+ archive_fmt = "V{pixivision_id}_{id}{suffix}.{extension}"
+ pattern = r"(?:https?://)?(?:www\.)?pixivision\.net/(?:en/)?a/(\d+)"
+ test = (
+ ("https://www.pixivision.net/en/a/2791"),
+ ("https://pixivision.net/a/2791", {
+ "count": 7,
+ "keyword": {
+ "pixivision_id": "2791",
+ "pixivision_title": "What's your favorite music? Editor’s "
+ "picks featuring: “CD Covers”!",
+ },
+ }),
+ )
+
+ def __init__(self, match):
+ PixivExtractor.__init__(self, match)
+ self.pixivision_id = match.group(1)
+
+ def works(self):
+ return (
+ self.api.illust_detail(illust_id)
+ for illust_id in util.unique_sequence(text.extract_iter(
+ self.page, '<a href="https://www.pixiv.net/en/artworks/', '"'))
+ )
+
+ def metadata(self):
+ url = "https://www.pixivision.net/en/a/" + self.pixivision_id
+ headers = {"User-Agent": "Mozilla/5.0"}
+ self.page = self.request(url, headers=headers).text
+
+ title = text.extract(self.page, '<title>', ' - pixivision<')[0]
+ return {
+ "pixivision_id" : self.pixivision_id,
+ "pixivision_title": text.unescape(title),
+ }
+
+
class PixivAppAPI():
"""Minimal interface for the Pixiv App API for mobile devices
diff --git a/gallery_dl/extractor/pururin.py b/gallery_dl/extractor/pururin.py
index 49c24bc..dee7bd4 100644
--- a/gallery_dl/extractor/pururin.py
+++ b/gallery_dl/extractor/pururin.py
@@ -6,20 +6,22 @@
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
-"""Extractors for https://pururin.io/"""
+"""Extractors for https://pururin.to/"""
from .common import GalleryExtractor
from .. import text, util
+import binascii
import json
class PururinGalleryExtractor(GalleryExtractor):
"""Extractor for image galleries on pururin.io"""
category = "pururin"
- pattern = r"(?:https?://)?(?:www\.)?pururin\.io/(?:gallery|read)/(\d+)"
+ pattern = r"(?:https?://)?(?:www\.)?pururin\.[ti]o/(?:gallery|read)/(\d+)"
test = (
- ("https://pururin.io/gallery/38661/iowant-2", {
- "pattern": r"https://cdn.pururin.io/\w+/images/data/\d+/\d+\.jpg",
+ ("https://pururin.to/gallery/38661/iowant-2", {
+ "pattern": r"https://cdn.pururin.[ti]o/\w+"
+ r"/images/data/\d+/\d+\.jpg",
"keyword": {
"title" : "re:I ?owant 2!!",
"title_en" : "re:I ?owant 2!!",
@@ -41,11 +43,12 @@ class PururinGalleryExtractor(GalleryExtractor):
"language" : "English",
}
}),
- ("https://pururin.io/gallery/7661/unisis-team-vanilla", {
+ ("https://pururin.to/gallery/7661/unisis-team-vanilla", {
"count": 17,
}),
+ ("https://pururin.io/gallery/38661/iowant-2"),
)
- root = "https://pururin.io"
+ root = "https://pururin.to"
def __init__(self, match):
self.gallery_id = match.group(1)
@@ -70,8 +73,8 @@ class PururinGalleryExtractor(GalleryExtractor):
url = "{}/read/{}/01/x".format(self.root, self.gallery_id)
page = self.request(url).text
- info = json.loads(text.unescape(text.extract(
- page, ':gallery="', '"')[0]))
+ info = json.loads(binascii.a2b_base64(text.extract(
+ page, '<gallery-read encoded="', '"')[0]).decode())
self._ext = info["image_extension"]
self._cnt = info["total_pages"]
@@ -97,6 +100,6 @@ class PururinGalleryExtractor(GalleryExtractor):
return data
def images(self, _):
- ufmt = "https://cdn.pururin.io/assets/images/data/{}/{{}}.{}".format(
+ ufmt = "https://cdn.pururin.to/assets/images/data/{}/{{}}.{}".format(
self.gallery_id, self._ext)
return [(ufmt.format(num), None) for num in range(1, self._cnt + 1)]
diff --git a/gallery_dl/extractor/reactor.py b/gallery_dl/extractor/reactor.py
index c62a942..104dc23 100644
--- a/gallery_dl/extractor/reactor.py
+++ b/gallery_dl/extractor/reactor.py
@@ -215,7 +215,7 @@ class JoyreactorTagExtractor(ReactorTagExtractor):
"count": ">= 15",
}),
("http://joyreactor.com/tag/Cirno", {
- "url": "de1e60c15bfb07a0e9603b00dc3d05f60edc7914",
+ "url": "aa59090590b26f4654881301fe8fe748a51625a8",
}),
)
@@ -243,7 +243,7 @@ class JoyreactorUserExtractor(ReactorUserExtractor):
test = (
("http://joyreactor.cc/user/hemantic"),
("http://joyreactor.com/user/Tacoman123", {
- "url": "452cd0fa23e2ad0e122c296ba75aa7f0b29329f6",
+ "url": "60ce9a3e3db791a0899f7fb7643b5b87d09ae3b5",
}),
)
@@ -254,23 +254,27 @@ class JoyreactorPostExtractor(ReactorPostExtractor):
pattern = JR_BASE_PATTERN + r"/post/(\d+)"
test = (
("http://joyreactor.com/post/3721876", { # single image
- "url": "6ce09f239d8b7fdf6dd1664c2afc39618cc87663",
- "keyword": "147ed5b9799ba43cbd16168450afcfae5ddedbf3",
+ "pattern": r"http://img\d\.joyreactor\.com/pics/post/full"
+ r"/cartoon-painting-monster-lake-4841316.jpeg",
+ "count": 1,
+ "keyword": "2207a7dfed55def2042b6c2554894c8d7fda386e",
}),
("http://joyreactor.com/post/3713804", { # 4 images
- "url": "f08ac8493ca0619a3e3c6bedb8d8374af3eec304",
- "keyword": "f12c6f3c2f298fed9b12bd3e70fb823870aa9b93",
+ "pattern": r"http://img\d\.joyreactor\.com/pics/post/full"
+ r"/movie-tv-godzilla-monsters-\d+\.jpeg",
+ "count": 4,
+ "keyword": "d7da9ba7809004c809eedcf6f1c06ad0fbb3df21",
}),
("http://joyreactor.com/post/3726210", { # gif / video
- "url": "33a48e1eca6cb2d298fbbb6536b3283799d6515b",
- "keyword": "d173cc6e88f02a63904e475eacd7050304eb1967",
+ "url": "60f3b9a0a3918b269bea9b4f8f1a5ab3c2c550f8",
+ "keyword": "8949d9d5fc469dab264752432efbaa499561664a",
}),
("http://joyreactor.com/post/3668724", { # youtube embed
"url": "bf1666eddcff10c9b58f6be63fa94e4e13074214",
"keyword": "e18b1ffbd79d76f9a0e90b6d474cc2499e343f0b",
}),
("http://joyreactor.cc/post/1299", { # "malformed" JSON
- "url": "ac900743ed7cf1baf3db3b531c3bc414bf1ffcde",
+ "url": "ab02c6eb7b4035ad961b29ee0770ee41be2fcc39",
}),
)
@@ -311,7 +315,7 @@ class PornreactorUserExtractor(ReactorUserExtractor):
test = (
("http://pornreactor.cc/user/Disillusion", {
"range": "1-25",
- "count": ">= 25",
+ "count": ">= 20",
}),
("http://fapreactor.com/user/Disillusion"),
)
@@ -324,10 +328,10 @@ class PornreactorPostExtractor(ReactorPostExtractor):
pattern = PR_BASE_PATTERN + r"/post/(\d+)"
test = (
("http://pornreactor.cc/post/863166", {
- "url": "680db1e33ca92ff70b2c0e1708c471cbe2201324",
+ "url": "a09fb0577489e1f9564c25d0ad576f81b19c2ef3",
"content": "ec6b0568bfb1803648744077da082d14de844340",
}),
("http://fapreactor.com/post/863166", {
- "url": "864ecd5785e4898301aa8d054dd653b1165be158",
+ "url": "2a956ce0c90e8bc47b4392db4fa25ad1342f3e54",
}),
)
diff --git a/gallery_dl/extractor/seisoparty.py b/gallery_dl/extractor/seisoparty.py
new file mode 100644
index 0000000..b736b4b
--- /dev/null
+++ b/gallery_dl/extractor/seisoparty.py
@@ -0,0 +1,142 @@
+# -*- coding: utf-8 -*-
+
+# Copyright 2021 Mike Fährmann
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Extractors for https://seiso.party/"""
+
+from .common import Extractor, Message
+from .. import text
+import re
+
+
+class SeisopartyExtractor(Extractor):
+ """Base class for seisoparty extractors"""
+ category = "seisoparty"
+ root = "https://seiso.party"
+ directory_fmt = ("{category}", "{service}", "{username}")
+ filename_fmt = "{id}_{title}_{num:>02}_{filename}.{extension}"
+ archive_fmt = "{service}_{user}_{id}_{num}"
+ cookiedomain = ".seiso.party"
+ _warning = True
+
+ def __init__(self, match):
+ Extractor.__init__(self, match)
+ self.user_name = None
+ self._find_files = re.compile(
+ r'href="(https://cdn(?:-\d)?\.seiso\.party/files/[^"]+)').findall
+
+ def items(self):
+ if self._warning:
+ if not self._check_cookies(("__ddg1", "__ddg2")):
+ self.log.warning("no DDoS-GUARD cookies set (__ddg1, __ddg2)")
+ SeisopartyExtractor._warning = False
+
+ for post in self.posts():
+ files = post.pop("files")
+ yield Message.Directory, post
+ for post["num"], url in enumerate(files, 1):
+ yield Message.Url, url, text.nameext_from_url(url, post)
+
+ def _parse_post(self, page, post_id):
+ extr = text.extract_from(page)
+ return {
+ "service" : self.service,
+ "user" : self.user_id,
+ "username": self.user_name,
+ "id" : post_id,
+ "date" : text.parse_datetime(extr(
+ '<div class="margin-bottom-15 minor-text">', '<'),
+ "%Y-%m-%d %H:%M:%S %Z"),
+ "title" : text.unescape(extr('class="post-title">', '<')),
+ "content" : text.unescape(extr("\n<p>\n", "\n</p>\n").strip()),
+ "files" : self._find_files(page),
+ }
+
+
+class SeisopartyUserExtractor(SeisopartyExtractor):
+ """Extractor for all posts from a seiso.party user listing"""
+ subcategory = "user"
+ pattern = r"(?:https?://)?seiso\.party/artists/([^/?#]+)/([^/?#]+)"
+ test = (
+ ("https://seiso.party/artists/fanbox/21", {
+ "pattern": r"https://cdn\.seiso\.party/files/fanbox/\d+/",
+ "count": ">=15",
+ "keyword": {
+ "content": str,
+ "date": "type:datetime",
+ "id": r"re:\d+",
+ "num": int,
+ "service": "fanbox",
+ "title": str,
+ "user": "21",
+ "username": "雨",
+ },
+ }),
+ )
+
+ def __init__(self, match):
+ SeisopartyExtractor.__init__(self, match)
+ self.service, self.user_id = match.groups()
+
+ def posts(self):
+ url = "{}/artists/{}/{}".format(self.root, self.service, self.user_id)
+ page = self.request(url).text
+ self.user_name, pos = text.extract(page, '<span class="title">', '<')
+
+ url = self.root + text.extract(
+ page, 'href="', '"', page.index('id="content"', pos))[0]
+ response = self.request(url)
+ headers = {"Referer": url}
+
+ while True:
+ yield self._parse_post(response.text, url.rpartition("/")[2])
+ response = self.request(url + "/next", headers=headers)
+ if url == response.url:
+ return
+ url = headers["Referer"] = response.url
+
+
+class SeisopartyPostExtractor(SeisopartyExtractor):
+ """Extractor for a single seiso.party post"""
+ subcategory = "post"
+ pattern = r"(?:https?://)?seiso\.party/post/([^/?#]+)/([^/?#]+)/([^/?#]+)"
+ test = (
+ ("https://seiso.party/post/fanbox/21/371", {
+ "url": "75f13b92de0ce399b6163c3de18f1f36011c2366",
+ "count": 2,
+ "keyword": {
+ "content": "この前描いためぐるちゃんのPSDファイルです。\n"
+ "どうぞよろしくお願いします。",
+ "date": "dt:2021-05-06 12:38:31",
+ "extension": "re:psd|jpg",
+ "filename": "re:backcourt|ffb2ccb7a3586d05f9a4620329dd131e",
+ "id": "371",
+ "num": int,
+ "service": "fanbox",
+ "title": "MEGURU.PSD",
+ "user": "21",
+ "username": "雨",
+ },
+ }),
+ ("https://seiso.party/post/patreon/429/95949", {
+ "pattern": r"https://cdn-2\.seiso\.party/files/patreon/95949/",
+ "count": 2,
+ }),
+ )
+
+ def __init__(self, match):
+ SeisopartyExtractor.__init__(self, match)
+ self.service, self.user_id, self.post_id = match.groups()
+
+ def posts(self):
+ url = "{}/artists/{}/{}".format(self.root, self.service, self.user_id)
+ page = self.request(url).text
+ self.user_name, pos = text.extract(page, '<span class="title">', '<')
+
+ url = "{}/post/{}/{}/{}".format(
+ self.root, self.service, self.user_id, self.post_id)
+ return (self._parse_post(self.request(url).text, self.post_id),)
diff --git a/gallery_dl/extractor/subscribestar.py b/gallery_dl/extractor/subscribestar.py
index 83836e5..ae8b58d 100644
--- a/gallery_dl/extractor/subscribestar.py
+++ b/gallery_dl/extractor/subscribestar.py
@@ -140,8 +140,8 @@ class SubscribestarUserExtractor(SubscribestarExtractor):
test = (
("https://www.subscribestar.com/subscribestar", {
"count": ">= 20",
- "pattern": r"https://star-uploads.s\d+-us-west-\d+.amazonaws.com"
- r"/uploads/users/11/",
+ "pattern": r"https://(star-uploads|ss-uploads-prod)\.s\d+-us-west-"
+ r"\d+\.amazonaws\.com/uploads(_v2)?/users/11/",
"keyword": {
"author_id": 11,
"author_name": "subscribestar",
diff --git a/gallery_dl/extractor/twitter.py b/gallery_dl/extractor/twitter.py
index 5550f96..fd0140d 100644
--- a/gallery_dl/extractor/twitter.py
+++ b/gallery_dl/extractor/twitter.py
@@ -113,18 +113,16 @@ class TwitterExtractor(Extractor):
"url" : base + "orig",
"width" : width,
"height" : height,
- "_fallback": self._image_fallback(base, url + ":"),
+ "_fallback": self._image_fallback(base),
}))
else:
files.append({"url": media["media_url"]})
@staticmethod
- def _image_fallback(new, old):
- yield old + "orig"
-
- for size in ("large", "medium", "small"):
- yield new + size
- yield old + size
+ def _image_fallback(base):
+ yield base + "large"
+ yield base + "medium"
+ yield base + "small"
def _extract_card(self, tweet, files):
card = tweet["card"]
@@ -486,8 +484,9 @@ class TwitterTweetExtractor(TwitterExtractor):
"options": (("retweets", "original"),),
"count": 2,
"keyword": {
- "tweet_id": 1296296016002547713,
- "date" : "dt:2020-08-20 04:00:28",
+ "tweet_id" : 1296296016002547713,
+ "retweet_id": 1296296016002547713,
+ "date" : "dt:2020-08-20 04:00:28",
},
}),
# all Tweets from a conversation (#1319)
@@ -526,18 +525,17 @@ class TwitterImageExtractor(Extractor):
self.id, self.fmt = match.groups()
def items(self):
- base = "https://pbs.twimg.com/media/" + self.id
- new = base + "?format=" + self.fmt + "&name="
- old = base + "." + self.fmt + ":"
+ base = "https://pbs.twimg.com/media/{}?format={}&name=".format(
+ self.id, self.fmt)
data = {
"filename": self.id,
"extension": self.fmt,
- "_fallback": TwitterExtractor._image_fallback(new, old),
+ "_fallback": TwitterExtractor._image_fallback(base),
}
yield Message.Directory, data
- yield Message.Url, new + "orig", data
+ yield Message.Url, base + "orig", data
class TwitterAPI():
@@ -712,7 +710,7 @@ class TwitterAPI():
def _guest_token(self):
root = "https://api.twitter.com"
endpoint = "/1.1/guest/activate.json"
- return self._call(endpoint, None, root, "POST")["guest_token"]
+ return str(self._call(endpoint, None, root, "POST")["guest_token"])
def _call(self, endpoint, params, root=None, method="GET"):
if root is None:
@@ -809,6 +807,7 @@ class TwitterAPI():
if original_retweets:
if not retweet:
continue
+ retweet["retweeted_status_id_str"] = retweet["id_str"]
retweet["_retweet_id_str"] = tweet["id_str"]
tweet = retweet
elif retweet:
diff --git a/gallery_dl/extractor/vk.py b/gallery_dl/extractor/vk.py
index 1ce1140..2178641 100644
--- a/gallery_dl/extractor/vk.py
+++ b/gallery_dl/extractor/vk.py
@@ -17,43 +17,60 @@ class VkPhotosExtractor(Extractor):
"""Extractor for photos from a vk user"""
category = "vk"
subcategory = "photos"
- directory_fmt = ("{category}", "{user[id]}")
+ directory_fmt = ("{category}", "{user[name]|user[id]}")
filename_fmt = "{id}.{extension}"
archive_fmt = "{id}"
root = "https://vk.com"
request_interval = 1.0
- pattern = r"(?:https://)?(?:www\.|m\.)?vk\.com/(?:albums|photos|id)(\d+)"
+ pattern = (r"(?:https://)?(?:www\.|m\.)?vk\.com/(?:"
+ r"(?:albums|photos|id)(-?\d+)|([^/?#]+))")
test = (
("https://vk.com/id398982326", {
"pattern": r"https://sun\d+-\d+\.userapi\.com/c\d+/v\d+"
r"/[0-9a-f]+/[\w-]+\.jpg",
"count": ">= 35",
+ "keywords": {
+ "id": r"re:\d+",
+ "user": {
+ "id": "398982326",
+ "info": "Мы за Движуху! – m1ni SounD #4 [EROmusic]",
+ "name": "",
+ "nick": "Dobrov Kurva",
+ },
+ },
+ }),
+ ("https://vk.com/cosplayinrussia", {
+ "range": "75-100",
+ "keywords": {
+ "id": r"re:\d+",
+ "user": {
+ "id" : "-165740836",
+ "info": "Предложка открыта, кидайте ваши косплейчики. При "
+ "правильном оформлении они будут опубликованы",
+ "name": "cosplayinrussia",
+ "nick": "Косплей | Cosplay 18+",
+ },
+ },
}),
("https://m.vk.com/albums398982326"),
("https://www.vk.com/id398982326?profile=1"),
+ ("https://vk.com/albums-165740836"),
)
def __init__(self, match):
Extractor.__init__(self, match)
- self.user_id = match.group(1)
+ self.user_id, self.user_name = match.groups()
def items(self):
- user_id = self.user_id
-
- if self.config("metadata"):
- url = "{}/id{}".format(self.root, user_id)
- extr = text.extract_from(self.request(url).text)
- data = {"user": {
- "id" : user_id,
- "nick": text.unescape(extr(
- "<title>", " | VK<")),
- "name": text.unescape(extr(
- '<h1 class="page_name">', "<")).replace(" ", " "),
- "info": text.unescape(text.remove_html(extr(
- '<span class="current_text">', '</span')))
- }}
+ if self.user_id:
+ user_id = self.user_id
+ prefix = "public" if user_id[0] == "-" else "id"
+ url = "{}/{}{}".format(self.root, prefix, user_id.lstrip("-"))
+ data = self._extract_profile(url)
else:
- data = {"user": {"id": user_id}}
+ url = "{}/{}".format(self.root, self.user_name)
+ data = self._extract_profile(url)
+ user_id = data["user"]["id"]
photos_url = "{}/photos{}".format(self.root, user_id)
headers = {
@@ -86,3 +103,15 @@ class VkPhotosExtractor(Extractor):
if cnt <= 40 or offset == params["offset"]:
return
params["offset"] = offset
+
+ def _extract_profile(self, url):
+ extr = text.extract_from(self.request(url).text)
+ return {"user": {
+ "name": text.unescape(extr(
+ 'rel="canonical" href="https://vk.com/', '"')),
+ "nick": text.unescape(extr(
+ '<h1 class="page_name">', "<")).replace(" ", " "),
+ "info": text.unescape(text.remove_html(extr(
+ '<span class="current_text">', '</span'))),
+ "id" : extr('<a href="/albums', '"'),
+ }}
diff --git a/gallery_dl/extractor/webtoons.py b/gallery_dl/extractor/webtoons.py
index cebb421..e2474c9 100644
--- a/gallery_dl/extractor/webtoons.py
+++ b/gallery_dl/extractor/webtoons.py
@@ -11,7 +11,7 @@
from .common import GalleryExtractor, Extractor, Message
from .. import exception, text, util
-BASE_PATTERN = r"(?:https?://)?(?:www\.)?webtoons\.com/((en|fr)"
+BASE_PATTERN = r"(?:https?://)?(?:www\.)?webtoons\.com/(([^/?#]+)"
class WebtoonsBase():
@@ -118,6 +118,9 @@ class WebtoonsComicExtractor(WebtoonsBase, Extractor):
"list?title_no=210827&page=9"), {
"count": ">= 18",
}),
+ # (#1643)
+ ("https://www.webtoons.com/es/romance/lore-olympus/"
+ "list?title_no=1725"),
)
def __init__(self, match):
diff --git a/gallery_dl/extractor/wikiart.py b/gallery_dl/extractor/wikiart.py
index 511a609..9f95e14 100644
--- a/gallery_dl/extractor/wikiart.py
+++ b/gallery_dl/extractor/wikiart.py
@@ -71,8 +71,8 @@ class WikiartArtistExtractor(WikiartExtractor):
directory_fmt = ("{category}", "{artist[artistName]}")
pattern = BASE_PATTERN + r"/(?!\w+-by-)([\w-]+)/?$"
test = ("https://www.wikiart.org/en/thomas-cole", {
- "url": "5140343730331786117fa5f4c013a6153393e28e",
- "keyword": "4d9cbc50ebddfcb186f31ff70b08833578dd0070",
+ "url": "deabec0ed7efa97e2a729ff9d08b539143106bac",
+ "keyword": "751a5457b71c8704982d3bb6485a214cd3d07bf9",
})
def __init__(self, match):
diff --git a/gallery_dl/extractor/ytdl.py b/gallery_dl/extractor/ytdl.py
new file mode 100644
index 0000000..d380dab
--- /dev/null
+++ b/gallery_dl/extractor/ytdl.py
@@ -0,0 +1,136 @@
+# -*- coding: utf-8 -*-
+
+# Copyright 2021 Mike Fährmann
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Extractors for sites supported by youtube-dl"""
+
+from .common import Extractor, Message
+from .. import text, config, exception
+
+
+class YoutubeDLExtractor(Extractor):
+ """Generic extractor for youtube-dl supported URLs"""
+ category = "ytdl"
+ directory_fmt = ("{category}", "{subcategory}")
+ filename_fmt = "{title}-{id}.{extension}"
+ archive_fmt = "{extractor_key} {id}"
+ pattern = r"ytdl:(.*)"
+ test = ("ytdl:https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9",)
+
+ def __init__(self, match):
+ # import main youtube_dl module
+ module_name = self.ytdl_module_name = config.get(
+ ("extractor", "ytdl"), "module") or "youtube_dl"
+ module = __import__(module_name)
+
+ # find suitable youtube_dl extractor
+ self.ytdl_url = url = match.group(1)
+ generic = config.interpolate(("extractor", "ytdl"), "generic", True)
+ if generic == "force":
+ self.ytdl_ie_key = "Generic"
+ self.force_generic_extractor = True
+ else:
+ for ie in module.extractor.gen_extractor_classes():
+ if ie.suitable(url):
+ self.ytdl_ie_key = ie.ie_key()
+ break
+ if not generic and self.ytdl_ie_key == "Generic":
+ raise exception.NoExtractorError()
+ self.force_generic_extractor = False
+
+ # set subcategory to youtube_dl extractor's key
+ self.subcategory = self.ytdl_ie_key
+ Extractor.__init__(self, match)
+
+ def items(self):
+ # import subcategory module
+ ytdl_module = __import__(
+ config.get(("extractor", "ytdl", self.subcategory), "module") or
+ self.ytdl_module_name)
+ self.log.debug("Using %s", ytdl_module)
+
+ # construct YoutubeDL object
+ options = {
+ "format" : self.config("format"),
+ "retries" : self._retries,
+ "socket_timeout" : self._timeout,
+ "nocheckcertificate" : not self._verify,
+ "proxy" : self.session.proxies.get("http"),
+ "force_generic_extractor": self.force_generic_extractor,
+ "nopart" : not self.config("part", True),
+ "updatetime" : self.config("mtime", True),
+ "ratelimit" : text.parse_bytes(
+ self.config("rate"), None),
+ "min_filesize" : text.parse_bytes(
+ self.config("filesize-min"), None),
+ "max_filesize" : text.parse_bytes(
+ self.config("filesize-max"), None),
+ }
+
+ raw_options = self.config("raw-options")
+ if raw_options:
+ options.update(raw_options)
+ if self.config("logging", True):
+ options["logger"] = self.log
+ options["extract_flat"] = "in_playlist"
+
+ username, password = self._get_auth_info()
+ if username:
+ options["username"], options["password"] = username, password
+ del username, password
+
+ ytdl = ytdl_module.YoutubeDL(options)
+
+ # transfer cookies to ytdl
+ cookies = self.session.cookies
+ if cookies:
+ set_cookie = self.ytdl.cookiejar.set_cookie
+ for cookie in self.session.cookies:
+ set_cookie(cookie)
+
+ # extract youtube_dl info_dict
+ info_dict = ytdl._YoutubeDL__extract_info(
+ self.ytdl_url,
+ ytdl.get_info_extractor(self.ytdl_ie_key),
+ False, {}, True)
+
+ if "entries" in info_dict:
+ results = self._process_entries(ytdl, info_dict["entries"])
+ else:
+ results = (info_dict,)
+
+ # yield results
+ for info_dict in results:
+ info_dict["extension"] = None
+ info_dict["_ytdl_info_dict"] = info_dict
+ info_dict["_ytdl_instance"] = ytdl
+
+ url = "ytdl:" + (info_dict.get("url") or
+ info_dict.get("webpage_url") or
+ self.ytdl_url)
+
+ yield Message.Directory, info_dict
+ yield Message.Url, url, info_dict
+
+ def _process_entries(self, ytdl, entries):
+ for entry in entries:
+ if entry.get("_type") in ("url", "url_transparent"):
+ info_dict = ytdl.extract_info(
+ entry["url"], False,
+ ie_key=entry.get("ie_key"))
+ if "entries" in info_dict:
+ yield from self._process_entries(
+ ytdl, info_dict["entries"])
+ else:
+ yield info_dict
+ else:
+ yield entry
+
+
+if config.get(("extractor", "ytdl"), "enabled"):
+ # make 'ytdl:' prefix optional
+ YoutubeDLExtractor.pattern = r"(?:ytdl:)?(.*)"
diff --git a/gallery_dl/job.py b/gallery_dl/job.py
index dddc03a..953d9c3 100644
--- a/gallery_dl/job.py
+++ b/gallery_dl/job.py
@@ -33,6 +33,7 @@ class Job():
self.pathfmt = None
self.kwdict = {}
self.status = 0
+ self.url_key = extr.config("url-metadata")
self._logger_extra = {
"job" : self,
@@ -57,7 +58,7 @@ class Job():
extr.session.adapters = pextr.session.adapters
# user-supplied metadata
- kwdict = self.extractor.config("keywords")
+ kwdict = extr.config("keywords")
if kwdict:
self.kwdict.update(kwdict)
@@ -106,19 +107,23 @@ class Job():
def dispatch(self, msg):
"""Call the appropriate message handler"""
if msg[0] == Message.Url:
- _, url, kwds = msg
- if self.pred_url(url, kwds):
- self.update_kwdict(kwds)
- self.handle_url(url, kwds)
+ _, url, kwdict = msg
+ if self.url_key:
+ kwdict[self.url_key] = url
+ if self.pred_url(url, kwdict):
+ self.update_kwdict(kwdict)
+ self.handle_url(url, kwdict)
elif msg[0] == Message.Directory:
self.update_kwdict(msg[1])
self.handle_directory(msg[1])
elif msg[0] == Message.Queue:
- _, url, kwds = msg
- if self.pred_queue(url, kwds):
- self.handle_queue(url, kwds)
+ _, url, kwdict = msg
+ if self.url_key:
+ kwdict[self.url_key] = url
+ if self.pred_queue(url, kwdict):
+ self.handle_queue(url, kwdict)
elif msg[0] == Message.Version:
if msg[1] != 1:
@@ -302,11 +307,18 @@ class DownloadJob(Job):
else:
extr._parentdir = pextr._parentdir
- if pextr.config("parent-metadata"):
- if self.kwdict:
- job.kwdict.update(self.kwdict)
- if kwdict:
- job.kwdict.update(kwdict)
+ pmeta = pextr.config("parent-metadata")
+ if pmeta:
+ if isinstance(pmeta, str):
+ data = self.kwdict.copy()
+ if kwdict:
+ data.update(kwdict)
+ job.kwdict[pmeta] = data
+ else:
+ if self.kwdict:
+ job.kwdict.update(self.kwdict)
+ if kwdict:
+ job.kwdict.update(kwdict)
if pextr.config("parent-skip"):
job._skipcnt = self._skipcnt
@@ -626,7 +638,7 @@ class UrlJob(Job):
extr = extractor.find(url)
if extr:
- self.status |= self.__class__(extr, self).run()
+ self.status |= self.__class__(extr, self, self.depth + 1).run()
else:
self._write_unsupported(url)
diff --git a/gallery_dl/postprocessor/metadata.py b/gallery_dl/postprocessor/metadata.py
index ef1d304..c721612 100644
--- a/gallery_dl/postprocessor/metadata.py
+++ b/gallery_dl/postprocessor/metadata.py
@@ -89,7 +89,7 @@ class MetadataPP(PostProcessor):
ext = kwdict.get("extension")
kwdict["extension"] = pathfmt.extension
kwdict["extension"] = pathfmt.prefix + self._extension_fmt(kwdict)
- filename = pathfmt.build_filename()
+ filename = pathfmt.build_filename(kwdict)
kwdict["extension"] = ext
return filename
diff --git a/gallery_dl/util.py b/gallery_dl/util.py
index fbede3e..3462138 100644
--- a/gallery_dl/util.py
+++ b/gallery_dl/util.py
@@ -145,6 +145,14 @@ def to_string(value):
return str(value)
+def to_timestamp(dt):
+ """Convert naive datetime to UTC timestamp string"""
+ try:
+ return str((dt - EPOCH) // SECOND)
+ except Exception:
+ return ""
+
+
def dump_json(obj, fp=sys.stdout, ensure_ascii=True, indent=4):
"""Serialize 'obj' as JSON and write it to 'fp'"""
json.dump(
@@ -370,6 +378,8 @@ class UniversalNone():
NONE = UniversalNone()
+EPOCH = datetime.datetime(1970, 1, 1)
+SECOND = datetime.timedelta(0, 1)
WINDOWS = (os.name == "nt")
SENTINEL = object()
SPECIAL_EXTRACTORS = {"oauth", "recursive", "test"}
@@ -390,11 +400,17 @@ def compile_expression(expr, name="<expr>", globals=GLOBALS):
def build_predicate(predicates):
if not predicates:
- return lambda url, kwds: True
+ return lambda url, kwdict: True
elif len(predicates) == 1:
return predicates[0]
- else:
- return ChainPredicate(predicates)
+ return functools.partial(chain_predicates, predicates)
+
+
+def chain_predicates(predicates, url, kwdict):
+ for pred in predicates:
+ if not pred(url, kwdict):
+ return False
+ return True
class RangePredicate():
@@ -408,7 +424,7 @@ class RangePredicate():
else:
self.lower, self.upper = 0, 0
- def __call__(self, url, kwds):
+ def __call__(self, url, _):
self.index += 1
if self.index > self.upper:
@@ -473,7 +489,7 @@ class UniquePredicate():
def __init__(self):
self.urls = set()
- def __call__(self, url, kwds):
+ def __call__(self, url, _):
if url.startswith("text:"):
return True
if url not in self.urls:
@@ -498,18 +514,6 @@ class FilterPredicate():
raise exception.FilterError(exc)
-class ChainPredicate():
- """Predicate; True if all of its predicates return True"""
- def __init__(self, predicates):
- self.predicates = predicates
-
- def __call__(self, url, kwds):
- for pred in self.predicates:
- if not pred(url, kwds):
- return False
- return True
-
-
class ExtendedUrl():
"""URL with attached config key-value pairs"""
def __init__(self, url, gconf, lconf):
@@ -536,6 +540,7 @@ class Formatter():
- "d": calls text.parse_timestamp
- "U": calls urllib.parse.unquote
- "S": calls util.to_string()
+ - "T": calls util.to_timestamü()
- Example: {f!l} -> "example"; {f!u} -> "EXAMPLE"
Extra Format Specifiers:
@@ -559,12 +564,14 @@ class Formatter():
Replaces all occurrences of <old> with <new>
Example: {f:R /_/} -> "f_o_o_b_a_r" (if "f" is "f o o b a r")
"""
+ CACHE = {}
CONVERSIONS = {
"l": str.lower,
"u": str.upper,
"c": str.capitalize,
"C": string.capwords,
"t": str.strip,
+ "T": to_timestamp,
"d": text.parse_timestamp,
"U": urllib.parse.unquote,
"S": to_string,
@@ -575,19 +582,26 @@ class Formatter():
def __init__(self, format_string, default=None):
self.default = default
- self.result = []
- self.fields = []
-
- for literal_text, field_name, format_spec, conversion in \
- _string.formatter_parser(format_string):
- if literal_text:
- self.result.append(literal_text)
- if field_name:
- self.fields.append((
- len(self.result),
- self._field_access(field_name, format_spec, conversion),
- ))
- self.result.append("")
+ key = (format_string, default)
+
+ try:
+ self.result, self.fields = self.CACHE[key]
+ except KeyError:
+ self.result = []
+ self.fields = []
+
+ for literal_text, field_name, format_spec, conv in \
+ _string.formatter_parser(format_string):
+ if literal_text:
+ self.result.append(literal_text)
+ if field_name:
+ self.fields.append((
+ len(self.result),
+ self._field_access(field_name, format_spec, conv),
+ ))
+ self.result.append("")
+
+ self.CACHE[key] = (self.result, self.fields)
if len(self.result) == 1:
if self.fields:
@@ -777,9 +791,20 @@ class PathFormat():
raise exception.FilenameFormatError(exc)
directory_fmt = config("directory")
- if directory_fmt is None:
- directory_fmt = extractor.directory_fmt
try:
+ if directory_fmt is None:
+ directory_fmt = extractor.directory_fmt
+ elif isinstance(directory_fmt, dict):
+ self.directory_conditions = [
+ (compile_expression(expr), [
+ Formatter(fmt, kwdefault).format_map
+ for fmt in fmts
+ ])
+ for expr, fmts in directory_fmt.items() if expr
+ ]
+ self.build_directory = self.build_directory_conditional
+ directory_fmt = directory_fmt.get("", extractor.directory_fmt)
+
self.directory_formatters = [
Formatter(dirfmt, kwdefault).format_map
for dirfmt in directory_fmt
@@ -793,19 +818,6 @@ class PathFormat():
self.path = self.realpath = self.temppath = ""
self.delete = self._create_directory = False
- basedir = extractor._parentdir
- if not basedir:
- basedir = config("base-directory")
- if basedir is None:
- basedir = "." + os.sep + "gallery-dl" + os.sep
- elif basedir:
- basedir = expand_path(basedir)
- if os.altsep and os.altsep in basedir:
- basedir = basedir.replace(os.altsep, os.sep)
- if basedir[-1] != os.sep:
- basedir += os.sep
- self.basedirectory = basedir
-
extension_map = config("extension-map")
if extension_map is None:
extension_map = self.EXTENSION_MAP
@@ -826,6 +838,22 @@ class PathFormat():
remove = config("path-remove", "\x00-\x1f\x7f")
self.clean_path = self._build_cleanfunc(remove, "")
+ basedir = extractor._parentdir
+ if not basedir:
+ basedir = config("base-directory")
+ sep = os.sep
+ if basedir is None:
+ basedir = "." + sep + "gallery-dl" + sep
+ elif basedir:
+ basedir = expand_path(basedir)
+ altsep = os.altsep
+ if altsep and altsep in basedir:
+ basedir = basedir.replace(altsep, sep)
+ if basedir[-1] != sep:
+ basedir += sep
+ basedir = self.clean_path(basedir)
+ self.basedirectory = basedir
+
@staticmethod
def _build_cleanfunc(chars, repl):
if not chars:
@@ -837,8 +865,8 @@ class PathFormat():
def func(x, c=chars, r=repl):
return x.replace(c, r)
else:
- def func(x, sub=re.compile("[" + chars + "]").sub, r=repl):
- return sub(r, x)
+ return functools.partial(
+ re.compile("[" + chars + "]").sub, repl)
return func
def open(self, mode="wb"):
@@ -870,29 +898,14 @@ class PathFormat():
def set_directory(self, kwdict):
"""Build directory path and create it if necessary"""
self.kwdict = kwdict
-
- # Build path segments by applying 'kwdict' to directory format strings
- segments = []
- append = segments.append
- try:
- for formatter in self.directory_formatters:
- segment = formatter(kwdict).strip()
- if WINDOWS:
- # remove trailing dots and spaces (#647)
- segment = segment.rstrip(". ")
- if segment:
- append(self.clean_segment(segment))
- except Exception as exc:
- raise exception.DirectoryFormatError(exc)
-
- # Join path segments
sep = os.sep
- directory = self.clean_path(self.basedirectory + sep.join(segments))
- # Ensure 'directory' ends with a path separator
+ segments = self.build_directory(kwdict)
if segments:
- directory += sep
- self.directory = directory
+ self.directory = directory = self.basedirectory + self.clean_path(
+ sep.join(segments) + sep)
+ else:
+ self.directory = directory = self.basedirectory
if WINDOWS:
# Enable longer-than-260-character paths on Windows
@@ -935,17 +948,15 @@ class PathFormat():
self.temppath = self.realpath = self.realpath[:-1]
return True
- def build_filename(self):
+ def build_filename(self, kwdict):
"""Apply 'kwdict' to filename format string"""
try:
return self.clean_path(self.clean_segment(
- self.filename_formatter(self.kwdict)))
+ self.filename_formatter(kwdict)))
except Exception as exc:
raise exception.FilenameFormatError(exc)
- def build_filename_conditional(self):
- kwdict = self.kwdict
-
+ def build_filename_conditional(self, kwdict):
try:
for condition, formatter in self.filename_conditions:
if condition(kwdict):
@@ -956,12 +967,49 @@ class PathFormat():
except Exception as exc:
raise exception.FilenameFormatError(exc)
+ def build_directory(self, kwdict):
+ """Apply 'kwdict' to directory format strings"""
+ segments = []
+ append = segments.append
+
+ try:
+ for formatter in self.directory_formatters:
+ segment = formatter(kwdict).strip()
+ if WINDOWS:
+ # remove trailing dots and spaces (#647)
+ segment = segment.rstrip(". ")
+ if segment:
+ append(self.clean_segment(segment))
+ return segments
+ except Exception as exc:
+ raise exception.DirectoryFormatError(exc)
+
+ def build_directory_conditional(self, kwdict):
+ segments = []
+ append = segments.append
+
+ try:
+ for condition, formatters in self.directory_conditions:
+ if condition(kwdict):
+ break
+ else:
+ formatters = self.directory_formatters
+ for formatter in formatters:
+ segment = formatter(kwdict).strip()
+ if WINDOWS:
+ segment = segment.rstrip(". ")
+ if segment:
+ append(self.clean_segment(segment))
+ return segments
+ except Exception as exc:
+ raise exception.DirectoryFormatError(exc)
+
def build_path(self):
"""Combine directory and filename to full paths"""
if self._create_directory:
os.makedirs(self.realdirectory, exist_ok=True)
self._create_directory = False
- self.filename = filename = self.build_filename()
+ self.filename = filename = self.build_filename(self.kwdict)
self.path = self.directory + filename
self.realpath = self.realdirectory + filename
if not self.temppath:
@@ -1028,9 +1076,9 @@ class DownloadArchive():
# fallback for missing WITHOUT ROWID support (#553)
self.cursor.execute("CREATE TABLE IF NOT EXISTS archive "
"(entry PRIMARY KEY)")
-
- self.keygen = (extractor.category + extractor.config(
- "archive-format", extractor.archive_fmt)
+ self.keygen = (
+ extractor.config("archive-prefix", extractor.category) +
+ extractor.config("archive-format", extractor.archive_fmt)
).format_map
def check(self, kwdict):
diff --git a/gallery_dl/version.py b/gallery_dl/version.py
index 1a3e0e4..fbb4e5b 100644
--- a/gallery_dl/version.py
+++ b/gallery_dl/version.py
@@ -6,4 +6,4 @@
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
-__version__ = "1.18.0"
+__version__ = "1.18.2"
diff --git a/test/test_extractor.py b/test/test_extractor.py
index f04e1c7..de43ff7 100644
--- a/test/test_extractor.py
+++ b/test/test_extractor.py
@@ -147,7 +147,7 @@ class TestExtractorModule(unittest.TestCase):
return c.capitalize()
for extr in extractor.extractors():
- if extr.category not in ("", "oauth"):
+ if extr.category not in ("", "oauth", "ytdl"):
expected = "{}{}Extractor".format(
capitalize(extr.category),
capitalize(extr.subcategory),
diff --git a/test/test_results.py b/test/test_results.py
index 5b22ecd..c36b6dd 100644
--- a/test/test_results.py
+++ b/test/test_results.py
@@ -22,7 +22,6 @@ from gallery_dl import extractor, util, job, config, exception # noqa E402
# temporary issues, etc.
BROKEN = {
- "imagevenue",
"photobucket",
}
@@ -315,6 +314,11 @@ def setup_test_config():
"e621", "inkbunny", "tapas", "pillowfort", "mangadex"):
config.set(("extractor", category), "username", None)
+ config.set(("extractor", "kemonoparty"), "cookies", {
+ "__ddg1": "0gBDGpJ3KZQmA4B9QH25", "__ddg2": "lmj5s1jnJOvhPXCX"})
+ config.set(("extractor", "seisoparty"), "cookies", {
+ "__ddg1": "Y8rBxSDHO5UCEtQvzyI9", "__ddg2": "lmj5s1jnJOvhPXCX"})
+
config.set(("extractor", "mastodon.social"), "access-token",
"Blf9gVqG7GytDTfVMiyYQjwVMQaNACgf3Ds3IxxVDUQ")
diff --git a/test/test_util.py b/test/test_util.py
index d90d5ad..2d574da 100644
--- a/test/test_util.py
+++ b/test/test_util.py
@@ -124,7 +124,7 @@ class TestPredicate(unittest.TestCase):
pred = util.build_predicate([util.UniquePredicate(),
util.UniquePredicate()])
- self.assertIsInstance(pred, util.ChainPredicate)
+ self.assertIs(pred.func, util.chain_predicates)
class TestISO639_1(unittest.TestCase):
@@ -271,6 +271,7 @@ class TestFormatter(unittest.TestCase):
"s": " \n\r\tSPACE ",
"u": "%27%3C%20/%20%3E%27",
"t": 1262304000,
+ "dt": datetime.datetime(2010, 1, 1),
"name": "Name",
"title1": "Title",
"title2": "",
@@ -295,6 +296,7 @@ class TestFormatter(unittest.TestCase):
self._run_test("{n!S}", "")
self._run_test("{t!d}", datetime.datetime(2010, 1, 1))
self._run_test("{t!d:%Y-%m-%d}", "2010-01-01")
+ self._run_test("{dt!T}", "1262304000")
with self.assertRaises(KeyError):
self._run_test("{a!q}", "hello world")
@@ -601,6 +603,11 @@ class TestOther(unittest.TestCase):
self.assertEqual(f(["a", "b", "c"]), "a, b, c")
self.assertEqual(f([1, 2, 3]), "1, 2, 3")
+ def test_to_timestamp(self, f=util.to_timestamp):
+ self.assertEqual(f(util.EPOCH), "0")
+ self.assertEqual(f(datetime.datetime(2010, 1, 1)), "1262304000")
+ self.assertEqual(f(None), "")
+
def test_universal_none(self):
obj = util.NONE