aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLibravatarUnit 193 <unit193@unit193.net>2021-06-22 22:30:44 -0400
committerLibravatarUnit 193 <unit193@unit193.net>2021-06-22 22:30:44 -0400
commit29b8ce4676815053724f96769fa09d42428a79af (patch)
treefe96f96b15332550c500a55ec92731117424ce8e
parentdf933b07457921cd21eb95a87bd74375b76613ab (diff)
parent32de2b06db501c7de81678bce8e3e0c3e63d340c (diff)
downloadgallery-dl-29b8ce4676815053724f96769fa09d42428a79af.tar.bz2
gallery-dl-29b8ce4676815053724f96769fa09d42428a79af.tar.xz
gallery-dl-29b8ce4676815053724f96769fa09d42428a79af.tar.zst
Update upstream source from tag 'upstream/1.18.0'
Update to upstream version '1.18.0' with Debian dir ef07ea3c642369ef40d1f6eadc566c8be0eea8a9
-rw-r--r--CHANGELOG.md33
-rw-r--r--PKG-INFO7
-rw-r--r--README.rst5
-rw-r--r--data/completion/_gallery-dl10
-rw-r--r--data/completion/gallery-dl2
-rw-r--r--data/man/gallery-dl.118
-rw-r--r--data/man/gallery-dl.conf.543
-rw-r--r--docs/gallery-dl.conf4
-rw-r--r--gallery_dl.egg-info/PKG-INFO7
-rw-r--r--gallery_dl/__init__.py17
-rw-r--r--gallery_dl/cache.py9
-rw-r--r--gallery_dl/extractor/500px.py409
-rw-r--r--gallery_dl/extractor/cyberdrop.py4
-rw-r--r--gallery_dl/extractor/deviantart.py2
-rw-r--r--gallery_dl/extractor/foolfuuka.py9
-rw-r--r--gallery_dl/extractor/furaffinity.py23
-rw-r--r--gallery_dl/extractor/gfycat.py5
-rw-r--r--gallery_dl/extractor/hiperdex.py15
-rw-r--r--gallery_dl/extractor/hitomi.py8
-rw-r--r--gallery_dl/extractor/idolcomplex.py31
-rw-r--r--gallery_dl/extractor/instagram.py44
-rw-r--r--gallery_dl/extractor/kemonoparty.py4
-rw-r--r--gallery_dl/extractor/mangadex.py354
-rw-r--r--gallery_dl/extractor/mangafox.py9
-rw-r--r--gallery_dl/extractor/oauth.py9
-rw-r--r--gallery_dl/extractor/philomena.py21
-rw-r--r--gallery_dl/extractor/pixiv.py34
-rw-r--r--gallery_dl/extractor/redgifs.py2
-rw-r--r--gallery_dl/extractor/sankaku.py15
-rw-r--r--gallery_dl/extractor/subscribestar.py9
-rw-r--r--gallery_dl/extractor/twitter.py13
-rw-r--r--gallery_dl/extractor/unsplash.py17
-rw-r--r--gallery_dl/job.py31
-rw-r--r--gallery_dl/option.py37
-rw-r--r--gallery_dl/postprocessor/classify.py7
-rw-r--r--gallery_dl/postprocessor/compare.py7
-rw-r--r--gallery_dl/postprocessor/exec.py3
-rw-r--r--gallery_dl/postprocessor/metadata.py12
-rw-r--r--gallery_dl/postprocessor/mtime.py4
-rw-r--r--gallery_dl/postprocessor/ugoira.py4
-rw-r--r--gallery_dl/postprocessor/zip.py10
-rw-r--r--gallery_dl/util.py89
-rw-r--r--gallery_dl/version.py2
-rw-r--r--setup.cfg2
-rw-r--r--setup.py3
-rw-r--r--test/test_postprocessor.py19
-rw-r--r--test/test_results.py2
-rw-r--r--test/test_util.py24
48 files changed, 1056 insertions, 392 deletions
diff --git a/CHANGELOG.md b/CHANGELOG.md
index dcc1299..0a4c90c 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,38 @@
# Changelog
+## 1.18.0 - 2021-06-19
+### Additions
+- [foolfuuka] support `archive.wakarimasen.moe` ([#1595](https://github.com/mikf/gallery-dl/issues/1595))
+- [mangadex] implement login with username & password ([#1535](https://github.com/mikf/gallery-dl/issues/1535))
+- [mangadex] add extractor for a user's followed feed ([#1535](https://github.com/mikf/gallery-dl/issues/1535))
+- [pixiv] support fetching privately followed users ([#1628](https://github.com/mikf/gallery-dl/issues/1628))
+- implement conditional filenames ([#1394](https://github.com/mikf/gallery-dl/issues/1394))
+- implement `filter` option for post processors ([#1460](https://github.com/mikf/gallery-dl/issues/1460))
+- add `-T/--terminate` command-line option ([#1399](https://github.com/mikf/gallery-dl/issues/1399))
+- add `-P/--postprocessor` command-line option ([#1583](https://github.com/mikf/gallery-dl/issues/1583))
+### Changes
+- [kemonoparty] update default filenames and archive IDs ([#1514](https://github.com/mikf/gallery-dl/issues/1514))
+- [twitter] update default settings
+ - change `retweets` and `quoted` options from `true` to `false`
+ - change directory format for search results to the same as other extractors
+- require an argument for `--clear-cache`
+### Fixes
+- [500px] update GraphQL queries
+- [furaffinity] improve metadata extraction ([#1630](https://github.com/mikf/gallery-dl/issues/1630))
+- [hitomi] update image URL generation ([#1637](https://github.com/mikf/gallery-dl/issues/1637))
+- [idolcomplex] improve and fix pagination ([#1594](https://github.com/mikf/gallery-dl/issues/1594), [#1601](https://github.com/mikf/gallery-dl/issues/1601))
+- [instagram] fix login ([#1631](https://github.com/mikf/gallery-dl/issues/1631))
+- [instagram] update query hashes
+- [mangadex] update to API v5 ([#1535](https://github.com/mikf/gallery-dl/issues/1535))
+- [mangafox] improve URL pattern ([#1608](https://github.com/mikf/gallery-dl/issues/1608))
+- [oauth] prevent exceptions when reporting errors ([#1603](https://github.com/mikf/gallery-dl/issues/1603))
+- [philomena] fix tag escapes handling ([#1629](https://github.com/mikf/gallery-dl/issues/1629))
+- [redgifs] update API server address ([#1632](https://github.com/mikf/gallery-dl/issues/1632))
+- [sankaku] handle empty tags ([#1617](https://github.com/mikf/gallery-dl/issues/1617))
+- [subscribestar] improve attachment filenames ([#1609](https://github.com/mikf/gallery-dl/issues/1609))
+- [unsplash] update collections URL pattern ([#1627](https://github.com/mikf/gallery-dl/issues/1627))
+- [postprocessor:metadata] handle dicts in `mode:tags` ([#1598](https://github.com/mikf/gallery-dl/issues/1598))
+
## 1.17.5 - 2021-05-30
### Additions
- [kemonoparty] add `metadata` option ([#1548](https://github.com/mikf/gallery-dl/issues/1548))
diff --git a/PKG-INFO b/PKG-INFO
index 14d8ed3..ef2b047 100644
--- a/PKG-INFO
+++ b/PKG-INFO
@@ -1,6 +1,6 @@
Metadata-Version: 2.1
Name: gallery_dl
-Version: 1.17.5
+Version: 1.18.0
Summary: Command-line program to download image galleries and collections from several image hosting sites
Home-page: https://github.com/mikf/gallery-dl
Author: Mike Fährmann
@@ -75,8 +75,8 @@ Description: ==========
Prebuilt executable files with a Python interpreter and
required Python packages included are available for
- - `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.17.5/gallery-dl.exe>`__
- - `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.17.5/gallery-dl.bin>`__
+ - `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.18.0/gallery-dl.exe>`__
+ - `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.18.0/gallery-dl.bin>`__
| Executables build from the latest commit can be found at
| https://github.com/mikf/gallery-dl/actions/workflows/executables.yml
@@ -220,6 +220,7 @@ Description: ==========
``imgbb``,
``inkbunny``,
``instagram``,
+ ``mangadex``,
``mangoxo``,
``pillowfort``,
``pinterest``,
diff --git a/README.rst b/README.rst
index 66e71e7..f3a42fc 100644
--- a/README.rst
+++ b/README.rst
@@ -64,8 +64,8 @@ Standalone Executable
Prebuilt executable files with a Python interpreter and
required Python packages included are available for
-- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.17.5/gallery-dl.exe>`__
-- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.17.5/gallery-dl.bin>`__
+- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.18.0/gallery-dl.exe>`__
+- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.18.0/gallery-dl.bin>`__
| Executables build from the latest commit can be found at
| https://github.com/mikf/gallery-dl/actions/workflows/executables.yml
@@ -209,6 +209,7 @@ and optional for
``imgbb``,
``inkbunny``,
``instagram``,
+``mangadex``,
``mangoxo``,
``pillowfort``,
``pinterest``,
diff --git a/data/completion/_gallery-dl b/data/completion/_gallery-dl
index 436260b..15806e8 100644
--- a/data/completion/_gallery-dl
+++ b/data/completion/_gallery-dl
@@ -11,7 +11,7 @@ _arguments -C -S \
{-i,--input-file}'[Download URLs found in FILE ("-" for stdin). More than one --input-file can be specified]':'<file>':_files \
--cookies'[File to load additional cookies from]':'<file>':_files \
--proxy'[Use the specified proxy]':'<url>' \
---clear-cache'[Delete all cached login sessions, cookies, etc.]':'<module>' \
+--clear-cache'[Delete cached login sessions, cookies, etc. for MODULE (ALL to delete everything)]':'<module>' \
{-q,--quiet}'[Activate quiet mode]' \
{-v,--verbose}'[Print various debugging information]' \
{-g,--get-urls}'[Print URLs instead of downloading]' \
@@ -27,7 +27,6 @@ _arguments -C -S \
--write-pages'[Write downloaded intermediary pages to files in the current directory to debug problems]' \
{-r,--limit-rate}'[Maximum download rate (e.g. 500k or 2.5M)]':'<rate>' \
{-R,--retries}'[Maximum number of retries for failed HTTP requests or -1 for infinite retries (default: 4)]':'<n>' \
-{-A,--abort}'[Abort extractor run after N consecutive file downloads have been skipped, e.g. if files with the same filename already exist]':'<n>' \
--http-timeout'[Timeout for HTTP connections (default: 30.0)]':'<seconds>' \
--sleep'[Number of seconds to sleep before each download]':'<seconds>' \
--filesize-min'[Do not download files smaller than SIZE (e.g. 500k or 2.5M)]':'<size>' \
@@ -44,7 +43,9 @@ _arguments -C -S \
{-u,--username}'[Username to login with]':'<user>' \
{-p,--password}'[Password belonging to the given username]':'<pass>' \
--netrc'[Enable .netrc authentication data]' \
---download-archive'[Record all downloaded files in the archive file and skip downloading any file already in it.]':'<file>':_files \
+--download-archive'[Record all downloaded files in the archive file and skip downloading any file already in it]':'<file>':_files \
+{-A,--abort}'[Stop current extractor run after N consecutive file downloads were skipped]':'<n>' \
+{-T,--terminate}'[Stop current and parent extractor runs after N consecutive file downloads were skipped]':'<n>' \
--range'[Index-range(s) specifying which images to download. For example "5-10" or "1,3-5,10-"]':'<range>' \
--chapter-range'[Like "--range", but applies to manga-chapters and other delegated URLs]':'<range>' \
--filter'[Python expression controlling which images to download. Files for which the expression evaluates to False are ignored. Available keys are the filename-specific ones listed by "-K". Example: --filter "image_width >= 1000 and rating in ("s", "q")"]':'<expr>' \
@@ -56,6 +57,7 @@ _arguments -C -S \
--write-tags'[Write image tags to separate text files]' \
--mtime-from-date'[Set file modification times according to "date" metadata]' \
--exec'[Execute CMD for each downloaded file. Example: --exec "convert {} {}.png && rm {}"]':'<cmd>' \
---exec-after'[Execute CMD after all files were downloaded successfully. Example: --exec-after "cd {} && convert * ../doc.pdf"]':'<cmd>' && rc=0
+--exec-after'[Execute CMD after all files were downloaded successfully. Example: --exec-after "cd {} && convert * ../doc.pdf"]':'<cmd>' \
+{-P,--postprocessor}'[Activate the specified post processor]':'<name>' && rc=0
return rc
diff --git a/data/completion/gallery-dl b/data/completion/gallery-dl
index 9a3a63e..f3d1100 100644
--- a/data/completion/gallery-dl
+++ b/data/completion/gallery-dl
@@ -10,7 +10,7 @@ _gallery_dl()
elif [[ "${prev}" =~ ^(-d|--dest)$ ]]; then
COMPREPLY=( $(compgen -d -- "${cur}") )
else
- COMPREPLY=( $(compgen -W "--help --version --dest --input-file --cookies --proxy --clear-cache --quiet --verbose --get-urls --resolve-urls --dump-json --simulate --extractor-info --list-keywords --list-modules --list-extractors --write-log --write-unsupported --write-pages --limit-rate --retries --abort --http-timeout --sleep --filesize-min --filesize-max --no-part --no-skip --no-mtime --no-download --no-check-certificate --config --config-yaml --option --ignore-config --username --password --netrc --download-archive --range --chapter-range --filter --chapter-filter --zip --ugoira-conv --ugoira-conv-lossless --write-metadata --write-tags --mtime-from-date --exec --exec-after" -- "${cur}") )
+ COMPREPLY=( $(compgen -W "--help --version --dest --input-file --cookies --proxy --clear-cache --quiet --verbose --get-urls --resolve-urls --dump-json --simulate --extractor-info --list-keywords --list-modules --list-extractors --write-log --write-unsupported --write-pages --limit-rate --retries --http-timeout --sleep --filesize-min --filesize-max --no-part --no-skip --no-mtime --no-download --no-check-certificate --config --config-yaml --option --ignore-config --username --password --netrc --download-archive --abort --terminate --range --chapter-range --filter --chapter-filter --zip --ugoira-conv --ugoira-conv-lossless --write-metadata --write-tags --mtime-from-date --exec --exec-after --postprocessor" -- "${cur}") )
fi
}
diff --git a/data/man/gallery-dl.1 b/data/man/gallery-dl.1
index 719b8b4..25da021 100644
--- a/data/man/gallery-dl.1
+++ b/data/man/gallery-dl.1
@@ -1,4 +1,4 @@
-.TH "GALLERY-DL" "1" "2021-05-30" "1.17.5" "gallery-dl Manual"
+.TH "GALLERY-DL" "1" "2021-06-19" "1.18.0" "gallery-dl Manual"
.\" disable hyphenation
.nh
@@ -36,7 +36,7 @@ File to load additional cookies from
Use the specified proxy
.TP
.B "\-\-clear\-cache" \f[I]MODULE\f[]
-Delete all cached login sessions, cookies, etc.
+Delete cached login sessions, cookies, etc. for MODULE (ALL to delete everything)
.TP
.B "\-q, \-\-quiet"
Activate quiet mode
@@ -83,9 +83,6 @@ Maximum download rate (e.g. 500k or 2.5M)
.B "\-R, \-\-retries" \f[I]N\f[]
Maximum number of retries for failed HTTP requests or -1 for infinite retries (default: 4)
.TP
-.B "\-A, \-\-abort" \f[I]N\f[]
-Abort extractor run after N consecutive file downloads have been skipped, e.g. if files with the same filename already exist
-.TP
.B "\-\-http\-timeout" \f[I]SECONDS\f[]
Timeout for HTTP connections (default: 30.0)
.TP
@@ -132,7 +129,13 @@ Password belonging to the given username
Enable .netrc authentication data
.TP
.B "\-\-download\-archive" \f[I]FILE\f[]
-Record all downloaded files in the archive file and skip downloading any file already in it.
+Record all downloaded files in the archive file and skip downloading any file already in it
+.TP
+.B "\-A, \-\-abort" \f[I]N\f[]
+Stop current extractor run after N consecutive file downloads were skipped
+.TP
+.B "\-T, \-\-terminate" \f[I]N\f[]
+Stop current and parent extractor runs after N consecutive file downloads were skipped
.TP
.B "\-\-range" \f[I]RANGE\f[]
Index-range(s) specifying which images to download. For example '5-10' or '1,3-5,10-'
@@ -169,6 +172,9 @@ Execute CMD for each downloaded file. Example: --exec 'convert {} {}.png && rm {
.TP
.B "\-\-exec\-after" \f[I]CMD\f[]
Execute CMD after all files were downloaded successfully. Example: --exec-after 'cd {} && convert * ../doc.pdf'
+.TP
+.B "\-P, \-\-postprocessor" \f[I]NAME\f[]
+Activate the specified post processor
.SH EXAMPLES
.TP
diff --git a/data/man/gallery-dl.conf.5 b/data/man/gallery-dl.conf.5
index f35f218..84e8e0e 100644
--- a/data/man/gallery-dl.conf.5
+++ b/data/man/gallery-dl.conf.5
@@ -1,4 +1,4 @@
-.TH "GALLERY-DL.CONF" "5" "2021-05-30" "1.17.5" "gallery-dl Manual"
+.TH "GALLERY-DL.CONF" "5" "2021-06-19" "1.18.0" "gallery-dl Manual"
.\" disable hyphenation
.nh
.\" disable justification (adjust text to left margin only)
@@ -75,14 +75,31 @@ those as makeshift comments by settings their values to arbitrary strings.
.SH EXTRACTOR OPTIONS
.SS extractor.*.filename
.IP "Type:" 6
-\f[I]string\f[]
+\f[I]string\f[] or \f[I]object\f[]
.IP "Example:" 4
+.br
+* .. code::
+
"{manga}_c{chapter}_{page:>03}.{extension}"
+.br
+* .. code:: json
+
+{
+"extension == 'mp4'": "{id}_video.{extension}",
+"'nature' in title" : "{id}_{title}.{extension}",
+"" : "{id}_default.{extension}"
+}
+
+
.IP "Description:" 4
-A \f[I]format string\f[] to build the resulting filename
-for a downloaded file.
+A \f[I]format string\f[] to build filenames for downloaded files with.
+
+If this is an \f[I]object\f[], it must contain Python expressions mapping to the
+filename format strings to use.
+These expressions are evaluated in the order as specified in Python 3.6+
+and in an undetermined order in Python 3.4 and 3.5.
The available replacement keys depend on the extractor used. A list
of keys for a specific one can be acquired by calling *gallery-dl*
@@ -358,9 +375,9 @@ and optional for
.br
* \f[I]aryion\f[]
.br
-* \f[I]danbooru\f[]
+* \f[I]danbooru\f[] (*)
.br
-* \f[I]e621\f[]
+* \f[I]e621\f[] (*)
.br
* \f[I]exhentai\f[]
.br
@@ -372,6 +389,8 @@ and optional for
.br
* \f[I]instagram\f[]
.br
+* \f[I]mangadex\f[]
+.br
* \f[I]mangoxo\f[]
.br
* \f[I]pillowfort\f[]
@@ -392,7 +411,7 @@ These values can also be specified via the
\f[I]-u/--username\f[] and \f[I]-p/--password\f[] command-line options or
by using a \f[I].netrc\f[] file. (see Authentication_)
-Note: The password value for \f[I]danbooru\f[] and \f[I]e621\f[] should be
+(*) The password value for \f[I]danbooru\f[] and \f[I]e621\f[] should be
the API key found in your user profile, not the actual account password.
@@ -1900,7 +1919,7 @@ Fetch media from all Tweets and replies in a \f[I]conversation
\f[I]bool\f[]
.IP "Default:" 9
-\f[I]true\f[]
+\f[I]false\f[]
.IP "Description:" 4
Fetch media from quoted Tweets.
@@ -1922,7 +1941,7 @@ Fetch media from replies to other Tweets.
\f[I]bool\f[]
.IP "Default:" 9
-\f[I]true\f[]
+\f[I]false\f[]
.IP "Description:" 4
Fetch media from Retweets.
@@ -3206,12 +3225,18 @@ logging output to a file.
"name" : "zip",
"compression": "store",
"extension" : "cbz",
+"filter" : "extension not in ('zip', 'rar')",
"whitelist" : ["mangadex", "exhentai", "nhentai"]
}
.IP "Description:" 4
An \f[I]object\f[] containing a \f[I]"name"\f[] attribute specifying the
post-processor type, as well as any of its \f[I]options\f[].
+
+It is possible to set a \f[I]"filter"\f[] expression similar to
+\f[I]image-filter\f[] to only run a post-processor
+conditionally.
+
It is also possible set a \f[I]"whitelist"\f[] or \f[I]"blacklist"\f[] to
only enable or disable a post-processor for the specified
extractor categories.
diff --git a/docs/gallery-dl.conf b/docs/gallery-dl.conf
index 7497cd6..9514c7a 100644
--- a/docs/gallery-dl.conf
+++ b/docs/gallery-dl.conf
@@ -256,9 +256,9 @@
"password": null,
"cards": false,
"conversations": false,
- "quoted": true,
+ "quoted": false,
"replies": true,
- "retweets": true,
+ "retweets": false,
"text-tweets": false,
"twitpic": false,
"users": "timeline",
diff --git a/gallery_dl.egg-info/PKG-INFO b/gallery_dl.egg-info/PKG-INFO
index 7fe851f..b53c326 100644
--- a/gallery_dl.egg-info/PKG-INFO
+++ b/gallery_dl.egg-info/PKG-INFO
@@ -1,6 +1,6 @@
Metadata-Version: 2.1
Name: gallery-dl
-Version: 1.17.5
+Version: 1.18.0
Summary: Command-line program to download image galleries and collections from several image hosting sites
Home-page: https://github.com/mikf/gallery-dl
Author: Mike Fährmann
@@ -75,8 +75,8 @@ Description: ==========
Prebuilt executable files with a Python interpreter and
required Python packages included are available for
- - `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.17.5/gallery-dl.exe>`__
- - `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.17.5/gallery-dl.bin>`__
+ - `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.18.0/gallery-dl.exe>`__
+ - `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.18.0/gallery-dl.bin>`__
| Executables build from the latest commit can be found at
| https://github.com/mikf/gallery-dl/actions/workflows/executables.yml
@@ -220,6 +220,7 @@ Description: ==========
``imgbb``,
``inkbunny``,
``instagram``,
+ ``mangadex``,
``mangoxo``,
``pillowfort``,
``pinterest``,
diff --git a/gallery_dl/__init__.py b/gallery_dl/__init__.py
index 8154afc..d5893b7 100644
--- a/gallery_dl/__init__.py
+++ b/gallery_dl/__init__.py
@@ -6,23 +6,16 @@
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
-from __future__ import unicode_literals, print_function
+import sys
+import json
+import logging
+from . import version, config, option, output, extractor, job, util, exception
__author__ = "Mike Fährmann"
__copyright__ = "Copyright 2014-2021 Mike Fährmann"
__license__ = "GPLv2"
__maintainer__ = "Mike Fährmann"
__email__ = "mike_faehrmann@web.de"
-
-import sys
-
-if sys.hexversion < 0x3040000:
- sys.exit("Python 3.4+ required")
-
-import json
-import logging
-from . import version, config, option, output, extractor, job, util, exception
-
__version__ = version.__version__
@@ -126,6 +119,8 @@ def main():
config.set((), "postprocessors", args.postprocessors)
if args.abort:
config.set((), "skip", "abort:" + str(args.abort))
+ if args.terminate:
+ config.set((), "skip", "terminate:" + str(args.terminate))
for opts in args.options:
config.set(*opts)
diff --git a/gallery_dl/cache.py b/gallery_dl/cache.py
index 5ab68bf..7a49b61 100644
--- a/gallery_dl/cache.py
+++ b/gallery_dl/cache.py
@@ -168,7 +168,7 @@ def cache(maxage=3600, keyarg=None):
return wrap
-def clear(module="all"):
+def clear(module):
"""Delete database entries for 'module'"""
db = DatabaseCacheDecorator.db
if not db:
@@ -176,19 +176,18 @@ def clear(module="all"):
rowcount = 0
cursor = db.cursor()
- module = module.lower()
try:
- if module == "all":
+ if module == "ALL":
cursor.execute("DELETE FROM data")
else:
cursor.execute(
"DELETE FROM data "
"WHERE key LIKE 'gallery_dl.extractor.' || ? || '.%'",
- (module,)
+ (module.lower(),)
)
except sqlite3.OperationalError:
- pass # database is not initialized, can't be modified, etc.
+ pass # database not initialized, cannot be modified, etc.
else:
rowcount = cursor.rowcount
db.commit()
diff --git a/gallery_dl/extractor/500px.py b/gallery_dl/extractor/500px.py
index c2c5a66..4cf5e48 100644
--- a/gallery_dl/extractor/500px.py
+++ b/gallery_dl/extractor/500px.py
@@ -11,7 +11,6 @@
from .common import Extractor, Message
import json
-
BASE_PATTERN = r"(?:https?://)?(?:web\.)?500px\.com"
@@ -78,15 +77,14 @@ class _500pxExtractor(Extractor):
headers = {"Origin": self.root, "X-CSRF-Token": csrf_token}
return self.request(url, headers=headers, params=params).json()
- def _request_graphql(self, opname, variables, query_hash):
+ def _request_graphql(self, opname, variables):
url = "https://api.500px.com/graphql"
- params = {
+ data = {
"operationName": opname,
"variables" : json.dumps(variables),
- "extensions" : '{"persistedQuery":{"version":1'
- ',"sha256Hash":"' + query_hash + '"}}',
+ "query" : QUERIES[opname],
}
- return self.request(url, params=params).json()["data"]
+ return self.request(url, method="POST", json=data).json()["data"]
class _500pxUserExtractor(_500pxExtractor):
@@ -111,8 +109,6 @@ class _500pxUserExtractor(_500pxExtractor):
variables = {"username": self.user, "pageSize": 20}
photos = self._request_graphql(
"OtherPhotosQuery", variables,
- "018a5e5117bd72bdf28066aad02c4f2d"
- "8acdf7f6127215d231da60e24080eb1b",
)["user"]["photos"]
while True:
@@ -124,8 +120,6 @@ class _500pxUserExtractor(_500pxExtractor):
variables["cursor"] = photos["pageInfo"]["endCursor"]
photos = self._request_graphql(
"OtherPhotosPaginationContainerQuery", variables,
- "b4af70d42c71a5e43f0be36ce60dc81e"
- "9742ebc117cde197350f2b86b5977d98",
)["userByUsername"]["photos"]
@@ -159,7 +153,6 @@ class _500pxGalleryExtractor(_500pxExtractor):
def metadata(self):
user = self._request_graphql(
"ProfileRendererQuery", {"username": self.user_name},
- "fcecc7028c308115b0defebc63acec3fe3c12df86a602c3e1785ba5cfb8fff47",
)["profile"]
self.user_id = str(user["legacyId"])
@@ -172,7 +165,6 @@ class _500pxGalleryExtractor(_500pxExtractor):
}
gallery = self._request_graphql(
"GalleriesDetailQueryRendererQuery", variables,
- "eda3c77ca4efe4b3347ec9c08befe3bd2c58099ebfb1f680d829fcd26d34f12d",
)["gallery"]
self._photos = gallery["photos"]
@@ -200,8 +192,6 @@ class _500pxGalleryExtractor(_500pxExtractor):
variables["cursor"] = photos["pageInfo"]["endCursor"]
photos = self._request_graphql(
"GalleriesDetailPaginationContainerQuery", variables,
- "466cf6661a07e7fdca465edb39118efb"
- "80fb157c6d3f620c7f518cdae0832c78",
)["galleryByOwnerIdAndSlugOrToken"]["photos"]
@@ -261,3 +251,394 @@ class _500pxImageExtractor(_500pxExtractor):
def photos(self):
edges = ({"node": {"legacyId": self.photo_id}},)
return self._extend(edges)
+
+
+QUERIES = {
+
+ "OtherPhotosQuery": """\
+query OtherPhotosQuery($username: String!, $pageSize: Int) {
+ user: userByUsername(username: $username) {
+ ...OtherPhotosPaginationContainer_user_RlXb8
+ id
+ }
+}
+
+fragment OtherPhotosPaginationContainer_user_RlXb8 on User {
+ photos(first: $pageSize, privacy: PROFILE, sort: ID_DESC) {
+ edges {
+ node {
+ id
+ legacyId
+ canonicalPath
+ width
+ height
+ name
+ isLikedByMe
+ notSafeForWork
+ photographer: uploader {
+ id
+ legacyId
+ username
+ displayName
+ canonicalPath
+ followedByUsers {
+ isFollowedByMe
+ }
+ }
+ images(sizes: [33, 35]) {
+ size
+ url
+ jpegUrl
+ webpUrl
+ id
+ }
+ __typename
+ }
+ cursor
+ }
+ totalCount
+ pageInfo {
+ endCursor
+ hasNextPage
+ }
+ }
+}
+""",
+
+ "OtherPhotosPaginationContainerQuery": """\
+query OtherPhotosPaginationContainerQuery($username: String!, $pageSize: Int, $cursor: String) {
+ userByUsername(username: $username) {
+ ...OtherPhotosPaginationContainer_user_3e6UuE
+ id
+ }
+}
+
+fragment OtherPhotosPaginationContainer_user_3e6UuE on User {
+ photos(first: $pageSize, after: $cursor, privacy: PROFILE, sort: ID_DESC) {
+ edges {
+ node {
+ id
+ legacyId
+ canonicalPath
+ width
+ height
+ name
+ isLikedByMe
+ notSafeForWork
+ photographer: uploader {
+ id
+ legacyId
+ username
+ displayName
+ canonicalPath
+ followedByUsers {
+ isFollowedByMe
+ }
+ }
+ images(sizes: [33, 35]) {
+ size
+ url
+ jpegUrl
+ webpUrl
+ id
+ }
+ __typename
+ }
+ cursor
+ }
+ totalCount
+ pageInfo {
+ endCursor
+ hasNextPage
+ }
+ }
+}
+""",
+
+ "ProfileRendererQuery": """\
+query ProfileRendererQuery($username: String!) {
+ profile: userByUsername(username: $username) {
+ id
+ legacyId
+ userType: type
+ username
+ firstName
+ displayName
+ registeredAt
+ canonicalPath
+ avatar {
+ ...ProfileAvatar_avatar
+ id
+ }
+ userProfile {
+ firstname
+ lastname
+ state
+ country
+ city
+ about
+ id
+ }
+ socialMedia {
+ website
+ twitter
+ instagram
+ facebook
+ id
+ }
+ coverPhotoUrl
+ followedByUsers {
+ totalCount
+ isFollowedByMe
+ }
+ followingUsers {
+ totalCount
+ }
+ membership {
+ expiryDate
+ membershipTier: tier
+ photoUploadQuota
+ refreshPhotoUploadQuotaAt
+ paymentStatus
+ id
+ }
+ profileTabs {
+ tabs {
+ name
+ visible
+ }
+ }
+ ...EditCover_cover
+ photoStats {
+ likeCount
+ viewCount
+ }
+ photos(privacy: PROFILE) {
+ totalCount
+ }
+ licensingPhotos(status: ACCEPTED) {
+ totalCount
+ }
+ portfolio {
+ id
+ status
+ userDisabled
+ }
+ }
+}
+
+fragment EditCover_cover on User {
+ coverPhotoUrl
+}
+
+fragment ProfileAvatar_avatar on UserAvatar {
+ images(sizes: [MEDIUM, LARGE]) {
+ size
+ url
+ id
+ }
+}
+""",
+
+ "GalleriesDetailQueryRendererQuery": """\
+query GalleriesDetailQueryRendererQuery($galleryOwnerLegacyId: ID!, $ownerLegacyId: String, $slug: String, $token: String, $pageSize: Int, $gallerySize: Int) {
+ galleries(galleryOwnerLegacyId: $galleryOwnerLegacyId, first: $gallerySize) {
+ edges {
+ node {
+ legacyId
+ description
+ name
+ privacy
+ canonicalPath
+ notSafeForWork
+ buttonName
+ externalUrl
+ cover {
+ images(sizes: [35, 33]) {
+ size
+ webpUrl
+ jpegUrl
+ id
+ }
+ id
+ }
+ photos {
+ totalCount
+ }
+ id
+ }
+ }
+ }
+ gallery: galleryByOwnerIdAndSlugOrToken(ownerLegacyId: $ownerLegacyId, slug: $slug, token: $token) {
+ ...GalleriesDetailPaginationContainer_gallery_RlXb8
+ id
+ }
+}
+
+fragment GalleriesDetailPaginationContainer_gallery_RlXb8 on Gallery {
+ id
+ legacyId
+ name
+ privacy
+ notSafeForWork
+ ownPhotosOnly
+ canonicalPath
+ publicSlug
+ lastPublishedAt
+ photosAddedSinceLastPublished
+ reportStatus
+ creator {
+ legacyId
+ id
+ }
+ cover {
+ images(sizes: [33, 32, 36, 2048]) {
+ url
+ size
+ webpUrl
+ id
+ }
+ id
+ }
+ description
+ externalUrl
+ buttonName
+ photos(first: $pageSize) {
+ totalCount
+ edges {
+ cursor
+ node {
+ id
+ legacyId
+ canonicalPath
+ name
+ description
+ category
+ uploadedAt
+ location
+ width
+ height
+ isLikedByMe
+ photographer: uploader {
+ id
+ legacyId
+ username
+ displayName
+ canonicalPath
+ avatar {
+ images(sizes: SMALL) {
+ url
+ id
+ }
+ id
+ }
+ followedByUsers {
+ totalCount
+ isFollowedByMe
+ }
+ }
+ images(sizes: [33, 32]) {
+ size
+ url
+ webpUrl
+ id
+ }
+ __typename
+ }
+ }
+ pageInfo {
+ endCursor
+ hasNextPage
+ }
+ }
+}
+""",
+
+ "GalleriesDetailPaginationContainerQuery": """\
+query GalleriesDetailPaginationContainerQuery($ownerLegacyId: String, $slug: String, $token: String, $pageSize: Int, $cursor: String) {
+ galleryByOwnerIdAndSlugOrToken(ownerLegacyId: $ownerLegacyId, slug: $slug, token: $token) {
+ ...GalleriesDetailPaginationContainer_gallery_3e6UuE
+ id
+ }
+}
+
+fragment GalleriesDetailPaginationContainer_gallery_3e6UuE on Gallery {
+ id
+ legacyId
+ name
+ privacy
+ notSafeForWork
+ ownPhotosOnly
+ canonicalPath
+ publicSlug
+ lastPublishedAt
+ photosAddedSinceLastPublished
+ reportStatus
+ creator {
+ legacyId
+ id
+ }
+ cover {
+ images(sizes: [33, 32, 36, 2048]) {
+ url
+ size
+ webpUrl
+ id
+ }
+ id
+ }
+ description
+ externalUrl
+ buttonName
+ photos(first: $pageSize, after: $cursor) {
+ totalCount
+ edges {
+ cursor
+ node {
+ id
+ legacyId
+ canonicalPath
+ name
+ description
+ category
+ uploadedAt
+ location
+ width
+ height
+ isLikedByMe
+ photographer: uploader {
+ id
+ legacyId
+ username
+ displayName
+ canonicalPath
+ avatar {
+ images(sizes: SMALL) {
+ url
+ id
+ }
+ id
+ }
+ followedByUsers {
+ totalCount
+ isFollowedByMe
+ }
+ }
+ images(sizes: [33, 32]) {
+ size
+ url
+ webpUrl
+ id
+ }
+ __typename
+ }
+ }
+ pageInfo {
+ endCursor
+ hasNextPage
+ }
+ }
+}
+""",
+
+}
diff --git a/gallery_dl/extractor/cyberdrop.py b/gallery_dl/extractor/cyberdrop.py
index a057b84..e354cb7 100644
--- a/gallery_dl/extractor/cyberdrop.py
+++ b/gallery_dl/extractor/cyberdrop.py
@@ -8,7 +8,7 @@
from .common import Extractor, Message
from .. import text
-import base64
+import binascii
class CyberdropAlbumExtractor(Extractor):
@@ -52,7 +52,7 @@ class CyberdropAlbumExtractor(Extractor):
yield Message.Directory, data
for file_b64 in files:
- file = base64.b64decode(file_b64.encode()).decode()
+ file = binascii.a2b_base64(file_b64).decode()
text.nameext_from_url(file, data)
data["filename"], _, data["id"] = data["filename"].rpartition("-")
yield Message.Url, "https://f.cyberdrop.cc/" + file, data
diff --git a/gallery_dl/extractor/deviantart.py b/gallery_dl/extractor/deviantart.py
index 9a461a4..70e268d 100644
--- a/gallery_dl/extractor/deviantart.py
+++ b/gallery_dl/extractor/deviantart.py
@@ -918,7 +918,7 @@ class DeviantartOAuthAPI():
def __init__(self, extractor):
self.extractor = extractor
self.log = extractor.log
- self.headers = {}
+ self.headers = {"dA-minor-version": "20200519"}
self.delay = extractor.config("wait-min", 0)
self.delay_min = max(2, self.delay)
diff --git a/gallery_dl/extractor/foolfuuka.py b/gallery_dl/extractor/foolfuuka.py
index 5962b9e..5ea3adb 100644
--- a/gallery_dl/extractor/foolfuuka.py
+++ b/gallery_dl/extractor/foolfuuka.py
@@ -90,7 +90,9 @@ BASE_PATTERN = FoolfuukaExtractor.update({
},
"thebarchive": {
"root": "https://thebarchive.com",
- "pattern": r"thebarchive\.com",
+ },
+ "wakarimasen": {
+ "root": "https://archive.wakarimasen.moe",
},
})
@@ -137,6 +139,9 @@ class FoolfuukaThreadExtractor(FoolfuukaExtractor):
("https://thebarchive.com/b/thread/739772332/", {
"url": "07d39d2cb48f40fb337dc992993d965b0cd5f7cd",
}),
+ ("https://archive.wakarimasen.moe/a/thread/223157648/", {
+ "url": "fef0758d2eb81b1ba783051fd5ec491d70107a78",
+ }),
)
def __init__(self, match):
@@ -175,6 +180,7 @@ class FoolfuukaBoardExtractor(FoolfuukaExtractor):
("https://archive.nyafuu.org/c/"),
("https://rbt.asia/g/"),
("https://thebarchive.com/b/"),
+ ("https://archive.wakarimasen.moe/a/"),
)
def __init__(self, match):
@@ -218,6 +224,7 @@ class FoolfuukaSearchExtractor(FoolfuukaExtractor):
("https://archive.nyafuu.org/_/search/text/test/"),
("https://rbt.asia/_/search/text/test/"),
("https://thebarchive.com/_/search/text/test/"),
+ ("https://archive.wakarimasen.moe/a/search/text/test/"),
)
def __init__(self, match):
diff --git a/gallery_dl/extractor/furaffinity.py b/gallery_dl/extractor/furaffinity.py
index a7b0356..86e1678 100644
--- a/gallery_dl/extractor/furaffinity.py
+++ b/gallery_dl/extractor/furaffinity.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2020 Mike Fährmann
+# Copyright 2020-2021 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -11,7 +11,6 @@
from .common import Extractor, Message
from .. import text, util
-
BASE_PATTERN = r"(?:https?://)?(?:www\.|sfw\.)?furaffinity\.net"
@@ -19,7 +18,7 @@ class FuraffinityExtractor(Extractor):
"""Base class for furaffinity extractors"""
category = "furaffinity"
directory_fmt = ("{category}", "{user!l}")
- filename_fmt = "{id} {title}.{extension}"
+ filename_fmt = "{id}{title:? //}.{extension}"
archive_fmt = "{id}"
cookiedomain = ".furaffinity.net"
root = "https://www.furaffinity.net"
@@ -55,9 +54,6 @@ class FuraffinityExtractor(Extractor):
def _parse_post(self, post_id):
url = "{}/view/{}/".format(self.root, post_id)
extr = text.extract_from(self.request(url).text)
- title, _, artist = text.unescape(extr(
- 'property="og:title" content="', '"')).rpartition(" by ")
- artist_url = artist.replace("_", "").lower()
path = extr('href="//d', '"')
if not path:
@@ -74,18 +70,16 @@ class FuraffinityExtractor(Extractor):
rh = text.remove_html
data = text.nameext_from_url(path, {
- "id" : pi(post_id),
- "title" : title,
- "artist" : artist,
- "artist_url": artist_url,
- "user" : self.user or artist_url,
- "url" : "https://d" + path
+ "id" : pi(post_id),
+ "url": "https://d" + path,
})
tags = extr('class="tags-row">', '</section>')
if tags:
# new site layout
data["tags"] = text.split_html(tags)
+ data["title"] = text.unescape(extr("<h2><p>", "</p></h2>"))
+ data["artist"] = extr("<strong>", "<")
data["description"] = self._process_description(extr(
'class="section-body">', '</div>'))
data["views"] = pi(rh(extr('class="views">', '</span>')))
@@ -100,6 +94,8 @@ class FuraffinityExtractor(Extractor):
data["height"] = pi(extr("", "p"))
else:
# old site layout
+ data["title"] = text.unescape(extr("<h2>", "</h2>"))
+ data["artist"] = extr(">", "<")
data["fa_category"] = extr("<b>Category:</b>", "<").strip()
data["theme"] = extr("<b>Theme:</b>", "<").strip()
data["species"] = extr("<b>Species:</b>", "<").strip()
@@ -114,6 +110,9 @@ class FuraffinityExtractor(Extractor):
data["rating"] = extr('<img alt="', ' ')
data["description"] = self._process_description(extr(
"</table>", "</table>"))
+
+ data["artist_url"] = data["artist"].replace("_", "").lower()
+ data["user"] = self.user or data["artist_url"]
data["date"] = text.parse_timestamp(data["filename"].partition(".")[0])
return data
diff --git a/gallery_dl/extractor/gfycat.py b/gallery_dl/extractor/gfycat.py
index 4e62165..5732816 100644
--- a/gallery_dl/extractor/gfycat.py
+++ b/gallery_dl/extractor/gfycat.py
@@ -177,7 +177,10 @@ class GfycatAPI():
@cache(keyarg=1, maxage=3600)
def _authenticate_impl(self, category):
- url = "https://weblogin." + category + ".com/oauth/webtoken"
+ if category == "redgifs":
+ url = "https://api.redgifs.com/v1/oauth/webtoken"
+ else:
+ url = "https://weblogin." + category + ".com/oauth/webtoken"
data = {"access_key": self.ACCESS_KEY}
headers = {"Referer": self.extractor.root + "/",
"Origin" : self.extractor.root}
diff --git a/gallery_dl/extractor/hiperdex.py b/gallery_dl/extractor/hiperdex.py
index 93ef6f1..7ad06c9 100644
--- a/gallery_dl/extractor/hiperdex.py
+++ b/gallery_dl/extractor/hiperdex.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2020 Mike Fährmann
+# Copyright 2020-2021 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -13,14 +13,13 @@ from .. import text
from ..cache import memcache
import re
-
-BASE_PATTERN = r"(?:https?://)?(?:www\.)?hiperdex\.(?:com|net|info)"
+BASE_PATTERN = r"(?:https?://)?(?:www\.)?hiperdex\d?\.(?:com|net|info)"
class HiperdexBase():
"""Base class for hiperdex extractors"""
category = "hiperdex"
- root = "https://hiperdex.com"
+ root = "https://hiperdex2.com"
@memcache(keyarg=1)
def manga_data(self, manga, page=None):
@@ -66,8 +65,8 @@ class HiperdexChapterExtractor(HiperdexBase, ChapterExtractor):
"""Extractor for manga chapters from hiperdex.com"""
pattern = BASE_PATTERN + r"(/manga/([^/?#]+)/([^/?#]+))"
test = (
- ("https://hiperdex.com/manga/domestic-na-kanojo/154-5/", {
- "pattern": r"https://hiperdex.(com|net|info)/wp-content/uploads"
+ ("https://hiperdex2.com/manga/domestic-na-kanojo/154-5/", {
+ "pattern": r"https://hiperdex\d?.(com|net|info)/wp-content/uploads"
r"/WP-manga/data/manga_\w+/[0-9a-f]{32}/\d+\.webp",
"count": 9,
"keyword": {
@@ -107,7 +106,7 @@ class HiperdexMangaExtractor(HiperdexBase, MangaExtractor):
chapterclass = HiperdexChapterExtractor
pattern = BASE_PATTERN + r"(/manga/([^/?#]+))/?$"
test = (
- ("https://hiperdex.com/manga/youre-not-that-special/", {
+ ("https://hiperdex2.com/manga/youre-not-that-special/", {
"count": 51,
"pattern": HiperdexChapterExtractor.pattern,
"keyword": {
@@ -159,7 +158,7 @@ class HiperdexArtistExtractor(HiperdexBase, MangaExtractor):
reverse = False
pattern = BASE_PATTERN + r"(/manga-a(?:rtist|uthor)/([^/?#]+))"
test = (
- ("https://hiperdex.com/manga-artist/beck-ho-an/"),
+ ("https://hiperdex2.com/manga-artist/beck-ho-an/"),
("https://hiperdex.net/manga-artist/beck-ho-an/"),
("https://hiperdex.info/manga-artist/beck-ho-an/"),
("https://hiperdex.com/manga-author/viagra/", {
diff --git a/gallery_dl/extractor/hitomi.py b/gallery_dl/extractor/hitomi.py
index 994e1b7..497509d 100644
--- a/gallery_dl/extractor/hitomi.py
+++ b/gallery_dl/extractor/hitomi.py
@@ -35,12 +35,12 @@ class HitomiGalleryExtractor(GalleryExtractor):
}),
# Game CG with scenes (#321)
("https://hitomi.la/galleries/733697.html", {
- "url": "ec3fe9b708ee376ec579b90d053ad485c0777552",
+ "url": "8dfbcb1e51cec43a7112d58b7e92153155ada3b9",
"count": 210,
}),
# fallback for galleries only available through /reader/ URLs
("https://hitomi.la/galleries/1045954.html", {
- "url": "bf4ed4e726204da5bc37a236ca476a2a96081388",
+ "url": "a5af7fdca1f5c93c289af128914a8488ea345036",
"count": 1413,
}),
# gallery with "broken" redirect
@@ -140,8 +140,8 @@ class HitomiGalleryExtractor(GalleryExtractor):
# see https://ltn.hitomi.la/common.js
inum = int(ihash[-3:-1], 16)
- frontends = 2 if inum < 0x30 else 3
- inum = 1 if inum < 0x09 else inum
+ frontends = 2 if inum < 0x70 else 3
+ inum = 1 if inum < 0x49 else inum
url = "https://{}b.hitomi.la/images/{}/{}/{}.{}".format(
chr(97 + (inum % frontends)),
diff --git a/gallery_dl/extractor/idolcomplex.py b/gallery_dl/extractor/idolcomplex.py
index 3d4bcfb..9701f1e 100644
--- a/gallery_dl/extractor/idolcomplex.py
+++ b/gallery_dl/extractor/idolcomplex.py
@@ -132,11 +132,16 @@ class IdolcomplexTagExtractor(IdolcomplexExtractor):
archive_fmt = "t_{search_tags}_{id}"
pattern = r"(?:https?://)?idol\.sankakucomplex\.com/\?([^#]*)"
test = (
- ("https://idol.sankakucomplex.com/?tags=lyumos+wreath", {
- "count": ">= 6",
+ ("https://idol.sankakucomplex.com/?tags=lyumos", {
+ "count": 5,
+ "range": "18-22",
"pattern": r"https://is\.sankakucomplex\.com/data/[^/]{2}/[^/]{2}"
r"/[^/]{32}\.\w+\?e=\d+&m=[^&#]+",
}),
+ ("https://idol.sankakucomplex.com/?tags=order:favcount", {
+ "count": 5,
+ "range": "18-22",
+ }),
("https://idol.sankakucomplex.com"
"/?tags=lyumos+wreath&page=3&next=694215"),
)
@@ -184,21 +189,21 @@ class IdolcomplexTagExtractor(IdolcomplexExtractor):
while True:
page = self.request(self.root, params=params, retries=10).text
pos = page.find("<div id=more-popular-posts-link>") + 1
+ yield from text.extract_iter(page, '" id=p', '>', pos)
- ids = list(text.extract_iter(page, '" id=p', '>', pos))
- if not ids:
+ next_url = text.extract(page, 'next-page-url="', '"', pos)[0]
+ if not next_url:
return
- yield from ids
- next_qs = text.extract(page, 'next-page-url="/?', '"', pos)[0]
- next_id = text.parse_query(next_qs).get("next")
-
- # stop if the same "next" parameter occurs twice in a row (#265)
- if "next" in params and params["next"] == next_id:
- return
+ next_params = text.parse_query(text.unescape(
+ next_url).lstrip("?/"))
- params["next"] = next_id or (text.parse_int(ids[-1]) - 1)
- params["page"] = "2"
+ if "next" in next_params:
+ # stop if the same "next" value occurs twice in a row (#265)
+ if "next" in params and params["next"] == next_params["next"]:
+ return
+ next_params["page"] = "2"
+ params = next_params
class IdolcomplexPoolExtractor(IdolcomplexExtractor):
diff --git a/gallery_dl/extractor/instagram.py b/gallery_dl/extractor/instagram.py
index e3db789..b015556 100644
--- a/gallery_dl/extractor/instagram.py
+++ b/gallery_dl/extractor/instagram.py
@@ -120,10 +120,7 @@ class InstagramExtractor(Extractor):
if not self._check_cookies(self.cookienames):
username, password = self._get_auth_info()
if username:
- self.session.cookies.set(
- "ig_cb", "2", domain="www.instagram.com")
self._update_cookies(self._login_impl(username, password))
-
self.session.cookies.set(
"csrftoken", self.csrf_token, domain=self.cookiedomain)
@@ -131,33 +128,42 @@ class InstagramExtractor(Extractor):
def _login_impl(self, username, password):
self.log.info("Logging in as %s", username)
- page = self.request(self.root + "/accounts/login/").text
+ url = self.root + "/accounts/login/"
+ page = self.request(url).text
+
headers = {
- "Referer" : self.root + "/accounts/login/",
+ "X-Web-Device-Id" : text.extract(page, '"device_id":"', '"')[0],
"X-IG-App-ID" : "936619743392459",
+ "X-ASBD-ID" : "437806",
+ "X-IG-WWW-Claim" : "0",
"X-Requested-With": "XMLHttpRequest",
+ "Referer" : url,
}
+ url = self.root + "/data/shared_data/"
+ data = self.request(url, headers=headers).json()
- response = self.request(self.root + "/web/__mid/", headers=headers)
- headers["X-CSRFToken"] = response.cookies["csrftoken"]
- headers["X-Instagram-AJAX"] = text.extract(
- page, '"rollout_hash":"', '"')[0]
-
- url = self.root + "/accounts/login/ajax/"
+ headers["X-CSRFToken"] = data["config"]["csrf_token"]
+ headers["X-Instagram-AJAX"] = data["rollout_hash"]
+ headers["Origin"] = self.root
data = {
"username" : username,
"enc_password" : "#PWD_INSTAGRAM_BROWSER:0:{}:{}".format(
int(time.time()), password),
- "queryParams" : "{}",
- "optIntoOneTap": "false",
+ "queryParams" : "{}",
+ "optIntoOneTap" : "false",
+ "stopDeletionNonce" : "",
+ "trustedDeviceRecords": "{}",
}
+ url = self.root + "/accounts/login/ajax/"
response = self.request(url, method="POST", headers=headers, data=data)
if not response.json().get("authenticated"):
raise exception.AuthenticationError()
+
+ cget = self.session.cookies.get
return {
- key: self.session.cookies.get(key)
- for key in ("sessionid", "mid", "csrftoken")
+ name: cget(name)
+ for name in ("sessionid", "mid", "ig_did")
}
def _parse_post_graphql(self, post):
@@ -408,7 +414,7 @@ class InstagramPostsExtractor(InstagramExtractor):
url = "{}/{}/".format(self.root, self.item)
user = self._extract_profile_page(url)
- query_hash = "32b14723a678bd4628d70c1f877b94c9"
+ query_hash = "7ea6ae3cf6fb05e73fcbe1732b1d2a42"
variables = {"id": user["id"], "first": 50}
edge = self._get_edge_data(user, "edge_owner_to_timeline_media")
return self._pagination_graphql(query_hash, variables, edge)
@@ -427,7 +433,7 @@ class InstagramTaggedExtractor(InstagramExtractor):
url = "{}/{}/".format(self.root, self.item)
user = self._extract_profile_page(url)
- query_hash = "31fe64d9463cbbe58319dced405c6206"
+ query_hash = "be13233562af2d229b008d2976b998b5"
variables = {"id": user["id"], "first": 50}
edge = self._get_edge_data(user, None)
return self._pagination_graphql(query_hash, variables, edge)
@@ -613,13 +619,13 @@ class InstagramPostExtractor(InstagramExtractor):
)
def posts(self):
- query_hash = "d4e8ae69cb68f66329dcebe82fb69f6d"
+ query_hash = "971f52b26328008c768b7d8e4ac9ce3c"
variables = {
"shortcode" : self.item,
"child_comment_count" : 3,
"fetch_comment_count" : 40,
"parent_comment_count" : 24,
- "has_threaded_comments": True
+ "has_threaded_comments": True,
}
data = self._request_graphql(query_hash, variables)
media = data.get("shortcode_media")
diff --git a/gallery_dl/extractor/kemonoparty.py b/gallery_dl/extractor/kemonoparty.py
index 1b5e5e9..8c51d5d 100644
--- a/gallery_dl/extractor/kemonoparty.py
+++ b/gallery_dl/extractor/kemonoparty.py
@@ -20,8 +20,8 @@ class KemonopartyExtractor(Extractor):
category = "kemonoparty"
root = "https://kemono.party"
directory_fmt = ("{category}", "{service}", "{user}")
- filename_fmt = "{id}_{title}_{filename}.{extension}"
- archive_fmt = "{service}_{user}_{id}_{filename}.{extension}"
+ filename_fmt = "{id}_{title}_{num:>02}_{filename}.{extension}"
+ archive_fmt = "{service}_{user}_{id}_{num}"
def items(self):
find_inline = re.compile(r'src="(/inline/[^"]+)').findall
diff --git a/gallery_dl/extractor/mangadex.py b/gallery_dl/extractor/mangadex.py
index 6a88d58..0fe46b1 100644
--- a/gallery_dl/extractor/mangadex.py
+++ b/gallery_dl/extractor/mangadex.py
@@ -10,202 +10,270 @@
from .common import Extractor, Message
from .. import text, util, exception
-from ..cache import memcache
+from ..cache import cache, memcache
+from collections import defaultdict
+
+BASE_PATTERN = r"(?:https?://)?(?:www\.)?mangadex\.(?:org|cc)"
class MangadexExtractor(Extractor):
"""Base class for mangadex extractors"""
category = "mangadex"
+ directory_fmt = (
+ "{category}", "{manga}",
+ "{volume:?v/ />02}c{chapter:>03}{chapter_minor}{title:?: //}")
+ filename_fmt = (
+ "{manga}_c{chapter:>03}{chapter_minor}_{page:>03}.{extension}")
+ archive_fmt = "{chapter_id}_{page}"
root = "https://mangadex.org"
- api_root = "https://api.mangadex.org"
-
- # mangadex-to-iso639-1 codes
- iso639_map = {
- "br": "pt",
- "ct": "ca",
- "gb": "en",
- "vn": "vi",
- }
+ _cache = {}
def __init__(self, match):
Extractor.__init__(self, match)
+ self.api = MangadexAPI(self)
+ self.uuid = match.group(1)
+
+ def items(self):
+ for chapter in self.chapters():
+ uuid = chapter["data"]["id"]
+ data = self._transform(chapter)
+ data["_extractor"] = MangadexChapterExtractor
+ self._cache[uuid] = (chapter, data)
+ yield Message.Queue, self.root + "/chapter/" + uuid, data
- server = self.config("api-server")
- if server is not None:
- self.api_root = server.rstrip("/")
+ def _transform(self, chapter):
+ relationships = defaultdict(list)
+ for item in chapter["relationships"]:
+ relationships[item["type"]].append(item["id"])
+ manga = self.api.manga(relationships["manga"][0])
+ for item in manga["relationships"]:
+ relationships[item["type"]].append(item["id"])
- def chapter_data(self, chapter_id):
- """Request API results for 'chapter_id'"""
- url = "{}/v2/chapter/{}".format(self.api_root, chapter_id)
- return self.request(url).json()["data"]
+ cattributes = chapter["data"]["attributes"]
+ mattributes = manga["data"]["attributes"]
+ lang = cattributes["translatedLanguage"].partition("-")[0]
- @memcache(keyarg=1)
- def manga_data(self, manga_id):
- """Request API results for 'manga_id'"""
- url = "{}/v2/manga/{}".format(self.api_root, manga_id)
- return self.request(url).json()["data"]
-
- def manga_chapters(self, manga_id):
- """Request chapter list for 'manga_id'"""
- url = "{}/v2/manga/{}/chapters".format(self.api_root, manga_id)
- data = self.request(url).json()["data"]
-
- groups = {
- group["id"]: group["name"]
- for group in data["groups"]
+ if cattributes["chapter"]:
+ chnum, sep, minor = cattributes["chapter"].partition(".")
+ else:
+ chnum, sep, minor = 0, "", ""
+
+ data = {
+ "manga" : mattributes["title"]["en"],
+ "manga_id": manga["data"]["id"],
+ "title" : cattributes["title"],
+ "volume" : text.parse_int(cattributes["volume"]),
+ "chapter" : text.parse_int(chnum),
+ "chapter_minor": sep + minor,
+ "chapter_id": chapter["data"]["id"],
+ "date" : text.parse_datetime(cattributes["publishAt"]),
+ "lang" : lang,
+ "language": util.code_to_language(lang),
+ "count" : len(cattributes["data"]),
}
- for chapter in data["chapters"]:
- cgroups = chapter["groups"]
- for idx, group_id in enumerate(cgroups):
- cgroups[idx] = groups[group_id]
- yield chapter
+ if self.config("metadata"):
+ data["artist"] = [
+ self.api.author(uuid)["data"]["attributes"]["name"]
+ for uuid in relationships["artist"]]
+ data["author"] = [
+ self.api.author(uuid)["data"]["attributes"]["name"]
+ for uuid in relationships["author"]]
+ data["group"] = [
+ self.api.group(uuid)["data"]["attributes"]["name"]
+ for uuid in relationships["scanlation_group"]]
+
+ return data
class MangadexChapterExtractor(MangadexExtractor):
"""Extractor for manga-chapters from mangadex.org"""
subcategory = "chapter"
- directory_fmt = (
- "{category}", "{manga}",
- "{volume:?v/ />02}c{chapter:>03}{chapter_minor}{title:?: //}")
- filename_fmt = (
- "{manga}_c{chapter:>03}{chapter_minor}_{page:>03}.{extension}")
- archive_fmt = "{chapter_id}_{page}"
- pattern = r"(?:https?://)?(?:www\.)?mangadex\.(?:org|cc)/chapter/(\d+)"
+ pattern = BASE_PATTERN + r"/chapter/([0-9a-f-]+)"
test = (
- ("https://mangadex.org/chapter/122094", {
- "keyword": "89d1b24b4baa1fb737d32711d9f2ade6ea426987",
+ ("https://mangadex.org/chapter/f946ac53-0b71-4b5d-aeb2-7931b13c4aaa", {
+ "keyword": "f6c2b908df06eb834d56193dfe1fa1f7c2c4dccd",
# "content": "50383a4c15124682057b197d40261641a98db514",
}),
# oneshot
- ("https://mangadex.cc/chapter/138086", {
+ ("https://mangadex.org/chapter/61a88817-9c29-4281-bdf1-77b3c1be9831", {
+ "options": (("metadata", True),),
"count": 64,
- "keyword": "c53a0e4c12250578a4e630281085875e59532c03",
+ "keyword": "6abcbe1e24eeb1049dc931958853cd767ee483fb",
}),
# MANGA Plus (#1154)
- ("https://mangadex.org/chapter/1122815", {
- "exception": exception.HttpError,
+ ("https://mangadex.org/chapter/8d50ed68-8298-4ac9-b63d-cb2aea143dd0", {
+ "exception": exception.StopExtraction,
}),
)
- def __init__(self, match):
- MangadexExtractor.__init__(self, match)
- self.chapter_id = match.group(1)
-
def items(self):
- cdata = self.chapter_data(self.chapter_id)
- if "server" not in cdata:
- if cdata["status"] == "external":
- raise exception.StopExtraction(
- "Chapter is not available on MangaDex and can be read on "
- "the official publisher's website at %s.", cdata["pages"])
- raise exception.StopExtraction("No download server available.")
- mdata = self.manga_data(cdata["mangaId"])
-
- chapter, sep, minor = cdata["chapter"].partition(".")
- lang = self.iso639_map.get(cdata["language"], cdata["language"])
-
- base = cdata["server"] + cdata["hash"] + "/"
- if base[0] == "/":
- base = text.urljoin(self.root, base)
-
- if "serverFallback" in cdata:
- fallback = cdata["serverFallback"] + cdata["hash"] + "/"
- else:
- fallback = None
-
- data = {
- "manga" : text.unescape(mdata["title"]),
- "manga_id": mdata["id"],
- "artist" : mdata["artist"],
- "author" : mdata["author"],
- "title" : text.unescape(cdata["title"]),
- "volume" : text.parse_int(cdata["volume"]),
- "chapter" : text.parse_int(chapter),
- "chapter_minor": sep + minor,
- "chapter_id": cdata["id"],
- "group" : [group["name"] for group in cdata["groups"]],
- "date" : text.parse_timestamp(cdata["timestamp"]),
- "lang" : lang,
- "language": util.code_to_language(lang),
- "count" : len(cdata["pages"]),
- }
-
+ try:
+ chapter, data = self._cache.pop(self.uuid)
+ except KeyError:
+ chapter = self.api.chapter(self.uuid)
+ data = self._transform(chapter)
yield Message.Directory, data
- for data["page"], page in enumerate(cdata["pages"], 1):
- if fallback:
- data["_fallback"] = (fallback + page,)
- yield Message.Url, base + page, text.nameext_from_url(page, data)
+
+ cattributes = chapter["data"]["attributes"]
+ base = "{}/data/{}/".format(
+ self.api.athome_server(self.uuid)["baseUrl"], cattributes["hash"])
+ for data["page"], page in enumerate(cattributes["data"], 1):
+ text.nameext_from_url(page, data)
+ yield Message.Url, base + page, data
class MangadexMangaExtractor(MangadexExtractor):
"""Extractor for manga from mangadex.org"""
subcategory = "manga"
- categorytransfer = True
- pattern = (r"(?:https?://)?(?:www\.)?mangadex\.(?:org|cc)"
- r"/(?:title|manga)/(\d+)")
+ pattern = BASE_PATTERN + r"/(?:title|manga)/(?!feed$)([0-9a-f-]+)"
test = (
- ("https://mangadex.org/manga/2946/souten-no-koumori", {
- "pattern": r"https://mangadex.org/chapter/\d+",
+ ("https://mangadex.org/title/f90c4398-8aad-4f51-8a1f-024ca09fdcbc", {
"keyword": {
"manga" : "Souten no Koumori",
- "manga_id": 2946,
+ "manga_id": "f90c4398-8aad-4f51-8a1f-024ca09fdcbc",
"title" : "re:One[Ss]hot",
"volume" : 0,
"chapter" : 0,
"chapter_minor": "",
- "chapter_id": int,
- "group" : list,
+ "chapter_id": str,
"date" : "type:datetime",
"lang" : str,
"language": str,
},
}),
- ("https://mangadex.cc/manga/13318/dagashi-kashi/chapters/2/", {
+ ("https://mangadex.cc/manga/d0c88e3b-ea64-4e07-9841-c1d2ac982f4a/", {
+ "options": (("lang", "en"),),
"count": ">= 100",
}),
- ("https://mangadex.org/title/13004/yorumori-no-kuni-no-sora-ni", {
- "count": 0,
+ ("https://mangadex.org/title/7c1e2742-a086-4fd3-a3be-701fd6cf0be9", {
+ "count": 1,
}),
)
- def __init__(self, match):
- MangadexExtractor.__init__(self, match)
- self.manga_id = match.group(1)
+ def chapters(self):
+ return self.api.manga_feed(self.uuid)
- def items(self):
- yield Message.Version, 1
- for data in self.chapters():
- url = "{}/chapter/{}".format(self.root, data["chapter_id"])
- yield Message.Queue, url, data
+
+class MangadexFeedExtractor(MangadexExtractor):
+ """Extractor for chapters from your Followed Feed"""
+ subcategory = "feed"
+ pattern = BASE_PATTERN + r"/title/feed$()"
+ test = ("https://mangadex.org/title/feed",)
def chapters(self):
- """Return a sorted list of chapter-metadata dicts"""
- manga = self.manga_data(int(self.manga_id))
- results = []
-
- for cdata in self.manga_chapters(self.manga_id):
- chapter, sep, minor = cdata["chapter"].partition(".")
- lang = self.iso639_map.get(cdata["language"], cdata["language"])
- results.append({
- "manga" : text.unescape(manga["title"]),
- "manga_id": text.parse_int(self.manga_id),
- "artist" : manga["artist"],
- "author" : manga["author"],
- "title" : text.unescape(cdata["title"]),
- "volume" : text.parse_int(cdata["volume"]),
- "chapter" : text.parse_int(chapter),
- "chapter_minor": sep + minor,
- "chapter_id": text.parse_int(cdata["id"]),
- "group" : cdata["groups"],
- "date" : text.parse_timestamp(cdata["timestamp"]),
- "lang" : lang,
- "language": util.code_to_language(lang),
- "_extractor": MangadexChapterExtractor,
- })
-
- results.sort(
- key=lambda x: (x["chapter"], x["chapter_minor"]),
- reverse=self.config("chapter-reverse", False),
- )
- return results
+ return self.api.user_follows_manga_feed()
+
+
+class MangadexAPI():
+ """Interface for the MangaDex API v5"""
+
+ def __init__(self, extr):
+ self.extractor = extr
+ self.headers = {}
+
+ self.username, self.password = self.extractor._get_auth_info()
+ if not self.username:
+ self.authenticate = util.noop
+
+ server = extr.config("api-server")
+ self.root = ("https://api.mangadex.org" if server is None
+ else text.ensure_http_scheme(server).rstrip("/"))
+
+ def athome_server(self, uuid):
+ return self._call("/at-home/server/" + uuid)
+
+ @memcache(keyarg=1)
+ def author(self, uuid):
+ return self._call("/author/" + uuid)
+
+ def chapter(self, uuid):
+ return self._call("/chapter/" + uuid)
+
+ @memcache(keyarg=1)
+ def group(self, uuid):
+ return self._call("/group/" + uuid)
+
+ @memcache(keyarg=1)
+ def manga(self, uuid):
+ return self._call("/manga/" + uuid)
+
+ def manga_feed(self, uuid):
+ config = self.extractor.config
+ order = "desc" if config("chapter-reverse") else "asc"
+ params = {
+ "order[volume]" : order,
+ "order[chapter]" : order,
+ "translatedLanguage[]": config("lang"),
+ }
+ return self._pagination("/manga/" + uuid + "/feed", params)
+
+ def user_follows_manga_feed(self):
+ params = {
+ "order[publishAt]" : "desc",
+ "translatedLanguage[]": self.extractor.config("lang"),
+ }
+ return self._pagination("/user/follows/manga/feed", params)
+
+ def authenticate(self):
+ self.headers["Authorization"] = \
+ self._authenticate_impl(self.username, self.password)
+
+ @cache(maxage=900, keyarg=1)
+ def _authenticate_impl(self, username, password):
+ refresh_token = _refresh_token_cache(username)
+ if refresh_token:
+ self.extractor.log.info("Refreshing access token")
+ url = self.root + "/auth/refresh"
+ data = {"token": refresh_token}
+ else:
+ self.extractor.log.info("Logging in as %s", username)
+ url = self.root + "/auth/login"
+ data = {"username": username, "password": password}
+
+ data = self.extractor.request(
+ url, method="POST", json=data, fatal=None).json()
+ if data.get("result") != "ok":
+ raise exception.AuthenticationError()
+
+ if refresh_token != data["token"]["refresh"]:
+ _refresh_token_cache.update(username, data["token"]["refresh"])
+ return "Bearer " + data["token"]["session"]
+
+ def _call(self, endpoint, params=None):
+ url = self.root + endpoint
+
+ while True:
+ self.authenticate()
+ response = self.extractor.request(
+ url, params=params, headers=self.headers, fatal=None)
+
+ if response.status_code < 400:
+ return response.json()
+ if response.status_code == 429:
+ until = response.headers.get("X-RateLimit-Retry-After")
+ self.extractor.wait(until=until)
+ continue
+
+ msg = ", ".join('{title}: {detail}'.format_map(error)
+ for error in response.json()["errors"])
+ raise exception.StopExtraction(
+ "%s %s (%s)", response.status_code, response.reason, msg)
+
+ def _pagination(self, endpoint, params=None):
+ if params is None:
+ params = {}
+ params["offset"] = 0
+
+ while True:
+ data = self._call(endpoint, params)
+ yield from data["results"]
+
+ params["offset"] = data["offset"] + data["limit"]
+ if params["offset"] >= data["total"]:
+ return
+
+
+@cache(maxage=28*24*3600, keyarg=0)
+def _refresh_token_cache(username):
+ return None
diff --git a/gallery_dl/extractor/mangafox.py b/gallery_dl/extractor/mangafox.py
index a123783..a9d504e 100644
--- a/gallery_dl/extractor/mangafox.py
+++ b/gallery_dl/extractor/mangafox.py
@@ -1,12 +1,12 @@
# -*- coding: utf-8 -*-
-# Copyright 2017-2019 Mike Fährmann
+# Copyright 2017-2021 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
-"""Extract manga-chapters and entire manga from https://fanfox.net/"""
+"""Extractors for from https://fanfox.net/"""
from .common import ChapterExtractor
from .. import text
@@ -15,14 +15,15 @@ from .. import text
class MangafoxChapterExtractor(ChapterExtractor):
"""Extractor for manga-chapters from fanfox.net"""
category = "mangafox"
- pattern = (r"(?:https?://)?(?:www\.|m\.)?(?:mangafox\.me|fanfox\.net)"
- r"(/manga/[^/]+/((?:v(\d+)/)?c(\d+)([^/?#]*)))")
+ pattern = (r"(?:https?://)?(?:www\.|m\.)?(?:fanfox\.net|mangafox\.me)"
+ r"(/manga/[^/]+/((?:v([^/?#]+)/)?c(\d+)([^/?#]*)))")
test = (
("http://fanfox.net/manga/kidou_keisatsu_patlabor/v05/c006.2/1.html", {
"keyword": "5661dab258d42d09d98f194f7172fb9851a49766",
"content": "5c50c252dcf12ffecf68801f4db8a2167265f66c",
}),
("http://mangafox.me/manga/kidou_keisatsu_patlabor/v05/c006.2/"),
+ ("http://fanfox.net/manga/black_clover/vTBD/c295/1.html"),
)
root = "https://m.fanfox.net"
diff --git a/gallery_dl/extractor/oauth.py b/gallery_dl/extractor/oauth.py
index 483c657..c798ad0 100644
--- a/gallery_dl/extractor/oauth.py
+++ b/gallery_dl/extractor/oauth.py
@@ -73,6 +73,9 @@ class OAuthBase(Extractor):
print(url, end="\n\n", flush=True)
return (recv or self.recv)()
+ def error(self, msg):
+ return self.send("Remote server reported an error:\n\n" + str(msg))
+
def _oauth1_authorization_flow(
self, request_token_url, authorize_url, access_token_url):
"""Perform the OAuth 1.0a authorization flow"""
@@ -135,8 +138,7 @@ class OAuthBase(Extractor):
))
return
if "error" in params:
- self.send(params["error"])
- return
+ return self.error(params)
# exchange the authorization code for a token
data = {
@@ -156,8 +158,7 @@ class OAuthBase(Extractor):
# check token response
if "error" in data:
- self.send(data["error"])
- return
+ return self.error(data)
token = data[key]
token_name = key.replace("_", "-")
diff --git a/gallery_dl/extractor/philomena.py b/gallery_dl/extractor/philomena.py
index 3cfcb0e..64fc938 100644
--- a/gallery_dl/extractor/philomena.py
+++ b/gallery_dl/extractor/philomena.py
@@ -107,11 +107,11 @@ class PhilomenaPostExtractor(PhilomenaExtractor):
"source_url": "https://www.deviantart.com/speccysy/art"
"/Afternoon-Flight-215193985",
"spoilered": False,
- "tag_count": 37,
+ "tag_count": 38,
"tag_ids": list,
"tags": list,
"thumbnails_generated": True,
- "updated_at": "2021-04-07T06:01:30Z",
+ "updated_at": "2021-05-28T17:39:38Z",
"uploader": "Clover the Clever",
"uploader_id": 211188,
"upvotes": int,
@@ -149,6 +149,10 @@ class PhilomenaSearchExtractor(PhilomenaExtractor):
"range": "40-60",
"count": 21,
}),
+ (("https://derpibooru.org/tags/"
+ "artist-colon--dash-_-fwslash--fwslash-%255Bkorroki%255D_aternak"), {
+ "count": ">= 2",
+ }),
("https://ponybooru.org/search?q=cute", {
"range": "40-60",
"count": 21,
@@ -159,7 +163,18 @@ class PhilomenaSearchExtractor(PhilomenaExtractor):
PhilomenaExtractor.__init__(self, match)
groups = match.groups()
if groups[-1]:
- self.params = {"q": groups[-1]}
+ q = groups[-1]
+ for old, new in (
+ ("-colon-" , ":"),
+ ("-dash-" , "-"),
+ ("-dot-" , "."),
+ ("-plus-" , "+"),
+ ("-fwslash-", "/"),
+ ("-bwslash-", "\\"),
+ ):
+ if old in q:
+ q = q.replace(old, new)
+ self.params = {"q": text.unquote(text.unquote(q))}
else:
self.params = text.parse_query(groups[-2])
diff --git a/gallery_dl/extractor/pixiv.py b/gallery_dl/extractor/pixiv.py
index 8076fff..ff07a57 100644
--- a/gallery_dl/extractor/pixiv.py
+++ b/gallery_dl/extractor/pixiv.py
@@ -321,34 +321,30 @@ class PixivFavoriteExtractor(PixivExtractor):
def __init__(self, match):
uid, kind, self.tag, query = match.groups()
+ query = text.parse_query(query)
- if query:
- self.query = text.parse_query(query)
- uid = self.query.get("id")
+ if not uid:
+ uid = query.get("id")
if not uid:
self.subcategory = "bookmark"
- elif self.query.get("type") == "user":
- self.subcategory = "following"
- self.items = self._items_following
- else:
- self.query = {}
- if kind == "following":
- self.subcategory = "following"
- self.items = self._items_following
+
+ if kind == "following" or query.get("type") == "user":
+ self.subcategory = "following"
+ self.items = self._items_following
PixivExtractor.__init__(self, match)
+ self.query = query
self.user_id = uid
def works(self):
tag = None
- restrict = "public"
-
if "tag" in self.query:
tag = text.unquote(self.query["tag"])
elif self.tag:
tag = text.unquote(self.tag)
- if "rest" in self.query and self.query["rest"] == "hide":
+ restrict = "public"
+ if self.query.get("rest") == "hide":
restrict = "private"
return self.api.user_bookmarks_illust(self.user_id, tag, restrict)
@@ -364,9 +360,11 @@ class PixivFavoriteExtractor(PixivExtractor):
return {"user_bookmark": user}
def _items_following(self):
- yield Message.Version, 1
+ restrict = "public"
+ if self.query.get("rest") == "hide":
+ restrict = "private"
- for preview in self.api.user_following(self.user_id):
+ for preview in self.api.user_following(self.user_id, restrict):
user = preview["user"]
user["_extractor"] = PixivUserExtractor
url = "https://www.pixiv.net/users/{}".format(user["id"])
@@ -622,8 +620,8 @@ class PixivAppAPI():
params = {"user_id": user_id}
return self._call("v1/user/detail", params)["user"]
- def user_following(self, user_id):
- params = {"user_id": user_id}
+ def user_following(self, user_id, restrict="public"):
+ params = {"user_id": user_id, "restrict": restrict}
return self._pagination("v1/user/following", params, "user_previews")
def user_illusts(self, user_id):
diff --git a/gallery_dl/extractor/redgifs.py b/gallery_dl/extractor/redgifs.py
index 8611dcb..576564c 100644
--- a/gallery_dl/extractor/redgifs.py
+++ b/gallery_dl/extractor/redgifs.py
@@ -71,6 +71,6 @@ class RedgifsImageExtractor(RedgifsExtractor):
class RedgifsAPI(GfycatAPI):
- API_ROOT = "https://napi.redgifs.com"
+ API_ROOT = "https://api.redgifs.com"
ACCESS_KEY = ("dBLwVuGn9eq4dtXLs8WSfpjcYFY7bPQe"
"AqGPSFgqeW5B9uzj2cMVhF63pTFF4Rg9")
diff --git a/gallery_dl/extractor/sankaku.py b/gallery_dl/extractor/sankaku.py
index 9808cb8..2ea6f57 100644
--- a/gallery_dl/extractor/sankaku.py
+++ b/gallery_dl/extractor/sankaku.py
@@ -54,14 +54,16 @@ class SankakuExtractor(BooruExtractor):
def _prepare(post):
post["created_at"] = post["created_at"]["s"]
post["date"] = text.parse_timestamp(post["created_at"])
- post["tags"] = [tag["name"] for tag in post["tags"]]
+ post["tags"] = [tag["name"] for tag in post["tags"] if tag["name"]]
post["tag_string"] = " ".join(post["tags"])
def _extended_tags(self, post):
tags = collections.defaultdict(list)
types = self.TAG_TYPES
for tag in post["tags"]:
- tags[types[tag["type"]]].append(tag["name"])
+ name = tag["name"]
+ if name:
+ tags[types[tag["type"]]].append(name)
for key, value in tags.items():
post["tags_" + key] = value
post["tag_string_" + key] = " ".join(value)
@@ -160,6 +162,15 @@ class SankakuPostExtractor(SankakuExtractor):
"pattern": r"https://s\.sankakucomplex\.com"
r"/data/13/3c/133cda3bfde249c504284493903fb985\.jpg",
}),
+ # empty tags (#1617)
+ ("https://sankaku.app/post/show/20758561", {
+ "options": (("tags", True),),
+ "count": 1,
+ "keyword": {
+ "tags": list,
+ "tags_general": ["key(mangaka)", "key(mangaka)"],
+ },
+ }),
("https://beta.sankakucomplex.com/post/show/360451"),
("https://chan.sankakucomplex.com/post/show/360451"),
)
diff --git a/gallery_dl/extractor/subscribestar.py b/gallery_dl/extractor/subscribestar.py
index 753f266..83836e5 100644
--- a/gallery_dl/extractor/subscribestar.py
+++ b/gallery_dl/extractor/subscribestar.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2020 Mike Fährmann
+# Copyright 2020-2021 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -13,7 +13,6 @@ from .. import text, exception
from ..cache import cache
import json
-
BASE_PATTERN = r"(?:https?://)?(?:www\.)?subscribestar\.(com|adult)"
@@ -45,8 +44,8 @@ class SubscribestarExtractor(Extractor):
yield Message.Directory, data
for item in media:
item.update(data)
- url = item["url"]
- yield Message.Url, url, text.nameext_from_url(url, item)
+ text.nameext_from_url(item.get("name") or item["url"], item)
+ yield Message.Url, item["url"], item
def posts(self):
"""Yield HTML content of all relevant posts"""
@@ -105,6 +104,8 @@ class SubscribestarExtractor(Extractor):
media.append({
"id" : text.parse_int(text.extract(
att, 'data-upload-id="', '"')[0]),
+ "name": text.unescape(text.extract(
+ att, 'doc_preview-title">', '<')[0] or ""),
"url" : text.extract(att, 'href="', '"')[0],
"type": "attachment",
})
diff --git a/gallery_dl/extractor/twitter.py b/gallery_dl/extractor/twitter.py
index afeebb0..5550f96 100644
--- a/gallery_dl/extractor/twitter.py
+++ b/gallery_dl/extractor/twitter.py
@@ -33,10 +33,10 @@ class TwitterExtractor(Extractor):
Extractor.__init__(self, match)
self.user = match.group(1)
self.textonly = self.config("text-tweets", False)
- self.retweets = self.config("retweets", True)
+ self.retweets = self.config("retweets", False)
self.replies = self.config("replies", True)
self.twitpic = self.config("twitpic", False)
- self.quoted = self.config("quoted", True)
+ self.quoted = self.config("quoted", False)
self.videos = self.config("videos", True)
self.cards = self.config("cards", False)
self._user_cache = {}
@@ -44,7 +44,6 @@ class TwitterExtractor(Extractor):
def items(self):
self.login()
metadata = self.metadata()
- yield Message.Version, 1
for tweet in self.tweets():
@@ -406,7 +405,6 @@ class TwitterFollowingExtractor(TwitterExtractor):
class TwitterSearchExtractor(TwitterExtractor):
"""Extractor for all images from a search timeline"""
subcategory = "search"
- directory_fmt = ("{category}", "Search", "{search}")
pattern = BASE_PATTERN + r"/search/?\?(?:[^&#]+&)*q=([^&#]+)"
test = ("https://twitter.com/search?q=nature", {
"range": "1-40",
@@ -456,14 +454,14 @@ class TwitterTweetExtractor(TwitterExtractor):
"options": (("replies", False),),
"count": 0,
}),
- # quoted tweet (#526, #854)
+ # "quoted" option (#854)
("https://twitter.com/StobiesGalaxy/status/1270755918330896395", {
+ "options": (("quoted", True),),
"pattern": r"https://pbs\.twimg\.com/media/Ea[KG].+=jpg",
"count": 8,
}),
- # "quoted" option (#854)
+ # quoted tweet (#526, #854)
("https://twitter.com/StobiesGalaxy/status/1270755918330896395", {
- "options": (("quoted", False),),
"pattern": r"https://pbs\.twimg\.com/media/EaK.+=jpg",
"count": 4,
}),
@@ -499,6 +497,7 @@ class TwitterTweetExtractor(TwitterExtractor):
}),
# retweet with missing media entities (#1555)
("https://twitter.com/morino_ya/status/1392763691599237121", {
+ "options": (("retweets", True),),
"count": 4,
}),
)
diff --git a/gallery_dl/extractor/unsplash.py b/gallery_dl/extractor/unsplash.py
index e89a5b7..6cfc69e 100644
--- a/gallery_dl/extractor/unsplash.py
+++ b/gallery_dl/extractor/unsplash.py
@@ -172,13 +172,16 @@ class UnsplashFavoriteExtractor(UnsplashExtractor):
class UnsplashCollectionExtractor(UnsplashExtractor):
"""Extractor for an unsplash collection"""
subcategory = "collection"
- pattern = BASE_PATTERN + r"/collections/(\d+)"
- test = ("https://unsplash.com/collections/3178572/winter", {
- "pattern": r"https://images\.unsplash\.com/(photo-\d+-\w+"
- r"|reserve/[^/?#]+)\?ixid=\w+&ixlib=rb-1\.2\.1$",
- "range": "1-30",
- "count": 30,
- })
+ pattern = BASE_PATTERN + r"/collections/([^/?#]+)"
+ test = (
+ ("https://unsplash.com/collections/3178572/winter", {
+ "pattern": r"https://images\.unsplash\.com/(photo-\d+-\w+"
+ r"|reserve/[^/?#]+)\?ixid=\w+&ixlib=rb-1\.2\.1$",
+ "range": "1-30",
+ "count": 30,
+ }),
+ ("https://unsplash.com/collections/_8qJQ2bCMWE/2021.05"),
+ )
def photos(self):
url = "{}/napi/collections/{}/photos".format(self.root, self.item)
diff --git a/gallery_dl/job.py b/gallery_dl/job.py
index 164c2a9..dddc03a 100644
--- a/gallery_dl/job.py
+++ b/gallery_dl/job.py
@@ -12,6 +12,7 @@ import time
import errno
import logging
import operator
+import functools
import collections
from . import extractor, downloader, postprocessor
from . import config, text, util, output, exception
@@ -375,17 +376,17 @@ class DownloadJob(Job):
def initialize(self, kwdict=None):
"""Delayed initialization of PathFormat, etc."""
- config = self.extractor.config
+ cfg = self.extractor.config
pathfmt = self.pathfmt = util.PathFormat(self.extractor)
if kwdict:
pathfmt.set_directory(kwdict)
- self.sleep = config("sleep")
- if not config("download", True):
+ self.sleep = cfg("sleep")
+ if not cfg("download", True):
# monkey-patch method to do nothing and always return True
self.download = pathfmt.fix_extension
- archive = config("archive")
+ archive = cfg("archive")
if archive:
path = util.expand_path(archive)
try:
@@ -399,7 +400,7 @@ class DownloadJob(Job):
else:
self.extractor.log.debug("Using download archive '%s'", path)
- skip = config("skip", True)
+ skip = cfg("skip", True)
if skip:
self._skipexc = None
if skip == "enumerate":
@@ -427,7 +428,10 @@ class DownloadJob(Job):
category = self.extractor.category
basecategory = self.extractor.basecategory
+ pp_conf = config.get((), "postprocessor") or {}
for pp_dict in postprocessors:
+ if isinstance(pp_dict, str):
+ pp_dict = pp_conf.get(pp_dict) or {"name": pp_dict}
whitelist = pp_dict.get("whitelist")
if whitelist and category not in whitelist and \
@@ -459,6 +463,23 @@ class DownloadJob(Job):
for callback in self.hooks["init"]:
callback(pathfmt)
+ def register_hooks(self, hooks, options=None):
+ expr = options.get("filter") if options else None
+
+ if expr:
+ condition = util.compile_expression(expr)
+ for hook, callback in hooks.items():
+ self.hooks[hook].append(functools.partial(
+ self._call_hook, callback, condition))
+ else:
+ for hook, callback in hooks.items():
+ self.hooks[hook].append(callback)
+
+ @staticmethod
+ def _call_hook(callback, condition, pathfmt):
+ if condition(pathfmt.kwdict):
+ callback(pathfmt)
+
def _build_blacklist(self):
wlist = self.extractor.config("whitelist")
if wlist is not None:
diff --git a/gallery_dl/option.py b/gallery_dl/option.py
index 6018542..a046a27 100644
--- a/gallery_dl/option.py
+++ b/gallery_dl/option.py
@@ -114,8 +114,9 @@ def build_parser():
)
general.add_argument(
"--clear-cache",
- dest="clear_cache", metavar="MODULE", nargs="?", const="all",
- help="Delete all cached login sessions, cookies, etc.",
+ dest="clear_cache", metavar="MODULE",
+ help="Delete cached login sessions, cookies, etc. for MODULE "
+ "(ALL to delete everything)",
)
output = parser.add_argument_group("Output Options")
@@ -204,13 +205,6 @@ def build_parser():
"or -1 for infinite retries (default: 4)"),
)
downloader.add_argument(
- "-A", "--abort",
- dest="abort", metavar="N", type=int,
- help=("Abort extractor run after N consecutive file downloads have "
- "been skipped, e.g. if files with the same filename already "
- "exist"),
- )
- downloader.add_argument(
"--http-timeout",
dest="timeout", metavar="SECONDS", type=float, action=ConfigAction,
help="Timeout for HTTP connections (default: 30.0)",
@@ -301,7 +295,19 @@ def build_parser():
"--download-archive",
dest="archive", metavar="FILE", action=ConfigAction,
help=("Record all downloaded files in the archive file and "
- "skip downloading any file already in it."),
+ "skip downloading any file already in it"),
+ )
+ selection.add_argument(
+ "-A", "--abort",
+ dest="abort", metavar="N", type=int,
+ help=("Stop current extractor run "
+ "after N consecutive file downloads were skipped"),
+ )
+ selection.add_argument(
+ "-T", "--terminate",
+ dest="terminate", metavar="N", type=int,
+ help=("Stop current and parent extractor runs "
+ "after N consecutive file downloads were skipped"),
)
selection.add_argument(
"--range",
@@ -335,7 +341,7 @@ def build_parser():
postprocessor.add_argument(
"--zip",
dest="postprocessors",
- action="append_const", const={"name": "zip"},
+ action="append_const", const="zip",
help="Store downloaded files in a ZIP archive",
)
postprocessor.add_argument(
@@ -362,7 +368,7 @@ def build_parser():
postprocessor.add_argument(
"--write-metadata",
dest="postprocessors",
- action="append_const", const={"name": "metadata"},
+ action="append_const", const="metadata",
help="Write metadata to separate JSON files",
)
postprocessor.add_argument(
@@ -374,7 +380,7 @@ def build_parser():
postprocessor.add_argument(
"--mtime-from-date",
dest="postprocessors",
- action="append_const", const={"name": "mtime"},
+ action="append_const", const="mtime",
help="Set file modification times according to 'date' metadata",
)
postprocessor.add_argument(
@@ -392,6 +398,11 @@ def build_parser():
help=("Execute CMD after all files were downloaded successfully. "
"Example: --exec-after 'cd {} && convert * ../doc.pdf'"),
)
+ postprocessor.add_argument(
+ "-P", "--postprocessor",
+ dest="postprocessors", metavar="NAME", action="append",
+ help="Activate the specified post processor",
+ )
parser.add_argument(
"urls",
diff --git a/gallery_dl/postprocessor/classify.py b/gallery_dl/postprocessor/classify.py
index eda092d..34af1d9 100644
--- a/gallery_dl/postprocessor/classify.py
+++ b/gallery_dl/postprocessor/classify.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2018-2020 Mike Fährmann
+# Copyright 2018-2021 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -31,9 +31,8 @@ class ClassifyPP(PostProcessor):
for directory, exts in mapping.items()
for ext in exts
}
-
- job.hooks["prepare"].append(self.prepare)
- job.hooks["file"].append(self.move)
+ job.register_hooks(
+ {"prepare": self.prepare, "file": self.move}, options)
def prepare(self, pathfmt):
ext = pathfmt.extension
diff --git a/gallery_dl/postprocessor/compare.py b/gallery_dl/postprocessor/compare.py
index ca416c9..1bca593 100644
--- a/gallery_dl/postprocessor/compare.py
+++ b/gallery_dl/postprocessor/compare.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2020 Mike Fährmann
+# Copyright 2020-2021 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -18,11 +18,12 @@ class ComparePP(PostProcessor):
PostProcessor.__init__(self, job)
if options.get("shallow"):
self._compare = self._compare_size
- job.hooks["file"].append(
+
+ job.register_hooks({"file": (
self.enumerate
if options.get("action") == "enumerate" else
self.compare
- )
+ )}, options)
def compare(self, pathfmt):
try:
diff --git a/gallery_dl/postprocessor/exec.py b/gallery_dl/postprocessor/exec.py
index 2514219..8fed723 100644
--- a/gallery_dl/postprocessor/exec.py
+++ b/gallery_dl/postprocessor/exec.py
@@ -41,8 +41,7 @@ class ExecPP(PostProcessor):
events = ("after",)
elif isinstance(events, str):
events = events.split(",")
- for event in events:
- job.hooks[event].append(execute)
+ job.register_hooks({event: execute for event in events}, options)
def exec_list(self, pathfmt, status=None):
if status:
diff --git a/gallery_dl/postprocessor/metadata.py b/gallery_dl/postprocessor/metadata.py
index 49696a0..ef1d304 100644
--- a/gallery_dl/postprocessor/metadata.py
+++ b/gallery_dl/postprocessor/metadata.py
@@ -57,8 +57,7 @@ class MetadataPP(PostProcessor):
events = ("file",)
elif isinstance(events, str):
events = events.split(",")
- for event in events:
- job.hooks[event].append(self.run)
+ job.register_hooks({event: self.run for event in events}, options)
def run(self, pathfmt):
directory = self._directory(pathfmt)
@@ -103,11 +102,18 @@ class MetadataPP(PostProcessor):
if not tags:
return
- if not isinstance(tags, list):
+ if isinstance(tags, str):
taglist = tags.split(", ")
if len(taglist) < len(tags) / 16:
taglist = tags.split(" ")
tags = taglist
+ elif isinstance(tags, dict):
+ taglists = tags.values()
+ tags = []
+ extend = tags.extend
+ for taglist in taglists:
+ extend(taglist)
+ tags.sort()
fp.write("\n".join(tags) + "\n")
diff --git a/gallery_dl/postprocessor/mtime.py b/gallery_dl/postprocessor/mtime.py
index e4c28ea..d2f1915 100644
--- a/gallery_dl/postprocessor/mtime.py
+++ b/gallery_dl/postprocessor/mtime.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2019-2020 Mike Fährmann
+# Copyright 2019-2021 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -17,7 +17,7 @@ class MtimePP(PostProcessor):
def __init__(self, job, options):
PostProcessor.__init__(self, job)
self.key = options.get("key", "date")
- job.hooks["file"].append(self.run)
+ job.register_hooks({"file": self.run}, options)
def run(self, pathfmt):
mtime = pathfmt.kwdict.get(self.key)
diff --git a/gallery_dl/postprocessor/ugoira.py b/gallery_dl/postprocessor/ugoira.py
index ac094b7..e5bdebc 100644
--- a/gallery_dl/postprocessor/ugoira.py
+++ b/gallery_dl/postprocessor/ugoira.py
@@ -55,8 +55,8 @@ class UgoiraPP(PostProcessor):
else:
self.prevent_odd = False
- job.hooks["prepare"].append(self.prepare)
- job.hooks["file"].append(self.convert)
+ job.register_hooks(
+ {"prepare": self.prepare, "file": self.convert}, options)
def prepare(self, pathfmt):
self._frames = None
diff --git a/gallery_dl/postprocessor/zip.py b/gallery_dl/postprocessor/zip.py
index e820280..1c4bd03 100644
--- a/gallery_dl/postprocessor/zip.py
+++ b/gallery_dl/postprocessor/zip.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2018-2020 Mike Fährmann
+# Copyright 2018-2021 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -38,8 +38,10 @@ class ZipPP(PostProcessor):
self.args = (self.path[:-1] + ext, "a",
self.COMPRESSION_ALGORITHMS[algorithm], True)
- job.hooks["file"].append(
- self.write_safe if options.get("mode") == "safe" else self.write)
+ job.register_hooks({
+ "file":
+ self.write_safe if options.get("mode") == "safe" else self.write,
+ }, options)
job.hooks["finalize"].append(self.finalize)
def write(self, pathfmt, zfile=None):
@@ -56,7 +58,7 @@ class ZipPP(PostProcessor):
def write_safe(self, pathfmt):
with zipfile.ZipFile(*self.args) as zfile:
- self._write(pathfmt, zfile)
+ self.write(pathfmt, zfile)
def finalize(self, pathfmt, status):
if self.zfile:
diff --git a/gallery_dl/util.py b/gallery_dl/util.py
index 78663a0..fbede3e 100644
--- a/gallery_dl/util.py
+++ b/gallery_dl/util.py
@@ -21,6 +21,7 @@ import sqlite3
import binascii
import datetime
import operator
+import functools
import itertools
import urllib.parse
from http.cookiejar import Cookie
@@ -346,8 +347,6 @@ CODES = {
"zh": "Chinese",
}
-SPECIAL_EXTRACTORS = {"oauth", "recursive", "test"}
-
class UniversalNone():
"""None-style object that supports more operations than None itself"""
@@ -373,6 +372,20 @@ class UniversalNone():
NONE = UniversalNone()
WINDOWS = (os.name == "nt")
SENTINEL = object()
+SPECIAL_EXTRACTORS = {"oauth", "recursive", "test"}
+GLOBALS = {
+ "parse_int": text.parse_int,
+ "urlsplit" : urllib.parse.urlsplit,
+ "datetime" : datetime.datetime,
+ "abort" : raises(exception.StopExtraction),
+ "terminate": raises(exception.TerminateExtraction),
+ "re" : re,
+}
+
+
+def compile_expression(expr, name="<expr>", globals=GLOBALS):
+ code_object = compile(expr, name, "eval")
+ return functools.partial(eval, code_object, globals)
def build_predicate(predicates):
@@ -472,20 +485,13 @@ class UniquePredicate():
class FilterPredicate():
"""Predicate; True if evaluating the given expression returns True"""
- def __init__(self, filterexpr, target="image"):
+ def __init__(self, expr, target="image"):
name = "<{} filter>".format(target)
- self.codeobj = compile(filterexpr, name, "eval")
- self.globals = {
- "parse_int": text.parse_int,
- "urlsplit" : urllib.parse.urlsplit,
- "datetime" : datetime.datetime,
- "abort" : raises(exception.StopExtraction),
- "re" : re,
- }
+ self.expr = compile_expression(expr, name)
- def __call__(self, url, kwds):
+ def __call__(self, _, kwdict):
try:
- return eval(self.codeobj, self.globals, kwds)
+ return self.expr(kwdict)
except exception.GalleryDLException:
raise
except Exception as exc:
@@ -749,25 +755,30 @@ class PathFormat():
}
def __init__(self, extractor):
- filename_fmt = extractor.config("filename")
- if filename_fmt is None:
- filename_fmt = extractor.filename_fmt
-
- directory_fmt = extractor.config("directory")
- if directory_fmt is None:
- directory_fmt = extractor.directory_fmt
-
- extension_map = extractor.config("extension-map")
- if extension_map is None:
- extension_map = self.EXTENSION_MAP
- self.extension_map = extension_map.get
+ config = extractor.config
+ kwdefault = config("keywords-default")
- kwdefault = extractor.config("keywords-default")
+ filename_fmt = config("filename")
try:
+ if filename_fmt is None:
+ filename_fmt = extractor.filename_fmt
+ elif isinstance(filename_fmt, dict):
+ self.filename_conditions = [
+ (compile_expression(expr),
+ Formatter(fmt, kwdefault).format_map)
+ for expr, fmt in filename_fmt.items() if expr
+ ]
+ self.build_filename = self.build_filename_conditional
+ filename_fmt = filename_fmt.get("", extractor.filename_fmt)
+
self.filename_formatter = Formatter(
filename_fmt, kwdefault).format_map
except Exception as exc:
raise exception.FilenameFormatError(exc)
+
+ directory_fmt = config("directory")
+ if directory_fmt is None:
+ directory_fmt = extractor.directory_fmt
try:
self.directory_formatters = [
Formatter(dirfmt, kwdefault).format_map
@@ -784,7 +795,7 @@ class PathFormat():
basedir = extractor._parentdir
if not basedir:
- basedir = extractor.config("base-directory")
+ basedir = config("base-directory")
if basedir is None:
basedir = "." + os.sep + "gallery-dl" + os.sep
elif basedir:
@@ -795,8 +806,13 @@ class PathFormat():
basedir += os.sep
self.basedirectory = basedir
- restrict = extractor.config("path-restrict", "auto")
- replace = extractor.config("path-replace", "_")
+ extension_map = config("extension-map")
+ if extension_map is None:
+ extension_map = self.EXTENSION_MAP
+ self.extension_map = extension_map.get
+
+ restrict = config("path-restrict", "auto")
+ replace = config("path-replace", "_")
if restrict == "auto":
restrict = "\\\\|/<>:\"?*" if WINDOWS else "/"
elif restrict == "unix":
@@ -807,7 +823,7 @@ class PathFormat():
restrict = "^0-9A-Za-z_."
self.clean_segment = self._build_cleanfunc(restrict, replace)
- remove = extractor.config("path-remove", "\x00-\x1f\x7f")
+ remove = config("path-remove", "\x00-\x1f\x7f")
self.clean_path = self._build_cleanfunc(remove, "")
@staticmethod
@@ -927,6 +943,19 @@ class PathFormat():
except Exception as exc:
raise exception.FilenameFormatError(exc)
+ def build_filename_conditional(self):
+ kwdict = self.kwdict
+
+ try:
+ for condition, formatter in self.filename_conditions:
+ if condition(kwdict):
+ break
+ else:
+ formatter = self.filename_formatter
+ return self.clean_path(self.clean_segment(formatter(kwdict)))
+ except Exception as exc:
+ raise exception.FilenameFormatError(exc)
+
def build_path(self):
"""Combine directory and filename to full paths"""
if self._create_directory:
diff --git a/gallery_dl/version.py b/gallery_dl/version.py
index 018554e..1a3e0e4 100644
--- a/gallery_dl/version.py
+++ b/gallery_dl/version.py
@@ -6,4 +6,4 @@
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
-__version__ = "1.17.5"
+__version__ = "1.18.0"
diff --git a/setup.cfg b/setup.cfg
index ccee68f..68f3711 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -1,6 +1,8 @@
[flake8]
exclude = gallery_dl/__init__.py,gallery_dl/__main__.py,setup.py,build,scripts,archive
ignore = E203,E226,W504
+per-file-ignores =
+ gallery_dl/extractor/500px.py: E501
[egg_info]
tag_build =
diff --git a/setup.py b/setup.py
index a82176e..ab708d8 100644
--- a/setup.py
+++ b/setup.py
@@ -7,9 +7,6 @@ import os.path
import warnings
from setuptools import setup
-if sys.hexversion < 0x3040000:
- sys.exit("Python 3.4+ required")
-
def read(fname):
path = os.path.join(os.path.dirname(__file__), fname)
diff --git a/test/test_postprocessor.py b/test/test_postprocessor.py
index 6bf887c..00c17b2 100644
--- a/test/test_postprocessor.py
+++ b/test/test_postprocessor.py
@@ -30,13 +30,17 @@ class MockPostprocessorModule(Mock):
class FakeJob():
- def __init__(self):
- self.extractor = extractor.find("test:")
- self.pathfmt = util.PathFormat(self.extractor)
+ def __init__(self, extr=extractor.find("test:")):
+ self.extractor = extr
+ self.pathfmt = util.PathFormat(extr)
self.out = output.NullOutput()
self.get_logger = logging.getLogger
self.hooks = collections.defaultdict(list)
+ def register_hooks(self, hooks, options):
+ for hook, callback in hooks.items():
+ self.hooks[hook].append(callback)
+
class TestPostprocessorModule(unittest.TestCase):
@@ -239,6 +243,15 @@ class MetadataTest(BasePostprocessorTest):
self._trigger()
self.assertEqual(self._output(m), "foo\nbar\nbaz\n")
+ def test_metadata_tags_dict(self):
+ self._create(
+ {"mode": "tags"},
+ {"tags": {"g": ["foobar1", "foobar2"], "m": ["foobarbaz"]}},
+ )
+ with patch("builtins.open", mock_open()) as m:
+ self._trigger()
+ self.assertEqual(self._output(m), "foobar1\nfoobar2\nfoobarbaz\n")
+
def test_metadata_custom(self):
def test(pp_info):
pp = self._create(pp_info, {"foo": "bar"})
diff --git a/test/test_results.py b/test/test_results.py
index bf2496b..5b22ecd 100644
--- a/test/test_results.py
+++ b/test/test_results.py
@@ -312,7 +312,7 @@ def setup_test_config():
config.set(("extractor", "mangoxo") , "password", "5zbQF10_5u25259Ma")
for category in ("danbooru", "instagram", "twitter", "subscribestar",
- "e621", "inkbunny", "tapas", "pillowfort"):
+ "e621", "inkbunny", "tapas", "pillowfort", "mangadex"):
config.set(("extractor", category), "username", None)
config.set(("extractor", "mastodon.social"), "access-token",
diff --git a/test/test_util.py b/test/test_util.py
index e2f5084..d90d5ad 100644
--- a/test/test_util.py
+++ b/test/test_util.py
@@ -493,6 +493,30 @@ class TestOther(unittest.TestCase):
def test_noop(self):
self.assertEqual(util.noop(), None)
+ def test_compile_expression(self):
+ expr = util.compile_expression("1 + 2 * 3")
+ self.assertEqual(expr(), 7)
+ self.assertEqual(expr({"a": 1, "b": 2, "c": 3}), 7)
+ self.assertEqual(expr({"a": 9, "b": 9, "c": 9}), 7)
+
+ expr = util.compile_expression("a + b * c")
+ self.assertEqual(expr({"a": 1, "b": 2, "c": 3}), 7)
+ self.assertEqual(expr({"a": 9, "b": 9, "c": 9}), 90)
+
+ with self.assertRaises(NameError):
+ expr()
+ with self.assertRaises(NameError):
+ expr({"a": 2})
+
+ with self.assertRaises(SyntaxError):
+ util.compile_expression("")
+ with self.assertRaises(SyntaxError):
+ util.compile_expression("x++")
+
+ expr = util.compile_expression("1 and abort()")
+ with self.assertRaises(exception.StopExtraction):
+ expr()
+
def test_generate_token(self):
tokens = set()
for _ in range(100):