summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLibravatarUnit 193 <unit193@unit193.net>2020-07-20 01:03:23 -0400
committerLibravatarUnit 193 <unit193@unit193.net>2020-07-20 01:03:23 -0400
commitba039cfb2e1ba2522ee0a0fa2a84a1a6579e4877 (patch)
tree2e057b8e83968c7ab951b1b60aa16b71682f5414
parent02dd2886783cd303cff6890a741152d013bb00ce (diff)
New upstream version 1.14.3.upstream/1.14.3
-rw-r--r--CHANGELOG.md23
-rw-r--r--PKG-INFO18
-rw-r--r--README.rst16
-rw-r--r--data/completion/_gallery-dl56
-rw-r--r--data/man/gallery-dl.12
-rw-r--r--data/man/gallery-dl.conf.536
-rw-r--r--docs/gallery-dl.conf2
-rw-r--r--gallery_dl.egg-info/PKG-INFO18
-rw-r--r--gallery_dl.egg-info/SOURCES.txt3
-rw-r--r--gallery_dl/cloudflare.py29
-rw-r--r--gallery_dl/extractor/8muses.py23
-rw-r--r--gallery_dl/extractor/__init__.py2
-rw-r--r--gallery_dl/extractor/artstation.py18
-rw-r--r--gallery_dl/extractor/exhentai.py9
-rw-r--r--gallery_dl/extractor/foolslide.py15
-rw-r--r--gallery_dl/extractor/gfycat.py110
-rw-r--r--gallery_dl/extractor/hypnohub.py2
-rw-r--r--gallery_dl/extractor/imagefap.py2
-rw-r--r--gallery_dl/extractor/imgur.py3
-rw-r--r--gallery_dl/extractor/khinsider.py21
-rw-r--r--gallery_dl/extractor/mangakakalot.py118
-rw-r--r--gallery_dl/extractor/mastodon.py2
-rw-r--r--gallery_dl/extractor/newgrounds.py2
-rw-r--r--gallery_dl/extractor/patreon.py5
-rw-r--r--gallery_dl/extractor/pururin.py2
-rw-r--r--gallery_dl/extractor/reddit.py4
-rw-r--r--gallery_dl/extractor/redgifs.py76
-rw-r--r--gallery_dl/extractor/smugmug.py4
-rw-r--r--gallery_dl/extractor/subscribestar.py227
-rw-r--r--gallery_dl/extractor/tumblr.py3
-rw-r--r--gallery_dl/extractor/twitter.py58
-rw-r--r--gallery_dl/extractor/wallhaven.py2
-rw-r--r--gallery_dl/extractor/weibo.py4
-rw-r--r--gallery_dl/job.py1
-rw-r--r--gallery_dl/util.py15
-rw-r--r--gallery_dl/version.py2
-rw-r--r--setup.py1
-rw-r--r--test/test_results.py8
38 files changed, 770 insertions, 172 deletions
diff --git a/CHANGELOG.md b/CHANGELOG.md
index f84e423..ac09ee7 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,28 @@
# Changelog
+## 1.14.3 - 2020-07-18
+### Additions
+- [8muses] support `comics.8muses.com` URLs
+- [artstation] add `following` extractor ([#888](https://github.com/mikf/gallery-dl/issues/888))
+- [exhentai] add `domain` option ([#897](https://github.com/mikf/gallery-dl/issues/897))
+- [gfycat] add `user` and `search` extractors
+- [imgur] support all `/t/...` URLs ([#880](https://github.com/mikf/gallery-dl/issues/880))
+- [khinsider] add `format` option ([#840](https://github.com/mikf/gallery-dl/issues/840))
+- [mangakakalot] add `manga` and `chapter` extractors ([#876](https://github.com/mikf/gallery-dl/issues/876))
+- [redgifs] support `gifsdeliverynetwork.com` URLs ([#874](https://github.com/mikf/gallery-dl/issues/874))
+- [subscribestar] add `user` and `post` extractors ([#852](https://github.com/mikf/gallery-dl/issues/852))
+- [twitter] add support for nitter.net URLs ([#890](https://github.com/mikf/gallery-dl/issues/890))
+- add Zsh completion script ([#150](https://github.com/mikf/gallery-dl/issues/150))
+### Fixes
+- [gfycat] retry 404'ed videos on redgifs.com ([#874](https://github.com/mikf/gallery-dl/issues/874))
+- [newgrounds] fix favorites extraction
+- [patreon] yield images and attachments before post files ([#871](https://github.com/mikf/gallery-dl/issues/871))
+- [reddit] fix AttributeError when using `recursion` ([#879](https://github.com/mikf/gallery-dl/issues/879))
+- [twitter] raise proper exception if a user doesn't exist ([#891](https://github.com/mikf/gallery-dl/issues/891))
+- defer directory creation ([#722](https://github.com/mikf/gallery-dl/issues/722))
+- set pseudo extension for Metadata messages ([#865](https://github.com/mikf/gallery-dl/issues/865))
+- prevent exception on Cloudflare challenges ([#868](https://github.com/mikf/gallery-dl/issues/868))
+
## 1.14.2 - 2020-06-27
### Additions
- [artstation] add `date` metadata field ([#839](https://github.com/mikf/gallery-dl/issues/839))
diff --git a/PKG-INFO b/PKG-INFO
index 5322ef0..ab22502 100644
--- a/PKG-INFO
+++ b/PKG-INFO
@@ -1,6 +1,6 @@
Metadata-Version: 2.1
Name: gallery_dl
-Version: 1.14.2
+Version: 1.14.3
Summary: Command-line program to download image-galleries and -collections from several image hosting sites
Home-page: https://github.com/mikf/gallery-dl
Author: Mike Fährmann
@@ -94,8 +94,8 @@ Description: ==========
put it into your `PATH <https://en.wikipedia.org/wiki/PATH_(variable)>`__,
and run it inside a command prompt (like ``cmd.exe``).
- - `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.14.2/gallery-dl.exe>`__
- - `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.14.2/gallery-dl.bin>`__
+ - `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.14.3/gallery-dl.exe>`__
+ - `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.14.3/gallery-dl.bin>`__
These executables include a Python 3.8 interpreter
and all required Python packages.
@@ -165,6 +165,14 @@ Description: ==========
$ gallery-dl "r:https://pastebin.com/raw/FLwrCYsT"
+ If a site's address is nonstandard for its extractor, you can prefix the URL with the
+ extractor's name to force the use of a specific extractor:
+
+ .. code:: bash
+
+ $ gallery-dl "tumblr:https://sometumblrblog.example"
+
+
Configuration
=============
@@ -209,7 +217,7 @@ Description: ==========
``pixiv``, ``nijie``, and ``seiga``
and optional for
``danbooru``, ``e621``, ``exhentai``, ``idolcomplex``, ``instagram``,
- ``luscious``, ``sankaku``, ``tsumino``, and ``twitter``.
+ ``luscious``, ``sankaku``, ``subscribestar``, ``tsumino``, and ``twitter``.
You can set the necessary information in your configuration file
(cf. gallery-dl.conf_)
@@ -302,7 +310,7 @@ Description: ==========
.. _gallery-dl-example.conf: https://github.com/mikf/gallery-dl/blob/master/docs/gallery-dl-example.conf
.. _configuration.rst: https://github.com/mikf/gallery-dl/blob/master/docs/configuration.rst
.. _Supported Sites: https://github.com/mikf/gallery-dl/blob/master/docs/supportedsites.rst
- .. _stable: https://github.com/mikf/gallery-dl/archive/v1.14.2.tar.gz
+ .. _stable: https://github.com/mikf/gallery-dl/archive/v1.14.3.tar.gz
.. _dev: https://github.com/mikf/gallery-dl/archive/master.tar.gz
.. _Python: https://www.python.org/downloads/
diff --git a/README.rst b/README.rst
index 861d8a7..360c02b 100644
--- a/README.rst
+++ b/README.rst
@@ -83,8 +83,8 @@ Download a standalone executable file,
put it into your `PATH <https://en.wikipedia.org/wiki/PATH_(variable)>`__,
and run it inside a command prompt (like ``cmd.exe``).
-- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.14.2/gallery-dl.exe>`__
-- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.14.2/gallery-dl.bin>`__
+- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.14.3/gallery-dl.exe>`__
+- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.14.3/gallery-dl.bin>`__
These executables include a Python 3.8 interpreter
and all required Python packages.
@@ -154,6 +154,14 @@ Filter manga chapters by language and chapter number:
$ gallery-dl "r:https://pastebin.com/raw/FLwrCYsT"
+If a site's address is nonstandard for its extractor, you can prefix the URL with the
+extractor's name to force the use of a specific extractor:
+
+.. code:: bash
+
+ $ gallery-dl "tumblr:https://sometumblrblog.example"
+
+
Configuration
=============
@@ -198,7 +206,7 @@ a username & password pair. This is necessary for
``pixiv``, ``nijie``, and ``seiga``
and optional for
``danbooru``, ``e621``, ``exhentai``, ``idolcomplex``, ``instagram``,
-``luscious``, ``sankaku``, ``tsumino``, and ``twitter``.
+``luscious``, ``sankaku``, ``subscribestar``, ``tsumino``, and ``twitter``.
You can set the necessary information in your configuration file
(cf. gallery-dl.conf_)
@@ -291,7 +299,7 @@ access to *gallery-dl*. Authorize it and you will be shown one or more
.. _gallery-dl-example.conf: https://github.com/mikf/gallery-dl/blob/master/docs/gallery-dl-example.conf
.. _configuration.rst: https://github.com/mikf/gallery-dl/blob/master/docs/configuration.rst
.. _Supported Sites: https://github.com/mikf/gallery-dl/blob/master/docs/supportedsites.rst
-.. _stable: https://github.com/mikf/gallery-dl/archive/v1.14.2.tar.gz
+.. _stable: https://github.com/mikf/gallery-dl/archive/v1.14.3.tar.gz
.. _dev: https://github.com/mikf/gallery-dl/archive/master.tar.gz
.. _Python: https://www.python.org/downloads/
diff --git a/data/completion/_gallery-dl b/data/completion/_gallery-dl
new file mode 100644
index 0000000..b5074d2
--- /dev/null
+++ b/data/completion/_gallery-dl
@@ -0,0 +1,56 @@
+#compdef gallery-dl
+
+local curcontext="$curcontext"
+typeset -A opt_args
+
+local rc=1
+_arguments -C -S \
+{-h,--help}'[Print this help message and exit]' \
+--version'[Print program version and exit]' \
+{-d,--dest}'[Destination directory]':'<dest>':_files \
+{-i,--input-file}'[Download URLs found in FILE ("-" for stdin)]':'<file>':_files \
+--cookies'[File to load additional cookies from]':'<file>':_files \
+--proxy'[Use the specified proxy]':'<url>' \
+--clear-cache'[Delete all cached login sessions, cookies, etc.]' \
+{-q,--quiet}'[Activate quiet mode]' \
+{-v,--verbose}'[Print various debugging information]' \
+{-g,--get-urls}'[Print URLs instead of downloading]' \
+{-j,--dump-json}'[Print JSON information]' \
+{-s,--simulate}'[Simulate data extraction; do not download anything]' \
+{-K,--list-keywords}'[Print a list of available keywords and example values for the given URLs]' \
+--list-modules'[Print a list of available extractor modules]' \
+--list-extractors'[Print a list of extractor classes with description, (sub)category and example URL]' \
+--write-log'[Write logging output to FILE]':'<file>':_files \
+--write-unsupported'[Write URLs, which get emitted by other extractors but cannot be handled, to FILE]':'<file>':_files \
+--write-pages'[Write downloaded intermediary pages to files in the current directory to debug problems]' \
+{-r,--limit-rate}'[Maximum download rate (e.g. 500k or 2.5M)]':'<rate>' \
+{-R,--retries}'[Maximum number of retries for failed HTTP requests or -1 for infinite retries (default: 4)]':'<n>' \
+{-A,--abort}'[Abort extractor run after N consecutive file downloads have been skipped, e.g. if files with the same filename already exist]':'<n>' \
+--http-timeout'[Timeout for HTTP connections (default: 30.0)]':'<seconds>' \
+--sleep'[Number of seconds to sleep before each download]':'<seconds>' \
+--no-part'[Do not use .part files]' \
+--no-mtime'[Do not set file modification times according to Last-Modified HTTP response headers]' \
+--no-download'[Do not download any files]' \
+--no-check-certificate'[Disable HTTPS certificate validation]' \
+{-c,--config}'[Additional configuration files]':'<file>':_files \
+--config-yaml'[==SUPPRESS==]':'<file>':_files \
+{-o,--option}'[Additional "<key>=<value>" option values]':'<opt>' \
+--ignore-config'[Do not read the default configuration files]' \
+{-u,--username}'[Username to login with]':'<user>' \
+{-p,--password}'[Password belonging to the given username]':'<pass>' \
+--netrc'[Enable .netrc authentication data]' \
+--download-archive'[Record all downloaded files in the archive file and skip downloading any file already in it.]':'<file>':_files \
+--range'[Index-range(s) specifying which images to download. For example "5-10" or "1,3-5,10-"]':'<range>' \
+--chapter-range'[Like "--range", but applies to manga-chapters and other delegated URLs]':'<range>' \
+--filter'[Python expression controlling which images to download. Files for which the expression evaluates to False are ignored. Available keys are the filename-specific ones listed by "-K". Example: --filter "image_width >= 1000 and rating in ("s", "q")"]':'<expr>' \
+--chapter-filter'[Like "--filter", but applies to manga-chapters and other delegated URLs]':'<expr>' \
+--zip'[Store downloaded files in a ZIP archive]' \
+--ugoira-conv'[Convert Pixiv Ugoira to WebM (requires FFmpeg)]' \
+--ugoira-conv-lossless'[Convert Pixiv Ugoira to WebM in VP9 lossless mode]' \
+--write-metadata'[Write metadata to separate JSON files]' \
+--write-tags'[Write image tags to separate text files]' \
+--mtime-from-date'[Set file modification times according to "date" metadata]' \
+--exec'[Execute CMD for each downloaded file. Example: --exec "convert {} {}.png && rm {}"]':'<cmd>' \
+--exec-after'[Execute CMD after all files were downloaded successfully. Example: --exec-after "cd {} && convert * ../doc.pdf"]':'<cmd>' && rc=0
+
+return rc
diff --git a/data/man/gallery-dl.1 b/data/man/gallery-dl.1
index 21055ca..f05f2e8 100644
--- a/data/man/gallery-dl.1
+++ b/data/man/gallery-dl.1
@@ -1,4 +1,4 @@
-.TH "GALLERY-DL" "1" "2020-06-27" "1.14.2" "gallery-dl Manual"
+.TH "GALLERY-DL" "1" "2020-07-18" "1.14.3" "gallery-dl Manual"
.\" disable hyphenation
.nh
diff --git a/data/man/gallery-dl.conf.5 b/data/man/gallery-dl.conf.5
index 7060751..039e750 100644
--- a/data/man/gallery-dl.conf.5
+++ b/data/man/gallery-dl.conf.5
@@ -1,4 +1,4 @@
-.TH "GALLERY-DL.CONF" "5" "2020-06-27" "1.14.2" "gallery-dl Manual"
+.TH "GALLERY-DL.CONF" "5" "2020-07-18" "1.14.3" "gallery-dl Manual"
.\" disable hyphenation
.nh
.\" disable justification (adjust text to left margin only)
@@ -290,6 +290,8 @@ and optional for
.br
* \f[I]sankaku\f[]
.br
+* \f[I]subscribestar\f[]
+.br
* \f[I]tsumino\f[]
.br
* \f[I]twitter\f[]
@@ -836,6 +838,22 @@ Minimum wait time in seconds before API requests.
Note: This value will internally be rounded up
to the next power of 2.
+.SS extractor.exhentai.domain
+.IP "Type:" 6
+\f[I]string\f[]
+
+.IP "Default:" 9
+\f[I]"auto"\f[]
+
+.IP "Description:" 4
+.br
+* \f[I]"auto"\f[]: Use \f[I]e-hentai.org\f[] or \f[I]exhentai.org\f[]
+depending on the input URL
+.br
+* \f[I]"e-hentai.org"\f[]: Use \f[I]e-hentai.org\f[] for all URLs
+.br
+* \f[I]"exhentai.org"\f[]: Use \f[I]exhentai.org\f[] for all URLs
+
.SS extractor.exhentai.limits
.IP "Type:" 6
\f[I]bool\f[]
@@ -1009,6 +1027,22 @@ Include *Story Highlights* when downloading a user profile.
.IP "Description:" 4
Download video files.
+.SS extractor.khinsider.format
+.IP "Type:" 6
+\f[I]string\f[]
+
+.IP "Default:" 9
+\f[I]"mp3"\f[]
+
+.IP "Description:" 4
+The name of the preferred file format to download.
+
+Use \f[I]"all"\f[] to download all available formats,
+or a (comma-separated) list to select multiple formats.
+
+If the selected format is not available,
+the first in the list gets chosen (usually mp3).
+
.SS extractor.kissmanga.captcha
.IP "Type:" 6
\f[I]string\f[]
diff --git a/docs/gallery-dl.conf b/docs/gallery-dl.conf
index aa54e1a..2db802d 100644
--- a/docs/gallery-dl.conf
+++ b/docs/gallery-dl.conf
@@ -44,6 +44,8 @@
{
"username": null,
"password": null,
+ "domain": "auto",
+ "limits": true,
"original": true,
"wait-min": 3.0,
"wait-max": 6.0
diff --git a/gallery_dl.egg-info/PKG-INFO b/gallery_dl.egg-info/PKG-INFO
index 0b01abc..0007699 100644
--- a/gallery_dl.egg-info/PKG-INFO
+++ b/gallery_dl.egg-info/PKG-INFO
@@ -1,6 +1,6 @@
Metadata-Version: 2.1
Name: gallery-dl
-Version: 1.14.2
+Version: 1.14.3
Summary: Command-line program to download image-galleries and -collections from several image hosting sites
Home-page: https://github.com/mikf/gallery-dl
Author: Mike Fährmann
@@ -94,8 +94,8 @@ Description: ==========
put it into your `PATH <https://en.wikipedia.org/wiki/PATH_(variable)>`__,
and run it inside a command prompt (like ``cmd.exe``).
- - `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.14.2/gallery-dl.exe>`__
- - `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.14.2/gallery-dl.bin>`__
+ - `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.14.3/gallery-dl.exe>`__
+ - `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.14.3/gallery-dl.bin>`__
These executables include a Python 3.8 interpreter
and all required Python packages.
@@ -165,6 +165,14 @@ Description: ==========
$ gallery-dl "r:https://pastebin.com/raw/FLwrCYsT"
+ If a site's address is nonstandard for its extractor, you can prefix the URL with the
+ extractor's name to force the use of a specific extractor:
+
+ .. code:: bash
+
+ $ gallery-dl "tumblr:https://sometumblrblog.example"
+
+
Configuration
=============
@@ -209,7 +217,7 @@ Description: ==========
``pixiv``, ``nijie``, and ``seiga``
and optional for
``danbooru``, ``e621``, ``exhentai``, ``idolcomplex``, ``instagram``,
- ``luscious``, ``sankaku``, ``tsumino``, and ``twitter``.
+ ``luscious``, ``sankaku``, ``subscribestar``, ``tsumino``, and ``twitter``.
You can set the necessary information in your configuration file
(cf. gallery-dl.conf_)
@@ -302,7 +310,7 @@ Description: ==========
.. _gallery-dl-example.conf: https://github.com/mikf/gallery-dl/blob/master/docs/gallery-dl-example.conf
.. _configuration.rst: https://github.com/mikf/gallery-dl/blob/master/docs/configuration.rst
.. _Supported Sites: https://github.com/mikf/gallery-dl/blob/master/docs/supportedsites.rst
- .. _stable: https://github.com/mikf/gallery-dl/archive/v1.14.2.tar.gz
+ .. _stable: https://github.com/mikf/gallery-dl/archive/v1.14.3.tar.gz
.. _dev: https://github.com/mikf/gallery-dl/archive/master.tar.gz
.. _Python: https://www.python.org/downloads/
diff --git a/gallery_dl.egg-info/SOURCES.txt b/gallery_dl.egg-info/SOURCES.txt
index e094fe1..80c9f4f 100644
--- a/gallery_dl.egg-info/SOURCES.txt
+++ b/gallery_dl.egg-info/SOURCES.txt
@@ -4,6 +4,7 @@ MANIFEST.in
README.rst
setup.cfg
setup.py
+data/completion/_gallery-dl
data/completion/gallery-dl
data/man/gallery-dl.1
data/man/gallery-dl.conf.5
@@ -99,6 +100,7 @@ gallery_dl/extractor/luscious.py
gallery_dl/extractor/mangadex.py
gallery_dl/extractor/mangafox.py
gallery_dl/extractor/mangahere.py
+gallery_dl/extractor/mangakakalot.py
gallery_dl/extractor/mangapanda.py
gallery_dl/extractor/mangapark.py
gallery_dl/extractor/mangareader.py
@@ -144,6 +146,7 @@ gallery_dl/extractor/slickpic.py
gallery_dl/extractor/slideshare.py
gallery_dl/extractor/smugmug.py
gallery_dl/extractor/speakerdeck.py
+gallery_dl/extractor/subscribestar.py
gallery_dl/extractor/test.py
gallery_dl/extractor/tsumino.py
gallery_dl/extractor/tumblr.py
diff --git a/gallery_dl/cloudflare.py b/gallery_dl/cloudflare.py
index 88068d5..0f49d61 100644
--- a/gallery_dl/cloudflare.py
+++ b/gallery_dl/cloudflare.py
@@ -32,23 +32,11 @@ def solve_challenge(session, response, kwargs):
"""Solve Cloudflare challenge and get cfclearance cookie"""
parsed = urllib.parse.urlsplit(response.url)
root = parsed.scheme + "://" + parsed.netloc
-
page = response.text
- try:
- params = {"ray": text.extract(page, '?ray=', '"')[0]}
-
- url = root + "/cdn-cgi/images/trace/jschal/nojs/transparent.gif"
- session.request("GET", url, params=params)
-
- url = root + "/cdn-cgi/images/trace/jschal/js/nocookie/transparent.gif"
- session.request("GET", url, params=params)
- except Exception:
- pass
cf_kwargs = {}
headers = cf_kwargs["headers"] = collections.OrderedDict()
params = cf_kwargs["data"] = collections.OrderedDict()
- url = root + text.unescape(text.extract(page, 'action="', '"')[0])
headers["Referer"] = response.url
form = text.extract(page, 'id="challenge-form"', '</form>')[0]
@@ -58,12 +46,27 @@ def solve_challenge(session, response, kwargs):
if not name:
continue
if name == "jschl_answer":
- value = solve_js_challenge(page, parsed.netloc)
+ try:
+ value = solve_js_challenge(page, parsed.netloc)
+ except Exception:
+ return response, None, None
else:
value = element.attrib.get("value")
params[name] = value
+ try:
+ params = {"ray": text.extract(page, '?ray=', '"')[0]}
+
+ url = root + "/cdn-cgi/images/trace/jschal/nojs/transparent.gif"
+ session.request("GET", url, params=params)
+
+ url = root + "/cdn-cgi/images/trace/jschal/js/nocookie/transparent.gif"
+ session.request("GET", url, params=params)
+ except Exception:
+ pass
+
time.sleep(4)
+ url = root + text.unescape(text.extract(page, 'action="', '"')[0])
cf_response = session.request("POST", url, **cf_kwargs)
if cf_response.history:
diff --git a/gallery_dl/extractor/8muses.py b/gallery_dl/extractor/8muses.py
index 42fbe12..fafb785 100644
--- a/gallery_dl/extractor/8muses.py
+++ b/gallery_dl/extractor/8muses.py
@@ -6,7 +6,7 @@
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
-"""Extractors for https://www.8muses.com/"""
+"""Extractors for https://comics.8muses.com/"""
from .common import Extractor, Message
from .. import text
@@ -14,30 +14,31 @@ import json
class _8musesAlbumExtractor(Extractor):
- """Extractor for image albums on www.8muses.com"""
+ """Extractor for image albums on comics.8muses.com"""
category = "8muses"
subcategory = "album"
directory_fmt = ("{category}", "{album[path]}")
filename_fmt = "{page:>03}.{extension}"
archive_fmt = "{hash}"
- root = "https://www.8muses.com"
- pattern = (r"(?:https?://)?(?:www\.)?8muses\.com"
+ root = "https://comics.8muses.com"
+ pattern = (r"(?:https?://)?(?:comics\.|www\.)?8muses\.com"
r"(/comics/album/[^?&#]+)(\?[^#]+)?")
test = (
- ("https://www.8muses.com/comics/album/Fakku-Comics/santa/Im-Sorry", {
- "url": "82449d6a26a29204695cba5d52c3ec60170bc159",
+ ("https://comics.8muses.com/comics/album/Fakku-Comics/mogg/Liar", {
+ "url": "6286ac33087c236c5a7e51f8a9d4e4d5548212d4",
+ "pattern": r"https://comics.8muses.com/image/fl/[\w-]+",
"keyword": {
"url" : str,
"hash" : str,
"page" : int,
- "count": 16,
+ "count": 6,
"album": {
- "id" : 10457,
- "title" : "Im Sorry",
- "path" : "Fakku Comics/santa/Im Sorry",
+ "id" : 10467,
+ "title" : "Liar",
+ "path" : "Fakku Comics/mogg/Liar",
"private": False,
"url" : str,
- "parent" : 10454,
+ "parent" : 10464,
"views" : int,
"likes" : int,
"date" : "dt:2018-07-10 00:00:00",
diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py
index 561b484..3184663 100644
--- a/gallery_dl/extractor/__init__.py
+++ b/gallery_dl/extractor/__init__.py
@@ -68,6 +68,7 @@ modules = [
"mangadex",
"mangafox",
"mangahere",
+ "mangakakalot",
"mangapanda",
"mangapark",
"mangareader",
@@ -108,6 +109,7 @@ modules = [
"slideshare",
"smugmug",
"speakerdeck",
+ "subscribestar",
"tsumino",
"tumblr",
"twitter",
diff --git a/gallery_dl/extractor/artstation.py b/gallery_dl/extractor/artstation.py
index 64a4bf4..985ad48 100644
--- a/gallery_dl/extractor/artstation.py
+++ b/gallery_dl/extractor/artstation.py
@@ -380,3 +380,21 @@ class ArtstationImageExtractor(ArtstationExtractor):
def get_project_assets(self, project_id):
return self.assets
+
+
+class ArtstationFollowingExtractor(ArtstationExtractor):
+ """Extractor for a user's followed users"""
+ subcategory = "following"
+ pattern = (r"(?:https?://)?(?:www\.)?artstation\.com"
+ r"/(?!artwork|projects|search)([^/?&#]+)/following")
+ test = ("https://www.artstation.com/gaerikim/following", {
+ "pattern": ArtstationUserExtractor.pattern,
+ "count": ">= 50",
+ })
+
+ def items(self):
+ url = "{}/users/{}/following.json".format(self.root, self.user)
+ for user in self._pagination(url):
+ url = "{}/{}".format(self.root, user["username"])
+ user["_extractor"] = ArtstationUserExtractor
+ yield Message.Queue, url, user
diff --git a/gallery_dl/extractor/exhentai.py b/gallery_dl/extractor/exhentai.py
index 6cc3abc..bf310ec 100644
--- a/gallery_dl/extractor/exhentai.py
+++ b/gallery_dl/extractor/exhentai.py
@@ -35,9 +35,12 @@ class ExhentaiExtractor(Extractor):
def __init__(self, match):
version = match.group(1)
- if version != "ex":
- self.root = "https://e-hentai.org"
- self.cookiedomain = ".e-hentai.org"
+ domain = self.config("domain", "auto")
+ if domain == "auto":
+ domain = ("ex" if version == "ex" else "e-") + "hentai.org"
+ self.root = "https://" + domain
+ self.cookiedomain = "." + domain
+
Extractor.__init__(self, match)
self.limits = self.config("limits", True)
self.original = self.config("original", True)
diff --git a/gallery_dl/extractor/foolslide.py b/gallery_dl/extractor/foolslide.py
index 731f54b..715e294 100644
--- a/gallery_dl/extractor/foolslide.py
+++ b/gallery_dl/extractor/foolslide.py
@@ -155,18 +155,13 @@ EXTRACTORS = {
"root": "https://jaiminisbox.com/reader",
"pattern": r"(?:www\.)?jaiminisbox\.com/reader",
"extra": {"decode": "base64"},
- "test-chapter": (
- ("https://jaiminisbox.com/reader/read/fire-force/en/0/215/", {
- "keyword": "6d2b5c0b34344156b0301ff2733389dfe36a7604",
- }),
- ("https://jaiminisbox.com/reader/read/red-storm/en/0/336/", {
- "keyword": "53c6dddf3e5a61b6002a886ccd7e3354e973299a",
+ "test-chapter":
+ ("https://jaiminisbox.com/reader/read/oshi-no-ko/en/0/1/", {
+ "keyword": "d6435cfc1522293a42517a4aadda95a8631da0b3",
}),
- ),
"test-manga":
- ("https://jaiminisbox.com/reader/series/sora_no_kian/", {
- "url": "66612be177dc3b3fa1d1f537ef02f4f701b163ea",
- "keyword": "0908a4145bb03acc4210f5d01169988969f5acd1",
+ ("https://jaiminisbox.com/reader/series/oshi-no-ko/", {
+ "count": ">= 10",
}),
},
"kireicake": {
diff --git a/gallery_dl/extractor/gfycat.py b/gallery_dl/extractor/gfycat.py
index 9cd3b95..b4b0e49 100644
--- a/gallery_dl/extractor/gfycat.py
+++ b/gallery_dl/extractor/gfycat.py
@@ -6,9 +6,11 @@
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
-"""Extract images from https://gfycat.com/"""
+"""Extractors for https://gfycat.com/"""
from .common import Extractor, Message
+from .. import text, exception
+from ..cache import cache
class GfycatExtractor(Extractor):
@@ -47,11 +49,45 @@ class GfycatExtractor(Extractor):
return ()
+class GfycatUserExtractor(GfycatExtractor):
+ """Extractor for gfycat user profiles"""
+ subcategory = "user"
+ directory_fmt = ("{category}", "{userName}")
+ pattern = r"(?:https?://)?gfycat\.com/@([^/?&#]+)"
+ test = ("https://gfycat.com/@gretta", {
+ "pattern": r"https://giant\.gfycat\.com/[A-Za-z]+\.mp4",
+ "count": ">= 100",
+ })
+
+ def gfycats(self):
+ return GfycatAPI(self).user(self.key)
+
+
+class GfycatSearchExtractor(GfycatExtractor):
+ """Extractor for gfycat search results"""
+ subcategory = "search"
+ directory_fmt = ("{category}", "Search", "{search}")
+ pattern = r"(?:https?://)?gfycat\.com/gifs/search/([^/?&#]+)"
+ test = ("https://gfycat.com/gifs/search/funny+animals", {
+ "pattern": r"https://\w+\.gfycat\.com/[A-Za-z]+\.mp4",
+ "archive": False,
+ "range": "100-300",
+ "count": "> 200",
+ })
+
+ def metadata(self):
+ self.key = text.unquote(self.key).replace("+", " ")
+ return {"search": self.key}
+
+ def gfycats(self):
+ return GfycatAPI(self).search(self.key)
+
+
class GfycatImageExtractor(GfycatExtractor):
"""Extractor for individual images from gfycat.com"""
subcategory = "image"
pattern = (r"(?:https?://)?(?:\w+\.)?gfycat\.com"
- r"/(?:gifs/detail/|\w+/)?([A-Za-z]+)")
+ r"/(?:gifs/detail/|\w+/)?([A-Za-z]{8,})")
test = (
("https://gfycat.com/GrayGenerousCowrie", {
"url": "e0b5e1d7223108249b15c3c7898dd358dbfae045",
@@ -78,11 +114,75 @@ class GfycatImageExtractor(GfycatExtractor):
("https://gfycat.com/detail/UnequaledHastyAnkole?tagname=aww", {
"url": "e24c9f69897fd223343782425a429c5cab6a768e",
}),
+ # retry 404'ed videos on redgifs (#874)
+ ("https://www.gfycat.com/foolishforkedabyssiniancat", {
+ "pattern": "https://redgifs.com/watch/foolishforkedabyssiniancat",
+ }),
("https://gfycat.com/gifs/detail/UnequaledHastyAnkole"),
("https://gfycat.com/ifr/UnequaledHastyAnkole"),
("https://gfycat.com/ru/UnequaledHastyAnkole"),
)
- def gfycats(self):
- url = "https://api.gfycat.com/v1/gfycats/" + self.key
- return (self.request(url).json()["gfyItem"],)
+ def items(self):
+ try:
+ gfycat = GfycatAPI(self).gfycat(self.key)
+ except exception.HttpError:
+ from .redgifs import RedgifsImageExtractor
+ url = "https://redgifs.com/watch/" + self.key
+ data = {"_extractor": RedgifsImageExtractor}
+ yield Message.Queue, url, data
+ else:
+ url = self._select_format(gfycat)
+ yield Message.Directory, gfycat
+ yield Message.Url, url, gfycat
+
+
+class GfycatAPI():
+ API_ROOT = "https://api.gfycat.com"
+ ACCESS_KEY = "Anr96uuqt9EdamSCwK4txKPjMsf2M95Rfa5FLLhPFucu8H5HTzeutyAa"
+
+ def __init__(self, extractor):
+ self.extractor = extractor
+ self.headers = {}
+
+ def gfycat(self, gfycat_id):
+ endpoint = "/v1/gfycats/" + gfycat_id
+ return self._call(endpoint)["gfyItem"]
+
+ def user(self, user):
+ endpoint = "/v1/users/{}/gfycats".format(user.lower())
+ params = {"count": 100}
+ return self._pagination(endpoint, params)
+
+ def search(self, query):
+ endpoint = "/v1/gfycats/search"
+ params = {"search_text": query, "count": 150}
+ return self._pagination(endpoint, params)
+
+ @cache(keyarg=1, maxage=3600)
+ def _authenticate_impl(self, category):
+ url = "https://weblogin." + category + ".com/oauth/webtoken"
+ data = {"access_key": self.ACCESS_KEY}
+ headers = {"Referer": self.extractor.root + "/",
+ "Origin" : self.extractor.root}
+ response = self.extractor.request(
+ url, method="POST", headers=headers, json=data)
+ return "Bearer " + response.json()["access_token"]
+
+ def _call(self, endpoint, params=None):
+ url = self.API_ROOT + endpoint
+ self.headers["Authorization"] = self._authenticate_impl(
+ self.extractor.category)
+ return self.extractor.request(
+ url, params=params, headers=self.headers).json()
+
+ def _pagination(self, endpoint, params):
+ while True:
+ data = self._call(endpoint, params)
+ gfycats = data["gfycats"]
+ yield from gfycats
+
+ if "found" not in data and len(gfycats) < params["count"] or \
+ not data["gfycats"]:
+ return
+ params["cursor"] = data["cursor"]
diff --git a/gallery_dl/extractor/hypnohub.py b/gallery_dl/extractor/hypnohub.py
index 860cebd..17f9a88 100644
--- a/gallery_dl/extractor/hypnohub.py
+++ b/gallery_dl/extractor/hypnohub.py
@@ -23,7 +23,7 @@ class HypnohubTagExtractor(booru.TagMixin, HypnohubExtractor):
pattern = (r"(?:https?://)?(?:www\.)?hypnohub\.net"
r"/post\?(?:[^&#]*&)*tags=(?P<tags>[^&#]+)")
test = ("https://hypnohub.net/post?tags=gonoike_biwa", {
- "url": "0deaf1a2f832cfc4354c531259b949e850da1e7e",
+ "url": "2848abe3e433ad39bfdf5be5874682faaccea5be",
})
diff --git a/gallery_dl/extractor/imagefap.py b/gallery_dl/extractor/imagefap.py
index fd97605..bf0ac63 100644
--- a/gallery_dl/extractor/imagefap.py
+++ b/gallery_dl/extractor/imagefap.py
@@ -6,7 +6,7 @@
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
-"""Extract images from https://imagefap.com/"""
+"""Extractors for https://www.imagefap.com/"""
from .common import Extractor, Message
from .. import text
diff --git a/gallery_dl/extractor/imgur.py b/gallery_dl/extractor/imgur.py
index 20b698b..a617975 100644
--- a/gallery_dl/extractor/imgur.py
+++ b/gallery_dl/extractor/imgur.py
@@ -231,7 +231,7 @@ class ImgurAlbumExtractor(ImgurExtractor):
class ImgurGalleryExtractor(ImgurExtractor):
"""Extractor for imgur galleries"""
subcategory = "gallery"
- pattern = BASE_PATTERN + r"/(?:gallery|t/unmuted)/(\w{7}|\w{5})"
+ pattern = BASE_PATTERN + r"/(?:gallery|t/\w+)/(\w{7}|\w{5})"
test = (
("https://imgur.com/gallery/zf2fIms", { # non-album gallery (#380)
"pattern": "https://imgur.com/zf2fIms",
@@ -242,6 +242,7 @@ class ImgurGalleryExtractor(ImgurExtractor):
("https://imgur.com/t/unmuted/26sEhNr", { # unmuted URL
"pattern": "https://imgur.com/26sEhNr",
}),
+ ("https://imgur.com/t/cat/qSB8NbN"),
)
def items(self):
diff --git a/gallery_dl/extractor/khinsider.py b/gallery_dl/extractor/khinsider.py
index 822a743..2550af2 100644
--- a/gallery_dl/extractor/khinsider.py
+++ b/gallery_dl/extractor/khinsider.py
@@ -56,12 +56,27 @@ class KhinsiderSoundtrackExtractor(AsynchronousMixin, Extractor):
}}
def tracks(self, page):
- page = text.extract(page, '<table id="songlist">', '</table>')[0]
+ fmt = self.config("format", ("mp3",))
+ if fmt and isinstance(fmt, str):
+ if fmt == "all":
+ fmt = None
+ else:
+ fmt = fmt.lower().split(",")
+ page = text.extract(page, '<table id="songlist">', '</table>')[0]
for num, url in enumerate(text.extract_iter(
page, '<td class="clickable-row"><a href="', '"'), 1):
url = text.urljoin(self.root, url)
page = self.request(url, encoding="utf-8").text
+ track = first = None
- url = text.extract(page, 'style="color: #21363f;" href="', '"')[0]
- yield text.nameext_from_url(url, {"num": num, "url": url})
+ for url in text.extract_iter(
+ page, 'style="color: #21363f;" href="', '"'):
+ track = text.nameext_from_url(url, {"num": num, "url": url})
+ if first is None:
+ first = track
+ if not fmt or track["extension"] in fmt:
+ first = False
+ yield track
+ if first:
+ yield first
diff --git a/gallery_dl/extractor/mangakakalot.py b/gallery_dl/extractor/mangakakalot.py
new file mode 100644
index 0000000..8686b2d
--- /dev/null
+++ b/gallery_dl/extractor/mangakakalot.py
@@ -0,0 +1,118 @@
+# -*- coding: utf-8 -*-
+
+# Copyright 2020 Jake Mannens
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Extract manga-chapters and entire manga from https://mangakakalot.com/"""
+
+from .common import ChapterExtractor, MangaExtractor
+from .. import text
+import re
+
+
+class MangakakalotBase():
+ """Base class for mangakakalot extractors"""
+ category = "mangakakalot"
+ root = "https://mangakakalot.com"
+
+ @staticmethod
+ def parse_page(page, data):
+ """Parse metadata on 'page' and add it to 'data'"""
+ text.extract_all(page, (
+ ("manga" , '<h1>', '</h1>'),
+ ('author' , '<li>Author(s) :\n', '</a>'),
+ ), values=data)
+ data["author"] = text.remove_html(data["author"])
+ return data
+
+
+class MangakakalotChapterExtractor(MangakakalotBase, ChapterExtractor):
+ """Extractor for manga-chapters from mangakakalot.com"""
+ pattern = (r"(?:https?://)?(?:www\.)?mangakakalot\.com"
+ r"(/chapter/\w+/chapter_[^/?&#]+)")
+ test = (
+ ("https://mangakakalot.com/chapter/rx922077/chapter_6", {
+ "pattern": r"https://s\d+\.\w+\.com/mangakakalot/r\d+/rx922077/"
+ r"chapter_6_master_help_me_out/\d+\.jpg",
+ "keyword": "80fde46d2210a6c17f0b2f7c1c89f0f56b65e157",
+ "count": 14,
+ }),
+ (("https://mangakakalot.com/chapter"
+ "/hatarakanai_futari_the_jobless_siblings/chapter_20.1"), {
+ "keyword": "6b24349bb16f41ef1c4350200c1ccda5f09ae136",
+ "content": "7196aed8bb1536806bf55033ed1f2ed172c86f9a",
+ "count": 2,
+ }),
+ )
+
+ def __init__(self, match):
+ self.path = match.group(1)
+ ChapterExtractor.__init__(self, match, self.root + self.path)
+ self.session.headers['Referer'] = self.root
+
+ def metadata(self, page):
+ _ , pos = text.extract(page, '<span itemprop="name">', '<')
+ manga , pos = text.extract(page, '<span itemprop="name">', '<', pos)
+ info , pos = text.extract(page, '<span itemprop="name">', '<', pos)
+ author, pos = text.extract(page, '. Author: ', ' already has ', pos)
+
+ match = re.match(
+ r"(?:[Vv]ol\. *(\d+) )?"
+ r"[Cc]hapter *([^:]*)"
+ r"(?:: *(.+))?", info)
+ volume, chapter, title = match.groups() if match else ("", "", info)
+ chapter, sep, minor = chapter.partition(".")
+
+ return {
+ "manga" : text.unescape(manga),
+ "title" : text.unescape(title) if title else "",
+ "author" : text.unescape(author) if author else "",
+ "volume" : text.parse_int(volume),
+ "chapter" : text.parse_int(chapter),
+ "chapter_minor": sep + minor,
+ "lang" : "en",
+ "language" : "English",
+ }
+
+ def images(self, page):
+ page = text.extract(page, 'id="vungdoc"', '\n</div>')[0]
+ return [
+ (url, None)
+ for url in text.extract_iter(page, '<img src="', '"')
+ ]
+
+
+class MangakakalotMangaExtractor(MangakakalotBase, MangaExtractor):
+ """Extractor for manga from mangakakalot.com"""
+ chapterclass = MangakakalotChapterExtractor
+ pattern = (r"(?:https?://)?(?:www\.)?mangakakalot\.com"
+ r"(/(?:manga/|read-)\w+)")
+ test = (
+ ("https://mangakakalot.com/manga/lk921810", {
+ "url": "d262134b65993b031406f7b9d9442c9afd321a27",
+ }),
+ ("https://mangakakalot.com/read-ry3sw158504884246", {
+ "pattern": MangakakalotChapterExtractor.pattern,
+ "count": ">= 40"
+ }),
+ )
+
+ def chapters(self, page):
+ results = []
+ data = self.parse_page(page, {"lang": "en", "language": "English"})
+
+ needle = '<div class="row">\n<span><a href="'
+ pos = page.index('<div class="chapter-list">')
+ while True:
+ url, pos = text.extract(page, needle, '"', pos)
+ if not url:
+ return results
+ data["title"], pos = text.extract(page, '>', '</a>', pos)
+ data["date"] , pos = text.extract(page, '<span title="', '">', pos)
+ chapter, sep, minor = url.rpartition("/chapter_")[2].partition(".")
+ data["chapter"] = text.parse_int(chapter)
+ data["chapter_minor"] = sep + minor
+ results.append((url, data.copy()))
diff --git a/gallery_dl/extractor/mastodon.py b/gallery_dl/extractor/mastodon.py
index fa1fecc..ac17cb0 100644
--- a/gallery_dl/extractor/mastodon.py
+++ b/gallery_dl/extractor/mastodon.py
@@ -212,7 +212,7 @@ EXTRACTORS = {
"client-secret": "DdrODTHs_XoeOsNVXnILTMabtdpWrWOAtrmw91wU1zI",
"test-user" : ("https://mastodon.social/@jk", {
"pattern": r"https://files.mastodon.social/media_attachments"
- r"/files/\d+/\d+/\d+/original/\w+",
+ r"/files/(\d+/){3,}original/\w+",
"range": "1-60",
"count": 60,
}),
diff --git a/gallery_dl/extractor/newgrounds.py b/gallery_dl/extractor/newgrounds.py
index 84794ad..19a2b92 100644
--- a/gallery_dl/extractor/newgrounds.py
+++ b/gallery_dl/extractor/newgrounds.py
@@ -389,7 +389,7 @@ class NewgroundsFavoriteExtractor(NewgroundsExtractor):
return [
self.root + path
for path in text.extract_iter(
- page, 'href="//www.newgrounds.com', '"')
+ page, 'href="https://www.newgrounds.com', '"')
]
diff --git a/gallery_dl/extractor/patreon.py b/gallery_dl/extractor/patreon.py
index a14ec9c..f1e98d9 100644
--- a/gallery_dl/extractor/patreon.py
+++ b/gallery_dl/extractor/patreon.py
@@ -38,13 +38,12 @@ class PatreonExtractor(Extractor):
hashes = set()
yield Message.Directory, post
- yield Message.Metadata, text.nameext_from_url(
- post["creator"].get("image_url", ""), post)
+ yield Message.Metadata, post
for kind, url, name in itertools.chain(
- self._postfile(post),
self._images(post),
self._attachments(post),
+ self._postfile(post),
self._content(post),
):
fhash = self._filehash(url)
diff --git a/gallery_dl/extractor/pururin.py b/gallery_dl/extractor/pururin.py
index 721fc2f..26a5cd9 100644
--- a/gallery_dl/extractor/pururin.py
+++ b/gallery_dl/extractor/pururin.py
@@ -36,7 +36,7 @@ class PururinGalleryExtractor(GalleryExtractor):
"convention": "C92",
"rating" : float,
"uploader" : "demo",
- "scanlator" : "",
+ "scanlator" : "mrwayne",
"lang" : "en",
"language" : "English",
}
diff --git a/gallery_dl/extractor/reddit.py b/gallery_dl/extractor/reddit.py
index 6331b77..679059c 100644
--- a/gallery_dl/extractor/reddit.py
+++ b/gallery_dl/extractor/reddit.py
@@ -17,7 +17,7 @@ class RedditExtractor(Extractor):
"""Base class for reddit extractors"""
category = "reddit"
directory_fmt = ("{category}", "{subreddit}")
- filename_fmt = "{id} {title[:242]}.{extension}"
+ filename_fmt = "{id} {title[:220]}.{extension}"
archive_fmt = "{filename}"
cookiedomain = None
@@ -101,7 +101,7 @@ class RedditExtractor(Extractor):
depth += 1
submissions = (
self.api.submission(sid) for sid in extra
- if sid not in self._visited
+ if sid not in visited
)
def submissions(self):
diff --git a/gallery_dl/extractor/redgifs.py b/gallery_dl/extractor/redgifs.py
index dcbbc0d..4477825 100644
--- a/gallery_dl/extractor/redgifs.py
+++ b/gallery_dl/extractor/redgifs.py
@@ -8,8 +8,8 @@
"""Extractors for https://redgifs.com/"""
-from .gfycat import GfycatExtractor
-from ..cache import cache
+from .gfycat import GfycatExtractor, GfycatAPI
+from .. import text
class RedgifsExtractor(GfycatExtractor):
@@ -44,7 +44,7 @@ class RedgifsSearchExtractor(RedgifsExtractor):
})
def metadata(self):
- self.key = self.key.replace("-", " ")
+ self.key = text.unquote(self.key).replace("-", " ")
return {"search": self.key}
def gfycats(self):
@@ -54,65 +54,21 @@ class RedgifsSearchExtractor(RedgifsExtractor):
class RedgifsImageExtractor(RedgifsExtractor):
"""Extractor for individual gifs from redgifs.com"""
subcategory = "image"
- pattern = r"(?:https?://)?(?:www\.)?redgifs\.com/watch/([A-Za-z]+)"
- test = ("https://redgifs.com/watch/foolishforkedabyssiniancat", {
- "pattern": r"https://\w+.redgifs.com/FoolishForkedAbyssiniancat.mp4",
- "content": "f6e03f1df9a2ff2a74092f53ee7580d2fb943533",
- })
+ pattern = (r"(?:https?://)?(?:www\.)?(?:redgifs\.com/watch"
+ r"|gifdeliverynetwork.com)/([A-Za-z]+)")
+ test = (
+ ("https://redgifs.com/watch/foolishforkedabyssiniancat", {
+ "pattern": r"https://\w+.redgifs.com/FoolishForkedAbyss.+.mp4",
+ "content": "f6e03f1df9a2ff2a74092f53ee7580d2fb943533",
+ }),
+ ("https://www.gifdeliverynetwork.com/foolishforkedabyssiniancat"),
+ )
def gfycats(self):
return (RedgifsAPI(self).gfycat(self.key),)
-class RedgifsAPI():
-
- def __init__(self, extractor):
- self.extractor = extractor
- self.headers = {}
-
- def gfycat(self, gfycat_id):
- endpoint = "v1/gfycats/" + gfycat_id
- return self._call(endpoint)["gfyItem"]
-
- def user(self, user):
- endpoint = "v1/users/{}/gfycats".format(user.lower())
- params = {"count": 100}
- return self._pagination(endpoint, params)
-
- def search(self, query):
- endpoint = "v1/gfycats/search"
- params = {"search_text": query, "count": 150}
- return self._pagination(endpoint, params)
-
- @cache(maxage=3600)
- def _authenticate_impl(self):
- url = "https://weblogin.redgifs.com/oauth/webtoken"
- headers = {
- "Referer": "https://www.redgifs.com/",
- "Origin" : "https://www.redgifs.com",
- }
- data = {
- "access_key": "dBLwVuGn9eq4dtXLs8WSfpjcYFY7bPQe"
- "AqGPSFgqeW5B9uzj2cMVhF63pTFF4Rg9",
- }
-
- response = self.extractor.request(
- url, method="POST", headers=headers, json=data)
- return "Bearer " + response.json()["access_token"]
-
- def _call(self, endpoint, params=None):
- self.headers["Authorization"] = self._authenticate_impl()
- url = "https://napi.redgifs.com/" + endpoint
- return self.extractor.request(
- url, params=params, headers=self.headers).json()
-
- def _pagination(self, endpoint, params):
- while True:
- data = self._call(endpoint, params)
- gfycats = data["gfycats"]
- yield from gfycats
-
- if "found" not in data and len(gfycats) < params["count"] or \
- not data["gfycats"]:
- return
- params["cursor"] = data["cursor"]
+class RedgifsAPI(GfycatAPI):
+ API_ROOT = "https://napi.redgifs.com/"
+ ACCESS_KEY = ("dBLwVuGn9eq4dtXLs8WSfpjcYFY7bPQe"
+ "AqGPSFgqeW5B9uzj2cMVhF63pTFF4Rg9")
diff --git a/gallery_dl/extractor/smugmug.py b/gallery_dl/extractor/smugmug.py
index 69b8cb9..44a0a84 100644
--- a/gallery_dl/extractor/smugmug.py
+++ b/gallery_dl/extractor/smugmug.py
@@ -112,13 +112,13 @@ class SmugmugImageExtractor(SmugmugExtractor):
test = (
("https://tdm.smugmug.com/Nature/Dove/i-kCsLJT6", {
"url": "f624ad7293afd6412a7d34e3950a118596c36c85",
- "keyword": "ea70e93be5067dca988d871dcf9afac491a189a4",
+ "keyword": "085861b5935e3cd96ad15954039bc2419cdf1c27",
"content": "ecbd9d7b4f75a637abc8d35319be9ec065a44eb0",
}),
# video
("https://tstravels.smugmug.com/Dailies/Daily-Dose-2015/i-39JFNzB", {
"url": "04d0ab1ff829ca7d78f5acb5548953df08e9a5ee",
- "keyword": "c708c4b9527a2fb29396c19f7628f9cf4b0b3a39",
+ "keyword": "e0927fda7b1c39c19974625270102ad7e72b9d6f",
}),
)
diff --git a/gallery_dl/extractor/subscribestar.py b/gallery_dl/extractor/subscribestar.py
new file mode 100644
index 0000000..08d8850
--- /dev/null
+++ b/gallery_dl/extractor/subscribestar.py
@@ -0,0 +1,227 @@
+# -*- coding: utf-8 -*-
+
+# Copyright 2020 Mike Fährmann
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Extractors for https://www.subscribestar.com/"""
+
+from .common import Extractor, Message
+from .. import text, exception
+from ..cache import cache
+import datetime
+import json
+
+
+BASE_PATTERN = r"(?:https?://)?(?:www\.)?subscribestar\.(com|adult)"
+
+
+class SubscribestarExtractor(Extractor):
+ """Base class for subscribestar extractors"""
+ category = "subscribestar"
+ root = "https://www.subscribestar.com"
+ directory_fmt = ("{category}", "{author_name}")
+ filename_fmt = "{post_id}_{id}.{extension}"
+ archive_fmt = "{id}"
+ cookiedomain = "www.subscribestar.com"
+ cookienames = ("auth_token",)
+
+ def __init__(self, match):
+ tld, self.item = match.groups()
+ if tld == "adult":
+ self.root = "https://subscribestar.adult"
+ self.cookiedomain = "subscribestar.adult"
+ self.subcategory += "-adult"
+ Extractor.__init__(self, match)
+ self.metadata = self.config("metadata", False)
+ self._year = " " + str(datetime.date.today().year)
+
+ def items(self):
+ self.login()
+ for post_html in self.posts():
+ media = self._media_from_post(post_html)
+ if not media:
+ continue
+ data = self._data_from_post(post_html)
+ yield Message.Directory, data
+ for item in media:
+ item.update(data)
+ url = item["url"]
+ yield Message.Url, url, text.nameext_from_url(url, item)
+
+ def posts(self):
+ """Yield HTML content of all relevant posts"""
+
+ def login(self):
+ if self._check_cookies(self.cookienames):
+ return
+ username, password = self._get_auth_info()
+ if username:
+ cookies = self._login_impl(username, password)
+ self._update_cookies(cookies)
+
+ @cache(maxage=28*24*3600, keyarg=1)
+ def _login_impl(self, username, password):
+ self.log.info("Logging in as %s", username)
+
+ url = "https://www.subscribestar.com/session.json"
+ headers = {
+ "Origin" : "https://www.subscribestar.com",
+ "Referer" : "https://www.subscribestar.com/login",
+ "X-Requested-With": "XMLHttpRequest",
+ }
+ data = {
+ "utf8" : "✓",
+ "email" : username,
+ "password": password,
+ }
+
+ response = self.request(
+ url, method="POST", headers=headers, data=data, fatal=False)
+ if response.json().get("errors"):
+ self.log.debug(response.json()["errors"])
+ raise exception.AuthenticationError()
+
+ return {
+ cookie.name: cookie.value
+ for cookie in response.cookies
+ if cookie.name.startswith("auth")
+ }
+
+ @staticmethod
+ def _media_from_post(html):
+ gallery = text.extract(html, 'data-gallery="', '"')[0]
+ if gallery:
+ return [
+ item for item in json.loads(text.unescape(gallery))
+ if "/previews/" not in item["url"]
+ ]
+ return ()
+
+ def _data_from_post(self, html):
+ extr = text.extract_from(html)
+ data = {
+ "post_id" : text.parse_int(extr('data-id="', '"')),
+ "author_id" : text.parse_int(extr('data-user-id="', '"')),
+ "author_name": text.unescape(extr('href="/', '"')),
+ "author_nick": text.unescape(extr('>', '<')),
+ "content" : (extr(
+ '<div class="post-content', '<div class="post-uploads')
+ .partition(">")[2]),
+ }
+
+ if self.metadata:
+ url = "{}/posts/{}".format(self.root, data["post_id"])
+ page = self.request(url).text
+ data["date"] = self._parse_datetime(text.extract(
+ page, 'class="section-subtitle">', '<')[0])
+
+ return data
+
+ def _parse_datetime(self, dt):
+ date = text.parse_datetime(dt, "%B %d, %Y %H:%M")
+ if date is dt:
+ date = text.parse_datetime(dt + self._year, "%d %b %H:%M %Y")
+ return date
+
+
+class SubscribestarUserExtractor(SubscribestarExtractor):
+ """Extractor for media from a subscribestar user"""
+ subcategory = "user"
+ pattern = BASE_PATTERN + r"/(?!posts/)([^/?&#]+)"
+ test = (
+ ("https://www.subscribestar.com/subscribestar", {
+ "count": ">= 20",
+ "pattern": r"https://star-uploads.s\d+-us-west-\d+.amazonaws.com"
+ r"/uploads/users/11/",
+ "keyword": {
+ "author_id": 11,
+ "author_name": "subscribestar",
+ "author_nick": "SubscribeStar",
+ "content": str,
+ "height" : int,
+ "id" : int,
+ "pinned" : bool,
+ "post_id": int,
+ "type" : "re:image|video",
+ "url" : str,
+ "width" : int,
+ },
+ }),
+ ("https://www.subscribestar.com/subscribestar", {
+ "options": (("metadata", True),),
+ "keyword": {"date": "type:datetime"},
+ "range": "1",
+ }),
+ ("https://subscribestar.adult/kanashiipanda", {
+ "range": "21-40",
+ "count": 20,
+ }),
+ )
+
+ def posts(self):
+ needle_next_page = 'data-role="infinite_scroll-next_page" href="'
+ page = self.request("{}/{}".format(self.root, self.item)).text
+
+ while True:
+ posts = page.split('<div class="post ')[1:]
+ if not posts:
+ return
+ yield from posts
+
+ url = text.extract(posts[-1], needle_next_page, '"')[0]
+ if not url:
+ return
+ page = self.request(self.root + text.unescape(url)).json()["html"]
+
+
+class SubscribestarPostExtractor(SubscribestarExtractor):
+ """Extractor for media from a single subscribestar post"""
+ subcategory = "post"
+ pattern = BASE_PATTERN + r"/posts/(\d+)"
+ test = (
+ ("https://www.subscribestar.com/posts/102468", {
+ "url": "612da5a98af056dd78dc846fbcfa705e721f6675",
+ "keyword": {
+ "author_id": 11,
+ "author_name": "subscribestar",
+ "author_nick": "SubscribeStar",
+ "content": "re:<h1>Brand Guidelines and Assets</h1>",
+ "date": "dt:2020-05-07 12:33:00",
+ "extension": "jpg",
+ "filename": "8ff61299-b249-47dc-880a-cdacc9081c62",
+ "group": "imgs_and_videos",
+ "height": 291,
+ "id": 203885,
+ "pinned": False,
+ "post_id": 102468,
+ "type": "image",
+ "width": 700,
+ },
+ }),
+ ("https://subscribestar.adult/posts/22950", {
+ "url": "440d745a368e6b3e218415f593a5045f384afa0d",
+ "keyword": {"date": "dt:2019-04-28 07:32:00"},
+ }),
+ )
+
+ def posts(self):
+ url = "{}/posts/{}".format(self.root, self.item)
+ self._page = self.request(url).text
+ return (self._page,)
+
+ def _data_from_post(self, html):
+ extr = text.extract_from(html)
+ return {
+ "post_id" : text.parse_int(extr('data-id="', '"')),
+ "author_name": text.unescape(extr('href="/', '"')),
+ "author_id" : text.parse_int(extr('data-user-id="', '"')),
+ "author_nick": text.unescape(extr('alt="', '"')),
+ "date" : self._parse_datetime(extr(
+ 'class="section-subtitle">', '<')),
+ "content" : (extr(
+ '<div class="post-content', '<div class="post-uploads')
+ .partition(">")[2]),
+ }
diff --git a/gallery_dl/extractor/tumblr.py b/gallery_dl/extractor/tumblr.py
index 70fead8..4d51851 100644
--- a/gallery_dl/extractor/tumblr.py
+++ b/gallery_dl/extractor/tumblr.py
@@ -276,9 +276,6 @@ class TumblrPostExtractor(TumblrExtractor):
("https://mikf123.tumblr.com/post/181022380064/chat-post", {
"count": 0,
}),
- ("http://pinetre-3.tumblr.com/post/181904381470/via", {
- "count": 0, # audio post with "null" as URL (#165)
- }),
("http://ziemniax.tumblr.com/post/109697912859/", {
"exception": exception.NotFoundError, # HTML response (#297)
}),
diff --git a/gallery_dl/extractor/twitter.py b/gallery_dl/extractor/twitter.py
index 1e985e3..2530040 100644
--- a/gallery_dl/extractor/twitter.py
+++ b/gallery_dl/extractor/twitter.py
@@ -15,6 +15,12 @@ import hashlib
import time
+BASE_PATTERN = (
+ r"(?:https?://)?(?:www\.|mobile\.)?"
+ r"(?:twitter\.com|nitter\.net)"
+)
+
+
class TwitterExtractor(Extractor):
"""Base class for twitter extractors"""
category = "twitter"
@@ -42,9 +48,14 @@ class TwitterExtractor(Extractor):
for tweet in self.tweets():
- if (not self.retweets and "retweeted_status_id_str" in tweet or
- not self.replies and "in_reply_to_user_id_str" in tweet or
- not self.quoted and "quoted" in tweet):
+ if not self.retweets and "retweeted_status_id_str" in tweet:
+ self.log.debug("Skipping %s (retweet)", tweet["id_str"])
+ continue
+ if not self.replies and "in_reply_to_user_id_str" in tweet:
+ self.log.debug("Skipping %s (reply)", tweet["id_str"])
+ continue
+ if not self.quoted and "quoted" in tweet:
+ self.log.debug("Skipping %s (quoted tweet)", tweet["id_str"])
continue
if self.twitpic:
@@ -234,8 +245,7 @@ class TwitterExtractor(Extractor):
class TwitterTimelineExtractor(TwitterExtractor):
"""Extractor for all images from a user's timeline"""
subcategory = "timeline"
- pattern = (r"(?:https?://)?(?:www\.|mobile\.)?twitter\.com"
- r"/(?!search)([^/?&#]+)/?(?:$|[?#])")
+ pattern = BASE_PATTERN + r"/(?!search)([^/?&#]+)/?(?:$|[?#])"
test = (
("https://twitter.com/supernaturepics", {
"range": "1-40",
@@ -251,8 +261,7 @@ class TwitterTimelineExtractor(TwitterExtractor):
class TwitterMediaExtractor(TwitterExtractor):
"""Extractor for all images from a user's Media Tweets"""
subcategory = "media"
- pattern = (r"(?:https?://)?(?:www\.|mobile\.)?twitter\.com"
- r"/(?!search)([^/?&#]+)/media(?!\w)")
+ pattern = BASE_PATTERN + r"/(?!search)([^/?&#]+)/media(?!\w)"
test = (
("https://twitter.com/supernaturepics/media", {
"range": "1-40",
@@ -268,8 +277,7 @@ class TwitterMediaExtractor(TwitterExtractor):
class TwitterLikesExtractor(TwitterExtractor):
"""Extractor for liked tweets"""
subcategory = "likes"
- pattern = (r"(?:https?://)?(?:www\.|mobile\.)?twitter\.com"
- r"/(?!search)([^/?&#]+)/likes(?!\w)")
+ pattern = BASE_PATTERN + r"/(?!search)([^/?&#]+)/likes(?!\w)"
test = ("https://twitter.com/supernaturepics/likes",)
def tweets(self):
@@ -279,7 +287,7 @@ class TwitterLikesExtractor(TwitterExtractor):
class TwitterBookmarkExtractor(TwitterExtractor):
"""Extractor for bookmarked tweets"""
subcategory = "bookmark"
- pattern = r"(?:https?://)?(?:www\.|mobile\.)?twitter\.com/i/bookmarks()"
+ pattern = BASE_PATTERN + r"/i/bookmarks()"
test = ("https://twitter.com/i/bookmarks",)
def tweets(self):
@@ -290,8 +298,7 @@ class TwitterSearchExtractor(TwitterExtractor):
"""Extractor for all images from a search timeline"""
subcategory = "search"
directory_fmt = ("{category}", "Search", "{search}")
- pattern = (r"(?:https?://)?(?:www\.|mobile\.)?twitter\.com"
- r"/search/?\?(?:[^&#]+&)*q=([^&#]+)")
+ pattern = BASE_PATTERN + r"/search/?\?(?:[^&#]+&)*q=([^&#]+)"
test = ("https://twitter.com/search?q=nature", {
"range": "1-40",
"count": 40,
@@ -307,8 +314,7 @@ class TwitterSearchExtractor(TwitterExtractor):
class TwitterTweetExtractor(TwitterExtractor):
"""Extractor for images from individual tweets"""
subcategory = "tweet"
- pattern = (r"(?:https?://)?(?:www\.|mobile\.)?twitter\.com"
- r"/([^/?&#]+|i/web)/status/(\d+)")
+ pattern = BASE_PATTERN + r"/([^/?&#]+|i/web)/status/(\d+)"
test = (
("https://twitter.com/supernaturepics/status/604341487988576256", {
"url": "0e801d2f98142dd87c3630ded9e4be4a4d63b580",
@@ -357,6 +363,11 @@ class TwitterTweetExtractor(TwitterExtractor):
"pattern": r"https://\w+.cloudfront.net/photos/large/\d+.jpg",
"count": 3,
}),
+ # Nitter tweet
+ ("https://nitter.net/ed1conf/status/1163841619336007680", {
+ "url": "0f6a841e23948e4320af7ae41125e0c5b3cadc98",
+ "content": "f29501e44d88437fe460f5c927b7543fda0f6e34",
+ }),
)
def __init__(self, match):
@@ -474,7 +485,10 @@ class TwitterAPI():
"variables": '{"screen_name":"' + screen_name + '"'
',"withHighlightedLabel":true}'
}
- return self._call(endpoint, params)["data"]["user"]
+ try:
+ return self._call(endpoint, params)["data"]["user"]
+ except KeyError:
+ raise exception.NotFoundError("user")
@cache(maxage=3600)
def _guest_token(self):
@@ -491,8 +505,16 @@ class TwitterAPI():
if response.status_code == 429:
self.extractor.wait(until=response.headers["x-rate-limit-reset"])
return self._call(endpoint, params)
+
+ try:
+ msg = ", ".join(
+ '"' + error["message"] + '"'
+ for error in response.json()["errors"]
+ )
+ except Exception:
+ msg = response.text
raise exception.StopExtraction(
- "%s %s (%s)", response.status_code, response.reason, response.text)
+ "%s %s (%s)", response.status_code, response.reason, msg)
def _pagination(self, endpoint, params=None,
entry_tweet="tweet-", entry_cursor="cursor-bottom-"):
@@ -517,8 +539,8 @@ class TwitterAPI():
entry["content"]["item"]["content"]["tweet"]["id"]]
except KeyError:
self.extractor.log.debug(
- "Skipping unavailable Tweet %s",
- entry["entryId"][6:])
+ "Skipping %s (deleted)",
+ entry["entryId"][len(entry_tweet):])
continue
tweet["user"] = users[tweet["user_id_str"]]
diff --git a/gallery_dl/extractor/wallhaven.py b/gallery_dl/extractor/wallhaven.py
index 043da0b..20980ac 100644
--- a/gallery_dl/extractor/wallhaven.py
+++ b/gallery_dl/extractor/wallhaven.py
@@ -34,7 +34,7 @@ class WallhavenSearchExtractor(WallhavenExtractor):
(("https://wallhaven.cc/search?q=id%3A87"
"&categories=111&purity=100&sorting=date_added&order=asc&page=3"), {
"pattern": r"https://w.wallhaven.cc/full/\w\w/wallhaven-\w+\.\w+",
- "count": "<= 10",
+ "count": "<= 20",
}),
)
diff --git a/gallery_dl/extractor/weibo.py b/gallery_dl/extractor/weibo.py
index d1ad388..0b1b2d9 100644
--- a/gallery_dl/extractor/weibo.py
+++ b/gallery_dl/extractor/weibo.py
@@ -135,9 +135,7 @@ class WeiboStatusExtractor(WeiboExtractor):
"exception": exception.NotFoundError,
}),
# non-numeric status ID (#664)
- ("https://weibo.com/3314883543/Iy7fj4qVg", {
- "pattern": r"https?://f.video.weibocdn.com/\w+\.mp4\?label=mp4_hd",
- }),
+ ("https://weibo.com/3314883543/Iy7fj4qVg"),
("https://m.weibo.cn/status/4339748116375525"),
("https://m.weibo.cn/5746766133/4339748116375525"),
)
diff --git a/gallery_dl/job.py b/gallery_dl/job.py
index 923a4e6..4c18e4d 100644
--- a/gallery_dl/job.py
+++ b/gallery_dl/job.py
@@ -281,6 +281,7 @@ class DownloadJob(Job):
postprocessors = self.postprocessors
if postprocessors:
+ kwdict["extension"] = "metadata"
pathfmt = self.pathfmt
pathfmt.set_filename(kwdict)
for pp in postprocessors:
diff --git a/gallery_dl/util.py b/gallery_dl/util.py
index c8d73b6..f688fa6 100644
--- a/gallery_dl/util.py
+++ b/gallery_dl/util.py
@@ -721,12 +721,10 @@ class PathFormat():
raise exception.DirectoryFormatError(exc)
self.directory = self.realdirectory = ""
- self.filename = ""
- self.extension = ""
- self.prefix = ""
- self.kwdict = {}
- self.delete = False
+ self.filename = self.extension = self.prefix = ""
self.path = self.realpath = self.temppath = ""
+ self.kwdict = {}
+ self.delete = self._create_directory = False
basedir = extractor._parentdir
if not basedir:
@@ -831,9 +829,7 @@ class PathFormat():
directory += sep
self.realdirectory = directory
-
- # Create directory tree
- os.makedirs(self.realdirectory, exist_ok=True)
+ self._create_directory = True
def set_filename(self, kwdict):
"""Set general filename data"""
@@ -872,6 +868,9 @@ class PathFormat():
def build_path(self):
"""Combine directory and filename to full paths"""
+ if self._create_directory:
+ os.makedirs(self.realdirectory, exist_ok=True)
+ self._create_directory = False
self.filename = filename = self.build_filename()
self.path = self.directory + filename
self.realpath = self.realdirectory + filename
diff --git a/gallery_dl/version.py b/gallery_dl/version.py
index 3297d03..fd52077 100644
--- a/gallery_dl/version.py
+++ b/gallery_dl/version.py
@@ -6,4 +6,4 @@
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
-__version__ = "1.14.2"
+__version__ = "1.14.3"
diff --git a/setup.py b/setup.py
index e31a38c..d7226ea 100644
--- a/setup.py
+++ b/setup.py
@@ -37,6 +37,7 @@ FILES = [
(path, [f for f in files if check_file(f)])
for (path, files) in [
("share/bash-completion/completions", ["data/completion/gallery-dl"]),
+ ("share/zsh/site-functions" , ["data/completion/_gallery-dl"]),
("share/man/man1" , ["data/man/gallery-dl.1"]),
("share/man/man5" , ["data/man/gallery-dl.conf.5"]),
]
diff --git a/test/test_results.py b/test/test_results.py
index 6a943aa..dd1ed1d 100644
--- a/test/test_results.py
+++ b/test/test_results.py
@@ -309,10 +309,6 @@ def setup_test_config():
config.set(("extractor", "nijie") , "username", email)
config.set(("extractor", "seiga") , "username", email)
- config.set(("extractor", "danbooru") , "username", None)
- config.set(("extractor", "e621") , "username", None)
- config.set(("extractor", "instagram") , "username", None)
- config.set(("extractor", "twitter") , "username", None)
config.set(("extractor", "newgrounds"), "username", "d1618111")
config.set(("extractor", "newgrounds"), "password", "d1618111")
@@ -320,6 +316,10 @@ def setup_test_config():
config.set(("extractor", "mangoxo") , "username", "LiQiang3")
config.set(("extractor", "mangoxo") , "password", "5zbQF10_5u25259Ma")
+ for category in ("danbooru", "instagram", "twitter", "subscribestar",
+ "e621"):
+ config.set(("extractor", category), "username", None)
+
config.set(("extractor", "mastodon.social"), "access-token",
"Blf9gVqG7GytDTfVMiyYQjwVMQaNACgf3Ds3IxxVDUQ")