aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLibravatarUnit 193 <unit193@unit193.net>2020-10-25 17:59:29 -0400
committerLibravatarUnit 193 <unit193@unit193.net>2020-10-25 17:59:29 -0400
commit6c77ac67811ca061b022e9677f3ef365625b0f3e (patch)
tree43606f6d2894d5211b1f8e0456a0e0162684e444
parentfa22dd3889bb4b898017195e13eb15ba0431255e (diff)
parent5dc7d6f5902ddaee5223d041d5c10060f0c72430 (diff)
downloadgallery-dl-6c77ac67811ca061b022e9677f3ef365625b0f3e.tar.bz2
gallery-dl-6c77ac67811ca061b022e9677f3ef365625b0f3e.tar.xz
gallery-dl-6c77ac67811ca061b022e9677f3ef365625b0f3e.tar.zst
Update upstream source from tag 'upstream/1.15.2'
Update to upstream version '1.15.2' with Debian dir b11433f3e7944c55987dcada15dce64a82e74ce6
-rw-r--r--CHANGELOG.md20
-rw-r--r--PKG-INFO12
-rw-r--r--README.rst10
-rw-r--r--data/man/gallery-dl.12
-rw-r--r--data/man/gallery-dl.conf.5127
-rw-r--r--gallery_dl.egg-info/PKG-INFO12
-rw-r--r--gallery_dl/extractor/35photo.py4
-rw-r--r--gallery_dl/extractor/4chan.py2
-rw-r--r--gallery_dl/extractor/500px.py4
-rw-r--r--gallery_dl/extractor/8kun.py2
-rw-r--r--gallery_dl/extractor/8muses.py2
-rw-r--r--gallery_dl/extractor/artstation.py10
-rw-r--r--gallery_dl/extractor/aryion.py2
-rw-r--r--gallery_dl/extractor/bcy.py2
-rw-r--r--gallery_dl/extractor/behance.py2
-rw-r--r--gallery_dl/extractor/blogger.py10
-rw-r--r--gallery_dl/extractor/booru.py6
-rw-r--r--gallery_dl/extractor/deviantart.py10
-rw-r--r--gallery_dl/extractor/directlink.py4
-rw-r--r--gallery_dl/extractor/dynastyscans.py2
-rw-r--r--gallery_dl/extractor/fallenangels.py12
-rw-r--r--gallery_dl/extractor/foolslide.py4
-rw-r--r--gallery_dl/extractor/furaffinity.py10
-rw-r--r--gallery_dl/extractor/fuskator.py2
-rw-r--r--gallery_dl/extractor/gelbooru.py9
-rw-r--r--gallery_dl/extractor/gfycat.py4
-rw-r--r--gallery_dl/extractor/hentai2read.py4
-rw-r--r--gallery_dl/extractor/hentaicafe.py4
-rw-r--r--gallery_dl/extractor/hentaifoundry.py222
-rw-r--r--gallery_dl/extractor/hentaihand.py2
-rw-r--r--gallery_dl/extractor/hiperdex.py6
-rw-r--r--gallery_dl/extractor/hitomi.py12
-rw-r--r--gallery_dl/extractor/imagefap.py2
-rw-r--r--gallery_dl/extractor/imagehosts.py8
-rw-r--r--gallery_dl/extractor/imgbb.py6
-rw-r--r--gallery_dl/extractor/imgur.py10
-rw-r--r--gallery_dl/extractor/inkbunny.py2
-rw-r--r--gallery_dl/extractor/instagram.py12
-rw-r--r--gallery_dl/extractor/issuu.py4
-rw-r--r--gallery_dl/extractor/khinsider.py2
-rw-r--r--gallery_dl/extractor/komikcast.py4
-rw-r--r--gallery_dl/extractor/lineblog.py4
-rw-r--r--gallery_dl/extractor/livedoor.py2
-rw-r--r--gallery_dl/extractor/luscious.py2
-rw-r--r--gallery_dl/extractor/mangadex.py37
-rw-r--r--gallery_dl/extractor/mangafox.py6
-rw-r--r--gallery_dl/extractor/mangahere.py12
-rw-r--r--gallery_dl/extractor/mangakakalot.py4
-rw-r--r--gallery_dl/extractor/mangapanda.py4
-rw-r--r--gallery_dl/extractor/mangapark.py4
-rw-r--r--gallery_dl/extractor/mangareader.py4
-rw-r--r--gallery_dl/extractor/mastodon.py4
-rw-r--r--gallery_dl/extractor/message.py4
-rw-r--r--gallery_dl/extractor/myportfolio.py4
-rw-r--r--gallery_dl/extractor/newgrounds.py21
-rw-r--r--gallery_dl/extractor/ngomik.py2
-rw-r--r--gallery_dl/extractor/nozomi.py2
-rw-r--r--gallery_dl/extractor/oauth.py2
-rw-r--r--gallery_dl/extractor/paheal.py2
-rw-r--r--gallery_dl/extractor/patreon.py6
-rw-r--r--gallery_dl/extractor/photobucket.py6
-rw-r--r--gallery_dl/extractor/piczel.py4
-rw-r--r--gallery_dl/extractor/pinterest.py55
-rw-r--r--gallery_dl/extractor/pixiv.py8
-rw-r--r--gallery_dl/extractor/pixnet.py2
-rw-r--r--gallery_dl/extractor/plurk.py2
-rw-r--r--gallery_dl/extractor/pornhub.py2
-rw-r--r--gallery_dl/extractor/reactor.py18
-rw-r--r--gallery_dl/extractor/readcomiconline.py4
-rw-r--r--gallery_dl/extractor/reddit.py10
-rw-r--r--gallery_dl/extractor/redgifs.py4
-rw-r--r--gallery_dl/extractor/sankakucomplex.py2
-rw-r--r--gallery_dl/extractor/seiga.py23
-rw-r--r--gallery_dl/extractor/sexcom.py4
-rw-r--r--gallery_dl/extractor/simplyhentai.py6
-rw-r--r--gallery_dl/extractor/slickpic.py2
-rw-r--r--gallery_dl/extractor/slideshare.py2
-rw-r--r--gallery_dl/extractor/smugmug.py4
-rw-r--r--gallery_dl/extractor/speakerdeck.py2
-rw-r--r--gallery_dl/extractor/subscribestar.py2
-rw-r--r--gallery_dl/extractor/tumblr.py2
-rw-r--r--gallery_dl/extractor/twitter.py145
-rw-r--r--gallery_dl/extractor/vanillarock.py4
-rw-r--r--gallery_dl/extractor/weasyl.py13
-rw-r--r--gallery_dl/extractor/webtoons.py4
-rw-r--r--gallery_dl/extractor/xhamster.py6
-rw-r--r--gallery_dl/extractor/xvideos.py4
-rw-r--r--gallery_dl/extractor/yuki.py2
-rw-r--r--gallery_dl/job.py35
-rw-r--r--gallery_dl/text.py2
-rw-r--r--gallery_dl/util.py7
-rw-r--r--gallery_dl/version.py2
-rw-r--r--test/test_results.py7
-rw-r--r--test/test_util.py2
94 files changed, 632 insertions, 477 deletions
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 0a55546..f382013 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,25 @@
# Changelog
+## 1.15.2 - 2020-10-24
+### Additions
+- [pinterest] implement login support ([#1055](https://github.com/mikf/gallery-dl/issues/1055))
+- [reddit] add `date` metadata field ([#1068](https://github.com/mikf/gallery-dl/issues/1068))
+- [seiga] add metadata for single image downloads ([#1063](https://github.com/mikf/gallery-dl/issues/1063))
+- [twitter] support media from Cards ([#937](https://github.com/mikf/gallery-dl/issues/937), [#1005](https://github.com/mikf/gallery-dl/issues/1005))
+- [weasyl] support api-key authentication ([#1057](https://github.com/mikf/gallery-dl/issues/1057))
+- add a `t` format string conversion for trimming whitespace ([#1065](https://github.com/mikf/gallery-dl/issues/1065))
+### Fixes
+- [blogger] handle URLs with specified width/height ([#1061](https://github.com/mikf/gallery-dl/issues/1061))
+- [fallenangels] fix extraction of `.5` chapters
+- [gelbooru] rewrite mp4 video URLs ([#1048](https://github.com/mikf/gallery-dl/issues/1048))
+- [hitomi] fix image URLs and gallery URL pattern
+- [mangadex] unescape more metadata fields ([#1066](https://github.com/mikf/gallery-dl/issues/1066))
+- [mangahere] ensure download URLs have a scheme ([#1070](https://github.com/mikf/gallery-dl/issues/1070))
+- [mangakakalot] ignore "Go Home" buttons in chapter pages
+- [newgrounds] handle embeds without scheme ([#1033](https://github.com/mikf/gallery-dl/issues/1033))
+- [newgrounds] provide fallback URLs for video downloads ([#1042](https://github.com/mikf/gallery-dl/issues/1042))
+- [xhamster] fix user profile extraction
+
## 1.15.1 - 2020-10-11
### Additions
- [hentaicafe] add `manga_id` metadata field ([#1036](https://github.com/mikf/gallery-dl/issues/1036))
diff --git a/PKG-INFO b/PKG-INFO
index 190cb5f..d488155 100644
--- a/PKG-INFO
+++ b/PKG-INFO
@@ -1,6 +1,6 @@
Metadata-Version: 2.1
Name: gallery_dl
-Version: 1.15.1
+Version: 1.15.2
Summary: Command-line program to download image-galleries and -collections from several image hosting sites
Home-page: https://github.com/mikf/gallery-dl
Author: Mike Fährmann
@@ -94,8 +94,8 @@ Description: ==========
put it into your `PATH <https://en.wikipedia.org/wiki/PATH_(variable)>`__,
and run it inside a command prompt (like ``cmd.exe``).
- - `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.15.1/gallery-dl.exe>`__
- - `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.15.1/gallery-dl.bin>`__
+ - `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.15.2/gallery-dl.exe>`__
+ - `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.15.2/gallery-dl.bin>`__
These executables include a Python 3.8 interpreter
and all required Python packages.
@@ -225,8 +225,8 @@ Description: ==========
``pixiv``, ``nijie``, and ``seiga``
and optional for
``aryion``, ``danbooru``, ``e621``, ``exhentai``, ``idolcomplex``, ``inkbunny``,
- ``instagram``, ``luscious``, ``sankaku``, ``subscribestar``, ``tsumino``,
- and ``twitter``.
+ ``instagram``, ``luscious``, ``pinterest``, ``sankaku``, ``subscribestar``,
+ ``tsumino``, and ``twitter``.
You can set the necessary information in your configuration file
(cf. gallery-dl.conf_)
@@ -319,7 +319,7 @@ Description: ==========
.. _gallery-dl-example.conf: https://github.com/mikf/gallery-dl/blob/master/docs/gallery-dl-example.conf
.. _configuration.rst: https://github.com/mikf/gallery-dl/blob/master/docs/configuration.rst
.. _Supported Sites: https://github.com/mikf/gallery-dl/blob/master/docs/supportedsites.rst
- .. _stable: https://github.com/mikf/gallery-dl/archive/v1.15.1.tar.gz
+ .. _stable: https://github.com/mikf/gallery-dl/archive/v1.15.2.tar.gz
.. _dev: https://github.com/mikf/gallery-dl/archive/master.tar.gz
.. _Python: https://www.python.org/downloads/
diff --git a/README.rst b/README.rst
index fa823b5..6715d1e 100644
--- a/README.rst
+++ b/README.rst
@@ -83,8 +83,8 @@ Download a standalone executable file,
put it into your `PATH <https://en.wikipedia.org/wiki/PATH_(variable)>`__,
and run it inside a command prompt (like ``cmd.exe``).
-- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.15.1/gallery-dl.exe>`__
-- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.15.1/gallery-dl.bin>`__
+- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.15.2/gallery-dl.exe>`__
+- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.15.2/gallery-dl.bin>`__
These executables include a Python 3.8 interpreter
and all required Python packages.
@@ -214,8 +214,8 @@ a username & password pair. This is necessary for
``pixiv``, ``nijie``, and ``seiga``
and optional for
``aryion``, ``danbooru``, ``e621``, ``exhentai``, ``idolcomplex``, ``inkbunny``,
-``instagram``, ``luscious``, ``sankaku``, ``subscribestar``, ``tsumino``,
-and ``twitter``.
+``instagram``, ``luscious``, ``pinterest``, ``sankaku``, ``subscribestar``,
+``tsumino``, and ``twitter``.
You can set the necessary information in your configuration file
(cf. gallery-dl.conf_)
@@ -308,7 +308,7 @@ access to *gallery-dl*. Authorize it and you will be shown one or more
.. _gallery-dl-example.conf: https://github.com/mikf/gallery-dl/blob/master/docs/gallery-dl-example.conf
.. _configuration.rst: https://github.com/mikf/gallery-dl/blob/master/docs/configuration.rst
.. _Supported Sites: https://github.com/mikf/gallery-dl/blob/master/docs/supportedsites.rst
-.. _stable: https://github.com/mikf/gallery-dl/archive/v1.15.1.tar.gz
+.. _stable: https://github.com/mikf/gallery-dl/archive/v1.15.2.tar.gz
.. _dev: https://github.com/mikf/gallery-dl/archive/master.tar.gz
.. _Python: https://www.python.org/downloads/
diff --git a/data/man/gallery-dl.1 b/data/man/gallery-dl.1
index cbcf4bf..9df67f4 100644
--- a/data/man/gallery-dl.1
+++ b/data/man/gallery-dl.1
@@ -1,4 +1,4 @@
-.TH "GALLERY-DL" "1" "2020-10-11" "1.15.1" "gallery-dl Manual"
+.TH "GALLERY-DL" "1" "2020-10-24" "1.15.2" "gallery-dl Manual"
.\" disable hyphenation
.nh
diff --git a/data/man/gallery-dl.conf.5 b/data/man/gallery-dl.conf.5
index aeecaa0..8dd3187 100644
--- a/data/man/gallery-dl.conf.5
+++ b/data/man/gallery-dl.conf.5
@@ -1,4 +1,4 @@
-.TH "GALLERY-DL.CONF" "5" "2020-10-11" "1.15.1" "gallery-dl Manual"
+.TH "GALLERY-DL.CONF" "5" "2020-10-24" "1.15.2" "gallery-dl Manual"
.\" disable hyphenation
.nh
.\" disable justification (adjust text to left margin only)
@@ -325,6 +325,8 @@ and optional for
.br
* \f[I]luscious\f[]
.br
+* \f[I]pinterest\f[]
+.br
* \f[I]sankaku\f[]
.br
* \f[I]subscribestar\f[]
@@ -333,13 +335,12 @@ and optional for
.br
* \f[I]twitter\f[]
-These values can also be set via the \f[I]-u/--username\f[] and
-\f[I]-p/--password\f[] command-line options or by using a \f[I].netrc\f[] file.
-(see Authentication_)
+These values can also be specified via the
+\f[I]-u/--username\f[] and \f[I]-p/--password\f[] command-line options or
+by using a \f[I].netrc\f[] file. (see Authentication_)
-Note: The password values for \f[I]danbooru\f[] and \f[I]e621\f[] should be
-the API keys found in your user profile, not your actual account
-password.
+Note: The password value for \f[I]danbooru\f[] and \f[I]e621\f[] should be
+the API key found in your user profile, not the actual account password.
.SS extractor.*.netrc
@@ -370,7 +371,7 @@ Source to read additional cookies from. Either as
Example:
-.. code::
+.. code:: json
{
"cookie-name": "cookie-value",
@@ -414,10 +415,10 @@ See \f[I]Requests' proxy documentation\f[] for more details.
Example:
-.. code::
+.. code:: json
{
-"http": "http://10.10.1.10:3128",
+"http" : "http://10.10.1.10:3128",
"https": "http://10.10.1.10:1080",
"http://10.20.1.128": "http://10.10.1.10:5323"
}
@@ -533,18 +534,43 @@ An alternative \f[I]format string\f[] to build archive IDs with.
\f[I]list\f[] of \f[I]Postprocessor Configuration\f[] objects
.IP "Example:" 4
-.. code::
+.. code:: json
[
-{"name": "zip", "compression": "zip"},
-{"name": "exec", "command": ["/home/foobar/script", "{category}", "{image_id}"]}
+{
+"name": "zip" ,
+"compression": "store"
+},
+{
+"name": "exec",
+"command": ["/home/foobar/script", "{category}", "{image_id}"]
+}
]
.IP "Description:" 4
-A list of \f[I]post-processors\f[]
+A list of \f[I]post processors\f[]
to be applied to each downloaded file in the specified order.
+Unlike other options, a \f[I]postprocessors\f[] setting at a deeper level
+.br
+does not override any \f[I]postprocessors\f[] setting at a lower level.
+Instead, all post processors from all applicable \f[I]postprocessors\f[]
+.br
+settings get combined into a single list.
+
+For example
+
+.br
+* an \f[I]mtime\f[] post processor at \f[I]extractor.postprocessors\f[],
+.br
+* a \f[I]zip\f[] post processor at \f[I]extractor.pixiv.postprocessors\f[],
+.br
+* and using \f[I]--exec\f[]
+
+will run all three post processors - \f[I]mtime\f[], \f[I]zip\f[], \f[I]exec\f[] -
+for each downloaded \f[I]pixiv\f[] file.
+
.SS extractor.*.retries
.IP "Type:" 6
@@ -555,7 +581,7 @@ to be applied to each downloaded file in the specified order.
.IP "Description:" 4
Maximum number of times a failed HTTP request is retried before
-giving up or \f[I]-1\f[] for infinite retries.
+giving up, or \f[I]-1\f[] for infinite retries.
.SS extractor.*.timeout
@@ -1596,6 +1622,17 @@ Possible types are \f[I]text\f[], \f[I]quote\f[], \f[I]link\f[], \f[I]answer\f[]
You can use \f[I]"all"\f[] instead of listing all types separately.
+.SS extractor.twitter.cards
+.IP "Type:" 6
+\f[I]bool\f[]
+
+.IP "Default:" 9
+\f[I]false\f[]
+
+.IP "Description:" 4
+Fetch media from \f[I]Cards\f[].
+
+
.SS extractor.twitter.quoted
.IP "Type:" 6
\f[I]bool\f[]
@@ -1680,12 +1717,24 @@ Download video files.
\f[I]null\f[]
.IP "Description:" 4
-Your \f[I]API Key\f[] to use
-your account's browsing settings and default filters when searching.
+Your \f[I]Wallhaven API Key\f[],
+to use your account's browsing settings and default filters when searching.
See https://wallhaven.cc/help/api for more information.
+.SS extractor.weasyl.api-key
+.IP "Type:" 6
+\f[I]string\f[]
+
+.IP "Default:" 9
+\f[I]null\f[]
+
+.IP "Description:" 4
+Your \f[I]Weasyl API Key\f[],
+to use your account's browsing settings and filters.
+
+
.SS extractor.weibo.retweets
.IP "Type:" 6
\f[I]bool\f[]
@@ -1946,7 +1995,7 @@ cause unexpected results in combination with other options
\f[I]object\f[]
.IP "Example:" 4
-.. code::
+.. code:: json
{
"quiet": true,
@@ -2075,13 +2124,13 @@ before outputting them as JSON.
\f[I]object\f[]
.IP "Default:" 9
-.. code::
+.. code:: json
{
-"Pictures" : ["jpg", "jpeg", "png", "gif", "bmp", "svg", "webp"],
-"Video" : ["flv", "ogv", "avi", "mp4", "mpg", "mpeg", "3gp", "mkv", "webm", "vob", "wmv"],
-"Music" : ["mp3", "aac", "flac", "ogg", "wma", "m4a", "wav"],
-"Archives" : ["zip", "rar", "7z", "tar", "gz", "bz2"]
+"Pictures": ["jpg", "jpeg", "png", "gif", "bmp", "svg", "webp"],
+"Video" : ["flv", "ogv", "avi", "mp4", "mpg", "mpeg", "3gp", "mkv", "webm", "vob", "wmv"],
+"Music" : ["mp3", "aac", "flac", "ogg", "wma", "m4a", "wav"],
+"Archives": ["zip", "rar", "7z", "tar", "gz", "bz2"]
}
@@ -2489,11 +2538,11 @@ Submission Policy, and Terms of Service.
application and put them in your configuration file
as \f[I]"client-id"\f[] and \f[I]"client-secret"\f[]
.br
-* clear your \f[I]cache\f[] (\f[I]--clear-cache\f[]) to delete
-the \f[I]access-token\f[] from the previous \f[I]client-id\f[]
+* clear your \f[I]cache\f[] to delete any remaining
+\f[I]access-token\f[] entries. (\f[I]gallery-dl --clear-cache\f[])
.br
-* get a new \f[I]refresh-token\f[]
-if necessary
+* get a new \f[I]refresh-token\f[] for the
+new \f[I]client-id\f[] (\f[I]gallery-dl oauth:deviantart\f[])
.SS extractor.flickr.api-key & .api-secret
@@ -2636,19 +2685,19 @@ The path \f[I]C:\\path\\to\\file.ext\f[] has therefore to be written as
\f[I]object\f[]
.IP "Example:" 4
-.. code::
+.. code:: json
{
-"format": "{asctime} {name}: {message}",
+"format" : "{asctime} {name}: {message}",
"format-date": "%H:%M:%S",
-"path": "~/log.txt",
-"encoding": "ascii"
+"path" : "~/log.txt",
+"encoding" : "ascii"
}
-.. code::
+.. code:: json
{
-"level": "debug",
+"level" : "debug",
"format": {
"debug" : "debug: {message}",
"info" : "[{name}] {message}",
@@ -2710,7 +2759,7 @@ use \f[I]"w"\f[] to truncate or \f[I]"a"\f[] to append
.br
* Default: \f[I]"utf-8"\f[]
-Note: path, mode and encoding are only applied when configuring
+Note: path, mode, and encoding are only applied when configuring
logging output to a file.
@@ -2719,17 +2768,17 @@ logging output to a file.
\f[I]object\f[]
.IP "Example:" 4
-.. code::
+.. code:: json
{ "name": "mtime" }
-.. code::
+.. code:: json
{
-"name": "zip",
+"name" : "zip",
"compression": "store",
-"extension": "cbz",
-"whitelist": ["mangadex", "exhentai", "nhentai"]
+"extension" : "cbz",
+"whitelist" : ["mangadex", "exhentai", "nhentai"]
}
.IP "Description:" 4
diff --git a/gallery_dl.egg-info/PKG-INFO b/gallery_dl.egg-info/PKG-INFO
index 3207269..18f8d82 100644
--- a/gallery_dl.egg-info/PKG-INFO
+++ b/gallery_dl.egg-info/PKG-INFO
@@ -1,6 +1,6 @@
Metadata-Version: 2.1
Name: gallery-dl
-Version: 1.15.1
+Version: 1.15.2
Summary: Command-line program to download image-galleries and -collections from several image hosting sites
Home-page: https://github.com/mikf/gallery-dl
Author: Mike Fährmann
@@ -94,8 +94,8 @@ Description: ==========
put it into your `PATH <https://en.wikipedia.org/wiki/PATH_(variable)>`__,
and run it inside a command prompt (like ``cmd.exe``).
- - `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.15.1/gallery-dl.exe>`__
- - `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.15.1/gallery-dl.bin>`__
+ - `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.15.2/gallery-dl.exe>`__
+ - `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.15.2/gallery-dl.bin>`__
These executables include a Python 3.8 interpreter
and all required Python packages.
@@ -225,8 +225,8 @@ Description: ==========
``pixiv``, ``nijie``, and ``seiga``
and optional for
``aryion``, ``danbooru``, ``e621``, ``exhentai``, ``idolcomplex``, ``inkbunny``,
- ``instagram``, ``luscious``, ``sankaku``, ``subscribestar``, ``tsumino``,
- and ``twitter``.
+ ``instagram``, ``luscious``, ``pinterest``, ``sankaku``, ``subscribestar``,
+ ``tsumino``, and ``twitter``.
You can set the necessary information in your configuration file
(cf. gallery-dl.conf_)
@@ -319,7 +319,7 @@ Description: ==========
.. _gallery-dl-example.conf: https://github.com/mikf/gallery-dl/blob/master/docs/gallery-dl-example.conf
.. _configuration.rst: https://github.com/mikf/gallery-dl/blob/master/docs/configuration.rst
.. _Supported Sites: https://github.com/mikf/gallery-dl/blob/master/docs/supportedsites.rst
- .. _stable: https://github.com/mikf/gallery-dl/archive/v1.15.1.tar.gz
+ .. _stable: https://github.com/mikf/gallery-dl/archive/v1.15.2.tar.gz
.. _dev: https://github.com/mikf/gallery-dl/archive/master.tar.gz
.. _Python: https://www.python.org/downloads/
diff --git a/gallery_dl/extractor/35photo.py b/gallery_dl/extractor/35photo.py
index e33aa2d..edb9d46 100644
--- a/gallery_dl/extractor/35photo.py
+++ b/gallery_dl/extractor/35photo.py
@@ -101,7 +101,7 @@ class _35photoUserExtractor(_35photoExtractor):
"""Extractor for all images of a user on 35photo.pro"""
subcategory = "user"
pattern = (r"(?:https?://)?(?:[a-z]+\.)?35photo\.pro"
- r"/(?!photo_|genre_|tags/|rating/)([^/?&#]+)")
+ r"/(?!photo_|genre_|tags/|rating/)([^/?#]+)")
test = (
("https://35photo.pro/liya", {
"pattern": r"https://m\d+.35photo.pro/photos_(main|series)/.*.jpg",
@@ -142,7 +142,7 @@ class _35photoTagExtractor(_35photoExtractor):
subcategory = "tag"
directory_fmt = ("{category}", "Tags", "{search_tag}")
archive_fmt = "t{search_tag}_{id}_{num}"
- pattern = r"(?:https?://)?(?:[a-z]+\.)?35photo\.pro/tags/([^/?&#]+)"
+ pattern = r"(?:https?://)?(?:[a-z]+\.)?35photo\.pro/tags/([^/?#]+)"
test = ("https://35photo.pro/tags/landscape/", {
"range": "1-25",
"count": 25,
diff --git a/gallery_dl/extractor/4chan.py b/gallery_dl/extractor/4chan.py
index 980dc20..bed30b1 100644
--- a/gallery_dl/extractor/4chan.py
+++ b/gallery_dl/extractor/4chan.py
@@ -65,7 +65,7 @@ class _4chanBoardExtractor(Extractor):
"""Extractor for 4chan boards"""
category = "4chan"
subcategory = "board"
- pattern = r"(?:https?://)?boards\.4chan(?:nel)?\.org/([^/?&#]+)/\d*$"
+ pattern = r"(?:https?://)?boards\.4chan(?:nel)?\.org/([^/?#]+)/\d*$"
test = ("https://boards.4channel.org/po/", {
"pattern": _4chanThreadExtractor.pattern,
"count": ">= 100",
diff --git a/gallery_dl/extractor/500px.py b/gallery_dl/extractor/500px.py
index fd973c3..624b14d 100644
--- a/gallery_dl/extractor/500px.py
+++ b/gallery_dl/extractor/500px.py
@@ -90,7 +90,7 @@ class _500pxExtractor(Extractor):
class _500pxUserExtractor(_500pxExtractor):
"""Extractor for photos from a user's photostream on 500px.com"""
subcategory = "user"
- pattern = BASE_PATTERN + r"/(?!photo/)(?:p/)?([^/?&#]+)/?(?:$|\?|#)"
+ pattern = BASE_PATTERN + r"/(?!photo/)(?:p/)?([^/?#]+)/?(?:$|[?#])"
test = (
("https://500px.com/p/light_expression_photography", {
"pattern": r"https?://drscdn.500px.org/photo/\d+/m%3D4096/v2",
@@ -132,7 +132,7 @@ class _500pxGalleryExtractor(_500pxExtractor):
subcategory = "gallery"
directory_fmt = ("{category}", "{user[username]}", "{gallery[name]}")
pattern = (BASE_PATTERN + r"/(?!photo/)(?:p/)?"
- r"([^/?&#]+)/galleries/([^/?&#]+)")
+ r"([^/?#]+)/galleries/([^/?#]+)")
test = (
("https://500px.com/p/fashvamp/galleries/lera", {
"url": "002dc81dee5b4a655f0e31ad8349e8903b296df6",
diff --git a/gallery_dl/extractor/8kun.py b/gallery_dl/extractor/8kun.py
index 7162920..47fe672 100644
--- a/gallery_dl/extractor/8kun.py
+++ b/gallery_dl/extractor/8kun.py
@@ -64,7 +64,7 @@ class _8kunBoardExtractor(Extractor):
"""Extractor for 8kun boards"""
category = "8kun"
subcategory = "board"
- pattern = r"(?:https?://)?8kun\.top/([^/?&#]+)/(?:index|\d+)\.html"
+ pattern = r"(?:https?://)?8kun\.top/([^/?#]+)/(?:index|\d+)\.html"
test = (
("https://8kun.top/v/index.html", {
"pattern": _8kunThreadExtractor.pattern,
diff --git a/gallery_dl/extractor/8muses.py b/gallery_dl/extractor/8muses.py
index b248735..3eb5565 100644
--- a/gallery_dl/extractor/8muses.py
+++ b/gallery_dl/extractor/8muses.py
@@ -22,7 +22,7 @@ class _8musesAlbumExtractor(Extractor):
archive_fmt = "{hash}"
root = "https://comics.8muses.com"
pattern = (r"(?:https?://)?(?:comics\.|www\.)?8muses\.com"
- r"(/comics/album/[^?&#]+)(\?[^#]+)?")
+ r"(/comics/album/[^?#]+)(\?[^#]+)?")
test = (
("https://comics.8muses.com/comics/album/Fakku-Comics/mogg/Liar", {
"url": "6286ac33087c236c5a7e51f8a9d4e4d5548212d4",
diff --git a/gallery_dl/extractor/artstation.py b/gallery_dl/extractor/artstation.py
index 985ad48..6914f24 100644
--- a/gallery_dl/extractor/artstation.py
+++ b/gallery_dl/extractor/artstation.py
@@ -131,7 +131,7 @@ class ArtstationUserExtractor(ArtstationExtractor):
"""Extractor for all projects of an artstation user"""
subcategory = "user"
pattern = (r"(?:https?://)?(?:(?:www\.)?artstation\.com"
- r"/(?!artwork|projects|search)([^/?&#]+)(?:/albums/all)?"
+ r"/(?!artwork|projects|search)([^/?#]+)(?:/albums/all)?"
r"|((?!www)\w+)\.artstation\.com(?:/projects)?)/?$")
test = (
("https://www.artstation.com/gaerikim/", {
@@ -156,7 +156,7 @@ class ArtstationAlbumExtractor(ArtstationExtractor):
"{album[id]} - {album[title]}")
archive_fmt = "a_{album[id]}_{asset[id]}"
pattern = (r"(?:https?://)?(?:(?:www\.)?artstation\.com"
- r"/(?!artwork|projects|search)([^/?&#]+)"
+ r"/(?!artwork|projects|search)([^/?#]+)"
r"|((?!www)\w+)\.artstation\.com)/albums/(\d+)")
test = (
("https://www.artstation.com/huimeiye/albums/770899", {
@@ -199,7 +199,7 @@ class ArtstationLikesExtractor(ArtstationExtractor):
directory_fmt = ("{category}", "{userinfo[username]}", "Likes")
archive_fmt = "f_{userinfo[id]}_{asset[id]}"
pattern = (r"(?:https?://)?(?:www\.)?artstation\.com"
- r"/(?!artwork|projects|search)([^/?&#]+)/likes/?")
+ r"/(?!artwork|projects|search)([^/?#]+)/likes/?")
test = (
("https://www.artstation.com/mikf/likes", {
"pattern": r"https://\w+\.artstation\.com/p/assets"
@@ -225,7 +225,7 @@ class ArtstationChallengeExtractor(ArtstationExtractor):
"{challenge[id]} - {challenge[title]}")
archive_fmt = "c_{challenge[id]}_{asset_id}"
pattern = (r"(?:https?://)?(?:www\.)?artstation\.com"
- r"/contests/[^/?&#]+/challenges/(\d+)"
+ r"/contests/[^/?#]+/challenges/(\d+)"
r"/?(?:\?sorting=([a-z]+))?")
test = (
("https://www.artstation.com/contests/thu-2017/challenges/20"),
@@ -386,7 +386,7 @@ class ArtstationFollowingExtractor(ArtstationExtractor):
"""Extractor for a user's followed users"""
subcategory = "following"
pattern = (r"(?:https?://)?(?:www\.)?artstation\.com"
- r"/(?!artwork|projects|search)([^/?&#]+)/following")
+ r"/(?!artwork|projects|search)([^/?#]+)/following")
test = ("https://www.artstation.com/gaerikim/following", {
"pattern": ArtstationUserExtractor.pattern,
"count": ">= 50",
diff --git a/gallery_dl/extractor/aryion.py b/gallery_dl/extractor/aryion.py
index 374a9fc..6a90b76 100644
--- a/gallery_dl/extractor/aryion.py
+++ b/gallery_dl/extractor/aryion.py
@@ -153,7 +153,7 @@ class AryionGalleryExtractor(AryionExtractor):
"""Extractor for a user's gallery on eka's portal"""
subcategory = "gallery"
categorytransfer = True
- pattern = BASE_PATTERN + r"/(?:gallery/|user/|latest.php\?name=)([^/?&#]+)"
+ pattern = BASE_PATTERN + r"/(?:gallery/|user/|latest.php\?name=)([^/?#]+)"
test = (
("https://aryion.com/g4/gallery/jameshoward", {
"options": (("recursive", False),),
diff --git a/gallery_dl/extractor/bcy.py b/gallery_dl/extractor/bcy.py
index c3049a4..ec7020a 100644
--- a/gallery_dl/extractor/bcy.py
+++ b/gallery_dl/extractor/bcy.py
@@ -108,7 +108,7 @@ class BcyUserExtractor(BcyExtractor):
test = (
("https://bcy.net/u/1933712", {
"pattern": r"https://img-bcy-qn.pstatp.com/\w+/\d+/post/\w+/.+jpg",
- "count": ">= 25",
+ "count": ">= 20",
}),
("https://bcy.net/u/109282764041", {
"pattern": r"https://p\d-bcy.byteimg.com/img/banciyuan/[0-9a-f]+"
diff --git a/gallery_dl/extractor/behance.py b/gallery_dl/extractor/behance.py
index be498bc..a817174 100644
--- a/gallery_dl/extractor/behance.py
+++ b/gallery_dl/extractor/behance.py
@@ -151,7 +151,7 @@ class BehanceUserExtractor(BehanceExtractor):
"""Extractor for a user's galleries from www.behance.net"""
subcategory = "user"
categorytransfer = True
- pattern = r"(?:https?://)?(?:www\.)?behance\.net/([^/?&#]+)/?$"
+ pattern = r"(?:https?://)?(?:www\.)?behance\.net/([^/?#]+)/?$"
test = ("https://www.behance.net/alexstrohl", {
"count": ">= 8",
"pattern": BehanceGalleryExtractor.pattern,
diff --git a/gallery_dl/extractor/blogger.py b/gallery_dl/extractor/blogger.py
index 9c18e0e..60170dc 100644
--- a/gallery_dl/extractor/blogger.py
+++ b/gallery_dl/extractor/blogger.py
@@ -42,7 +42,7 @@ class BloggerExtractor(Extractor):
blog["date"] = text.parse_datetime(blog["published"])
del blog["selfLink"]
- sub = re.compile(r"/s\d+/").sub
+ sub = re.compile(r"/(?:s\d+|w\d+-h\d+)/").sub
findall_image = re.compile(
r'src="(https?://\d+\.bp\.blogspot\.com/[^"]+)').findall
findall_video = re.compile(
@@ -92,7 +92,7 @@ class BloggerExtractor(Extractor):
class BloggerPostExtractor(BloggerExtractor):
"""Extractor for a single blog post"""
subcategory = "post"
- pattern = BASE_PATTERN + r"(/\d{4}/\d\d/[^/?&#]+\.html)"
+ pattern = BASE_PATTERN + r"(/\d{4}/\d\d/[^/?#]+\.html)"
test = (
("https://julianbphotography.blogspot.com/2010/12/moon-rise.html", {
"url": "9928429fb62f712eb4de80f53625eccecc614aae",
@@ -134,6 +134,10 @@ class BloggerPostExtractor(BloggerExtractor):
"cfnm-scene-jenna-fischer-in-office.html"), {
"pattern": r"https://.+\.googlevideo\.com/videoplayback",
}),
+ # image URLs with width/height (#1061)
+ ("https://aaaninja.blogspot.com/2020/08/altera-boob-press-2.html", {
+ "pattern": r"https://1.bp.blogspot.com/.+/s0/altera_.+png",
+ }),
)
def __init__(self, match):
@@ -167,7 +171,7 @@ class BloggerBlogExtractor(BloggerExtractor):
class BloggerSearchExtractor(BloggerExtractor):
"""Extractor for search resuls and labels"""
subcategory = "search"
- pattern = BASE_PATTERN + r"/search(?:/?\?q=([^/?&#]+)|/label/([^/?&#]+))"
+ pattern = BASE_PATTERN + r"/search(?:/?\?q=([^/?#]+)|/label/([^/?#]+))"
test = (
("https://julianbphotography.blogspot.com/search?q=400mm", {
"count": "< 10"
diff --git a/gallery_dl/extractor/booru.py b/gallery_dl/extractor/booru.py
index be0027a..0176d76 100644
--- a/gallery_dl/extractor/booru.py
+++ b/gallery_dl/extractor/booru.py
@@ -51,7 +51,7 @@ class BooruExtractor(SharedConfigMixin, Extractor):
for image in images:
try:
- url = image["file_url"]
+ url = self.get_file_url(image)
except KeyError:
continue
if url.startswith("/"):
@@ -86,6 +86,10 @@ class BooruExtractor(SharedConfigMixin, Extractor):
"""Collect metadata for extractor-job"""
return {}
+ @staticmethod
+ def get_file_url(image):
+ return image["file_url"]
+
def extended_tags(self, image, page=None):
"""Retrieve extended tag information"""
if not page:
diff --git a/gallery_dl/extractor/deviantart.py b/gallery_dl/extractor/deviantart.py
index 9cceaee..e40ec51 100644
--- a/gallery_dl/extractor/deviantart.py
+++ b/gallery_dl/extractor/deviantart.py
@@ -460,7 +460,7 @@ class DeviantartFolderExtractor(DeviantartExtractor):
subcategory = "folder"
directory_fmt = ("{category}", "{username}", "{folder[title]}")
archive_fmt = "F_{folder[uuid]}_{index}.{extension}"
- pattern = BASE_PATTERN + r"/gallery/(\d+)/([^/?&#]+)"
+ pattern = BASE_PATTERN + r"/gallery/(\d+)/([^/?#]+)"
test = (
# user
("https://www.deviantart.com/shimoda7/gallery/722019/Miscellaneous", {
@@ -601,7 +601,7 @@ class DeviantartCollectionExtractor(DeviantartExtractor):
directory_fmt = ("{category}", "{username}", "Favourites",
"{collection[title]}")
archive_fmt = "C_{collection[uuid]}_{index}.{extension}"
- pattern = BASE_PATTERN + r"/favourites/(\d+)/([^/?&#]+)"
+ pattern = BASE_PATTERN + r"/favourites/(\d+)/([^/?#]+)"
test = (
(("https://www.deviantart.com/pencilshadings"
"/favourites/70595441/3D-Favorites"), {
@@ -671,8 +671,8 @@ class DeviantartPopularExtractor(DeviantartExtractor):
archive_fmt = "P_{popular[range]}_{popular[search]}_{index}.{extension}"
pattern = (r"(?:https?://)?www\.deviantart\.com/(?:"
r"search(?:/deviations)?"
- r"|(?:deviations/?)?\?order=(popular-[^/?&#]+)"
- r"|((?:[\w-]+/)*)(popular-[^/?&#]+)"
+ r"|(?:deviations/?)?\?order=(popular-[^/?#]+)"
+ r"|((?:[\w-]+/)*)(popular-[^/?#]+)"
r")/?(?:\?([^#]*))?")
test = (
("https://www.deviantart.com/?order=popular-all-time", {
@@ -730,7 +730,7 @@ class DeviantartDeviationExtractor(DeviantartExtractor):
"""Extractor for single deviations"""
subcategory = "deviation"
archive_fmt = "{index}.{extension}"
- pattern = BASE_PATTERN + r"/(art|journal)/(?:[^/?&#]+-)?(\d+)"
+ pattern = BASE_PATTERN + r"/(art|journal)/(?:[^/?#]+-)?(\d+)"
test = (
(("https://www.deviantart.com/shimoda7/art/For-the-sake-10073852"), {
"options": (("original", 0),),
diff --git a/gallery_dl/extractor/directlink.py b/gallery_dl/extractor/directlink.py
index 1d17658..a6346bf 100644
--- a/gallery_dl/extractor/directlink.py
+++ b/gallery_dl/extractor/directlink.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2017-2019 Mike Fährmann
+# Copyright 2017-2020 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -17,7 +17,7 @@ class DirectlinkExtractor(Extractor):
category = "directlink"
filename_fmt = "{domain}/{path}/{filename}.{extension}"
archive_fmt = filename_fmt
- pattern = (r"(?i)https?://(?P<domain>[^/?&#]+)/(?P<path>[^?&#]+\."
+ pattern = (r"(?i)https?://(?P<domain>[^/?#]+)/(?P<path>[^?#]+\."
r"(?:jpe?g|jpe|png|gif|web[mp]|mp4|mkv|og[gmv]|opus))"
r"(?:\?(?P<query>[^/?#]*))?(?:#(?P<fragment>.*))?$")
test = (
diff --git a/gallery_dl/extractor/dynastyscans.py b/gallery_dl/extractor/dynastyscans.py
index 9cc6738..7d26c47 100644
--- a/gallery_dl/extractor/dynastyscans.py
+++ b/gallery_dl/extractor/dynastyscans.py
@@ -44,7 +44,7 @@ class DynastyscansBase():
class DynastyscansChapterExtractor(DynastyscansBase, ChapterExtractor):
"""Extractor for manga-chapters from dynasty-scans.com"""
- pattern = BASE_PATTERN + r"(/chapters/[^/?&#]+)"
+ pattern = BASE_PATTERN + r"(/chapters/[^/?#]+)"
test = (
(("http://dynasty-scans.com/chapters/"
"hitoribocchi_no_oo_seikatsu_ch33"), {
diff --git a/gallery_dl/extractor/fallenangels.py b/gallery_dl/extractor/fallenangels.py
index 44863a9..ab0e0c5 100644
--- a/gallery_dl/extractor/fallenangels.py
+++ b/gallery_dl/extractor/fallenangels.py
@@ -17,7 +17,7 @@ class FallenangelsChapterExtractor(ChapterExtractor):
"""Extractor for manga-chapters from fascans.com"""
category = "fallenangels"
pattern = (r"(?:https?://)?(manga|truyen)\.fascans\.com"
- r"/manga/([^/]+)/(\d+)(\.[^/?&#]+)?")
+ r"/manga/([^/?#]+)/([^/?#]+)")
test = (
("https://manga.fascans.com/manga/chronos-ruler/20/1", {
"url": "4604a7914566cc2da0ff789aa178e2d1c8c241e3",
@@ -28,12 +28,13 @@ class FallenangelsChapterExtractor(ChapterExtractor):
"keyword": "2bdb7334c0e3eceb9946ffd3132df679b4a94f6a",
}),
("http://manga.fascans.com/manga/rakudai-kishi-no-eiyuutan/19.5", {
- "keyword": "9fcca4c1a90d11f00764f62477ebe10bd408021c",
+ "url": "273f6863966c83ea79ad5846a2866e08067d3f0e",
+ "keyword": "d1065685bfe0054c4ff2a0f20acb089de4cec253",
}),
)
def __init__(self, match):
- self.version, self.manga, self.chapter, self.minor = match.groups()
+ self.version, self.manga, self.chapter = match.groups()
url = "https://{}.fascans.com/manga/{}/{}/1".format(
self.version, self.manga, self.chapter)
ChapterExtractor.__init__(self, match, url)
@@ -41,11 +42,12 @@ class FallenangelsChapterExtractor(ChapterExtractor):
def metadata(self, page):
extr = text.extract_from(page)
lang = "vi" if self.version == "truyen" else "en"
+ chapter, sep, minor = self.chapter.partition(".")
return {
"manga" : extr('name="description" content="', ' Chapter '),
"title" : extr(': ', ' - Page 1'),
- "chapter" : self.chapter,
- "chapter_minor": self.minor or "",
+ "chapter" : chapter,
+ "chapter_minor": sep + minor,
"lang" : lang,
"language": util.code_to_language(lang),
}
diff --git a/gallery_dl/extractor/foolslide.py b/gallery_dl/extractor/foolslide.py
index bf925b6..4245617 100644
--- a/gallery_dl/extractor/foolslide.py
+++ b/gallery_dl/extractor/foolslide.py
@@ -45,7 +45,7 @@ class FoolslideChapterExtractor(FoolslideBase, ChapterExtractor):
"""Base class for chapter extractors for FoOlSlide based sites"""
directory_fmt = ("{category}", "{manga}", "{chapter_string}")
archive_fmt = "{id}"
- pattern_fmt = r"(/read/[^/?&#]+/[a-z-]+/\d+/\d+(?:/\d+)?)"
+ pattern_fmt = r"(/read/[^/?#]+/[a-z-]+/\d+/\d+(?:/\d+)?)"
decode = "default"
def items(self):
@@ -86,7 +86,7 @@ class FoolslideChapterExtractor(FoolslideBase, ChapterExtractor):
class FoolslideMangaExtractor(FoolslideBase, MangaExtractor):
"""Base class for manga extractors for FoOlSlide based sites"""
- pattern_fmt = r"(/series/[^/?&#]+)"
+ pattern_fmt = r"(/series/[^/?#]+)"
def chapters(self, page):
extr = text.extract_from(page)
diff --git a/gallery_dl/extractor/furaffinity.py b/gallery_dl/extractor/furaffinity.py
index 2a5ef6e..752cd62 100644
--- a/gallery_dl/extractor/furaffinity.py
+++ b/gallery_dl/extractor/furaffinity.py
@@ -177,7 +177,7 @@ class FuraffinityExtractor(Extractor):
class FuraffinityGalleryExtractor(FuraffinityExtractor):
"""Extractor for a furaffinity user's gallery"""
subcategory = "gallery"
- pattern = BASE_PATTERN + r"/gallery/([^/?&#]+)"
+ pattern = BASE_PATTERN + r"/gallery/([^/?#]+)"
test = ("https://www.furaffinity.net/gallery/mirlinthloth/", {
"pattern": r"https://d\d?.facdn.net/art/mirlinthloth/\d+/\d+.\w+\.\w+",
"range": "45-50",
@@ -189,7 +189,7 @@ class FuraffinityScrapsExtractor(FuraffinityExtractor):
"""Extractor for a furaffinity user's scraps"""
subcategory = "scraps"
directory_fmt = ("{category}", "{user!l}", "Scraps")
- pattern = BASE_PATTERN + r"/scraps/([^/?&#]+)"
+ pattern = BASE_PATTERN + r"/scraps/([^/?#]+)"
test = ("https://www.furaffinity.net/scraps/mirlinthloth/", {
"pattern": r"https://d\d?.facdn.net/art/[^/]+(/stories)?/\d+/\d+.\w+.",
"count": ">= 3",
@@ -200,7 +200,7 @@ class FuraffinityFavoriteExtractor(FuraffinityExtractor):
"""Extractor for a furaffinity user's favorites"""
subcategory = "favorite"
directory_fmt = ("{category}", "{user!l}", "Favorites")
- pattern = BASE_PATTERN + r"/favorites/([^/?&#]+)"
+ pattern = BASE_PATTERN + r"/favorites/([^/?#]+)"
test = ("https://www.furaffinity.net/favorites/mirlinthloth/", {
"pattern": r"https://d\d?.facdn.net/art/[^/]+/\d+/\d+.\w+\.\w+",
"range": "45-50",
@@ -278,7 +278,7 @@ class FuraffinityUserExtractor(FuraffinityExtractor):
"""Extractor for furaffinity user profiles"""
subcategory = "user"
cookiedomain = None
- pattern = BASE_PATTERN + r"/user/([^/?&#]+)"
+ pattern = BASE_PATTERN + r"/user/([^/?#]+)"
test = (
("https://www.furaffinity.net/user/mirlinthloth/", {
"pattern": r"/gallery/mirlinthloth/$",
@@ -302,7 +302,7 @@ class FuraffinityUserExtractor(FuraffinityExtractor):
class FuraffinityFollowingExtractor(FuraffinityExtractor):
"""Extractor for a furaffinity user's watched users"""
subcategory = "following"
- pattern = BASE_PATTERN + "/watchlist/by/([^/?&#]+)"
+ pattern = BASE_PATTERN + "/watchlist/by/([^/?#]+)"
test = ("https://www.furaffinity.net/watchlist/by/mirlinthloth/", {
"pattern": FuraffinityUserExtractor.pattern,
"range": "176-225",
diff --git a/gallery_dl/extractor/fuskator.py b/gallery_dl/extractor/fuskator.py
index eba1c39..df55061 100644
--- a/gallery_dl/extractor/fuskator.py
+++ b/gallery_dl/extractor/fuskator.py
@@ -17,7 +17,7 @@ class FuskatorGalleryExtractor(GalleryExtractor):
"""Extractor for image galleries on fuskator.com"""
category = "fuskator"
root = "https://fuskator.com"
- pattern = r"(?:https?://)?fuskator\.com/(?:thumbs|expanded)/([^/?&#]+)"
+ pattern = r"(?:https?://)?fuskator\.com/(?:thumbs|expanded)/([^/?#]+)"
test = (
("https://fuskator.com/thumbs/d0GnIzXrSKU/", {
"pattern": r"https://i\d+.fuskator.com/large/d0GnIzXrSKU/.+\.jpg",
diff --git a/gallery_dl/extractor/gelbooru.py b/gallery_dl/extractor/gelbooru.py
index edadd31..c32ba5c 100644
--- a/gallery_dl/extractor/gelbooru.py
+++ b/gallery_dl/extractor/gelbooru.py
@@ -33,6 +33,15 @@ class GelbooruExtractor(booru.XmlParserMixin,
self.session.cookies["fringeBenefits"] = "yup"
self.per_page = 42
+ @staticmethod
+ def get_file_url(image):
+ url = image["file_url"]
+ if url.startswith("https://mp4.gelbooru.com/"):
+ ihash = image["md5"]
+ return "https://img2.gelbooru.com/images/{}/{}/{}.webm".format(
+ ihash[0:2], ihash[2:4], ihash)
+ return url
+
def items_noapi(self):
yield Message.Version, 1
data = self.get_metadata()
diff --git a/gallery_dl/extractor/gfycat.py b/gallery_dl/extractor/gfycat.py
index ba2fe5d..493c1d2 100644
--- a/gallery_dl/extractor/gfycat.py
+++ b/gallery_dl/extractor/gfycat.py
@@ -56,7 +56,7 @@ class GfycatUserExtractor(GfycatExtractor):
"""Extractor for gfycat user profiles"""
subcategory = "user"
directory_fmt = ("{category}", "{userName}")
- pattern = r"(?:https?://)?gfycat\.com/@([^/?&#]+)"
+ pattern = r"(?:https?://)?gfycat\.com/@([^/?#]+)"
test = ("https://gfycat.com/@gretta", {
"pattern": r"https://giant\.gfycat\.com/[A-Za-z]+\.mp4",
"count": ">= 100",
@@ -70,7 +70,7 @@ class GfycatSearchExtractor(GfycatExtractor):
"""Extractor for gfycat search results"""
subcategory = "search"
directory_fmt = ("{category}", "Search", "{search}")
- pattern = r"(?:https?://)?gfycat\.com/gifs/search/([^/?&#]+)"
+ pattern = r"(?:https?://)?gfycat\.com/gifs/search/([^/?#]+)"
test = ("https://gfycat.com/gifs/search/funny+animals", {
"pattern": r"https://\w+\.gfycat\.com/[A-Za-z]+\.mp4",
"archive": False,
diff --git a/gallery_dl/extractor/hentai2read.py b/gallery_dl/extractor/hentai2read.py
index 354acbf..53be67b 100644
--- a/gallery_dl/extractor/hentai2read.py
+++ b/gallery_dl/extractor/hentai2read.py
@@ -23,7 +23,7 @@ class Hentai2readBase():
class Hentai2readChapterExtractor(Hentai2readBase, ChapterExtractor):
"""Extractor for a single manga chapter from hentai2read.com"""
archive_fmt = "{chapter_id}_{page}"
- pattern = r"(?:https?://)?(?:www\.)?hentai2read\.com(/[^/?&#]+/(\d+))"
+ pattern = r"(?:https?://)?(?:www\.)?hentai2read\.com(/[^/?#]+/(\d+))"
test = ("https://hentai2read.com/amazon_elixir/1/", {
"url": "964b942cf492b3a129d2fe2608abfc475bc99e71",
"keyword": "ff84b8f751f0e4ee37717efc4332ff1db71951d9",
@@ -63,7 +63,7 @@ class Hentai2readChapterExtractor(Hentai2readBase, ChapterExtractor):
class Hentai2readMangaExtractor(Hentai2readBase, MangaExtractor):
"""Extractor for hmanga from hentai2read.com"""
chapterclass = Hentai2readChapterExtractor
- pattern = r"(?:https?://)?(?:www\.)?hentai2read\.com(/[^/?&#]+)/?$"
+ pattern = r"(?:https?://)?(?:www\.)?hentai2read\.com(/[^/?#]+)/?$"
test = (
("https://hentai2read.com/amazon_elixir/", {
"url": "273073752d418ec887d7f7211e42b832e8c403ba",
diff --git a/gallery_dl/extractor/hentaicafe.py b/gallery_dl/extractor/hentaicafe.py
index 833135e..e12670a 100644
--- a/gallery_dl/extractor/hentaicafe.py
+++ b/gallery_dl/extractor/hentaicafe.py
@@ -20,7 +20,7 @@ class HentaicafeChapterExtractor(foolslide.FoolslideChapterExtractor):
category = "hentaicafe"
directory_fmt = ("{category}", "{manga}")
pattern = (r"(?:https?://)?(?:www\.)?hentai\.cafe"
- r"(/manga/read/[^/?&#]+/[a-z-]+/\d+/\d+(?:/\d+)?)")
+ r"(/manga/read/[^/?#]+/[a-z-]+/\d+/\d+(?:/\d+)?)")
test = ("https://hentai.cafe/manga/read/saitom-box/en/0/1/", {
"url": "8c6a8c56875ba3ed7ab0a74a64f9960077767fc2",
"keyword": "6913608267d883c82b887303b9ced13821188329",
@@ -45,7 +45,7 @@ class HentaicafeMangaExtractor(foolslide.FoolslideMangaExtractor):
"""Extractor for manga from hentai.cafe"""
category = "hentaicafe"
pattern = (r"(?:https?://)?" + r"(?:www\.)?hentai\.cafe"
- r"(/hc\.fyi/\d+|(?:/manga/series)?/[^/?&#]+)/?$")
+ r"(/hc\.fyi/\d+|(?:/manga/series)?/[^/?#]+)/?$")
test = (
# single chapter
("https://hentai.cafe/hazuki-yuuto-summer-blues/", {
diff --git a/gallery_dl/extractor/hentaifoundry.py b/gallery_dl/extractor/hentaifoundry.py
index 5eb46b6..0be528d 100644
--- a/gallery_dl/extractor/hentaifoundry.py
+++ b/gallery_dl/extractor/hentaifoundry.py
@@ -9,7 +9,9 @@
"""Extractors for https://www.hentai-foundry.com/"""
from .common import Extractor, Message
-from .. import text, util, exception
+from .. import text, util
+
+BASE_PATTERN = r"(?:https?://)?(?:www\.)?hentai-foundry\.com"
class HentaifoundryExtractor(Extractor):
@@ -21,22 +23,21 @@ class HentaifoundryExtractor(Extractor):
root = "https://www.hentai-foundry.com"
per_page = 25
- def __init__(self, match, user="", page=1):
+ def __init__(self, match):
Extractor.__init__(self, match)
+ self.user = match.group(1)
self.page_url = ""
- self.user = user
self.start_post = 0
- self.start_page = text.parse_int(page, 1)
+ self.start_page = 1
def items(self):
- data = self.get_job_metadata()
- yield Message.Version, 1
- yield Message.Directory, data
+ self._init_site_filters()
+ data = self.metadata()
- self.set_filters()
- for page_url in util.advance(self._pagination(), self.start_post):
- image = self.get_image_metadata(page_url)
+ for post_url in util.advance(self.posts(), self.start_post):
+ image = self._parse_post(post_url)
image.update(data)
+ yield Message.Directory, image
yield Message.Url, image["src"], image
def skip(self, num):
@@ -45,24 +46,25 @@ class HentaifoundryExtractor(Extractor):
self.start_post += posts
return num
- def get_job_metadata(self):
- """Collect metadata for extractor-job"""
- self.request(self.root + "/?enterAgree=1")
+ def metadata(self):
return {"user": self.user}
- def _pagination(self, begin='thumbTitle"><a href="', end='"'):
+ def posts(self):
+ return self._pagination(self.page_url)
+
+ def _pagination(self, url, begin='thumbTitle"><a href="', end='"'):
num = self.start_page
while True:
- page = self.request("{}/page/{}".format(self.page_url, num)).text
+ page = self.request("{}/page/{}".format(url, num)).text
yield from text.extract_iter(page, begin, end)
if 'class="pager"' not in page or 'class="last hidden"' in page:
return
num += 1
- def get_image_metadata(self, path):
- """Collect url and metadata from an image page"""
+ def _parse_post(self, path):
+ """Collect url and metadata from an image post"""
url = text.urljoin(self.root, path)
page = self.request(url).text
extr = text.extract_from(page, page.index('id="picBox"'))
@@ -89,7 +91,7 @@ class HentaifoundryExtractor(Extractor):
return text.nameext_from_url(data["src"], data)
- def get_story_metadata(self, html):
+ def _parse_story(self, html):
"""Collect url and metadata for a story"""
extr = text.extract_from(html)
data = {
@@ -116,68 +118,66 @@ class HentaifoundryExtractor(Extractor):
return text.nameext_from_url(data["src"], data)
- def set_filters(self):
+ def _init_site_filters(self):
"""Set site-internal filters to show all images"""
- token = text.unquote(text.extract(
- self.session.cookies["YII_CSRF_TOKEN"], "%22", "%22")[0])
+ url = self.root + "/?enterAgree=1"
+ response = self.request(url, method="HEAD")
+
+ url = self.root + "/site/filters"
data = {
- "YII_CSRF_TOKEN": token,
- "rating_nudity": 3,
- "rating_violence": 3,
- "rating_profanity": 3,
- "rating_racism": 3,
- "rating_sex": 3,
- "rating_spoilers": 3,
- "rating_yaoi": 1,
- "rating_yuri": 1,
- "rating_teen": 1,
- "rating_guro": 1,
- "rating_furry": 1,
- "rating_beast": 1,
- "rating_male": 1,
- "rating_female": 1,
- "rating_futa": 1,
- "rating_other": 1,
- "rating_scat": 1,
- "rating_incest": 1,
- "rating_rape": 1,
- "filter_media": "A",
- "filter_order": "date_new",
- "filter_type": 0,
+ "rating_nudity" : "3",
+ "rating_violence" : "3",
+ "rating_profanity": "3",
+ "rating_racism" : "3",
+ "rating_sex" : "3",
+ "rating_spoilers" : "3",
+ "rating_yaoi" : "1",
+ "rating_yuri" : "1",
+ "rating_teen" : "1",
+ "rating_guro" : "1",
+ "rating_furry" : "1",
+ "rating_beast" : "1",
+ "rating_male" : "1",
+ "rating_female" : "1",
+ "rating_futa" : "1",
+ "rating_other" : "1",
+ "rating_scat" : "1",
+ "rating_incest" : "1",
+ "rating_rape" : "1",
+ "filter_media" : "A",
+ "filter_order" : "date_new",
+ "filter_type" : "0",
+ "YII_CSRF_TOKEN" : text.unquote(text.extract(
+ response.cookies["YII_CSRF_TOKEN"], "%22", "%22")[0]),
}
- url = self.root + "/site/filters"
self.request(url, method="POST", data=data)
class HentaifoundryUserExtractor(HentaifoundryExtractor):
- """Extractor for all images of a hentai-foundry-user"""
+ """Extractor for a hentaifoundry user profile"""
subcategory = "user"
- pattern = (r"(?:https?://)?(?:www\.)?hentai-foundry\.com"
- r"/user/([^/]+)/profile")
+ pattern = BASE_PATTERN + r"/user/([^/?#]+)/profile"
test = ("https://www.hentai-foundry.com/user/Tenpura/profile",)
- def __init__(self, match):
- HentaifoundryExtractor.__init__(self, match, match.group(1))
-
def items(self):
+ root = self.root
user = "/user/" + self.user
return self._dispatch_extractors((
(HentaifoundryPicturesExtractor ,
- self.root + "/pictures" + user),
+ root + "/pictures" + user),
(HentaifoundryScrapsExtractor,
- self.root + "/pictures" + user + "/scraps"),
+ root + "/pictures" + user + "/scraps"),
(HentaifoundryStoriesExtractor,
- self.root + "/stories" + user),
+ root + "/stories" + user),
(HentaifoundryFavoriteExtractor,
- self.root + user + "/faves/pictures"),
+ root + user + "/faves/pictures"),
), ("pictures",))
class HentaifoundryPicturesExtractor(HentaifoundryExtractor):
"""Extractor for all pictures of a hentaifoundry user"""
subcategory = "pictures"
- pattern = (r"(?:https?://)?(?:www\.)?hentai-foundry\.com"
- r"/pictures/user/([^/]+)(?:/page/(\d+))?/?$")
+ pattern = BASE_PATTERN + r"/pictures/user/([^/?#]+)(?:/page/(\d+))?/?$"
test = (
("https://www.hentai-foundry.com/pictures/user/Tenpura", {
"url": "ebbc981a85073745e3ca64a0f2ab31fab967fc28",
@@ -186,22 +186,15 @@ class HentaifoundryPicturesExtractor(HentaifoundryExtractor):
)
def __init__(self, match):
- HentaifoundryExtractor.__init__(
- self, match, match.group(1), match.group(2))
+ HentaifoundryExtractor.__init__(self, match)
self.page_url = "{}/pictures/user/{}".format(self.root, self.user)
- def get_job_metadata(self):
- page = self.request(self.page_url + "?enterAgree=1").text
- count = text.extract(page, ">Pictures (", ")")[0]
- return {"user": self.user, "count": text.parse_int(count)}
-
class HentaifoundryScrapsExtractor(HentaifoundryExtractor):
- """Extractor for scrap images of a hentai-foundry-user"""
+ """Extractor for scraps of a hentaifoundry user"""
subcategory = "scraps"
directory_fmt = ("{category}", "{user}", "Scraps")
- pattern = (r"(?:https?://)?(?:www\.)?hentai-foundry\.com"
- r"/pictures/user/([^/]+)/scraps(?:/page/(\d+))?")
+ pattern = BASE_PATTERN + r"/pictures/user/([^/?#]+)/scraps"
test = (
("https://www.hentai-foundry.com/pictures/user/Evulchibi/scraps", {
"url": "7cd9c6ec6258c4ab8c44991f7731be82337492a7",
@@ -211,24 +204,17 @@ class HentaifoundryScrapsExtractor(HentaifoundryExtractor):
)
def __init__(self, match):
- HentaifoundryExtractor.__init__(
- self, match, match.group(1), match.group(2))
+ HentaifoundryExtractor.__init__(self, match)
self.page_url = "{}/pictures/user/{}/scraps".format(
self.root, self.user)
- def get_job_metadata(self):
- page = self.request(self.page_url + "?enterAgree=1").text
- count = text.extract(page, ">Scraps (", ")")[0]
- return {"user": self.user, "count": text.parse_int(count)}
-
class HentaifoundryFavoriteExtractor(HentaifoundryExtractor):
- """Extractor for favorite images of a hentai-foundry-user"""
+ """Extractor for favorite images of a hentaifoundry user"""
subcategory = "favorite"
directory_fmt = ("{category}", "{user}", "Favorites")
archive_fmt = "f_{user}_{index}"
- pattern = (r"(?:https?://)?(?:www\.)?hentai-foundry\.com"
- r"/user/([^/]+)/faves/pictures(?:/page/(\d+))?")
+ pattern = BASE_PATTERN + r"/user/([^/?#]+)/faves/pictures"
test = (
("https://www.hentai-foundry.com/user/Tenpura/faves/pictures", {
"url": "56f9ae2e89fe855e9fe1da9b81e5ec6212b0320b",
@@ -238,8 +224,7 @@ class HentaifoundryFavoriteExtractor(HentaifoundryExtractor):
)
def __init__(self, match):
- HentaifoundryExtractor.__init__(
- self, match, match.group(1), match.group(2))
+ HentaifoundryExtractor.__init__(self, match)
self.page_url = "{}/user/{}/faves/pictures".format(
self.root, self.user)
@@ -249,21 +234,18 @@ class HentaifoundryRecentExtractor(HentaifoundryExtractor):
subcategory = "recent"
directory_fmt = ("{category}", "Recent Pictures", "{date}")
archive_fmt = "r_{index}"
- pattern = (r"(?:https?://)?(?:www\.)?hentai-foundry\.com"
- r"/pictures/recent/(\d+-\d+-\d+)(?:/page/(\d+))?")
+ pattern = BASE_PATTERN + r"/pictures/recent/(\d\d\d\d-\d\d-\d\d)"
test = ("http://www.hentai-foundry.com/pictures/recent/2018-09-20", {
- "pattern": r"https://pictures.hentai-foundry.com/[^/]/[^/]+/\d+/",
+ "pattern": r"https://pictures.hentai-foundry.com/[^/]/[^/?#]+/\d+/",
"range": "20-30",
})
def __init__(self, match):
- HentaifoundryExtractor.__init__(self, match, "", match.group(2))
- self.date = match.group(1)
- self.page_url = "{}/pictures/recent/{}".format(self.root, self.date)
+ HentaifoundryExtractor.__init__(self, match)
+ self.page_url = "{}/pictures/recent/{}".format(self.root, self.user)
- def get_job_metadata(self):
- self.request(self.root + "/?enterAgree=1")
- return {"date": self.date}
+ def metadata(self):
+ return {"date": self.user}
class HentaifoundryPopularExtractor(HentaifoundryExtractor):
@@ -271,15 +253,14 @@ class HentaifoundryPopularExtractor(HentaifoundryExtractor):
subcategory = "popular"
directory_fmt = ("{category}", "Popular Pictures")
archive_fmt = "p_{index}"
- pattern = (r"(?:https?://)?(?:www\.)?hentai-foundry\.com"
- r"/pictures/popular(?:/page/(\d+))?")
+ pattern = BASE_PATTERN + r"/pictures/popular()"
test = ("http://www.hentai-foundry.com/pictures/popular", {
- "pattern": r"https://pictures.hentai-foundry.com/[^/]/[^/]+/\d+/",
+ "pattern": r"https://pictures.hentai-foundry.com/[^/]/[^/?#]+/\d+/",
"range": "20-30",
})
def __init__(self, match):
- HentaifoundryExtractor.__init__(self, match, "", match.group(1))
+ HentaifoundryExtractor.__init__(self, match)
self.page_url = self.root + "/pictures/popular"
@@ -287,7 +268,7 @@ class HentaifoundryImageExtractor(HentaifoundryExtractor):
"""Extractor for a single image from hentaifoundry.com"""
subcategory = "image"
pattern = (r"(?:https?://)?(?:www\.|pictures\.)?hentai-foundry\.com"
- r"/(?:pictures/user|[^/])/([^/]+)/(\d+)")
+ r"/(?:pictures/user|[^/?#])/([^/?#]+)/(\d+)")
test = (
(("https://www.hentai-foundry.com"
"/pictures/user/Tenpura/407501/shimakaze"), {
@@ -309,36 +290,30 @@ class HentaifoundryImageExtractor(HentaifoundryExtractor):
"width" : 495,
},
}),
- ("https://www.hentai-foundry.com/pictures/user/Tenpura/340853/", {
- "exception": exception.HttpError,
- }),
+ ("https://www.hentai-foundry.com/pictures/user/Tenpura/340853/"),
("https://pictures.hentai-foundry.com"
"/t/Tenpura/407501/Tenpura-407501-shimakaze.png"),
)
+ skip = Extractor.skip
def __init__(self, match):
- HentaifoundryExtractor.__init__(self, match, match.group(1))
+ HentaifoundryExtractor.__init__(self, match)
self.index = match.group(2)
def items(self):
post_url = "{}/pictures/user/{}/{}/?enterAgree=1".format(
self.root, self.user, self.index)
- data = self.get_image_metadata(post_url)
- data["user"] = self.user
-
- yield Message.Version, 1
- yield Message.Directory, data
- yield Message.Url, data["src"], data
-
- def skip(self, _):
- return 0
+ image = self._parse_post(post_url)
+ image["user"] = self.user
+ yield Message.Directory, image
+ yield Message.Url, image["src"], image
class HentaifoundryStoriesExtractor(HentaifoundryExtractor):
- """Extractor for stories of a hentai-foundry user"""
+ """Extractor for stories of a hentaifoundry user"""
subcategory = "stories"
- pattern = (r"(?:https?://)?(?:www\.)?hentai-foundry\.com"
- r"/stories/user/([^/]+)(?:/page/(\d+))?/?$")
+ archive_fmt = "s_{index}"
+ pattern = BASE_PATTERN + r"/stories/user/([^/?#]+)(?:/page/(\d+))?/?$"
test = ("https://www.hentai-foundry.com/stories/user/SnowWolf35", {
"count": ">= 35",
"keyword": {
@@ -358,42 +333,37 @@ class HentaifoundryStoriesExtractor(HentaifoundryExtractor):
},
})
- def __init__(self, match):
- HentaifoundryExtractor.__init__(self, match, match.group(1))
- self.page_url = "{}/stories/user/{}".format(self.root, self.user)
-
def items(self):
- self.get_job_metadata()
- self.set_filters()
- stories = self._pagination('<div class="storyRow">', '</tr></table>')
- for story_html in util.advance(stories, self.start_post):
- story = self.get_story_metadata(story_html)
+ self._init_site_filters()
+ for story_html in util.advance(self.stories(), self.start_post):
+ story = self._parse_story(story_html)
yield Message.Directory, story
yield Message.Url, story["src"], story
+ def stories(self):
+ url = "{}/stories/user/{}".format(self.root, self.user)
+ return self._pagination(url, '<div class="storyRow">', '</tr></table>')
+
class HentaifoundryStoryExtractor(HentaifoundryExtractor):
"""Extractor for a hentaifoundry story"""
subcategory = "story"
- pattern = (r"(?:https?://)?(?:www\.)?hentai-foundry\.com"
- r"/stories/user/([^/]+)/(\d+)")
+ archive_fmt = "s_{index}"
+ pattern = BASE_PATTERN + r"/stories/user/([^/?#]+)/(\d+)"
test = (("https://www.hentai-foundry.com/stories/user/SnowWolf35"
"/26416/Overwatch-High-Chapter-Voting-Location"), {
"url": "5a67cfa8c3bf7634c8af8485dd07c1ea74ee0ae8",
"keyword": {"title": "Overwatch High Chapter Voting Location"},
})
+ skip = Extractor.skip
def __init__(self, match):
- HentaifoundryExtractor.__init__(self, match, match.group(1))
+ HentaifoundryExtractor.__init__(self, match)
self.index = match.group(2)
def items(self):
story_url = "{}/stories/user/{}/{}/x?enterAgree=1".format(
self.root, self.user, self.index)
- page = self.request(story_url).text
- story = self.get_story_metadata(page)
+ story = self._parse_story(self.request(story_url).text)
yield Message.Directory, story
yield Message.Url, story["src"], story
-
- def skip(self, _):
- return 0
diff --git a/gallery_dl/extractor/hentaihand.py b/gallery_dl/extractor/hentaihand.py
index 7635bf1..4485925 100644
--- a/gallery_dl/extractor/hentaihand.py
+++ b/gallery_dl/extractor/hentaihand.py
@@ -74,7 +74,7 @@ class HentaihandTagExtractor(Extractor):
root = "https://hentaihand.com"
pattern = (r"(?i)(?:https?://)?(?:www\.)?hentaihand\.com"
r"/\w+/(parody|character|tag|artist|group|language"
- r"|category|relationship)/([^/?&#]+)")
+ r"|category|relationship)/([^/?#]+)")
test = (
("https://hentaihand.com/en/artist/himuro", {
"pattern": HentaihandGalleryExtractor.pattern,
diff --git a/gallery_dl/extractor/hiperdex.py b/gallery_dl/extractor/hiperdex.py
index 1c53723..93ef6f1 100644
--- a/gallery_dl/extractor/hiperdex.py
+++ b/gallery_dl/extractor/hiperdex.py
@@ -64,7 +64,7 @@ class HiperdexBase():
class HiperdexChapterExtractor(HiperdexBase, ChapterExtractor):
"""Extractor for manga chapters from hiperdex.com"""
- pattern = BASE_PATTERN + r"(/manga/([^/?&#]+)/([^/?&#]+))"
+ pattern = BASE_PATTERN + r"(/manga/([^/?#]+)/([^/?#]+))"
test = (
("https://hiperdex.com/manga/domestic-na-kanojo/154-5/", {
"pattern": r"https://hiperdex.(com|net|info)/wp-content/uploads"
@@ -105,7 +105,7 @@ class HiperdexChapterExtractor(HiperdexBase, ChapterExtractor):
class HiperdexMangaExtractor(HiperdexBase, MangaExtractor):
"""Extractor for manga from hiperdex.com"""
chapterclass = HiperdexChapterExtractor
- pattern = BASE_PATTERN + r"(/manga/([^/?&#]+))/?$"
+ pattern = BASE_PATTERN + r"(/manga/([^/?#]+))/?$"
test = (
("https://hiperdex.com/manga/youre-not-that-special/", {
"count": 51,
@@ -157,7 +157,7 @@ class HiperdexArtistExtractor(HiperdexBase, MangaExtractor):
categorytransfer = False
chapterclass = HiperdexMangaExtractor
reverse = False
- pattern = BASE_PATTERN + r"(/manga-a(?:rtist|uthor)/([^/?&#]+))"
+ pattern = BASE_PATTERN + r"(/manga-a(?:rtist|uthor)/([^/?#]+))"
test = (
("https://hiperdex.com/manga-artist/beck-ho-an/"),
("https://hiperdex.net/manga-artist/beck-ho-an/"),
diff --git a/gallery_dl/extractor/hitomi.py b/gallery_dl/extractor/hitomi.py
index f341c47..994e1b7 100644
--- a/gallery_dl/extractor/hitomi.py
+++ b/gallery_dl/extractor/hitomi.py
@@ -21,10 +21,10 @@ class HitomiGalleryExtractor(GalleryExtractor):
root = "https://hitomi.la"
pattern = (r"(?:https?://)?hitomi\.la"
r"/(?:manga|doujinshi|cg|gamecg|galleries|reader)"
- r"/(?:[^/?&#]+-)?(\d+)")
+ r"/(?:[^/?#]+-)?(\d+)")
test = (
("https://hitomi.la/galleries/867789.html", {
- "pattern": r"https://[a-c]a.hitomi.la/images/./../[0-9a-f]+.jpg",
+ "pattern": r"https://[a-c]b.hitomi.la/images/./../[0-9a-f]+.jpg",
"keyword": "4873ef9a523621fc857b114e0b2820ba4066e9ae",
"count": 16,
}),
@@ -35,12 +35,12 @@ class HitomiGalleryExtractor(GalleryExtractor):
}),
# Game CG with scenes (#321)
("https://hitomi.la/galleries/733697.html", {
- "url": "b4cbc76032852db4a655bf6a2c4d58eae8153c8e",
+ "url": "ec3fe9b708ee376ec579b90d053ad485c0777552",
"count": 210,
}),
# fallback for galleries only available through /reader/ URLs
("https://hitomi.la/galleries/1045954.html", {
- "url": "f3aa914ad148437f72d307268fa0d250eabe8dab",
+ "url": "bf4ed4e726204da5bc37a236ca476a2a96081388",
"count": 1413,
}),
# gallery with "broken" redirect
@@ -143,7 +143,7 @@ class HitomiGalleryExtractor(GalleryExtractor):
frontends = 2 if inum < 0x30 else 3
inum = 1 if inum < 0x09 else inum
- url = "https://{}a.hitomi.la/images/{}/{}/{}.{}".format(
+ url = "https://{}b.hitomi.la/images/{}/{}/{}.{}".format(
chr(97 + (inum % frontends)),
ihash[-1], ihash[-3:-1], ihash,
idata["extension"],
@@ -158,7 +158,7 @@ class HitomiTagExtractor(Extractor):
subcategory = "tag"
pattern = (r"(?:https?://)?hitomi\.la/"
r"(tag|artist|group|series|type|character)/"
- r"([^/?&#]+)\.html")
+ r"([^/?#]+)\.html")
test = (
("https://hitomi.la/tag/screenshots-japanese.html", {
"pattern": HitomiGalleryExtractor.pattern,
diff --git a/gallery_dl/extractor/imagefap.py b/gallery_dl/extractor/imagefap.py
index bf0ac63..8785f65 100644
--- a/gallery_dl/extractor/imagefap.py
+++ b/gallery_dl/extractor/imagefap.py
@@ -151,7 +151,7 @@ class ImagefapUserExtractor(ImagefapExtractor):
subcategory = "user"
categorytransfer = True
pattern = (BASE_PATTERN +
- r"/(?:profile(?:\.php\?user=|/)([^/?&#]+)"
+ r"/(?:profile(?:\.php\?user=|/)([^/?#]+)"
r"|usergallery\.php\?userid=(\d+))")
test = (
("https://www.imagefap.com/profile/LucyRae/galleries", {
diff --git a/gallery_dl/extractor/imagehosts.py b/gallery_dl/extractor/imagehosts.py
index 4015bfd..ad5a508 100644
--- a/gallery_dl/extractor/imagehosts.py
+++ b/gallery_dl/extractor/imagehosts.py
@@ -176,7 +176,7 @@ class ImagetwistImageExtractor(ImagehostImageExtractor):
class ImgspiceImageExtractor(ImagehostImageExtractor):
"""Extractor for single images from imgspice.com"""
category = "imgspice"
- pattern = r"(?:https?://)?((?:www\.)?imgspice\.com/([^/?&#]+))"
+ pattern = r"(?:https?://)?((?:www\.)?imgspice\.com/([^/?#]+))"
test = ("https://imgspice.com/nwfwtpyog50y/test.png.html", {
"url": "b8c30a8f51ee1012959a4cfd46197fabf14de984",
"keyword": "100e310a19a2fa22d87e1bbc427ecb9f6501e0c0",
@@ -198,7 +198,7 @@ class PixhostImageExtractor(ImagehostImageExtractor):
"""Extractor for single images from pixhost.to"""
category = "pixhost"
pattern = (r"(?:https?://)?((?:www\.)?pixhost\.(?:to|org)"
- r"/show/\d+/(\d+)_[^/?&#]+)")
+ r"/show/\d+/(\d+)_[^/?#]+)")
test = ("http://pixhost.to/show/190/130327671_test-.png", {
"url": "4e5470dcf6513944773044d40d883221bbc46cff",
"keyword": "3bad6d59db42a5ebbd7842c2307e1c3ebd35e6b0",
@@ -218,7 +218,7 @@ class PostimgImageExtractor(ImagehostImageExtractor):
"""Extractor for single images from postimages.org"""
category = "postimg"
pattern = (r"(?:https?://)?((?:www\.)?(?:postimg|pixxxels)\.(?:cc|org)"
- r"/(?:image/)?([^/?&#]+)/?)")
+ r"/(?:image/)?([^/?#]+)/?)")
test = ("https://postimg.cc/Wtn2b3hC", {
"url": "0794cfda9b8951a8ac3aa692472484200254ab86",
"keyword": "2d05808d04e4e83e33200db83521af06e3147a84",
@@ -237,7 +237,7 @@ class TurboimagehostImageExtractor(ImagehostImageExtractor):
"""Extractor for single images from www.turboimagehost.com"""
category = "turboimagehost"
pattern = (r"(?:https?://)?((?:www\.)?turboimagehost\.com"
- r"/p/(\d+)/[^/?&#]+\.html)")
+ r"/p/(\d+)/[^/?#]+\.html)")
test = ("https://www.turboimagehost.com/p/39078423/test--.png.html", {
"url": "b94de43612318771ced924cb5085976f13b3b90e",
"keyword": "704757ca8825f51cec516ec44c1e627c1f2058ca",
diff --git a/gallery_dl/extractor/imgbb.py b/gallery_dl/extractor/imgbb.py
index 2a69fb1..5dcca62 100644
--- a/gallery_dl/extractor/imgbb.py
+++ b/gallery_dl/extractor/imgbb.py
@@ -115,7 +115,7 @@ class ImgbbAlbumExtractor(ImgbbExtractor):
"""Extractor for albums on imgbb.com"""
subcategory = "album"
directory_fmt = ("{category}", "{user}", "{album_name} {album_id}")
- pattern = r"(?:https?://)?ibb\.co/album/([^/?&#]+)/?(?:\?([^#]+))?"
+ pattern = r"(?:https?://)?ibb\.co/album/([^/?#]+)/?(?:\?([^#]+))?"
test = (
("https://ibb.co/album/i5PggF", {
"range": "1-80",
@@ -173,7 +173,7 @@ class ImgbbUserExtractor(ImgbbExtractor):
pattern = r"(?:https?://)?([^.]+)\.imgbb\.com/?(?:\?([^#]+))?$"
test = ("https://folkie.imgbb.com", {
"range": "1-80",
- "pattern": r"https?://i\.ibb\.co/\w+/[^/?&#]+",
+ "pattern": r"https?://i\.ibb\.co/\w+/[^/?#]+",
})
def __init__(self, match):
@@ -197,7 +197,7 @@ class ImgbbUserExtractor(ImgbbExtractor):
class ImgbbImageExtractor(ImgbbExtractor):
subcategory = "image"
- pattern = r"(?:https?://)?ibb\.co/(?!album/)([^/?&#]+)"
+ pattern = r"(?:https?://)?ibb\.co/(?!album/)([^/?#]+)"
test = ("https://ibb.co/fUqh5b", {
"pattern": r"https://i\.ibb\.co/g3kvx80/Arundel-Ireeman-5\.jpg",
"content": "c5a0965178a8b357acd8aa39660092918c63795e",
diff --git a/gallery_dl/extractor/imgur.py b/gallery_dl/extractor/imgur.py
index 4391e64..ae4e606 100644
--- a/gallery_dl/extractor/imgur.py
+++ b/gallery_dl/extractor/imgur.py
@@ -259,7 +259,7 @@ class ImgurGalleryExtractor(ImgurExtractor):
class ImgurUserExtractor(ImgurExtractor):
"""Extractor for all images posted by a user"""
subcategory = "user"
- pattern = BASE_PATTERN + r"/user/([^/?&#]+)(?:/posts|/submitted)?/?$"
+ pattern = BASE_PATTERN + r"/user/([^/?#]+)(?:/posts|/submitted)?/?$"
test = (
("https://imgur.com/user/Miguenzo", {
"range": "1-100",
@@ -277,7 +277,7 @@ class ImgurUserExtractor(ImgurExtractor):
class ImgurFavoriteExtractor(ImgurExtractor):
"""Extractor for a user's favorites"""
subcategory = "favorite"
- pattern = BASE_PATTERN + r"/user/([^/?&#]+)/favorites"
+ pattern = BASE_PATTERN + r"/user/([^/?#]+)/favorites"
test = ("https://imgur.com/user/Miguenzo/favorites", {
"range": "1-100",
"count": 100,
@@ -291,7 +291,7 @@ class ImgurFavoriteExtractor(ImgurExtractor):
class ImgurSubredditExtractor(ImgurExtractor):
"""Extractor for a subreddits's imgur links"""
subcategory = "subreddit"
- pattern = BASE_PATTERN + r"/r/([^/?&#]+)"
+ pattern = BASE_PATTERN + r"/r/([^/?#]+)"
test = ("https://imgur.com/r/pics", {
"range": "1-100",
"count": 100,
@@ -305,7 +305,7 @@ class ImgurSubredditExtractor(ImgurExtractor):
class ImgurTagExtractor(ImgurExtractor):
"""Extractor for imgur tag searches"""
subcategory = "tag"
- pattern = BASE_PATTERN + r"/t/([^/?&#]+)$"
+ pattern = BASE_PATTERN + r"/t/([^/?#]+)$"
test = ("https://imgur.com/t/animals", {
"range": "1-100",
"count": 100,
@@ -319,7 +319,7 @@ class ImgurTagExtractor(ImgurExtractor):
class ImgurSearchExtractor(ImgurExtractor):
"""Extractor for imgur search results"""
subcategory = "search"
- pattern = BASE_PATTERN + r"/search(?:/[^?&#]+)?/?\?q=([^&#]+)"
+ pattern = BASE_PATTERN + r"/search(?:/[^?#]+)?/?\?q=([^&#]+)"
test = ("https://imgur.com/search?q=cute+cat", {
"range": "1-100",
"count": 100,
diff --git a/gallery_dl/extractor/inkbunny.py b/gallery_dl/extractor/inkbunny.py
index ff8318c..6051db0 100644
--- a/gallery_dl/extractor/inkbunny.py
+++ b/gallery_dl/extractor/inkbunny.py
@@ -60,7 +60,7 @@ class InkbunnyExtractor(Extractor):
class InkbunnyUserExtractor(InkbunnyExtractor):
"""Extractor for inkbunny user profiles"""
subcategory = "user"
- pattern = BASE_PATTERN + r"/(?!s/)(gallery/|scraps/)?([^/?&#]+)"
+ pattern = BASE_PATTERN + r"/(?!s/)(gallery/|scraps/)?([^/?#]+)"
test = (
("https://inkbunny.net/soina", {
"pattern": r"https://[\w.]+\.metapix\.net/files/full"
diff --git a/gallery_dl/extractor/instagram.py b/gallery_dl/extractor/instagram.py
index 639f272..1194626 100644
--- a/gallery_dl/extractor/instagram.py
+++ b/gallery_dl/extractor/instagram.py
@@ -343,7 +343,7 @@ class InstagramImageExtractor(InstagramExtractor):
"""Extractor for PostPage"""
subcategory = "image"
pattern = (r"(?:https?://)?(?:www\.)?instagram\.com"
- r"/(?:p|tv|reel)/([^/?&#]+)")
+ r"/(?:p|tv|reel)/([^/?#]+)")
test = (
# GraphImage
("https://www.instagram.com/p/BqvsDleB3lV/", {
@@ -458,7 +458,7 @@ class InstagramStoriesExtractor(InstagramExtractor):
"""Extractor for StoriesPage"""
subcategory = "stories"
pattern = (r"(?:https?://)?(?:www\.)?instagram\.com"
- r"/stories/([^/?&#]+)(?:/(\d+))?")
+ r"/stories/([^/?#]+)(?:/(\d+))?")
test = (
("https://www.instagram.com/stories/instagram/"),
("https://www.instagram.com/stories/highlights/18042509488170095/"),
@@ -478,7 +478,7 @@ class InstagramSavedExtractor(InstagramExtractor):
subcategory = "saved"
pattern = (r"(?:https?://)?(?:www\.)?instagram\.com"
r"/(?!p/|explore/|directory/|accounts/|stories/|tv/)"
- r"([^/?&#]+)/saved")
+ r"([^/?#]+)/saved")
test = ("https://www.instagram.com/instagram/saved/",)
def __init__(self, match):
@@ -504,7 +504,7 @@ class InstagramUserExtractor(InstagramExtractor):
subcategory = "user"
pattern = (r"(?:https?://)?(?:www\.)?instagram\.com"
r"/(?!(?:p|explore|directory|accounts|stories|tv|reel)/)"
- r"([^/?&#]+)/?(?:$|[?#])")
+ r"([^/?#]+)/?(?:$|[?#])")
test = (
("https://www.instagram.com/instagram/", {
"range": "1-16",
@@ -550,7 +550,7 @@ class InstagramChannelExtractor(InstagramExtractor):
subcategory = "channel"
pattern = (r"(?:https?://)?(?:www\.)?instagram\.com"
r"/(?!p/|explore/|directory/|accounts/|stories/|tv/)"
- r"([^/?&#]+)/channel")
+ r"([^/?#]+)/channel")
test = ("https://www.instagram.com/instagram/channel/", {
"range": "1-16",
"count": ">= 16",
@@ -579,7 +579,7 @@ class InstagramTagExtractor(InstagramExtractor):
subcategory = "tag"
directory_fmt = ("{category}", "{subcategory}", "{tag}")
pattern = (r"(?:https?://)?(?:www\.)?instagram\.com"
- r"/explore/tags/([^/?&#]+)")
+ r"/explore/tags/([^/?#]+)")
test = ("https://www.instagram.com/explore/tags/instagram/", {
"range": "1-16",
"count": ">= 16",
diff --git a/gallery_dl/extractor/issuu.py b/gallery_dl/extractor/issuu.py
index b34b288..6266e5f 100644
--- a/gallery_dl/extractor/issuu.py
+++ b/gallery_dl/extractor/issuu.py
@@ -26,7 +26,7 @@ class IssuuPublicationExtractor(IssuuBase, GalleryExtractor):
"{document[originalPublishDate]} {document[title]}")
filename_fmt = "{num:>03}.{extension}"
archive_fmt = "{document[id]}_{num}"
- pattern = r"(?:https?://)?issuu\.com(/[^/?&#]+/docs/[^/?&#]+)"
+ pattern = r"(?:https?://)?issuu\.com(/[^/?#]+/docs/[^/?#]+)"
test = ("https://issuu.com/issuu/docs/motions-1-2019/", {
"pattern": r"https://image.isu.pub/190916155301-\w+/jpg/page_\d+.jpg",
"count" : 36,
@@ -80,7 +80,7 @@ class IssuuPublicationExtractor(IssuuBase, GalleryExtractor):
class IssuuUserExtractor(IssuuBase, Extractor):
"""Extractor for all publications of a user/publisher"""
subcategory = "user"
- pattern = r"(?:https?://)?issuu\.com/([^/?&#]+)/?$"
+ pattern = r"(?:https?://)?issuu\.com/([^/?#]+)/?$"
test = ("https://issuu.com/issuu", {
"pattern": IssuuPublicationExtractor.pattern,
"count" : "> 25",
diff --git a/gallery_dl/extractor/khinsider.py b/gallery_dl/extractor/khinsider.py
index 2550af2..6ddf0e8 100644
--- a/gallery_dl/extractor/khinsider.py
+++ b/gallery_dl/extractor/khinsider.py
@@ -19,7 +19,7 @@ class KhinsiderSoundtrackExtractor(AsynchronousMixin, Extractor):
directory_fmt = ("{category}", "{album[name]}")
archive_fmt = "{filename}.{extension}"
pattern = (r"(?:https?://)?downloads\.khinsider\.com"
- r"/game-soundtracks/album/([^/?&#]+)")
+ r"/game-soundtracks/album/([^/?#]+)")
root = "https://downloads.khinsider.com"
test = (("https://downloads.khinsider.com"
"/game-soundtracks/album/horizon-riders-wii"), {
diff --git a/gallery_dl/extractor/komikcast.py b/gallery_dl/extractor/komikcast.py
index 6e7f139..b54afb7 100644
--- a/gallery_dl/extractor/komikcast.py
+++ b/gallery_dl/extractor/komikcast.py
@@ -45,7 +45,7 @@ class KomikcastBase():
class KomikcastChapterExtractor(KomikcastBase, ChapterExtractor):
"""Extractor for manga-chapters from komikcast.com"""
- pattern = r"(?:https?://)?(?:www\.)?komikcast\.com(/chapter/[^/?&#]+/)"
+ pattern = r"(?:https?://)?(?:www\.)?komikcast\.com(/chapter/[^/?#]+/)"
test = (
(("https://komikcast.com/chapter/"
"apotheosis-chapter-02-2-bahasa-indonesia/"), {
@@ -81,7 +81,7 @@ class KomikcastMangaExtractor(KomikcastBase, MangaExtractor):
"""Extractor for manga from komikcast.com"""
chapterclass = KomikcastChapterExtractor
pattern = (r"(?:https?://)?(?:www\.)?komikcast\.com"
- r"(/(?:komik/)?[^/?&#]+)/?$")
+ r"(/(?:komik/)?[^/?#]+)/?$")
test = (
("https://komikcast.com/komik/090-eko-to-issho/", {
"url": "dc798d107697d1f2309b14ca24ca9dba30c6600f",
diff --git a/gallery_dl/extractor/lineblog.py b/gallery_dl/extractor/lineblog.py
index a1daa39..4071a26 100644
--- a/gallery_dl/extractor/lineblog.py
+++ b/gallery_dl/extractor/lineblog.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2019 Mike Fährmann
+# Copyright 2019-2020 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -42,7 +42,7 @@ class LineblogBase():
class LineblogBlogExtractor(LineblogBase, LivedoorBlogExtractor):
"""Extractor for a user's blog on lineblog.me"""
- pattern = r"(?:https?://)?lineblog\.me/(\w+)/?(?:$|[?&#])"
+ pattern = r"(?:https?://)?lineblog\.me/(\w+)/?(?:$|[?#])"
test = ("https://lineblog.me/mamoru_miyano/", {
"range": "1-20",
"count": 20,
diff --git a/gallery_dl/extractor/livedoor.py b/gallery_dl/extractor/livedoor.py
index 9d2383f..feffdfd 100644
--- a/gallery_dl/extractor/livedoor.py
+++ b/gallery_dl/extractor/livedoor.py
@@ -84,7 +84,7 @@ class LivedoorExtractor(Extractor):
class LivedoorBlogExtractor(LivedoorExtractor):
"""Extractor for a user's blog on blog.livedoor.jp"""
subcategory = "blog"
- pattern = r"(?:https?://)?blog\.livedoor\.jp/(\w+)/?(?:$|[?&#])"
+ pattern = r"(?:https?://)?blog\.livedoor\.jp/(\w+)/?(?:$|[?#])"
test = (
("http://blog.livedoor.jp/zatsu_ke/", {
"range": "1-50",
diff --git a/gallery_dl/extractor/luscious.py b/gallery_dl/extractor/luscious.py
index 7561c64..143d00d 100644
--- a/gallery_dl/extractor/luscious.py
+++ b/gallery_dl/extractor/luscious.py
@@ -46,7 +46,7 @@ class LusciousAlbumExtractor(LusciousExtractor):
directory_fmt = ("{category}", "{album[id]} {album[title]}")
archive_fmt = "{album[id]}_{id}"
pattern = (r"(?:https?://)?(?:www\.|members\.)?luscious\.net"
- r"/(?:albums|pictures/c/[^/?&#]+/album)/[^/?&#]+_(\d+)")
+ r"/(?:albums|pictures/c/[^/?#]+/album)/[^/?#]+_(\d+)")
test = (
("https://luscious.net/albums/okinami-no-koigokoro_277031/", {
"url": "7e4984a271a1072ac6483e4228a045895aff86f3",
diff --git a/gallery_dl/extractor/mangadex.py b/gallery_dl/extractor/mangadex.py
index 7e2d613..d50e0f2 100644
--- a/gallery_dl/extractor/mangadex.py
+++ b/gallery_dl/extractor/mangadex.py
@@ -83,18 +83,19 @@ class MangadexChapterExtractor(MangadexExtractor):
chapter, sep, minor = cdata["chapter"].partition(".")
return {
- "manga": mdata["manga"]["title"],
+ "manga" : text.unescape(mdata["manga"]["title"]),
"manga_id": cdata["manga_id"],
- "artist": mdata["manga"]["artist"],
- "author": mdata["manga"]["author"],
- "title": text.unescape(cdata["title"]),
- "volume": text.parse_int(cdata["volume"]),
- "chapter": text.parse_int(chapter),
+ "artist" : text.unescape(mdata["manga"]["artist"]),
+ "author" : text.unescape(mdata["manga"]["author"]),
+ "title" : text.unescape(cdata["title"]),
+ "volume" : text.parse_int(cdata["volume"]),
+ "chapter" : text.parse_int(chapter),
"chapter_minor": sep + minor,
"chapter_id": cdata["id"],
- "group": mdata["chapter"][self.chapter_id]["group_name"],
- "date": text.parse_timestamp(cdata["timestamp"]),
- "lang": util.language_to_code(cdata["lang_name"]),
+ "group" : text.unescape(
+ mdata["chapter"][self.chapter_id]["group_name"]),
+ "date" : text.parse_timestamp(cdata["timestamp"]),
+ "lang" : util.language_to_code(cdata["lang_name"]),
"language": cdata["lang_name"],
}
@@ -159,18 +160,18 @@ class MangadexMangaExtractor(MangadexExtractor):
chapter, sep, minor = info["chapter"].partition(".")
lang = self.iso639_map.get(info["lang_code"], info["lang_code"])
results.append({
- "manga": manga["title"],
+ "manga" : text.unescape(manga["title"]),
"manga_id": self.manga_id,
- "artist": manga["artist"],
- "author": manga["author"],
- "title": text.unescape(info["title"]),
- "volume": text.parse_int(info["volume"]),
- "chapter": text.parse_int(chapter),
+ "artist" : text.unescape(manga["artist"]),
+ "author" : text.unescape(manga["author"]),
+ "title" : text.unescape(info["title"]),
+ "volume" : text.parse_int(info["volume"]),
+ "chapter" : text.parse_int(chapter),
"chapter_minor": sep + minor,
"chapter_id": text.parse_int(chid),
- "group": text.unescape(info["group_name"]),
- "date": text.parse_timestamp(info["timestamp"]),
- "lang": lang,
+ "group" : text.unescape(info["group_name"]),
+ "date" : text.parse_timestamp(info["timestamp"]),
+ "lang" : lang,
"language": util.code_to_language(lang),
"_extractor": MangadexChapterExtractor,
})
diff --git a/gallery_dl/extractor/mangafox.py b/gallery_dl/extractor/mangafox.py
index 1b8a4a6..a123783 100644
--- a/gallery_dl/extractor/mangafox.py
+++ b/gallery_dl/extractor/mangafox.py
@@ -16,7 +16,7 @@ class MangafoxChapterExtractor(ChapterExtractor):
"""Extractor for manga-chapters from fanfox.net"""
category = "mangafox"
pattern = (r"(?:https?://)?(?:www\.|m\.)?(?:mangafox\.me|fanfox\.net)"
- r"(/manga/[^/]+/((?:v(\d+)/)?c(\d+)([^/?&#]*)))")
+ r"(/manga/[^/]+/((?:v(\d+)/)?c(\d+)([^/?#]*)))")
test = (
("http://fanfox.net/manga/kidou_keisatsu_patlabor/v05/c006.2/1.html", {
"keyword": "5661dab258d42d09d98f194f7172fb9851a49766",
@@ -53,9 +53,9 @@ class MangafoxChapterExtractor(ChapterExtractor):
pnum = 1
while True:
url, pos = text.extract(page, '<img src="', '"')
- yield url, None
+ yield text.ensure_http_scheme(url), None
url, pos = text.extract(page, ' src="', '"', pos)
- yield url, None
+ yield text.ensure_http_scheme(url), None
pnum += 2
page = self.request("{}/{}.html".format(self.urlbase, pnum)).text
diff --git a/gallery_dl/extractor/mangahere.py b/gallery_dl/extractor/mangahere.py
index 52cc672..653c61a 100644
--- a/gallery_dl/extractor/mangahere.py
+++ b/gallery_dl/extractor/mangahere.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2015-2019 Mike Fährmann
+# Copyright 2015-2020 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -24,12 +24,16 @@ class MangahereBase():
class MangahereChapterExtractor(MangahereBase, ChapterExtractor):
"""Extractor for manga-chapters from mangahere.cc"""
pattern = (r"(?:https?://)?(?:www\.|m\.)?mangahere\.c[co]/manga/"
- r"([^/]+(?:/v0*(\d+))?/c([^/?&#]+))")
+ r"([^/]+(?:/v0*(\d+))?/c([^/?#]+))")
test = (
("https://www.mangahere.cc/manga/dongguo_xiaojie/c004.2/", {
"keyword": "7c98d7b50a47e6757b089aa875a53aa970cac66f",
"content": "708d475f06893b88549cbd30df1e3f9428f2c884",
}),
+ # URLs without HTTP scheme (#1070)
+ ("https://www.mangahere.cc/manga/beastars/c196/1.html", {
+ "pattern": "https://zjcdn.mangahere.org/.*",
+ }),
("http://www.mangahere.co/manga/dongguo_xiaojie/c003.2/"),
("http://m.mangahere.co/manga/dongguo_xiaojie/c003.2/"),
)
@@ -65,9 +69,9 @@ class MangahereChapterExtractor(MangahereBase, ChapterExtractor):
while True:
url, pos = text.extract(page, '<img src="', '"')
- yield url, None
+ yield text.ensure_http_scheme(url), None
url, pos = text.extract(page, ' src="', '"', pos)
- yield url, None
+ yield text.ensure_http_scheme(url), None
pnum += 2
page = self.request(self.url_fmt.format(self.part, pnum)).text
diff --git a/gallery_dl/extractor/mangakakalot.py b/gallery_dl/extractor/mangakakalot.py
index 8686b2d..951a257 100644
--- a/gallery_dl/extractor/mangakakalot.py
+++ b/gallery_dl/extractor/mangakakalot.py
@@ -32,7 +32,7 @@ class MangakakalotBase():
class MangakakalotChapterExtractor(MangakakalotBase, ChapterExtractor):
"""Extractor for manga-chapters from mangakakalot.com"""
pattern = (r"(?:https?://)?(?:www\.)?mangakakalot\.com"
- r"(/chapter/\w+/chapter_[^/?&#]+)")
+ r"(/chapter/\w+/chapter_[^/?#]+)")
test = (
("https://mangakakalot.com/chapter/rx922077/chapter_6", {
"pattern": r"https://s\d+\.\w+\.com/mangakakalot/r\d+/rx922077/"
@@ -78,7 +78,7 @@ class MangakakalotChapterExtractor(MangakakalotBase, ChapterExtractor):
}
def images(self, page):
- page = text.extract(page, 'id="vungdoc"', '\n</div>')[0]
+ page = text.extract(page, 'id="vungdoc"', '\n<div')[0]
return [
(url, None)
for url in text.extract_iter(page, '<img src="', '"')
diff --git a/gallery_dl/extractor/mangapanda.py b/gallery_dl/extractor/mangapanda.py
index a4b8340..6067bd0 100644
--- a/gallery_dl/extractor/mangapanda.py
+++ b/gallery_dl/extractor/mangapanda.py
@@ -35,7 +35,7 @@ class MangapandaBase():
class MangapandaChapterExtractor(MangapandaBase, ChapterExtractor):
"""Extractor for manga-chapters from mangapanda.com"""
archive_fmt = "{manga}_{chapter}_{page}"
- pattern = r"(?:https?://)?(?:www\.)?mangapanda\.com((/[^/?&#]+)/(\d+))"
+ pattern = r"(?:https?://)?(?:www\.)?mangapanda\.com((/[^/?#]+)/(\d+))"
test = ("https://www.mangapanda.com/red-storm/2", {
"url": "1f633f776e950531ba9b1e81965316458e785261",
"keyword": "b24df4b9cc36383fb6a44e06d32a3884a4dcb5fb",
@@ -96,7 +96,7 @@ class MangapandaMangaExtractor(MangapandaBase, MangaExtractor):
"""Extractor for manga from mangapanda.com"""
chapterclass = MangapandaChapterExtractor
reverse = False
- pattern = r"(?:https?://)?(?:www\.)?mangapanda\.com(/[^/?&#]+)/?$"
+ pattern = r"(?:https?://)?(?:www\.)?mangapanda\.com(/[^/?#]+)/?$"
test = ("https://www.mangapanda.com/mushishi", {
"url": "357f965732371cac1990fee8b480f62e29141a42",
"keyword": "031b3ea085921c552de017ecbb9b906e462229c9",
diff --git a/gallery_dl/extractor/mangapark.py b/gallery_dl/extractor/mangapark.py
index 59a046c..0a6fba4 100644
--- a/gallery_dl/extractor/mangapark.py
+++ b/gallery_dl/extractor/mangapark.py
@@ -51,7 +51,7 @@ class MangaparkBase():
class MangaparkChapterExtractor(MangaparkBase, ChapterExtractor):
"""Extractor for manga-chapters from mangapark.net"""
pattern = (r"(?:https?://)?(?:www\.)?mangapark\.(me|net|com)"
- r"/manga/([^?&#]+/i\d+)")
+ r"/manga/([^?#]+/i\d+)")
test = (
("https://mangapark.net/manga/gosu/i811653/c055/1", {
"count": 50,
@@ -117,7 +117,7 @@ class MangaparkMangaExtractor(MangaparkBase, MangaExtractor):
"""Extractor for manga from mangapark.net"""
chapterclass = MangaparkChapterExtractor
pattern = (r"(?:https?://)?(?:www\.)?mangapark\.(me|net|com)"
- r"(/manga/[^/?&#]+)/?$")
+ r"(/manga/[^/?#]+)/?$")
test = (
("https://mangapark.net/manga/aria", {
"url": "9b62883c25c8de471f8ab43651e1448536c4ce3f",
diff --git a/gallery_dl/extractor/mangareader.py b/gallery_dl/extractor/mangareader.py
index fd9c7ac..30b8ce3 100644
--- a/gallery_dl/extractor/mangareader.py
+++ b/gallery_dl/extractor/mangareader.py
@@ -53,7 +53,7 @@ class MangareaderBase():
class MangareaderChapterExtractor(MangareaderBase, ChapterExtractor):
"""Extractor for manga-chapters from mangareader.net"""
archive_fmt = "{manga}_{chapter}_{page}"
- pattern = r"(?:https?://)?(?:www\.)?mangareader\.net((/[^/?&#]+)/(\d+))"
+ pattern = r"(?:https?://)?(?:www\.)?mangareader\.net((/[^/?#]+)/(\d+))"
test = (("https://www.mangareader.net"
"/karate-shoukoushi-kohinata-minoru/11"), {
"url": "45ece5668d1e9f65cf2225237d78de58660b54e4",
@@ -84,7 +84,7 @@ class MangareaderMangaExtractor(MangareaderBase, MangaExtractor):
"""Extractor for manga from mangareader.net"""
chapterclass = MangareaderChapterExtractor
reverse = False
- pattern = r"(?:https?://)?(?:www\.)?mangareader\.net(/[^/?&#]+)/?$"
+ pattern = r"(?:https?://)?(?:www\.)?mangareader\.net(/[^/?#]+)/?$"
test = ("https://www.mangareader.net/mushishi", {
"url": "bc203b858b4ad76e5d77e39118a7be0350e357da",
"keyword": "031b3ea085921c552de017ecbb9b906e462229c9",
diff --git a/gallery_dl/extractor/mastodon.py b/gallery_dl/extractor/mastodon.py
index ac17cb0..0e063d5 100644
--- a/gallery_dl/extractor/mastodon.py
+++ b/gallery_dl/extractor/mastodon.py
@@ -184,7 +184,7 @@ def generate_extractors():
Extr.category = category
Extr.instance = instance
Extr.pattern = (r"(?:https?://)?" + pattern +
- r"/@([^/?&#]+)(?:/media)?/?$")
+ r"/@([^/?#]+)(?:/media)?/?$")
Extr.test = info.get("test-user")
Extr.root = root
Extr.access_token = token
@@ -197,7 +197,7 @@ def generate_extractors():
Extr.__doc__ = "Extractor for images from a status on " + instance
Extr.category = category
Extr.instance = instance
- Extr.pattern = r"(?:https?://)?" + pattern + r"/@[^/?&#]+/(\d+)"
+ Extr.pattern = r"(?:https?://)?" + pattern + r"/@[^/?#]+/(\d+)"
Extr.test = info.get("test-status")
Extr.root = root
Extr.access_token = token
diff --git a/gallery_dl/extractor/message.py b/gallery_dl/extractor/message.py
index 088fdd6..7bf0084 100644
--- a/gallery_dl/extractor/message.py
+++ b/gallery_dl/extractor/message.py
@@ -40,7 +40,7 @@ class Message():
- 2nd element is the (external) URL as a string
- 3rd element is a dictionary containing URL-specific metadata
- - Message.Urllist:
+ - Message.Urllist: # obsolete
- Same as Message.Url, but its 2nd element is a list of multiple URLs
- The additional URLs serve as a fallback if the primary one fails
"""
@@ -51,5 +51,5 @@ class Message():
# Headers = 4
# Cookies = 5
Queue = 6
- Urllist = 7
+ # Urllist = 7
Metadata = 8
diff --git a/gallery_dl/extractor/myportfolio.py b/gallery_dl/extractor/myportfolio.py
index e2e163a..abb937f 100644
--- a/gallery_dl/extractor/myportfolio.py
+++ b/gallery_dl/extractor/myportfolio.py
@@ -21,14 +21,14 @@ class MyportfolioGalleryExtractor(Extractor):
archive_fmt = "{user}_{filename}"
pattern = (r"(?:myportfolio:(?:https?://)?([^/]+)|"
r"(?:https?://)?([^.]+\.myportfolio\.com))"
- r"(/[^/?&#]+)?")
+ r"(/[^/?#]+)?")
test = (
("https://andrewling.myportfolio.com/volvo-xc-90-hybrid", {
"url": "acea0690c76db0e5cf267648cefd86e921bc3499",
"keyword": "6ac6befe2ee0af921d24cf1dd4a4ed71be06db6d",
}),
("https://andrewling.myportfolio.com/", {
- "pattern": r"https://andrewling\.myportfolio\.com/[^/?&#+]+$",
+ "pattern": r"https://andrewling\.myportfolio\.com/[^/?#+]+$",
"count": ">= 6",
}),
("https://stevenilousphotography.myportfolio.com/society", {
diff --git a/gallery_dl/extractor/newgrounds.py b/gallery_dl/extractor/newgrounds.py
index f9dc886..a6cc5fa 100644
--- a/gallery_dl/extractor/newgrounds.py
+++ b/gallery_dl/extractor/newgrounds.py
@@ -39,6 +39,7 @@ class NewgroundsExtractor(Extractor):
post = self.extract_post(post_url)
url = post.get("url")
except Exception:
+ self.log.debug("", exc_info=True)
url = None
if url:
@@ -49,8 +50,8 @@ class NewgroundsExtractor(Extractor):
post["_comment"], 'data-smartload-src="', '"'), 1):
post["num"] = num
post["_index"] = "{}_{:>02}".format(post["index"], num)
- text.nameext_from_url(url, post)
- yield Message.Url, url, post
+ url = text.ensure_http_scheme(url)
+ yield Message.Url, url, text.nameext_from_url(url, post)
else:
self.log.warning(
"Unable to get download URL for '%s'", post_url)
@@ -159,6 +160,7 @@ class NewgroundsExtractor(Extractor):
if src:
src = src.replace("\\/", "/")
+ fallback = ()
date = text.parse_datetime(extr(
'itemprop="datePublished" content="', '"'))
else:
@@ -168,8 +170,9 @@ class NewgroundsExtractor(Extractor):
"X-Requested-With": "XMLHttpRequest",
"Referer": self.root,
}
- data = self.request(url, headers=headers).json()
- src = data["sources"]["360p"][0]["src"].replace(".360p.", ".")
+ sources = self.request(url, headers=headers).json()["sources"]
+ src = sources["360p"][0]["src"].replace(".360p.", ".")
+ fallback = self._video_fallback(sources)
date = text.parse_timestamp(src.rpartition("?")[2])
return {
@@ -181,8 +184,16 @@ class NewgroundsExtractor(Extractor):
"rating" : extr('class="rated-', '"'),
"index" : text.parse_int(index),
"_index" : index,
+ "_fallback" : fallback,
}
+ @staticmethod
+ def _video_fallback(sources):
+ sources = list(sources.items())
+ sources.sort(key=lambda src: text.parse_int(src[0][:-1]), reverse=True)
+ for src in sources:
+ yield src[1][0]["src"]
+
def _pagination(self, kind):
root = self.user_root
headers = {
@@ -218,7 +229,7 @@ class NewgroundsImageExtractor(NewgroundsExtractor):
"""Extractor for a single image from newgrounds.com"""
subcategory = "image"
pattern = (r"(?:https?://)?(?:"
- r"(?:www\.)?newgrounds\.com/art/view/([^/?&#]+)/[^/?&#]+"
+ r"(?:www\.)?newgrounds\.com/art/view/([^/?#]+)/[^/?#]+"
r"|art\.ngfiles\.com/images/\d+/\d+_([^_]+)_([^.]+))")
test = (
("https://www.newgrounds.com/art/view/tomfulp/ryu-is-hawt", {
diff --git a/gallery_dl/extractor/ngomik.py b/gallery_dl/extractor/ngomik.py
index f3608b2..8e29d97 100644
--- a/gallery_dl/extractor/ngomik.py
+++ b/gallery_dl/extractor/ngomik.py
@@ -18,7 +18,7 @@ class NgomikChapterExtractor(ChapterExtractor):
category = "ngomik"
root = "http://ngomik.in"
pattern = (r"(?:https?://)?(?:www\.)?ngomik\.in"
- r"(/[^/?&#]+-chapter-[^/?&#]+)")
+ r"(/[^/?#]+-chapter-[^/?#]+)")
test = (
("https://www.ngomik.in/14-sai-no-koi-chapter-1-6/", {
"url": "8e67fdf751bbc79bc6f4dead7675008ddb8e32a4",
diff --git a/gallery_dl/extractor/nozomi.py b/gallery_dl/extractor/nozomi.py
index 5e7e387..15bb576 100644
--- a/gallery_dl/extractor/nozomi.py
+++ b/gallery_dl/extractor/nozomi.py
@@ -124,7 +124,7 @@ class NozomiTagExtractor(NozomiExtractor):
subcategory = "tag"
directory_fmt = ("{category}", "{search_tags}")
archive_fmt = "t_{search_tags}_{postid}"
- pattern = r"(?:https?://)?nozomi\.la/tag/([^/?&#]+)-\d+\."
+ pattern = r"(?:https?://)?nozomi\.la/tag/([^/?#]+)-\d+\."
test = ("https://nozomi.la/tag/3:1_aspect_ratio-1.html", {
"pattern": r"^https://i.nozomi.la/\w/\w\w/\w+\.\w+$",
"count": ">= 25",
diff --git a/gallery_dl/extractor/oauth.py b/gallery_dl/extractor/oauth.py
index 6d7b27a..4bb2c48 100644
--- a/gallery_dl/extractor/oauth.py
+++ b/gallery_dl/extractor/oauth.py
@@ -315,7 +315,7 @@ class OAuthTumblr(OAuthBase):
class OAuthMastodon(OAuthBase):
subcategory = "mastodon"
- pattern = "oauth:mastodon:(?:https?://)?([^/?&#]+)"
+ pattern = "oauth:mastodon:(?:https?://)?([^/?#]+)"
def __init__(self, match):
OAuthBase.__init__(self, match)
diff --git a/gallery_dl/extractor/paheal.py b/gallery_dl/extractor/paheal.py
index f08055c..57521d6 100644
--- a/gallery_dl/extractor/paheal.py
+++ b/gallery_dl/extractor/paheal.py
@@ -44,7 +44,7 @@ class PahealTagExtractor(PahealExtractor):
subcategory = "tag"
directory_fmt = ("{category}", "{search_tags}")
pattern = (r"(?:https?://)?(?:rule34|rule63|cosplay)\.paheal\.net"
- r"/post/list/([^/?&#]+)")
+ r"/post/list/([^/?#]+)")
test = ("https://rule34.paheal.net/post/list/Ayane_Suzuki/1", {
"pattern": r"https://[^.]+\.paheal\.net/_images/\w+/\d+%20-%20",
"count": ">= 15"
diff --git a/gallery_dl/extractor/patreon.py b/gallery_dl/extractor/patreon.py
index f1e98d9..ad259f4 100644
--- a/gallery_dl/extractor/patreon.py
+++ b/gallery_dl/extractor/patreon.py
@@ -202,8 +202,8 @@ class PatreonCreatorExtractor(PatreonExtractor):
"""Extractor for a creator's works"""
subcategory = "creator"
pattern = (r"(?:https?://)?(?:www\.)?patreon\.com"
- r"/(?!(?:home|join|posts|login|signup)(?:$|[/?&#]))"
- r"([^/?&#]+)(?:/posts)?/?(?:\?([^#]+))?")
+ r"/(?!(?:home|join|posts|login|signup)(?:$|[/?#]))"
+ r"([^/?#]+)(?:/posts)?/?(?:\?([^#]+))?")
test = (
("https://www.patreon.com/koveliana", {
"range": "1-25",
@@ -283,7 +283,7 @@ class PatreonUserExtractor(PatreonExtractor):
class PatreonPostExtractor(PatreonExtractor):
"""Extractor for media from a single post"""
subcategory = "post"
- pattern = r"(?:https?://)?(?:www\.)?patreon\.com/posts/([^/?&#]+)"
+ pattern = r"(?:https?://)?(?:www\.)?patreon\.com/posts/([^/?#]+)"
test = (
# postfile + attachments
("https://www.patreon.com/posts/precious-metal-23563293", {
diff --git a/gallery_dl/extractor/photobucket.py b/gallery_dl/extractor/photobucket.py
index a6456da..5e2120a 100644
--- a/gallery_dl/extractor/photobucket.py
+++ b/gallery_dl/extractor/photobucket.py
@@ -22,7 +22,7 @@ class PhotobucketAlbumExtractor(Extractor):
filename_fmt = "{offset:>03}{pictureId:?_//}_{titleOrFilename}.{extension}"
archive_fmt = "{id}"
pattern = (r"(?:https?://)?((?:[^.]+\.)?photobucket\.com)"
- r"/user/[^/?&#]+/library(?:/[^?&#]*)?")
+ r"/user/[^/?#]+/library(?:/[^?#]*)?")
test = (
("https://s369.photobucket.com/user/CrpyLrkr/library", {
"pattern": r"https?://[oi]+\d+.photobucket.com/albums/oo139/",
@@ -111,8 +111,8 @@ class PhotobucketImageExtractor(Extractor):
filename_fmt = "{pictureId:?/_/}{titleOrFilename}.{extension}"
archive_fmt = "{username}_{id}"
pattern = (r"(?:https?://)?(?:[^.]+\.)?photobucket\.com"
- r"(?:/gallery/user/([^/?&#]+)/media/([^/?&#]+)"
- r"|/user/([^/?&#]+)/media/[^?&#]+\.html)")
+ r"(?:/gallery/user/([^/?#]+)/media/([^/?#]+)"
+ r"|/user/([^/?#]+)/media/[^?#]+\.html)")
test = (
(("https://s271.photobucket.com/user/lakerfanryan"
"/media/Untitled-3-1.jpg.html"), {
diff --git a/gallery_dl/extractor/piczel.py b/gallery_dl/extractor/piczel.py
index 064967d..45bd8b5 100644
--- a/gallery_dl/extractor/piczel.py
+++ b/gallery_dl/extractor/piczel.py
@@ -67,7 +67,7 @@ class PiczelExtractor(Extractor):
class PiczelUserExtractor(PiczelExtractor):
"""Extractor for all images from a user's gallery"""
subcategory = "user"
- pattern = r"(?:https?://)?(?:www\.)?piczel\.tv/gallery/([^/?&#]+)/?$"
+ pattern = r"(?:https?://)?(?:www\.)?piczel\.tv/gallery/([^/?#]+)/?$"
test = ("https://piczel.tv/gallery/Bikupan", {
"range": "1-100",
"count": ">= 100",
@@ -88,7 +88,7 @@ class PiczelFolderExtractor(PiczelExtractor):
directory_fmt = ("{category}", "{user[username]}", "{folder[name]}")
archive_fmt = "f{folder[id]}_{id}_{num}"
pattern = (r"(?:https?://)?(?:www\.)?piczel\.tv"
- r"/gallery/(?!image)([^/?&#]+)/(\d+)")
+ r"/gallery/(?!image)([^/?#]+)/(\d+)")
test = ("https://piczel.tv/gallery/Lulena/1114", {
"count": ">= 4",
})
diff --git a/gallery_dl/extractor/pinterest.py b/gallery_dl/extractor/pinterest.py
index cc89ac5..aa11289 100644
--- a/gallery_dl/extractor/pinterest.py
+++ b/gallery_dl/extractor/pinterest.py
@@ -9,7 +9,8 @@
"""Extractors for https://www.pinterest.com/"""
from .common import Extractor, Message
-from .. import text, exception
+from .. import text, util, exception
+from ..cache import cache
import itertools
import json
@@ -28,6 +29,7 @@ class PinterestExtractor(Extractor):
self.api = PinterestAPI(self)
def items(self):
+ self.api.login()
data = self.metadata()
yield Message.Version, 1
yield Message.Directory, data
@@ -98,6 +100,10 @@ class PinterestBoardExtractor(PinterestExtractor):
"options": (("sections", True),),
"count": 5,
}),
+ # secret board (#1055)
+ ("https://www.pinterest.de/g1952849/secret/", {
+ "count": 2,
+ }),
("https://www.pinterest.com/g1952848/test/", {
"exception": exception.GalleryDLException,
}),
@@ -230,16 +236,22 @@ class PinterestAPI():
"Accept" : "application/json, text/javascript, "
"*/*, q=0.01",
"Accept-Language" : "en-US,en;q=0.5",
- "X-Pinterest-AppState": "active",
- "X-APP-VERSION" : "b00dd49",
+ "Referer" : BASE_URL + "/",
"X-Requested-With" : "XMLHttpRequest",
+ "X-APP-VERSION" : "7a20185",
+ "X-CSRFToken" : None,
+ "X-Pinterest-AppState": "active",
"Origin" : BASE_URL,
- "Referer" : BASE_URL + "/",
}
def __init__(self, extractor):
self.extractor = extractor
+ csrf_token = util.generate_csrf_token()
+ self.headers = self.HEADERS.copy()
+ self.headers["X-CSRFToken"] = csrf_token
+ self.cookies = {"csrftoken": csrf_token}
+
def pin(self, pin_id):
"""Query information about a pin"""
options = {"id": pin_id, "field_set_key": "detailed"}
@@ -282,12 +294,45 @@ class PinterestAPI():
options = {"board_id": board_id, "add_vase": True}
return self._pagination("BoardRelatedPixieFeed", options)
+ def login(self):
+ """Login and obtain session cookies"""
+ username, password = self.extractor._get_auth_info()
+ if username:
+ self.cookies.update(self._login_impl(username, password))
+
+ @cache(maxage=180*24*3600, keyarg=1)
+ def _login_impl(self, username, password):
+ self.extractor.log.info("Logging in as %s", username)
+
+ url = self.BASE_URL + "/resource/UserSessionResource/create/"
+ options = {
+ "username_or_email": username,
+ "password" : password,
+ }
+ data = {"data": json.dumps({"options": options}), "source_url": ""}
+
+ try:
+ response = self.extractor.request(
+ url, method="POST", headers=self.headers,
+ cookies=self.cookies, data=data)
+ resource = response.json()["resource_response"]
+ except (exception.HttpError, ValueError, KeyError):
+ raise exception.AuthenticationError()
+
+ if resource["status"] != "success":
+ raise exception.AuthenticationError()
+ return {
+ cookie.name: cookie.value
+ for cookie in response.cookies
+ }
+
def _call(self, resource, options):
url = "{}/resource/{}Resource/get/".format(self.BASE_URL, resource)
params = {"data": json.dumps({"options": options}), "source_url": ""}
response = self.extractor.request(
- url, params=params, headers=self.HEADERS, fatal=False)
+ url, params=params, headers=self.headers,
+ cookies=self.cookies, fatal=False)
try:
data = response.json()
diff --git a/gallery_dl/extractor/pixiv.py b/gallery_dl/extractor/pixiv.py
index ee8f9bb..a813d0e 100644
--- a/gallery_dl/extractor/pixiv.py
+++ b/gallery_dl/extractor/pixiv.py
@@ -86,7 +86,7 @@ class PixivUserExtractor(PixivExtractor):
subcategory = "user"
pattern = (r"(?:https?://)?(?:www\.|touch\.)?pixiv\.net/(?:"
r"(?:en/)?users/(\d+)(?:/(?:artworks|illustrations|manga)"
- r"(?:/([^/?&#]+))?)?/?(?:$|[?#])"
+ r"(?:/([^/?#]+))?)?/?(?:$|[?#])"
r"|member(?:_illust)?\.php\?id=(\d+)(?:&([^#]+))?"
r"|(?:u(?:ser)?/|(?:mypage\.php)?#id=)(\d+))")
test = (
@@ -170,7 +170,7 @@ class PixivUserExtractor(PixivExtractor):
class PixivMeExtractor(PixivExtractor):
"""Extractor for pixiv.me URLs"""
subcategory = "me"
- pattern = r"(?:https?://)?pixiv\.me/([^/?&#]+)"
+ pattern = r"(?:https?://)?pixiv\.me/([^/?#]+)"
test = (
("https://pixiv.me/del_shannon", {
"url": "29c295ce75150177e6b0a09089a949804c708fbf",
@@ -243,7 +243,7 @@ class PixivFavoriteExtractor(PixivExtractor):
"{user_bookmark[id]} {user_bookmark[account]}")
archive_fmt = "f_{user_bookmark[id]}_{id}{num}.{extension}"
pattern = (r"(?:https?://)?(?:www\.|touch\.)?pixiv\.net/(?:(?:en/)?"
- r"users/(\d+)/(bookmarks/artworks(?:/([^/?&#]+))?|following)"
+ r"users/(\d+)/(bookmarks/artworks(?:/([^/?#]+))?|following)"
r"|bookmark\.php(?:\?([^#]*))?)")
test = (
("https://www.pixiv.net/en/users/173530/bookmarks/artworks", {
@@ -407,7 +407,7 @@ class PixivSearchExtractor(PixivExtractor):
archive_fmt = "s_{search[word]}_{id}{num}.{extension}"
directory_fmt = ("{category}", "search", "{search[word]}")
pattern = (r"(?:https?://)?(?:www\.|touch\.)?pixiv\.net"
- r"/(?:(?:en/)?tags/([^/?&#]+)(?:/[^/?&#]+)?/?"
+ r"/(?:(?:en/)?tags/([^/?#]+)(?:/[^/?#]+)?/?"
r"|search\.php)(?:\?([^#]+))?")
test = (
("https://www.pixiv.net/en/tags/Original", {
diff --git a/gallery_dl/extractor/pixnet.py b/gallery_dl/extractor/pixnet.py
index d8ac9f6..342f4fa 100644
--- a/gallery_dl/extractor/pixnet.py
+++ b/gallery_dl/extractor/pixnet.py
@@ -169,7 +169,7 @@ class PixnetUserExtractor(PixnetExtractor):
"""Extractor for all sets and folders of a pixnet user"""
subcategory = "user"
url_fmt = "{}{}/album/list"
- pattern = BASE_PATTERN + r"()(?:/blog|/album(?:/list)?)?/?(?:$|[?&#])"
+ pattern = BASE_PATTERN + r"()(?:/blog|/album(?:/list)?)?/?(?:$|[?#])"
test = (
("https://albertayu773.pixnet.net/"),
("https://albertayu773.pixnet.net/blog"),
diff --git a/gallery_dl/extractor/plurk.py b/gallery_dl/extractor/plurk.py
index 60ca1fb..f2e964d 100644
--- a/gallery_dl/extractor/plurk.py
+++ b/gallery_dl/extractor/plurk.py
@@ -72,7 +72,7 @@ class PlurkExtractor(Extractor):
class PlurkTimelineExtractor(PlurkExtractor):
"""Extractor for URLs from all posts in a Plurk timeline"""
subcategory = "timeline"
- pattern = r"(?:https?://)?(?:www\.)?plurk\.com/(?!p/)(\w+)/?(?:$|[?&#])"
+ pattern = r"(?:https?://)?(?:www\.)?plurk\.com/(?!p/)(\w+)/?(?:$|[?#])"
test = ("https://www.plurk.com/plurkapi", {
"pattern": r"https?://.+",
"count": ">= 23"
diff --git a/gallery_dl/extractor/pornhub.py b/gallery_dl/extractor/pornhub.py
index 6b36cdd..1856c82 100644
--- a/gallery_dl/extractor/pornhub.py
+++ b/gallery_dl/extractor/pornhub.py
@@ -118,7 +118,7 @@ class PornhubGalleryExtractor(PornhubExtractor):
class PornhubUserExtractor(PornhubExtractor):
"""Extractor for all galleries of a pornhub user"""
subcategory = "user"
- pattern = (BASE_PATTERN + r"/(users|model)/([^/?&#]+)"
+ pattern = (BASE_PATTERN + r"/(users|model)/([^/?#]+)"
"(?:/photos(?:/(public|private|favorites))?)?/?$")
test = (
("https://www.pornhub.com/users/flyings0l0/photos/public", {
diff --git a/gallery_dl/extractor/reactor.py b/gallery_dl/extractor/reactor.py
index e5b4b44..a20312f 100644
--- a/gallery_dl/extractor/reactor.py
+++ b/gallery_dl/extractor/reactor.py
@@ -150,7 +150,7 @@ class ReactorTagExtractor(ReactorExtractor):
subcategory = "tag"
directory_fmt = ("{category}", "{search_tags}")
archive_fmt = "{search_tags}_{post_id}_{num}"
- pattern = BASE_PATTERN + r"/tag/([^/?&#]+)"
+ pattern = BASE_PATTERN + r"/tag/([^/?#]+)"
test = ("http://anime.reactor.cc/tag/Anime+Art",)
def __init__(self, match):
@@ -166,7 +166,7 @@ class ReactorSearchExtractor(ReactorTagExtractor):
subcategory = "search"
directory_fmt = ("{category}", "search", "{search_tags}")
archive_fmt = "s_{search_tags}_{post_id}_{num}"
- pattern = BASE_PATTERN + r"/search(?:/|\?q=)([^/?&#]+)"
+ pattern = BASE_PATTERN + r"/search(?:/|\?q=)([^/?#]+)"
test = ("http://anime.reactor.cc/search?q=Art",)
@@ -174,7 +174,7 @@ class ReactorUserExtractor(ReactorExtractor):
"""Extractor for all posts of a user on *reactor.cc sites"""
subcategory = "user"
directory_fmt = ("{category}", "user", "{user}")
- pattern = BASE_PATTERN + r"/user/([^/?&#]+)"
+ pattern = BASE_PATTERN + r"/user/([^/?#]+)"
test = ("http://anime.reactor.cc/user/Shuster",)
def __init__(self, match):
@@ -215,7 +215,7 @@ JR_BASE_PATTERN = r"(?:https?://)?(?:www\.)?(joyreactor\.c(?:c|om))"
class JoyreactorTagExtractor(ReactorTagExtractor):
"""Extractor for tag searches on joyreactor.cc"""
category = "joyreactor"
- pattern = JR_BASE_PATTERN + r"/tag/([^/?&#]+)"
+ pattern = JR_BASE_PATTERN + r"/tag/([^/?#]+)"
test = (
("http://joyreactor.cc/tag/Advent+Cirno", {
"count": ">= 17",
@@ -229,7 +229,7 @@ class JoyreactorTagExtractor(ReactorTagExtractor):
class JoyreactorSearchExtractor(ReactorSearchExtractor):
"""Extractor for search results on joyreactor.cc"""
category = "joyreactor"
- pattern = JR_BASE_PATTERN + r"/search(?:/|\?q=)([^/?&#]+)"
+ pattern = JR_BASE_PATTERN + r"/search(?:/|\?q=)([^/?#]+)"
test = (
("http://joyreactor.cc/search/Cirno+Gifs", {
"range": "1-25",
@@ -244,7 +244,7 @@ class JoyreactorSearchExtractor(ReactorSearchExtractor):
class JoyreactorUserExtractor(ReactorUserExtractor):
"""Extractor for all posts of a user on joyreactor.cc"""
category = "joyreactor"
- pattern = JR_BASE_PATTERN + r"/user/([^/?&#]+)"
+ pattern = JR_BASE_PATTERN + r"/user/([^/?#]+)"
test = (
("http://joyreactor.cc/user/hemantic"),
("http://joyreactor.com/user/Tacoman123", {
@@ -289,7 +289,7 @@ PR_BASE_PATTERN = r"(?:https?://)?(?:www\.)?(pornreactor\.cc|fapreactor.com)"
class PornreactorTagExtractor(ReactorTagExtractor):
"""Extractor for tag searches on pornreactor.cc"""
category = "pornreactor"
- pattern = PR_BASE_PATTERN + r"/tag/([^/?&#]+)"
+ pattern = PR_BASE_PATTERN + r"/tag/([^/?#]+)"
test = (
("http://pornreactor.cc/tag/RiceGnat", {
"range": "1-25",
@@ -302,7 +302,7 @@ class PornreactorTagExtractor(ReactorTagExtractor):
class PornreactorSearchExtractor(ReactorSearchExtractor):
"""Extractor for search results on pornreactor.cc"""
category = "pornreactor"
- pattern = PR_BASE_PATTERN + r"/search(?:/|\?q=)([^/?&#]+)"
+ pattern = PR_BASE_PATTERN + r"/search(?:/|\?q=)([^/?#]+)"
test = (
("http://pornreactor.cc/search?q=ecchi+hentai", {
"range": "1-25",
@@ -315,7 +315,7 @@ class PornreactorSearchExtractor(ReactorSearchExtractor):
class PornreactorUserExtractor(ReactorUserExtractor):
"""Extractor for all posts of a user on pornreactor.cc"""
category = "pornreactor"
- pattern = PR_BASE_PATTERN + r"/user/([^/?&#]+)"
+ pattern = PR_BASE_PATTERN + r"/user/([^/?#]+)"
test = (
("http://pornreactor.cc/user/Disillusion", {
"range": "1-25",
diff --git a/gallery_dl/extractor/readcomiconline.py b/gallery_dl/extractor/readcomiconline.py
index 7030c81..ae1749e 100644
--- a/gallery_dl/extractor/readcomiconline.py
+++ b/gallery_dl/extractor/readcomiconline.py
@@ -45,7 +45,7 @@ class ReadcomiconlineIssueExtractor(ReadcomiconlineBase, ChapterExtractor):
"""Extractor for comic-issues from readcomiconline.to"""
subcategory = "issue"
pattern = (r"(?i)(?:https?://)?(?:www\.)?readcomiconline\.to"
- r"(/Comic/[^/?&#]+/[^/?&#]+\?id=(\d+))")
+ r"(/Comic/[^/?#]+/[^/?#]+\?id=(\d+))")
test = ("https://readcomiconline.to/Comic/W-i-t-c-h/Issue-130?id=22289", {
"url": "2bbab6ec4fbc05d269cca420a82a9b5acda28682",
"keyword": "30fe110273e871305001f33c18634516a0a51421",
@@ -81,7 +81,7 @@ class ReadcomiconlineComicExtractor(ReadcomiconlineBase, MangaExtractor):
chapterclass = ReadcomiconlineIssueExtractor
subcategory = "comic"
pattern = (r"(?i)(?:https?://)?(?:www\.)?readcomiconline\.to"
- r"(/Comic/[^/?&#]+/?)$")
+ r"(/Comic/[^/?#]+/?)$")
test = (
("https://readcomiconline.to/Comic/W-i-t-c-h", {
"url": "e231bc2a293edb465133c37a8e36a7e7d94cab14",
diff --git a/gallery_dl/extractor/reddit.py b/gallery_dl/extractor/reddit.py
index 9c6892a..0be7f17 100644
--- a/gallery_dl/extractor/reddit.py
+++ b/gallery_dl/extractor/reddit.py
@@ -47,6 +47,8 @@ class RedditExtractor(Extractor):
urls = []
if submission:
+ submission["date"] = text.parse_timestamp(
+ submission["created_utc"])
yield Message.Directory, submission
visited.add(submission["id"])
url = submission["url"]
@@ -135,7 +137,7 @@ class RedditSubredditExtractor(RedditExtractor):
"""Extractor for URLs from subreddits on reddit.com"""
subcategory = "subreddit"
pattern = (r"(?:https?://)?(?:\w+\.)?reddit\.com/r/"
- r"([^/?&#]+(?:/[a-z]+)?)/?(?:\?([^#]*))?(?:$|#)")
+ r"([^/?#]+(?:/[a-z]+)?)/?(?:\?([^#]*))?(?:$|#)")
test = (
("https://www.reddit.com/r/lavaporn/", {
"range": "1-20",
@@ -160,7 +162,7 @@ class RedditUserExtractor(RedditExtractor):
"""Extractor for URLs from posts by a reddit user"""
subcategory = "user"
pattern = (r"(?:https?://)?(?:\w+\.)?reddit\.com/u(?:ser)?/"
- r"([^/?&#]+(?:/[a-z]+)?)/?(?:\?([^#]*))?")
+ r"([^/?#]+(?:/[a-z]+)?)/?(?:\?([^#]*))?")
test = (
("https://www.reddit.com/user/username/", {
"count": ">= 2",
@@ -183,7 +185,7 @@ class RedditSubmissionExtractor(RedditExtractor):
"""Extractor for URLs from a submission on reddit.com"""
subcategory = "submission"
pattern = (r"(?:https?://)?(?:"
- r"(?:\w+\.)?reddit\.com/(?:r/[^/?&#]+/comments|gallery)"
+ r"(?:\w+\.)?reddit\.com/(?:r/[^/?#]+/comments|gallery)"
r"|redd\.it)/([a-z0-9]+)")
test = (
("https://www.reddit.com/r/lavaporn/comments/8cqhub/", {
@@ -229,7 +231,7 @@ class RedditImageExtractor(Extractor):
subcategory = "image"
archive_fmt = "{filename}"
pattern = (r"(?:https?://)?i\.redd(?:\.it|ituploads\.com)"
- r"/[^/?&#]+(?:\?[^#]*)?")
+ r"/[^/?#]+(?:\?[^#]*)?")
test = (
("https://i.redd.it/upjtjcx2npzz.jpg", {
"url": "0de614900feef103e580b632190458c0b62b641a",
diff --git a/gallery_dl/extractor/redgifs.py b/gallery_dl/extractor/redgifs.py
index 96be3d8..0a85be6 100644
--- a/gallery_dl/extractor/redgifs.py
+++ b/gallery_dl/extractor/redgifs.py
@@ -22,7 +22,7 @@ class RedgifsUserExtractor(RedgifsExtractor):
"""Extractor for redgifs user profiles"""
subcategory = "user"
directory_fmt = ("{category}", "{userName}")
- pattern = r"(?:https?://)?(?:www\.)?redgifs\.com/users/([^/?&#]+)"
+ pattern = r"(?:https?://)?(?:www\.)?redgifs\.com/users/([^/?#]+)"
test = ("https://www.redgifs.com/users/Natalifiction", {
"pattern": r"https://\w+\.(redgifs|gfycat)\.com/[A-Za-z]+\.mp4",
"count": ">= 100",
@@ -36,7 +36,7 @@ class RedgifsSearchExtractor(RedgifsExtractor):
"""Extractor for redgifs search results"""
subcategory = "search"
directory_fmt = ("{category}", "Search", "{search}")
- pattern = r"(?:https?://)?(?:www\.)?redgifs\.com/gifs/browse/([^/?&#]+)"
+ pattern = r"(?:https?://)?(?:www\.)?redgifs\.com/gifs/browse/([^/?#]+)"
test = ("https://www.redgifs.com/gifs/browse/jav", {
"pattern": r"https://\w+\.(redgifs|gfycat)\.com/[A-Za-z]+\.mp4",
"range": "100-300",
diff --git a/gallery_dl/extractor/sankakucomplex.py b/gallery_dl/extractor/sankakucomplex.py
index 0189fc9..f6ad327 100644
--- a/gallery_dl/extractor/sankakucomplex.py
+++ b/gallery_dl/extractor/sankakucomplex.py
@@ -30,7 +30,7 @@ class SankakucomplexArticleExtractor(SankakucomplexExtractor):
filename_fmt = "{filename}.{extension}"
archive_fmt = "{date:%Y%m%d}_{filename}"
pattern = (r"(?:https?://)?www\.sankakucomplex\.com"
- r"/(\d{4}/\d\d/\d\d/[^/?&#]+)")
+ r"/(\d{4}/\d\d/\d\d/[^/?#]+)")
test = (
("https://www.sankakucomplex.com/2019/05/11/twitter-cosplayers", {
"url": "4a9ecc5ae917fbce469280da5b6a482510cae84d",
diff --git a/gallery_dl/extractor/seiga.py b/gallery_dl/extractor/seiga.py
index 69962c8..b32a170 100644
--- a/gallery_dl/extractor/seiga.py
+++ b/gallery_dl/extractor/seiga.py
@@ -172,7 +172,7 @@ class SeigaImageExtractor(SeigaExtractor):
r"|lohas\.nicoseiga\.jp/(?:thumb|(?:priv|o)/[^/]+/\d+)/)(\d+)")
test = (
("https://seiga.nicovideo.jp/seiga/im5977527", {
- "keyword": "f66ba5de33d4ce2cb57f23bb37e1e847e0771c10",
+ "keyword": "c8339781da260f7fc44894ad9ada016f53e3b12a",
"content": "d9202292012178374d57fb0126f6124387265297",
}),
("https://seiga.nicovideo.jp/seiga/im123", {
@@ -196,4 +196,23 @@ class SeigaImageExtractor(SeigaExtractor):
return num
def get_images(self):
- return ({}, {"image_id": text.parse_int(self.image_id)})
+ url = "{}/seiga/im{}".format(self.root, self.image_id)
+ page = self.request(url, notfound="image").text
+
+ data = text.extract_all(page, (
+ ("date" , '<li class="date"><span class="created">', '<'),
+ ("title" , '<h1 class="title">', '</h1>'),
+ ("description" , '<p class="discription">', '</p>'),
+ ))[0]
+
+ data["user"] = text.extract_all(page, (
+ ("id" , '<a href="/user/illust/' , '"'),
+ ("name", '<span itemprop="title">', '<'),
+ ))[0]
+
+ data["description"] = text.remove_html(data["description"])
+ data["image_id"] = text.parse_int(self.image_id)
+ data["date"] = text.parse_datetime(
+ data["date"] + ":00+0900", "%Y年%m月%d日 %H:%M:%S%z")
+
+ return (data, data)
diff --git a/gallery_dl/extractor/sexcom.py b/gallery_dl/extractor/sexcom.py
index 2cef430..41d2e67 100644
--- a/gallery_dl/extractor/sexcom.py
+++ b/gallery_dl/extractor/sexcom.py
@@ -168,7 +168,7 @@ class SexcomBoardExtractor(SexcomExtractor):
subcategory = "board"
directory_fmt = ("{category}", "{user}", "{board}")
pattern = (r"(?:https?://)?(?:www\.)?sex\.com/user"
- r"/([^/?&#]+)/(?!(?:following|pins|repins|likes)/)([^/?&#]+)")
+ r"/([^/?#]+)/(?!(?:following|pins|repins|likes)/)([^/?#]+)")
test = ("https://www.sex.com/user/ronin17/exciting-hentai/", {
"count": ">= 15",
})
@@ -193,7 +193,7 @@ class SexcomSearchExtractor(SexcomExtractor):
subcategory = "search"
directory_fmt = ("{category}", "search", "{search[query]}")
pattern = (r"(?:https?://)?(?:www\.)?sex\.com/((?:"
- r"(pic|gif|video)s/([^/?&#]+)|search/(pic|gif|video)s"
+ r"(pic|gif|video)s/([^/?#]+)|search/(pic|gif|video)s"
r")/?(?:\?([^#]+))?)")
test = (
("https://www.sex.com/search/pics?query=ecchi", {
diff --git a/gallery_dl/extractor/simplyhentai.py b/gallery_dl/extractor/simplyhentai.py
index a0d34d1..7301cbc 100644
--- a/gallery_dl/extractor/simplyhentai.py
+++ b/gallery_dl/extractor/simplyhentai.py
@@ -18,7 +18,7 @@ class SimplyhentaiGalleryExtractor(GalleryExtractor):
archive_fmt = "{image_id}"
pattern = (r"(?:https?://)?(?!videos\.)([\w-]+\.simply-hentai\.com"
r"(?!/(?:album|gifs?|images?|series)(?:/|$))"
- r"(?:/(?!(?:page|all-pages)(?:/|\.|$))[^/?&#]+)+)")
+ r"(?:/(?!(?:page|all-pages)(?:/|\.|$))[^/?#]+)+)")
test = (
(("https://original-work.simply-hentai.com"
"/amazon-no-hiyaku-amazon-elixir"), {
@@ -84,7 +84,7 @@ class SimplyhentaiImageExtractor(Extractor):
filename_fmt = "{category}_{token}{title:?_//}.{extension}"
archive_fmt = "{token}"
pattern = (r"(?:https?://)?(?:www\.)?(simply-hentai\.com"
- r"/(image|gif)/[^/?&#]+)")
+ r"/(image|gif)/[^/?#]+)")
test = (
(("https://www.simply-hentai.com/image"
"/pheromomania-vol-1-kanzenban-isao-3949d8b3-400c-4b6"), {
@@ -138,7 +138,7 @@ class SimplyhentaiVideoExtractor(Extractor):
directory_fmt = ("{category}", "{type}s")
filename_fmt = "{title}{episode:?_//>02}.{extension}"
archive_fmt = "{title}_{episode}"
- pattern = r"(?:https?://)?(videos\.simply-hentai\.com/[^/?&#]+)"
+ pattern = r"(?:https?://)?(videos\.simply-hentai\.com/[^/?#]+)"
test = (
("https://videos.simply-hentai.com/creamy-pie-episode-02", {
"pattern": r"https://www\.googleapis\.com/drive/v3/files"
diff --git a/gallery_dl/extractor/slickpic.py b/gallery_dl/extractor/slickpic.py
index 05ec117..ddd45ce 100644
--- a/gallery_dl/extractor/slickpic.py
+++ b/gallery_dl/extractor/slickpic.py
@@ -33,7 +33,7 @@ class SlickpicAlbumExtractor(SlickpicExtractor):
"{album[id]} {album[title]}")
filename_fmt = "{num:>03}_{id}{title:?_//}.{extension}"
archive_fmt = "{id}"
- pattern = BASE_PATTERN + r"/albums/([^/?&#]+)"
+ pattern = BASE_PATTERN + r"/albums/([^/?#]+)"
test = (
("https://mattcrandall.slickpic.com/albums/LamborghiniMurcielago/", {
"url": "58bd94ebc80fd906e9879826970b408d54c6da07",
diff --git a/gallery_dl/extractor/slideshare.py b/gallery_dl/extractor/slideshare.py
index 30420a8..8f668df 100644
--- a/gallery_dl/extractor/slideshare.py
+++ b/gallery_dl/extractor/slideshare.py
@@ -20,7 +20,7 @@ class SlidesharePresentationExtractor(Extractor):
filename_fmt = "{presentation}-{num:>02}.{extension}"
archive_fmt = "{presentation}_{num}"
pattern = (r"(?:https?://)?(?:www\.)?slideshare\.net"
- r"/(?:mobile/)?([^/?&#]+)/([^/?&#]+)")
+ r"/(?:mobile/)?([^/?#]+)/([^/?#]+)")
test = (
(("https://www.slideshare.net"
"/Slideshare/get-started-with-slide-share"), {
diff --git a/gallery_dl/extractor/smugmug.py b/gallery_dl/extractor/smugmug.py
index 163102d..cfbd5eb 100644
--- a/gallery_dl/extractor/smugmug.py
+++ b/gallery_dl/extractor/smugmug.py
@@ -108,7 +108,7 @@ class SmugmugImageExtractor(SmugmugExtractor):
"""Extractor for individual smugmug images"""
subcategory = "image"
archive_fmt = "{Image[ImageKey]}"
- pattern = BASE_PATTERN + r"(?:/[^/?&#]+)+/i-([^/?&#-]+)"
+ pattern = BASE_PATTERN + r"(?:/[^/?#]+)+/i-([^/?#-]+)"
test = (
("https://tdm.smugmug.com/Nature/Dove/i-kCsLJT6", {
"url": "f624ad7293afd6412a7d34e3950a118596c36c85",
@@ -141,7 +141,7 @@ class SmugmugImageExtractor(SmugmugExtractor):
class SmugmugPathExtractor(SmugmugExtractor):
"""Extractor for smugmug albums from URL paths and users"""
subcategory = "path"
- pattern = BASE_PATTERN + r"((?:/[^/?&#a-fh-mo-z][^/?&#]*)*)/?$"
+ pattern = BASE_PATTERN + r"((?:/[^/?#a-fh-mo-z][^/?#]*)*)/?$"
test = (
("https://tdm.smugmug.com/Nature/Dove", {
"pattern": "smugmug:album:cr4C7f$",
diff --git a/gallery_dl/extractor/speakerdeck.py b/gallery_dl/extractor/speakerdeck.py
index a3819c7..f5b9171 100644
--- a/gallery_dl/extractor/speakerdeck.py
+++ b/gallery_dl/extractor/speakerdeck.py
@@ -20,7 +20,7 @@ class SpeakerdeckPresentationExtractor(Extractor):
filename_fmt = "{presentation}-{num:>02}.{extension}"
archive_fmt = "{presentation}_{num}"
pattern = (r"(?:https?://)?(?:www\.)?speakerdeck\.com"
- r"/([^/?&#]+)/([^/?&#]+)")
+ r"/([^/?#]+)/([^/?#]+)")
test = (
(("https://speakerdeck.com/speakerdeck/introduction-to-speakerdeck"), {
"pattern": r"https://files.speakerdeck.com/presentations/"
diff --git a/gallery_dl/extractor/subscribestar.py b/gallery_dl/extractor/subscribestar.py
index 38b39d4..753f266 100644
--- a/gallery_dl/extractor/subscribestar.py
+++ b/gallery_dl/extractor/subscribestar.py
@@ -135,7 +135,7 @@ class SubscribestarExtractor(Extractor):
class SubscribestarUserExtractor(SubscribestarExtractor):
"""Extractor for media from a subscribestar user"""
subcategory = "user"
- pattern = BASE_PATTERN + r"/(?!posts/)([^/?&#]+)"
+ pattern = BASE_PATTERN + r"/(?!posts/)([^/?#]+)"
test = (
("https://www.subscribestar.com/subscribestar", {
"count": ">= 20",
diff --git a/gallery_dl/extractor/tumblr.py b/gallery_dl/extractor/tumblr.py
index 185f33a..cf57a4d 100644
--- a/gallery_dl/extractor/tumblr.py
+++ b/gallery_dl/extractor/tumblr.py
@@ -295,7 +295,7 @@ class TumblrPostExtractor(TumblrExtractor):
class TumblrTagExtractor(TumblrExtractor):
"""Extractor for images from a tumblr-user by tag"""
subcategory = "tag"
- pattern = BASE_PATTERN + r"/tagged/([^/?&#]+)"
+ pattern = BASE_PATTERN + r"/tagged/([^/?#]+)"
test = ("http://demo.tumblr.com/tagged/Times%20Square", {
"pattern": (r"https://\d+\.media\.tumblr\.com/tumblr_[^/_]+_1280.jpg"),
"count": 1,
diff --git a/gallery_dl/extractor/twitter.py b/gallery_dl/extractor/twitter.py
index c98a300..06973b2 100644
--- a/gallery_dl/extractor/twitter.py
+++ b/gallery_dl/extractor/twitter.py
@@ -9,10 +9,8 @@
"""Extractors for https://twitter.com/"""
from .common import Extractor, Message
-from .. import text, exception
+from .. import text, util, exception
from ..cache import cache
-import hashlib
-import time
BASE_PATTERN = (
@@ -29,7 +27,6 @@ class TwitterExtractor(Extractor):
archive_fmt = "{tweet_id}_{retweet_id}_{num}"
cookiedomain = ".twitter.com"
root = "https://twitter.com"
- sizes = (":orig", ":large", ":medium", ":small")
def __init__(self, match):
Extractor.__init__(self, match)
@@ -39,6 +36,7 @@ class TwitterExtractor(Extractor):
self.twitpic = self.config("twitpic", False)
self.quoted = self.config("quoted", True)
self.videos = self.config("videos", True)
+ self.cards = self.config("cards", False)
self._user_cache = {}
def items(self):
@@ -58,56 +56,82 @@ class TwitterExtractor(Extractor):
self.log.debug("Skipping %s (quoted tweet)", tweet["id_str"])
continue
+ files = []
+ if "extended_entities" in tweet:
+ self._extract_media(tweet, files)
+ if "card" in tweet and self.cards:
+ self._extract_card(tweet, files)
if self.twitpic:
- self._extract_twitpic(tweet)
- if "extended_entities" not in tweet:
+ self._extract_twitpic(tweet, files)
+ if not files:
continue
tdata = self._transform_tweet(tweet)
tdata.update(metadata)
-
yield Message.Directory, tdata
- for tdata["num"], media in enumerate(
- tweet["extended_entities"]["media"], 1):
-
- tdata["width"] = media["original_info"].get("width", 0)
- tdata["height"] = media["original_info"].get("height", 0)
-
- if "video_info" in media:
-
- if self.videos == "ytdl":
- url = "ytdl:{}/i/web/status/{}".format(
- self.root, tweet["id_str"])
- tdata["extension"] = None
- yield Message.Url, url, tdata
-
- elif self.videos:
- video_info = media["video_info"]
- variant = max(
- video_info["variants"],
- key=lambda v: v.get("bitrate", 0),
- )
- tdata["duration"] = video_info.get(
- "duration_millis", 0) / 1000
- tdata["bitrate"] = variant.get("bitrate", 0)
-
- url = variant["url"]
- text.nameext_from_url(url, tdata)
- yield Message.Url, url, tdata
-
- elif "media_url_https" in media:
- url = media["media_url_https"]
- urls = [url + size for size in self.sizes]
- text.nameext_from_url(url, tdata)
- yield Message.Urllist, urls, tdata
-
- else:
- url = media["media_url"]
- text.nameext_from_url(url, tdata)
- yield Message.Url, url, tdata
+ for tdata["num"], file in enumerate(files, 1):
+ file.update(tdata)
+ url = file.pop("url")
+ if "extension" not in file:
+ text.nameext_from_url(url, file)
+ yield Message.Url, url, file
+
+ def _extract_media(self, tweet, files):
+ for media in tweet["extended_entities"]["media"]:
+ width = media["original_info"].get("width", 0),
+ height = media["original_info"].get("height", 0),
+
+ if "video_info" in media:
+ if self.videos == "ytdl":
+ files.append({
+ "url": "ytdl:{}/i/web/status/{}".format(
+ self.root, tweet["id_str"]),
+ "width" : width,
+ "height" : height,
+ "extension": None,
+ })
+ elif self.videos:
+ video_info = media["video_info"]
+ variant = max(
+ video_info["variants"],
+ key=lambda v: v.get("bitrate", 0),
+ )
+ files.append({
+ "url" : variant["url"],
+ "width" : width,
+ "height" : height,
+ "bitrate" : variant.get("bitrate", 0),
+ "duration": video_info.get(
+ "duration_millis", 0) / 1000,
+ })
+ elif "media_url_https" in media:
+ url = media["media_url_https"]
+ files.append(text.nameext_from_url(url, {
+ "url" : url + ":orig",
+ "_fallback": [url+":large", url+":medium", url+":small"],
+ "width" : width,
+ "height" : height,
+ }))
+ else:
+ files.append({"url": media["media_url"]})
+
+ def _extract_card(self, tweet, files):
+ card = tweet["card"]
+ if card["name"] in ("summary", "summary_large_image"):
+ bvals = card["binding_values"]
+ for prefix in ("photo_image_full_size_",
+ "summary_photo_image_",
+ "thumbnail_image_"):
+ for size in ("original", "x_large", "large", "small"):
+ key = prefix + size
+ if key in bvals:
+ files.append(bvals[key]["image_value"])
+ return
+ else:
+ url = "ytdl:{}/i/web/status/{}".format(self.root, tweet["id_str"])
+ files.append({"url": url})
- def _extract_twitpic(self, tweet):
- twitpics = []
+ def _extract_twitpic(self, tweet, files):
for url in tweet["entities"].get("urls", ()):
url = url["expanded_url"]
if "//twitpic.com/" in url and "/photos/" not in url:
@@ -117,15 +141,7 @@ class TwitterExtractor(Extractor):
url = text.extract(
response.text, 'name="twitter:image" value="', '"')[0]
if url:
- twitpics.append({
- "original_info": {},
- "media_url" : url,
- })
- if twitpics:
- if "extended_entities" in tweet:
- tweet["extended_entities"]["media"].extend(twitpics)
- else:
- tweet["extended_entities"] = {"media": twitpics}
+ files.append({"url": url})
def _transform_tweet(self, tweet):
entities = tweet["entities"]
@@ -247,7 +263,7 @@ class TwitterTimelineExtractor(TwitterExtractor):
"""Extractor for all images from a user's timeline"""
subcategory = "timeline"
pattern = BASE_PATTERN + \
- r"/(?!search)(?:([^/?&#]+)/?(?:$|[?#])|intent/user\?user_id=(\d+))"
+ r"/(?!search)(?:([^/?#]+)/?(?:$|[?#])|intent/user\?user_id=(\d+))"
test = (
("https://twitter.com/supernaturepics", {
"range": "1-40",
@@ -271,7 +287,7 @@ class TwitterTimelineExtractor(TwitterExtractor):
class TwitterMediaExtractor(TwitterExtractor):
"""Extractor for all images from a user's Media Tweets"""
subcategory = "media"
- pattern = BASE_PATTERN + r"/(?!search)([^/?&#]+)/media(?!\w)"
+ pattern = BASE_PATTERN + r"/(?!search)([^/?#]+)/media(?!\w)"
test = (
("https://twitter.com/supernaturepics/media", {
"range": "1-40",
@@ -288,7 +304,7 @@ class TwitterMediaExtractor(TwitterExtractor):
class TwitterLikesExtractor(TwitterExtractor):
"""Extractor for liked tweets"""
subcategory = "likes"
- pattern = BASE_PATTERN + r"/(?!search)([^/?&#]+)/likes(?!\w)"
+ pattern = BASE_PATTERN + r"/(?!search)([^/?#]+)/likes(?!\w)"
test = ("https://twitter.com/supernaturepics/likes",)
def tweets(self):
@@ -326,7 +342,7 @@ class TwitterSearchExtractor(TwitterExtractor):
class TwitterTweetExtractor(TwitterExtractor):
"""Extractor for images from individual tweets"""
subcategory = "tweet"
- pattern = BASE_PATTERN + r"/([^/?&#]+|i/web)/status/(\d+)"
+ pattern = BASE_PATTERN + r"/([^/?#]+|i/web)/status/(\d+)"
test = (
("https://twitter.com/supernaturepics/status/604341487988576256", {
"url": "0e801d2f98142dd87c3630ded9e4be4a4d63b580",
@@ -375,11 +391,16 @@ class TwitterTweetExtractor(TwitterExtractor):
"pattern": r"https://\w+.cloudfront.net/photos/large/\d+.jpg",
"count": 3,
}),
- # Nitter tweet
+ # Nitter tweet (#890)
("https://nitter.net/ed1conf/status/1163841619336007680", {
"url": "0f6a841e23948e4320af7ae41125e0c5b3cadc98",
"content": "f29501e44d88437fe460f5c927b7543fda0f6e34",
}),
+ # Twitter card (#1005)
+ ("https://twitter.com/billboard/status/1306599586602135555", {
+ "options": (("cards", True),),
+ "pattern": r"https://pbs.twimg.com/card_img/1317274761030856707/",
+ }),
# original retweets (#1026)
("https://twitter.com/jessica_3978/status/1296304589591810048", {
"options": (("retweets", "original"),),
@@ -446,7 +467,7 @@ class TwitterAPI():
cookies = self.extractor.session.cookies
# CSRF
- csrf = hashlib.md5(str(time.time()).encode()).hexdigest()
+ csrf = util.generate_csrf_token()
self.headers["x-csrf-token"] = csrf
cookies.set("ct0", csrf, domain=".twitter.com")
diff --git a/gallery_dl/extractor/vanillarock.py b/gallery_dl/extractor/vanillarock.py
index 687ce3c..e10c642 100644
--- a/gallery_dl/extractor/vanillarock.py
+++ b/gallery_dl/extractor/vanillarock.py
@@ -29,7 +29,7 @@ class VanillarockPostExtractor(VanillarockExtractor):
filename_fmt = "{num:>02}.{extension}"
archive_fmt = "{filename}"
pattern = (r"(?:https?://)?(?:www\.)?vanilla-rock\.com"
- r"(/(?!category/|tag/)[^/?&#]+)/?$")
+ r"(/(?!category/|tag/)[^/?#]+)/?$")
test = ("https://vanilla-rock.com/mizuhashi_parsee-5", {
"url": "7fb9a4d18d9fa22d7295fee8d94ab5a7a52265dd",
"keyword": "b91df99b714e1958d9636748b1c81a07c3ef52c9",
@@ -66,7 +66,7 @@ class VanillarockTagExtractor(VanillarockExtractor):
"""Extractor for vanillarock blog posts by tag or category"""
subcategory = "tag"
pattern = (r"(?:https?://)?(?:www\.)?vanilla-rock\.com"
- r"(/(?:tag|category)/[^?&#]+)")
+ r"(/(?:tag|category)/[^?#]+)")
test = (
("https://vanilla-rock.com/tag/%e5%b0%84%e5%91%bd%e4%b8%b8%e6%96%87", {
"pattern": VanillarockPostExtractor.pattern,
diff --git a/gallery_dl/extractor/weasyl.py b/gallery_dl/extractor/weasyl.py
index a39fbf1..6799784 100644
--- a/gallery_dl/extractor/weasyl.py
+++ b/gallery_dl/extractor/weasyl.py
@@ -30,6 +30,10 @@ class WeasylExtractor(Extractor):
return True
return False
+ def __init__(self, match):
+ Extractor.__init__(self, match)
+ self.session.headers['X-Weasyl-API-Key'] = self.config("api-key")
+
def request_submission(self, submitid):
return self.request(
"{}/api/submissions/{}/view".format(self.root, submitid)).json()
@@ -64,7 +68,7 @@ class WeasylExtractor(Extractor):
class WeasylSubmissionExtractor(WeasylExtractor):
subcategory = "submission"
- pattern = BASE_PATTERN + r"(?:~[\w-]+/submissions|submission)/(\d+)"
+ pattern = BASE_PATTERN + r"(?:~[\w~-]+/submissions|submission)/(\d+)"
test = (
("https://www.weasyl.com/~fiz/submissions/2031/a-wesley", {
"pattern": "https://cdn.weasyl.com/~fiz/submissions/2031/41ebc1c29"
@@ -105,12 +109,13 @@ class WeasylSubmissionExtractor(WeasylExtractor):
class WeasylSubmissionsExtractor(WeasylExtractor):
subcategory = "submissions"
- pattern = BASE_PATTERN + r"(?:~|submissions/)([\w-]+)/?$"
+ pattern = BASE_PATTERN + r"(?:~|submissions/)([\w~-]+)/?$"
test = (
("https://www.weasyl.com/~tanidareal", {
"count": ">= 200"
}),
("https://www.weasyl.com/submissions/tanidareal"),
+ ("https://www.weasyl.com/~aro~so")
)
def __init__(self, match):
@@ -126,7 +131,7 @@ class WeasylSubmissionsExtractor(WeasylExtractor):
class WeasylFolderExtractor(WeasylExtractor):
subcategory = "folder"
directory_fmt = ("{category}", "{owner_login}", "{folder_name}")
- pattern = BASE_PATTERN + r"submissions/([\w-]+)\?folderid=(\d+)"
+ pattern = BASE_PATTERN + r"submissions/([\w~-]+)\?folderid=(\d+)"
test = ("https://www.weasyl.com/submissions/tanidareal?folderid=7403", {
"count": ">= 12"
})
@@ -175,7 +180,7 @@ class WeasylJournalsExtractor(WeasylExtractor):
subcategory = "journals"
filename_fmt = "{journalid} {title}.{extension}"
archive_fmt = "{journalid}"
- pattern = BASE_PATTERN + r"journals/([\w-]+)"
+ pattern = BASE_PATTERN + r"journals/([\w~-]+)"
test = ("https://www.weasyl.com/journals/charmander", {
"count": ">= 2",
})
diff --git a/gallery_dl/extractor/webtoons.py b/gallery_dl/extractor/webtoons.py
index d42730e..55324cb 100644
--- a/gallery_dl/extractor/webtoons.py
+++ b/gallery_dl/extractor/webtoons.py
@@ -34,7 +34,7 @@ class WebtoonsEpisodeExtractor(WebtoonsExtractor):
directory_fmt = ("{category}", "{comic}")
filename_fmt = "{episode}-{num:>02}.{extension}"
archive_fmt = "{title_no}_{episode}_{num}"
- pattern = (BASE_PATTERN + r"/([^/?&#]+)/([^/?&#]+)/(?:[^/?&#]+))"
+ pattern = (BASE_PATTERN + r"/([^/?#]+)/([^/?#]+)/(?:[^/?#]+))"
r"/viewer(?:\?([^#'\"]+))")
test = (
(("https://www.webtoons.com/en/comedy/safely-endangered"
@@ -97,7 +97,7 @@ class WebtoonsComicExtractor(WebtoonsExtractor):
"""Extractor for an entire comic on webtoons.com"""
subcategory = "comic"
categorytransfer = True
- pattern = (BASE_PATTERN + r"/([^/?&#]+)/([^/?&#]+))"
+ pattern = (BASE_PATTERN + r"/([^/?#]+)/([^/?#]+))"
r"/list(?:\?([^#]+))")
test = (
# english
diff --git a/gallery_dl/extractor/xhamster.py b/gallery_dl/extractor/xhamster.py
index 5f11df3..258e89c 100644
--- a/gallery_dl/extractor/xhamster.py
+++ b/gallery_dl/extractor/xhamster.py
@@ -33,7 +33,7 @@ class XhamsterGalleryExtractor(XhamsterExtractor):
"{gallery[id]} {gallery[title]}")
filename_fmt = "{num:>03}_{id}.{extension}"
archive_fmt = "{id}"
- pattern = BASE_PATTERN + r"(/photos/gallery/[^/?&#]+)"
+ pattern = BASE_PATTERN + r"(/photos/gallery/[^/?#]+)"
test = (
("https://xhamster.com/photos/gallery/11748968", {
"pattern": r"https://thumb-p\d+.xhcdn.com/./[\w/-]+_1000.jpg$",
@@ -152,7 +152,7 @@ class XhamsterGalleryExtractor(XhamsterExtractor):
class XhamsterUserExtractor(XhamsterExtractor):
"""Extractor for all galleries of an xhamster user"""
subcategory = "user"
- pattern = BASE_PATTERN + r"/users/([^/?&#]+)(?:/photos)?/?(?:$|[?#])"
+ pattern = BASE_PATTERN + r"/users/([^/?#]+)(?:/photos)?/?(?:$|[?#])"
test = (
("https://xhamster.com/users/goldenpalomino/photos", {
"pattern": XhamsterGalleryExtractor.pattern,
@@ -174,7 +174,7 @@ class XhamsterUserExtractor(XhamsterExtractor):
while url:
extr = text.extract_from(self.request(url).text)
while True:
- url = extr('thumb-image-container" href="', '"')
+ url = extr('thumb-image-container role-pop" href="', '"')
if not url:
break
yield Message.Queue, url, data
diff --git a/gallery_dl/extractor/xvideos.py b/gallery_dl/extractor/xvideos.py
index b7d116a..9fdc5aa 100644
--- a/gallery_dl/extractor/xvideos.py
+++ b/gallery_dl/extractor/xvideos.py
@@ -28,7 +28,7 @@ class XvideosGalleryExtractor(XvideosBase, GalleryExtractor):
archive_fmt = "{gallery[id]}_{num}"
pattern = (r"(?:https?://)?(?:www\.)?xvideos\.com"
r"/(?:profiles|amateur-channels|model-channels)"
- r"/([^/?&#]+)/photos/(\d+)")
+ r"/([^/?#]+)/photos/(\d+)")
test = (
("https://www.xvideos.com/profiles/pervertedcouple/photos/751031", {
"url": "cb4657a37eea5ab6b1d333491cee7eeb529b0645",
@@ -94,7 +94,7 @@ class XvideosUserExtractor(XvideosBase, Extractor):
subcategory = "user"
categorytransfer = True
pattern = (r"(?:https?://)?(?:www\.)?xvideos\.com"
- r"/profiles/([^/?&#]+)/?(?:#.*)?$")
+ r"/profiles/([^/?#]+)/?(?:#.*)?$")
test = (
("https://www.xvideos.com/profiles/pervertedcouple", {
"url": "a413f3e60d6d3a2de79bd44fa3b7a9c03db4336e",
diff --git a/gallery_dl/extractor/yuki.py b/gallery_dl/extractor/yuki.py
index 0844c40..72d7cad 100644
--- a/gallery_dl/extractor/yuki.py
+++ b/gallery_dl/extractor/yuki.py
@@ -19,7 +19,7 @@ class YukiThreadExtractor(Extractor):
directory_fmt = ("{category}", "{board}", "{thread}{title:? - //}")
filename_fmt = "{time}-{filename}.{extension}"
archive_fmt = "{board}_{thread}_{tim}"
- pattern = r"(?:https?://)?yuki\.la/([^/?&#]+)/(\d+)"
+ pattern = r"(?:https?://)?yuki\.la/([^/?#]+)/(\d+)"
test = (
("https://yuki.la/gd/309639", {
"url": "289e86c5caf673a2515ec5f5f521ac0ae7e189e9",
diff --git a/gallery_dl/job.py b/gallery_dl/job.py
index b62240b..66dea08 100644
--- a/gallery_dl/job.py
+++ b/gallery_dl/job.py
@@ -110,12 +110,6 @@ class Job():
if self.pred_queue(url, kwds):
self.handle_queue(url, kwds)
- elif msg[0] == Message.Urllist:
- _, urls, kwds = msg
- if self.pred_url(urls[0], kwds):
- self.update_kwdict(kwds)
- self.handle_urllist(urls, kwds)
-
elif msg[0] == Message.Metadata:
self.update_kwdict(msg[1])
self.handle_metadata(msg[1])
@@ -130,10 +124,6 @@ class Job():
def handle_url(self, url, kwdict):
"""Handle Message.Url"""
- def handle_urllist(self, urls, kwdict):
- """Handle Message.Urllist"""
- self.handle_url(urls[0], kwdict)
-
def handle_directory(self, kwdict):
"""Handle Message.Directory"""
@@ -215,7 +205,7 @@ class DownloadJob(Job):
else:
self.visited = set()
- def handle_url(self, url, kwdict, fallback=None):
+ def handle_url(self, url, kwdict):
"""Download the resource specified in 'url'"""
postprocessors = self.postprocessors
pathfmt = self.pathfmt
@@ -246,7 +236,7 @@ class DownloadJob(Job):
if not self.download(url):
# use fallback URLs if available
- for num, url in enumerate(fallback or (), 1):
+ for num, url in enumerate(kwdict.get("_fallback", ()), 1):
util.remove_file(pathfmt.temppath)
self.log.info("Trying fallback URL #%d", num)
if self.download(url):
@@ -279,12 +269,6 @@ class DownloadJob(Job):
pp.run_after(pathfmt)
self._skipcnt = 0
- def handle_urllist(self, urls, kwdict):
- """Download the resource specified in 'url'"""
- fallback = iter(urls)
- url = next(fallback)
- self.handle_url(url, kwdict, fallback)
-
def handle_directory(self, kwdict):
"""Set and create the target directory for downloads"""
if not self.pathfmt:
@@ -563,15 +547,11 @@ class UrlJob(Job):
self.handle_queue = self.handle_url
@staticmethod
- def handle_url(url, _):
+ def handle_url(url, kwdict):
print(url)
-
- @staticmethod
- def handle_urllist(urls, _):
- prefix = ""
- for url in urls:
- print(prefix, url, sep="")
- prefix = "| "
+ if "_fallback" in kwdict:
+ for url in kwdict["_fallback"]:
+ print("|", url)
def handle_queue(self, url, _):
try:
@@ -625,9 +605,6 @@ class DataJob(Job):
def handle_url(self, url, kwdict):
self.data.append((Message.Url, url, self.filter(kwdict)))
- def handle_urllist(self, urls, kwdict):
- self.data.append((Message.Urllist, list(urls), self.filter(kwdict)))
-
def handle_directory(self, kwdict):
self.data.append((Message.Directory, self.filter(kwdict)))
diff --git a/gallery_dl/text.py b/gallery_dl/text.py
index 9a716f9..8b06384 100644
--- a/gallery_dl/text.py
+++ b/gallery_dl/text.py
@@ -70,7 +70,7 @@ def ensure_http_scheme(url, scheme="https://"):
def filename_from_url(url):
"""Extract the last part of an URL to use as a filename"""
try:
- return urllib.parse.urlsplit(url).path.rpartition("/")[2]
+ return url.partition("?")[0].rpartition("/")[2]
except (TypeError, AttributeError):
return ""
diff --git a/gallery_dl/util.py b/gallery_dl/util.py
index 3e91405..d85d2b3 100644
--- a/gallery_dl/util.py
+++ b/gallery_dl/util.py
@@ -13,6 +13,7 @@ import os
import sys
import json
import time
+import random
import shutil
import string
import _string
@@ -60,6 +61,10 @@ def raises(cls):
return wrap
+def generate_csrf_token():
+ return random.getrandbits(128).to_bytes(16, "big").hex()
+
+
def combine_dict(a, b):
"""Recursively combine the contents of 'b' into 'a'"""
for key, value in b.items():
@@ -490,6 +495,7 @@ class Formatter():
- "u": calls str.upper
- "c": calls str.capitalize
- "C": calls string.capwords
+ - "t": calls str.strip
- "U": calls urllib.parse.unquote
- "S": calls util.to_string()
- Example: {f!l} -> "example"; {f!u} -> "EXAMPLE"
@@ -520,6 +526,7 @@ class Formatter():
"u": str.upper,
"c": str.capitalize,
"C": string.capwords,
+ "t": str.strip,
"U": urllib.parse.unquote,
"S": to_string,
"s": str,
diff --git a/gallery_dl/version.py b/gallery_dl/version.py
index 81976c2..b2e5a58 100644
--- a/gallery_dl/version.py
+++ b/gallery_dl/version.py
@@ -6,4 +6,4 @@
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
-__version__ = "1.15.1"
+__version__ = "1.15.2"
diff --git a/test/test_results.py b/test/test_results.py
index a594032..d54017e 100644
--- a/test/test_results.py
+++ b/test/test_results.py
@@ -25,13 +25,14 @@ TRAVIS_SKIP = {
"exhentai", "mangafox", "dynastyscans", "nijie", "instagram", "ngomik",
"archivedmoe", "archiveofsins", "thebarchive", "fireden", "4plebs",
"sankaku", "idolcomplex", "mangahere", "mangadex", "sankakucomplex",
- "warosu", "fuskator", "patreon", "komikcast",
+ "warosu", "fuskator", "patreon", "komikcast", "twitter",
}
# temporary issues, etc.
BROKEN = {
+ "imgbox",
"imagevenue",
- "ngomik",
+ "mangapanda",
"photobucket",
}
@@ -296,6 +297,7 @@ class TestFormatter(util.Formatter):
def setup_test_config():
name = "gallerydl"
email = "gallerydl@openaliasbox.org"
+ email2 = "gallerydl@protonmail.com"
config.clear()
config.set(("cache",), "file", None)
@@ -307,6 +309,7 @@ def setup_test_config():
config.set(("extractor", "nijie") , "username", email)
config.set(("extractor", "seiga") , "username", email)
+ config.set(("extractor", "pinterest") , "username", email2)
config.set(("extractor", "newgrounds"), "username", "d1618111")
config.set(("extractor", "newgrounds"), "password", "d1618111")
diff --git a/test/test_util.py b/test/test_util.py
index 1515814..08ecd64 100644
--- a/test/test_util.py
+++ b/test/test_util.py
@@ -265,6 +265,7 @@ class TestFormatter(unittest.TestCase):
"d": {"a": "foo", "b": 0, "c": None},
"l": ["a", "b", "c"],
"n": None,
+ "s": " \n\r\tSPACE ",
"u": "%27%3C%20/%20%3E%27",
"name": "Name",
"title1": "Title",
@@ -278,6 +279,7 @@ class TestFormatter(unittest.TestCase):
self._run_test("{a!u}", "HELLO WORLD")
self._run_test("{a!c}", "Hello world")
self._run_test("{a!C}", "Hello World")
+ self._run_test("{s!t}", "SPACE")
self._run_test("{a!U}", self.kwdict["a"])
self._run_test("{u!U}", "'< / >'")
self._run_test("{a!s}", self.kwdict["a"])