aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLibravatarUnit 193 <unit193@unit193.net>2021-01-28 16:01:35 -0500
committerLibravatarUnit 193 <unit193@unit193.net>2021-01-28 16:01:35 -0500
commit049f5338c920ac0530aa38d182bc33c42dad26a8 (patch)
tree2667231344df1f34810638eea3d44b53b9926666
parent07849be7436c5181a785cd0eb32a7160672812c0 (diff)
parent2e29d2158d56879e5578dfabf9e8c0fa2e855ccf (diff)
downloadgallery-dl-049f5338c920ac0530aa38d182bc33c42dad26a8.tar.bz2
gallery-dl-049f5338c920ac0530aa38d182bc33c42dad26a8.tar.xz
gallery-dl-049f5338c920ac0530aa38d182bc33c42dad26a8.tar.zst
Update upstream source from tag 'upstream/1.16.4'
Update to upstream version '1.16.4' with Debian dir e3506d469f714e8be0057bf8b8c9c226db148160
-rw-r--r--CHANGELOG.md26
-rw-r--r--PKG-INFO8
-rw-r--r--README.rst6
-rw-r--r--data/man/gallery-dl.12
-rw-r--r--data/man/gallery-dl.conf.559
-rw-r--r--gallery_dl.egg-info/PKG-INFO8
-rw-r--r--gallery_dl.egg-info/SOURCES.txt3
-rw-r--r--gallery_dl/extractor/2chan.py6
-rw-r--r--gallery_dl/extractor/__init__.py5
-rw-r--r--gallery_dl/extractor/derpibooru.py11
-rw-r--r--gallery_dl/extractor/deviantart.py4
-rw-r--r--gallery_dl/extractor/foolfuuka.py2
-rw-r--r--gallery_dl/extractor/furaffinity.py15
-rw-r--r--gallery_dl/extractor/imagehosts.py16
-rw-r--r--gallery_dl/extractor/instagram.py6
-rw-r--r--gallery_dl/extractor/kemonoparty.py101
-rw-r--r--gallery_dl/extractor/mangadex.py11
-rw-r--r--gallery_dl/extractor/mangakakalot.py3
-rw-r--r--gallery_dl/extractor/newgrounds.py24
-rw-r--r--gallery_dl/extractor/nozomi.py8
-rw-r--r--gallery_dl/extractor/photovogue.py84
-rw-r--r--gallery_dl/extractor/pinterest.py4
-rw-r--r--gallery_dl/extractor/pixiv.py18
-rw-r--r--gallery_dl/extractor/sankaku.py44
-rw-r--r--gallery_dl/extractor/twitter.py71
-rw-r--r--gallery_dl/extractor/unsplash.py208
-rw-r--r--gallery_dl/extractor/webtoons.py3
-rw-r--r--gallery_dl/util.py7
-rw-r--r--gallery_dl/version.py2
-rw-r--r--test/test_util.py13
30 files changed, 642 insertions, 136 deletions
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 3b3060a..8629536 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,6 +1,30 @@
# Changelog
+## 1.16.4 - 2021-01-23
+### Additions
+- [furaffinity] add `descriptions` option ([#1231](https://github.com/mikf/gallery-dl/issues/1231))
+- [kemonoparty] add `user` and `post` extractors ([#1216](https://github.com/mikf/gallery-dl/issues/1216))
+- [nozomi] add `num` enumeration index ([#1239](https://github.com/mikf/gallery-dl/issues/1239))
+- [photovogue] added portfolio extractor ([#1253](https://github.com/mikf/gallery-dl/issues/1253))
+- [twitter] match `/i/user/ID` URLs
+- [unsplash] add extractors ([#1197](https://github.com/mikf/gallery-dl/issues/1197))
+- [vipr] add image extractor ([#1258](https://github.com/mikf/gallery-dl/issues/1258))
+### Changes
+- [derpibooru] use "Everything" filter by default ([#862](https://github.com/mikf/gallery-dl/issues/862))
+### Fixes
+- [derpibooru] update `date` parsing
+- [foolfuuka] stop search when results are exhausted ([#1174](https://github.com/mikf/gallery-dl/issues/1174))
+- [instagram] fix regex for `/saved` URLs ([#1251](https://github.com/mikf/gallery-dl/issues/1251))
+- [mangadex] update API URLs
+- [mangakakalot] fix extraction
+- [newgrounds] fix flash file extraction ([#1257](https://github.com/mikf/gallery-dl/issues/1257))
+- [sankaku] simplify login process
+- [twitter] fix retries after hitting rate limit
+
## 1.16.3 - 2021-01-10
+### Fixes
+- fix crash when using a `dict` for `path-restrict`
+- [postprocessor:metadata] sanitize custom filenames
## 1.16.2 - 2021-01-09
### Additions
@@ -12,7 +36,7 @@
- [twitter] fetch media from pinned tweets ([#1203](https://github.com/mikf/gallery-dl/issues/1203))
- [wikiart] add extractor for single paintings ([#1233](https://github.com/mikf/gallery-dl/issues/1233))
- [downloader:http] add MIME type and signature for `.ico` files ([#1211](https://github.com/mikf/gallery-dl/issues/1211))
-- add a `d` format string conversion for timestamp values
+- add `d` format string conversion for timestamp values
- add `"ascii"` as a special `path-restrict` value
### Fixes
- [hentainexus] fix extraction ([#1234](https://github.com/mikf/gallery-dl/issues/1234))
diff --git a/PKG-INFO b/PKG-INFO
index 6b9724a..bdacf73 100644
--- a/PKG-INFO
+++ b/PKG-INFO
@@ -1,6 +1,6 @@
Metadata-Version: 2.1
Name: gallery_dl
-Version: 1.16.3
+Version: 1.16.4
Summary: Command-line program to download image galleries and collections from several image hosting sites
Home-page: https://github.com/mikf/gallery-dl
Author: Mike Fährmann
@@ -94,8 +94,8 @@ Description: ==========
put it into your `PATH <https://en.wikipedia.org/wiki/PATH_(variable)>`__,
and run it inside a command prompt (like ``cmd.exe``).
- - `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.16.3/gallery-dl.exe>`__
- - `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.16.3/gallery-dl.bin>`__
+ - `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.16.4/gallery-dl.exe>`__
+ - `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.16.4/gallery-dl.bin>`__
These executables include a Python interpreter
and all required Python packages.
@@ -332,7 +332,7 @@ Description: ==========
.. _gallery-dl-example.conf: https://github.com/mikf/gallery-dl/blob/master/docs/gallery-dl-example.conf
.. _configuration.rst: https://github.com/mikf/gallery-dl/blob/master/docs/configuration.rst
.. _Supported Sites: https://github.com/mikf/gallery-dl/blob/master/docs/supportedsites.rst
- .. _stable: https://github.com/mikf/gallery-dl/archive/v1.16.3.tar.gz
+ .. _stable: https://github.com/mikf/gallery-dl/archive/v1.16.4.tar.gz
.. _dev: https://github.com/mikf/gallery-dl/archive/master.tar.gz
.. _Python: https://www.python.org/downloads/
diff --git a/README.rst b/README.rst
index 4982919..1ddebcf 100644
--- a/README.rst
+++ b/README.rst
@@ -83,8 +83,8 @@ Download a standalone executable file,
put it into your `PATH <https://en.wikipedia.org/wiki/PATH_(variable)>`__,
and run it inside a command prompt (like ``cmd.exe``).
-- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.16.3/gallery-dl.exe>`__
-- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.16.3/gallery-dl.bin>`__
+- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.16.4/gallery-dl.exe>`__
+- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.16.4/gallery-dl.bin>`__
These executables include a Python interpreter
and all required Python packages.
@@ -321,7 +321,7 @@ access to *gallery-dl*. Authorize it and you will be shown one or more
.. _gallery-dl-example.conf: https://github.com/mikf/gallery-dl/blob/master/docs/gallery-dl-example.conf
.. _configuration.rst: https://github.com/mikf/gallery-dl/blob/master/docs/configuration.rst
.. _Supported Sites: https://github.com/mikf/gallery-dl/blob/master/docs/supportedsites.rst
-.. _stable: https://github.com/mikf/gallery-dl/archive/v1.16.3.tar.gz
+.. _stable: https://github.com/mikf/gallery-dl/archive/v1.16.4.tar.gz
.. _dev: https://github.com/mikf/gallery-dl/archive/master.tar.gz
.. _Python: https://www.python.org/downloads/
diff --git a/data/man/gallery-dl.1 b/data/man/gallery-dl.1
index a260907..619e84f 100644
--- a/data/man/gallery-dl.1
+++ b/data/man/gallery-dl.1
@@ -1,4 +1,4 @@
-.TH "GALLERY-DL" "1" "2021-01-10" "1.16.3" "gallery-dl Manual"
+.TH "GALLERY-DL" "1" "2021-01-23" "1.16.4" "gallery-dl Manual"
.\" disable hyphenation
.nh
diff --git a/data/man/gallery-dl.conf.5 b/data/man/gallery-dl.conf.5
index 609d1de..413a40b 100644
--- a/data/man/gallery-dl.conf.5
+++ b/data/man/gallery-dl.conf.5
@@ -1,4 +1,4 @@
-.TH "GALLERY-DL.CONF" "5" "2021-01-10" "1.16.3" "gallery-dl Manual"
+.TH "GALLERY-DL.CONF" "5" "2021-01-23" "1.16.4" "gallery-dl Manual"
.\" disable hyphenation
.nh
.\" disable justification (adjust text to left margin only)
@@ -822,10 +822,7 @@ to use your account's browsing settings and filters.
\f[I]integer\f[]
.IP "Default:" 9
-\f[I]null\f[]
-
-.IP "Example:" 4
-56027 (\f[I]Everything\f[] filter)
+\f[I]56027\f[] (\f[I]Everything\f[] filter)
.IP "Description:" 4
The content filter ID to use.
@@ -1111,6 +1108,22 @@ Sets the maximum allowed size for downloaded images.
\f[I]"l"\f[], ...) to use as an upper limit.
+.SS extractor.furaffinity.descriptions
+.IP "Type:" 6
+\f[I]string\f[]
+
+.IP "Default:" 9
+\f[I]"text"\f[]
+
+.IP "Description:" 4
+Controls the format of \f[I]description\f[] metadata fields.
+
+.br
+* \f[I]"text"\f[]: Plain text with HTML tags removed
+.br
+* \f[I]"html"\f[]: Raw HTML content
+
+
.SS extractor.furaffinity.include
.IP "Type:" 6
\f[I]string\f[] or \f[I]list\f[] of \f[I]strings\f[]
@@ -1274,6 +1287,17 @@ If the selected format is not available,
the first in the list gets chosen (usually mp3).
+.SS extractor.newgrounds.flash
+.IP "Type:" 6
+\f[I]bool\f[]
+
+.IP "Default:" 9
+\f[I]true\f[]
+
+.IP "Description:" 4
+Download original Adobe Flash animations instead of pre-rendered videos.
+
+
.SS extractor.newgrounds.include
.IP "Type:" 6
\f[I]string\f[] or \f[I]list\f[] of \f[I]strings\f[]
@@ -1400,6 +1424,17 @@ Download from video pins.
Download user avatars.
+.SS extractor.pixiv.work.related
+.IP "Type:" 6
+\f[I]bool\f[]
+
+.IP "Default:" 9
+\f[I]false\f[]
+
+.IP "Description:" 4
+Also download related artworks.
+
+
.SS extractor.pixiv.ugoira
.IP "Type:" 6
\f[I]bool\f[]
@@ -1776,6 +1811,20 @@ Control video download behavior.
* \f[I]false\f[]: Skip video Tweets
+.SS extractor.unsplash.format
+.IP "Type:" 6
+\f[I]string\f[]
+
+.IP "Default:" 9
+\f[I]"raw"\f[]
+
+.IP "Description:" 4
+Name of the image format to download.
+
+Available formats are
+\f[I]"raw"\f[], \f[I]"full"\f[], \f[I]"regular"\f[], \f[I]"small"\f[], and \f[I]"thumb"\f[].
+
+
.SS extractor.vsco.videos
.IP "Type:" 6
\f[I]bool\f[]
diff --git a/gallery_dl.egg-info/PKG-INFO b/gallery_dl.egg-info/PKG-INFO
index 2ce1d97..f1a1ebe 100644
--- a/gallery_dl.egg-info/PKG-INFO
+++ b/gallery_dl.egg-info/PKG-INFO
@@ -1,6 +1,6 @@
Metadata-Version: 2.1
Name: gallery-dl
-Version: 1.16.3
+Version: 1.16.4
Summary: Command-line program to download image galleries and collections from several image hosting sites
Home-page: https://github.com/mikf/gallery-dl
Author: Mike Fährmann
@@ -94,8 +94,8 @@ Description: ==========
put it into your `PATH <https://en.wikipedia.org/wiki/PATH_(variable)>`__,
and run it inside a command prompt (like ``cmd.exe``).
- - `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.16.3/gallery-dl.exe>`__
- - `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.16.3/gallery-dl.bin>`__
+ - `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.16.4/gallery-dl.exe>`__
+ - `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.16.4/gallery-dl.bin>`__
These executables include a Python interpreter
and all required Python packages.
@@ -332,7 +332,7 @@ Description: ==========
.. _gallery-dl-example.conf: https://github.com/mikf/gallery-dl/blob/master/docs/gallery-dl-example.conf
.. _configuration.rst: https://github.com/mikf/gallery-dl/blob/master/docs/configuration.rst
.. _Supported Sites: https://github.com/mikf/gallery-dl/blob/master/docs/supportedsites.rst
- .. _stable: https://github.com/mikf/gallery-dl/archive/v1.16.3.tar.gz
+ .. _stable: https://github.com/mikf/gallery-dl/archive/v1.16.4.tar.gz
.. _dev: https://github.com/mikf/gallery-dl/archive/master.tar.gz
.. _Python: https://www.python.org/downloads/
diff --git a/gallery_dl.egg-info/SOURCES.txt b/gallery_dl.egg-info/SOURCES.txt
index cb025ff..d4907de 100644
--- a/gallery_dl.egg-info/SOURCES.txt
+++ b/gallery_dl.egg-info/SOURCES.txt
@@ -89,6 +89,7 @@ gallery_dl/extractor/instagram.py
gallery_dl/extractor/issuu.py
gallery_dl/extractor/kabeuchi.py
gallery_dl/extractor/keenspot.py
+gallery_dl/extractor/kemonoparty.py
gallery_dl/extractor/khinsider.py
gallery_dl/extractor/komikcast.py
gallery_dl/extractor/lineblog.py
@@ -118,6 +119,7 @@ gallery_dl/extractor/oauth.py
gallery_dl/extractor/paheal.py
gallery_dl/extractor/patreon.py
gallery_dl/extractor/photobucket.py
+gallery_dl/extractor/photovogue.py
gallery_dl/extractor/piczel.py
gallery_dl/extractor/pinterest.py
gallery_dl/extractor/pixiv.py
@@ -146,6 +148,7 @@ gallery_dl/extractor/test.py
gallery_dl/extractor/tsumino.py
gallery_dl/extractor/tumblr.py
gallery_dl/extractor/twitter.py
+gallery_dl/extractor/unsplash.py
gallery_dl/extractor/vanillarock.py
gallery_dl/extractor/vsco.py
gallery_dl/extractor/wallhaven.py
diff --git a/gallery_dl/extractor/2chan.py b/gallery_dl/extractor/2chan.py
index 9813f2b..d34209f 100644
--- a/gallery_dl/extractor/2chan.py
+++ b/gallery_dl/extractor/2chan.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2017-2020 Mike Fährmann
+# Copyright 2017-2021 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -22,8 +22,8 @@ class _2chanThreadExtractor(Extractor):
url_fmt = "https://{server}.2chan.net/{board}/src/{filename}"
pattern = r"(?:https?://)?([^.]+)\.2chan\.net/([^/]+)/res/(\d+)"
test = ("http://dec.2chan.net/70/res/4752.htm", {
- "url": "1c2d2ce8aea0fc71d94735cfc30009d628f33548",
- "keyword": "f508d6841ea2cb19ed799aac9dc580263ca50651",
+ "url": "20c211ae7c06b18ec345a057fe0b68dde979b051",
+ "keyword": "23a529b46313b927fc94b577e5e1fdb3aa164ac1",
})
def __init__(self, match):
diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py
index 36107d9..a69bacc 100644
--- a/gallery_dl/extractor/__init__.py
+++ b/gallery_dl/extractor/__init__.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2015-2020 Mike Fährmann
+# Copyright 2015-2021 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -58,6 +58,7 @@ modules = [
"issuu",
"kabeuchi",
"keenspot",
+ "kemonoparty",
"khinsider",
"komikcast",
"lineblog",
@@ -83,6 +84,7 @@ modules = [
"paheal",
"patreon",
"photobucket",
+ "photovogue",
"piczel",
"pinterest",
"pixiv",
@@ -108,6 +110,7 @@ modules = [
"tsumino",
"tumblr",
"twitter",
+ "unsplash",
"vanillarock",
"vsco",
"wallhaven",
diff --git a/gallery_dl/extractor/derpibooru.py b/gallery_dl/extractor/derpibooru.py
index 3b20fa5..94f3729 100644
--- a/gallery_dl/extractor/derpibooru.py
+++ b/gallery_dl/extractor/derpibooru.py
@@ -28,8 +28,7 @@ class DerpibooruExtractor(BooruExtractor):
@staticmethod
def _prepare(post):
- post["date"] = text.parse_datetime(
- post["created_at"], "%Y-%m-%dT%H:%M:%S")
+ post["date"] = text.parse_datetime(post["created_at"])
@staticmethod
def _extended_tags(post):
@@ -46,6 +45,8 @@ class DerpibooruExtractor(BooruExtractor):
filter_id = self.config("filter")
if filter_id:
params["filter_id"] = filter_id
+ elif not api_key:
+ params["filter_id"] = "56027" # "Everything" filter
while True:
data = self.request(url, params=params).json()
@@ -67,7 +68,7 @@ class DerpibooruPostExtractor(DerpibooruExtractor):
"animated": False,
"aspect_ratio": 1.0,
"comment_count": int,
- "created_at": "2012-01-02T03:12:33",
+ "created_at": "2012-01-02T03:12:33Z",
"date": "dt:2012-01-02 03:12:33",
"deletion_reason": None,
"description": "",
@@ -76,7 +77,7 @@ class DerpibooruPostExtractor(DerpibooruExtractor):
"duration": 0.04,
"extension": "png",
"faves": int,
- "first_seen_at": "2012-01-02T03:12:33",
+ "first_seen_at": "2012-01-02T03:12:33Z",
"format": "png",
"height": 900,
"hidden_from_users": False,
@@ -99,7 +100,7 @@ class DerpibooruPostExtractor(DerpibooruExtractor):
"tag_ids": list,
"tags": list,
"thumbnails_generated": True,
- "updated_at": "2020-05-28T13:14:07",
+ "updated_at": "2020-05-28T13:14:07Z",
"uploader": "Clover the Clever",
"uploader_id": 211188,
"upvotes": int,
diff --git a/gallery_dl/extractor/deviantart.py b/gallery_dl/extractor/deviantart.py
index a9c63a9..a58401e 100644
--- a/gallery_dl/extractor/deviantart.py
+++ b/gallery_dl/extractor/deviantart.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2015-2020 Mike Fährmann
+# Copyright 2015-2021 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -686,6 +686,8 @@ class DeviantartPopularExtractor(DeviantartExtractor):
}),
("https://www.deviantart.com/popular-24-hours/?q=tree+house", {
"options": (("original", False),),
+ "range": "1-30",
+ "count": 30,
}),
("https://www.deviantart.com/search?q=tree"),
("https://www.deviantart.com/search/deviations?order=popular-1-week"),
diff --git a/gallery_dl/extractor/foolfuuka.py b/gallery_dl/extractor/foolfuuka.py
index 81f2bc2..319ebe2 100644
--- a/gallery_dl/extractor/foolfuuka.py
+++ b/gallery_dl/extractor/foolfuuka.py
@@ -167,6 +167,8 @@ class FoolfuukaSearchExtractor(FoolfuukaExtractor):
return
yield from posts
+ if len(posts) <= 3:
+ return
params["page"] += 1
diff --git a/gallery_dl/extractor/furaffinity.py b/gallery_dl/extractor/furaffinity.py
index 752cd62..df5a73e 100644
--- a/gallery_dl/extractor/furaffinity.py
+++ b/gallery_dl/extractor/furaffinity.py
@@ -29,6 +29,9 @@ class FuraffinityExtractor(Extractor):
self.user = match.group(1)
self.offset = 0
+ if self.config("descriptions") == "html":
+ self._process_description = lambda x: x.strip()
+
def items(self):
metadata = self.metadata()
for post_id in util.advance(self.posts(), self.offset):
@@ -83,8 +86,8 @@ class FuraffinityExtractor(Extractor):
if tags:
# new site layout
data["tags"] = text.split_html(tags)
- data["description"] = text.unescape(rh(extr(
- 'class="section-body">', '</div>'), "", ""))
+ data["description"] = self._process_description(extr(
+ 'class="section-body">', '</div>'))
data["views"] = pi(rh(extr('class="views">', '</span>')))
data["favorites"] = pi(rh(extr('class="favorites">', '</span>')))
data["comments"] = pi(rh(extr('class="comments">', '</span>')))
@@ -109,12 +112,16 @@ class FuraffinityExtractor(Extractor):
data["tags"] = text.split_html(extr(
'id="keywords">', '</div>'))[::2]
data["rating"] = extr('<img alt="', ' ')
- data["description"] = text.unescape(text.remove_html(extr(
- "</table>", "</table>"), "", ""))
+ data["description"] = self._process_description(extr(
+ "</table>", "</table>"))
data["date"] = text.parse_timestamp(data["filename"].partition(".")[0])
return data
+ @staticmethod
+ def _process_description(description):
+ return text.unescape(text.remove_html(description, "", ""))
+
def _pagination(self):
num = 1
diff --git a/gallery_dl/extractor/imagehosts.py b/gallery_dl/extractor/imagehosts.py
index 28af179..fe3afbb 100644
--- a/gallery_dl/extractor/imagehosts.py
+++ b/gallery_dl/extractor/imagehosts.py
@@ -249,3 +249,19 @@ class TurboimagehostImageExtractor(ImagehostImageExtractor):
def get_info(self, page):
url = text.extract(page, 'src="', '"', page.index("<img "))[0]
return url, url
+
+
+class ViprImageExtractor(ImagehostImageExtractor):
+ """Extractor for single images from vipr.im"""
+ category = "vipr"
+ pattern = r"(?:https?://)?(vipr\.im/(\w+))"
+ test = ("https://vipr.im/kcd5jcuhgs3v.html", {
+ "url": "88f6a3ecbf3356a11ae0868b518c60800e070202",
+ "keyword": "c432e8a1836b0d97045195b745731c2b1bb0e771",
+ })
+ https = True
+ params = None
+
+ def get_info(self, page):
+ url = text.extract(page, '<img src="', '"')[0]
+ return url, url
diff --git a/gallery_dl/extractor/instagram.py b/gallery_dl/extractor/instagram.py
index c3e7fe4..84018a9 100644
--- a/gallery_dl/extractor/instagram.py
+++ b/gallery_dl/extractor/instagram.py
@@ -1,7 +1,7 @@
# -*- coding: utf-8 -*-
# Copyright 2018-2020 Leonardo Taccari
-# Copyright 2018-2020 Mike Fährmann
+# Copyright 2018-2021 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -35,7 +35,7 @@ class InstagramExtractor(Extractor):
Extractor.__init__(self, match)
self.item = match.group(1)
self.www_claim = "0"
- self.csrf_token = util.generate_csrf_token()
+ self.csrf_token = util.generate_token()
self._find_tags = re.compile(r"#\w+").findall
self._cursor = None
@@ -424,7 +424,7 @@ class InstagramChannelExtractor(InstagramExtractor):
class InstagramSavedExtractor(InstagramExtractor):
"""Extractor for ProfilePage saved media"""
subcategory = "saved"
- pattern = USER_PATTERN + r"([^/?#]+)/saved"
+ pattern = USER_PATTERN + r"/saved"
test = ("https://www.instagram.com/instagram/saved/",)
def posts(self):
diff --git a/gallery_dl/extractor/kemonoparty.py b/gallery_dl/extractor/kemonoparty.py
new file mode 100644
index 0000000..a5b5e00
--- /dev/null
+++ b/gallery_dl/extractor/kemonoparty.py
@@ -0,0 +1,101 @@
+# -*- coding: utf-8 -*-
+
+# Copyright 2021 Mike Fährmann
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Extractors for https://kemono.party/"""
+
+from .common import Extractor, Message
+from .. import text
+
+
+class KemonopartyExtractor(Extractor):
+ """Base class for kemonoparty extractors"""
+ category = "kemonoparty"
+ root = "https://kemono.party"
+ directory_fmt = ("{category}", "{user}")
+ filename_fmt = "{id}_{title}_{filename}.{extension}"
+ archive_fmt = "{user}_{id}_{filename}.{extension}"
+
+ def items(self):
+ for post in self.posts():
+
+ files = []
+ if post["file"]:
+ files.append(post["file"])
+ if post["attachments"]:
+ files.extend(post["attachments"])
+ post["date"] = text.parse_datetime(
+ post["published"], "%a, %d %b %Y %H:%M:%S %Z")
+ yield Message.Directory, post
+
+ for post["num"], file in enumerate(files, 1):
+ text.nameext_from_url(file["name"], post)
+ yield Message.Url, self.root + file["path"], post
+
+
+class KemonopartyUserExtractor(KemonopartyExtractor):
+ """Extractor for all posts from a kemono.party user listing"""
+ subcategory = "user"
+ pattern = r"(?:https?://)?kemono\.party/([^/?#]+)/user/(\d+)/?(?:$|[?#])"
+ test = ("https://kemono.party/fanbox/user/6993449", {
+ "range": "1-25",
+ "count": 25,
+ })
+
+ def __init__(self, match):
+ KemonopartyExtractor.__init__(self, match)
+ service, user_id = match.groups()
+ self.api_url = "{}/api/{}/user/{}".format(self.root, service, user_id)
+
+ def posts(self):
+ url = self.api_url
+ params = {"o": 0}
+
+ while True:
+ posts = self.request(url, params=params).json()
+ yield from posts
+
+ if len(posts) < 25:
+ return
+ params["o"] += 25
+
+
+class KemonopartyPostExtractor(KemonopartyExtractor):
+ """Extractor for a single kemono.party post"""
+ subcategory = "post"
+ pattern = r"(?:https?://)?kemono\.party/([^/?#]+)/user/(\d+)/post/(\d+)"
+ test = ("https://kemono.party/fanbox/user/6993449/post/506575", {
+ "pattern": r"https://kemono\.party/files/fanbox"
+ r"/6993449/506575/P058kDFYus7DbqAkGlfWTlOr\.jpeg",
+ "keyword": {
+ "added": "Wed, 06 May 2020 20:28:02 GMT",
+ "content": str,
+ "date": "dt:2019-08-11 02:09:04",
+ "edited": None,
+ "embed": dict,
+ "extension": "jpeg",
+ "filename": "P058kDFYus7DbqAkGlfWTlOr",
+ "id": "506575",
+ "num": 1,
+ "published": "Sun, 11 Aug 2019 02:09:04 GMT",
+ "service": "fanbox",
+ "shared_file": False,
+ "subcategory": "post",
+ "title": "c96取り置き",
+ "user": "6993449",
+ },
+ })
+
+ def __init__(self, match):
+ KemonopartyExtractor.__init__(self, match)
+ service, user_id, post_id = match.groups()
+ self.api_url = "{}/api/{}/user/{}/post/{}".format(
+ self.root, service, user_id, post_id)
+
+ def posts(self):
+ posts = self.request(self.api_url).json()
+ return (posts[0],) if len(posts) > 1 else posts
diff --git a/gallery_dl/extractor/mangadex.py b/gallery_dl/extractor/mangadex.py
index dca8995..2156ecf 100644
--- a/gallery_dl/extractor/mangadex.py
+++ b/gallery_dl/extractor/mangadex.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2018-2020 Mike Fährmann
+# Copyright 2018-2021 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -17,6 +17,7 @@ class MangadexExtractor(Extractor):
"""Base class for mangadex extractors"""
category = "mangadex"
root = "https://mangadex.org"
+ api_root = "https://api.mangadex.org"
# mangadex-to-iso639-1 codes
iso639_map = {
@@ -28,18 +29,18 @@ class MangadexExtractor(Extractor):
def chapter_data(self, chapter_id):
"""Request API results for 'chapter_id'"""
- url = "{}/api/v2/chapter/{}".format(self.root, chapter_id)
+ url = "{}/v2/chapter/{}".format(self.api_root, chapter_id)
return self.request(url).json()["data"]
@memcache(keyarg=1)
def manga_data(self, manga_id):
"""Request API results for 'manga_id'"""
- url = "{}/api/v2/manga/{}".format(self.root, manga_id)
+ url = "{}/v2/manga/{}".format(self.api_root, manga_id)
return self.request(url).json()["data"]
def manga_chapters(self, manga_id):
"""Request chapter list for 'manga_id'"""
- url = "{}/api/v2/manga/{}/chapters".format(self.root, manga_id)
+ url = "{}/v2/manga/{}/chapters".format(self.api_root, manga_id)
data = self.request(url).json()["data"]
groups = {
@@ -76,7 +77,7 @@ class MangadexChapterExtractor(MangadexExtractor):
}),
# MANGA Plus (#1154)
("https://mangadex.org/chapter/1122815", {
- "excepion": exception.StopExtraction,
+ "exception": exception.HttpError,
}),
)
diff --git a/gallery_dl/extractor/mangakakalot.py b/gallery_dl/extractor/mangakakalot.py
index 951a257..cab866a 100644
--- a/gallery_dl/extractor/mangakakalot.py
+++ b/gallery_dl/extractor/mangakakalot.py
@@ -78,7 +78,8 @@ class MangakakalotChapterExtractor(MangakakalotBase, ChapterExtractor):
}
def images(self, page):
- page = text.extract(page, 'id="vungdoc"', '\n<div')[0]
+ page = text.extract(
+ page, 'class="container-chapter-reader', '\n<div')[0]
return [
(url, None)
for url in text.extract_iter(page, '<img src="', '"')
diff --git a/gallery_dl/extractor/newgrounds.py b/gallery_dl/extractor/newgrounds.py
index a6cc5fa..4fdfac9 100644
--- a/gallery_dl/extractor/newgrounds.py
+++ b/gallery_dl/extractor/newgrounds.py
@@ -29,6 +29,7 @@ class NewgroundsExtractor(Extractor):
Extractor.__init__(self, match)
self.user = match.group(1)
self.user_root = "https://{}.newgrounds.com".format(self.user)
+ self.flash = self.config("flash", True)
def items(self):
self.login()
@@ -92,18 +93,22 @@ class NewgroundsExtractor(Extractor):
}
def extract_post(self, post_url):
+
+ if "/art/view/" in post_url:
+ extract_data = self._extract_image_data
+ elif "/audio/listen/" in post_url:
+ extract_data = self._extract_audio_data
+ else:
+ extract_data = self._extract_media_data
+ if self.flash:
+ post_url += "/format/flash"
+
response = self.request(post_url, fatal=False)
if response.status_code >= 400:
return {}
page = response.text
extr = text.extract_from(page)
-
- if "/art/view/" in post_url:
- data = self._extract_image_data(extr, post_url)
- elif "/audio/listen/" in post_url:
- data = self._extract_audio_data(extr, post_url)
- else:
- data = self._extract_media_data(extr, post_url)
+ data = extract_data(extr, post_url)
data["_comment"] = extr('id="author_comments"', '</div>')
data["comment"] = text.unescape(text.remove_html(
@@ -313,6 +318,11 @@ class NewgroundsMediaExtractor(NewgroundsExtractor):
"user" : "zj",
},
}),
+ # flash animation (#1257)
+ ("https://www.newgrounds.com/portal/view/161181/format/flash", {
+ "pattern": r"https://uploads\.ungrounded\.net/161000"
+ r"/161181_ddautta_mask__550x281_\.swf\?f1081628129",
+ })
)
def __init__(self, match):
diff --git a/gallery_dl/extractor/nozomi.py b/gallery_dl/extractor/nozomi.py
index 99e397b..4eb3ee6 100644
--- a/gallery_dl/extractor/nozomi.py
+++ b/gallery_dl/extractor/nozomi.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2019-2020 Mike Fährmann
+# Copyright 2019-2021 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -62,7 +62,7 @@ class NozomiExtractor(Extractor):
del post[key]
yield Message.Directory, post
- for image in images:
+ for post["num"], image in enumerate(images, 1):
post["url"] = url = text.urljoin(self.root, image["imageurl"])
text.nameext_from_url(url, post)
post["is_video"] = bool(image.get("is_video"))
@@ -95,13 +95,11 @@ class NozomiPostExtractor(NozomiExtractor):
"dataid" : "re:aaa9f7c632cde1e1a5baaff3fb6a6d857ec73df7fdc5",
"date" : "dt:2016-07-26 02:32:03",
"extension": "jpg",
- "favorites": int,
"filename" : str,
"height" : 768,
"is_video" : False,
"postid" : 3649262,
"source" : "danbooru",
- "sourceid" : 2434215,
"tags" : list,
"type" : "jpg",
"url" : str,
@@ -111,7 +109,7 @@ class NozomiPostExtractor(NozomiExtractor):
# multiple images per post
("https://nozomi.la/post/25588032.html", {
"url": "6aa3b7db385abcc9d374bdffd19187bccbf8f228",
- "keyword": "8c3a2561ccc9ad429be9850d1383a952d0b4a8ab",
+ "keyword": "f60e048df36308b6b25dfaac419b586895d360bc",
"count": 7,
}),
# empty 'date' (#1163)
diff --git a/gallery_dl/extractor/photovogue.py b/gallery_dl/extractor/photovogue.py
new file mode 100644
index 0000000..a5c788a
--- /dev/null
+++ b/gallery_dl/extractor/photovogue.py
@@ -0,0 +1,84 @@
+# -*- coding: utf-8 -*-
+
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Extractors for https://www.vogue.it/en/photovogue/"""
+
+from .common import Extractor, Message
+from .. import text
+
+BASE_PATTERN = r"(?:https?://)?(?:www\.)?vogue\.it/(?:en/)?photovogue"
+
+
+class PhotovogueUserExtractor(Extractor):
+ category = "photovogue"
+ subcategory = "user"
+ directory_fmt = ("{category}", "{photographer[id]} {photographer[name]}")
+ filename_fmt = "{id} {title}.{extension}"
+ archive_fmt = "{id}"
+ pattern = BASE_PATTERN + r"/portfolio/?\?id=(\d+)"
+ test = (
+ ("https://www.vogue.it/en/photovogue/portfolio/?id=221252"),
+ ("https://vogue.it/photovogue/portfolio?id=221252", {
+ "pattern": r"https://images.vogue.it/Photovogue/[^/]+_gallery.jpg",
+ "keyword": {
+ "date": "type:datetime",
+ "favorite_count": int,
+ "favorited": list,
+ "id": int,
+ "image_id": str,
+ "is_favorite": False,
+ "orientation": "re:portrait|landscape",
+ "photographer": {
+ "biography": "Born in 1995. Live in Bologna.",
+ "city": "Bologna",
+ "country_id": 106,
+ "favoritedCount": int,
+ "id": 221252,
+ "isGold": bool,
+ "isPro": bool,
+ "latitude": str,
+ "longitude": str,
+ "name": "Arianna Mattarozzi",
+ "user_id": "38cb0601-4a85-453c-b7dc-7650a037f2ab",
+ "websites": list,
+ },
+ "photographer_id": 221252,
+ "tags": list,
+ "title": str,
+ },
+ }),
+ )
+
+ def __init__(self, match):
+ Extractor.__init__(self, match)
+ self.user_id = match.group(1)
+
+ def items(self):
+ for photo in self.photos():
+ url = photo["gallery_image"]
+ photo["title"] = photo["title"].strip()
+ photo["date"] = text.parse_datetime(
+ photo["date"], "%Y-%m-%dT%H:%M:%S.%f%z")
+
+ yield Message.Directory, photo
+ yield Message.Url, url, text.nameext_from_url(url, photo)
+
+ def photos(self):
+ url = "https://api.vogue.it/production/photos"
+ params = {
+ "count": "50",
+ "order_by": "DESC",
+ "page": 0,
+ "photographer_id": self.user_id,
+ }
+
+ while True:
+ data = self.request(url, params=params).json()
+ yield from data["items"]
+
+ if not data["has_next"]:
+ break
+ params["page"] += 1
diff --git a/gallery_dl/extractor/pinterest.py b/gallery_dl/extractor/pinterest.py
index 334412d..e5a0486 100644
--- a/gallery_dl/extractor/pinterest.py
+++ b/gallery_dl/extractor/pinterest.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2016-2020 Mike Fährmann
+# Copyright 2016-2021 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -305,7 +305,7 @@ class PinterestAPI():
def __init__(self, extractor):
self.extractor = extractor
- csrf_token = util.generate_csrf_token()
+ csrf_token = util.generate_token()
self.headers = self.HEADERS.copy()
self.headers["X-CSRFToken"] = csrf_token
self.cookies = {"csrftoken": csrf_token}
diff --git a/gallery_dl/extractor/pixiv.py b/gallery_dl/extractor/pixiv.py
index 8aee058..a872ada 100644
--- a/gallery_dl/extractor/pixiv.py
+++ b/gallery_dl/extractor/pixiv.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2014-2020 Mike Fährmann
+# Copyright 2014-2021 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -217,6 +217,12 @@ class PixivWorkExtractor(PixivExtractor):
"url": "7267695a985c4db8759bebcf8d21dbdd2d2317ef",
"keywords": {"frames": list},
}),
+ # related works (#1237)
+ ("https://www.pixiv.net/artworks/966412", {
+ "options": (("related", True),),
+ "range": "1-10",
+ "count": ">= 10",
+ }),
("https://www.pixiv.net/en/artworks/966412"),
("http://www.pixiv.net/member_illust.php?mode=medium&illust_id=96641"),
("http://i1.pixiv.net/c/600x600/img-master"
@@ -233,7 +239,11 @@ class PixivWorkExtractor(PixivExtractor):
self.illust_id = match.group(1) or match.group(2)
def works(self):
- return (self.api.illust_detail(self.illust_id),)
+ works = (self.api.illust_detail(self.illust_id),)
+ if self.config("related", False):
+ related = self.api.illust_related(self.illust_id)
+ works = itertools.chain(works, related)
+ return works
class PixivFavoriteExtractor(PixivExtractor):
@@ -574,6 +584,10 @@ class PixivAppAPI():
params = {"mode": mode, "date": date}
return self._pagination("v1/illust/ranking", params)
+ def illust_related(self, illust_id):
+ params = {"illust_id": illust_id}
+ return self._pagination("v2/illust/related", params)
+
def search_illust(self, word, sort=None, target=None, duration=None):
params = {"word": word, "search_target": target,
"sort": sort, "duration": duration}
diff --git a/gallery_dl/extractor/sankaku.py b/gallery_dl/extractor/sankaku.py
index 6a499a3..e98b630 100644
--- a/gallery_dl/extractor/sankaku.py
+++ b/gallery_dl/extractor/sankaku.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2014-2020 Mike Fährmann
+# Copyright 2014-2021 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -264,49 +264,11 @@ class SankakuAPI():
@cache(maxage=365*24*3600, keyarg=1)
def _authenticate_impl(extr, username, password):
extr.log.info("Logging in as %s", username)
- headers = {"Accept": "application/vnd.sankaku.api+json;v=2"}
- # get initial access_token
- url = "https://login.sankakucomplex.com/auth/token"
+ url = "https://capi-v2.sankakucomplex.com/auth/token"
+ headers = {"Accept": "application/vnd.sankaku.api+json;v=2"}
data = {"login": username, "password": password}
- response = extr.request(
- url, method="POST", headers=headers, json=data, fatal=False)
- data = response.json()
- if response.status_code >= 400 or not data.get("success"):
- raise exception.AuthenticationError(data.get("error"))
- access_token = data["access_token"]
-
- # start openid auth
- url = "https://login.sankakucomplex.com/oidc/auth"
- params = {
- "response_type": "code",
- "scope" : "openid",
- "client_id" : "sankaku-web-app",
- "redirect_uri" : "https://sankaku.app/sso/callback",
- "state" : "return_uri=https://sankaku.app/",
- "theme" : "black",
- "lang" : "undefined",
- }
- page = extr.request(url, params=params).text
- submit_url = text.extract(page, 'submitUrl = "', '"')[0]
-
- # get code from initial access_token
- url = "https://login.sankakucomplex.com" + submit_url
- data = {
- "accessToken": access_token,
- "nonce" : "undefined",
- }
- response = extr.request(url, method="POST", data=data)
- query = text.parse_query(response.request.url.partition("?")[2])
-
- # get final access_token from code
- url = "https://capi-v2.sankakucomplex.com/sso/finalize?lang=en"
- data = {
- "code" : query["code"],
- "client_id" : "sankaku-web-app",
- "redirect_uri": "https://sankaku.app/sso/callback",
- }
response = extr.request(
url, method="POST", headers=headers, json=data, fatal=False)
data = response.json()
diff --git a/gallery_dl/extractor/twitter.py b/gallery_dl/extractor/twitter.py
index b769912..4034732 100644
--- a/gallery_dl/extractor/twitter.py
+++ b/gallery_dl/extractor/twitter.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2016-2020 Mike Fährmann
+# Copyright 2016-2021 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -239,7 +239,7 @@ class TwitterExtractor(Extractor):
def _login_impl(self, username, password):
self.log.info("Logging in as %s", username)
- token = util.generate_csrf_token()
+ token = util.generate_token()
self.session.cookies.clear()
self.request(self.root + "/login")
@@ -272,8 +272,8 @@ class TwitterExtractor(Extractor):
class TwitterTimelineExtractor(TwitterExtractor):
"""Extractor for all images from a user's timeline"""
subcategory = "timeline"
- pattern = BASE_PATTERN + \
- r"/(?!search)(?:([^/?#]+)/?(?:$|[?#])|intent/user\?user_id=(\d+))"
+ pattern = (BASE_PATTERN + r"/(?!search)(?:([^/?#]+)/?(?:$|[?#])"
+ r"|i(?:/user/|ntent/user\?user_id=)(\d+))")
test = (
("https://twitter.com/supernaturepics", {
"range": "1-40",
@@ -281,14 +281,15 @@ class TwitterTimelineExtractor(TwitterExtractor):
}),
("https://mobile.twitter.com/supernaturepics?p=i"),
("https://www.twitter.com/id:2976459548"),
+ ("https://twitter.com/i/user/2976459548"),
("https://twitter.com/intent/user?user_id=2976459548"),
)
def __init__(self, match):
TwitterExtractor.__init__(self, match)
- uid = match.group(2)
- if uid:
- self.user = "id:" + uid
+ user_id = match.group(2)
+ if user_id:
+ self.user = "id:" + user_id
def tweets(self):
return TwitterAPI(self).timeline_profile(self.user)
@@ -355,8 +356,7 @@ class TwitterListMembersExtractor(TwitterExtractor):
self.login()
for user in TwitterAPI(self).list_members(self.user):
user["_extractor"] = TwitterTimelineExtractor
- url = "{}/intent/user?user_id={}".format(
- self.root, user["rest_id"])
+ url = "{}/i/user/{}".format(self.root, user["rest_id"])
yield Message.Queue, url, user
@@ -509,7 +509,7 @@ class TwitterAPI():
# CSRF
csrf_token = cookies.get("ct0", domain=cookiedomain)
if not csrf_token:
- csrf_token = util.generate_csrf_token()
+ csrf_token = util.generate_token()
cookies.set("ct0", csrf_token, domain=cookiedomain)
self.headers["x-csrf-token"] = csrf_token
@@ -617,31 +617,34 @@ class TwitterAPI():
def _call(self, endpoint, params, root=None, method="GET"):
if root is None:
root = self.root
- response = self.extractor.request(
- root + endpoint, method=method, params=params,
- headers=self.headers, fatal=None)
-
- # update 'x-csrf-token' header (#1170)
- csrf_token = response.cookies.get("ct0")
- if csrf_token:
- self.headers["x-csrf-token"] = csrf_token
-
- if response.status_code < 400:
- return response.json()
- if response.status_code == 429:
- until = response.headers.get("x-rate-limit-reset")
- self.extractor.wait(until=until, seconds=(None if until else 60))
- return self._call(endpoint, params, method)
- try:
- msg = ", ".join(
- '"' + error["message"] + '"'
- for error in response.json()["errors"]
- )
- except Exception:
- msg = response.text
- raise exception.StopExtraction(
- "%s %s (%s)", response.status_code, response.reason, msg)
+ while True:
+ response = self.extractor.request(
+ root + endpoint, method=method, params=params,
+ headers=self.headers, fatal=None)
+
+ # update 'x-csrf-token' header (#1170)
+ csrf_token = response.cookies.get("ct0")
+ if csrf_token:
+ self.headers["x-csrf-token"] = csrf_token
+
+ if response.status_code < 400:
+ return response.json()
+ if response.status_code == 429:
+ until = response.headers.get("x-rate-limit-reset")
+ seconds = None if until else 60
+ self.extractor.wait(until=until, seconds=seconds)
+ continue
+
+ try:
+ msg = ", ".join(
+ '"' + error["message"] + '"'
+ for error in response.json()["errors"]
+ )
+ except Exception:
+ msg = response.text
+ raise exception.StopExtraction(
+ "%s %s (%s)", response.status_code, response.reason, msg)
def _pagination(self, endpoint, params=None):
if params is None:
diff --git a/gallery_dl/extractor/unsplash.py b/gallery_dl/extractor/unsplash.py
new file mode 100644
index 0000000..545eb31
--- /dev/null
+++ b/gallery_dl/extractor/unsplash.py
@@ -0,0 +1,208 @@
+# -*- coding: utf-8 -*-
+
+# Copyright 2021 Mike Fährmann
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Extractors for https://unsplash.com/"""
+
+from .common import Extractor, Message
+from .. import text, util
+
+BASE_PATTERN = r"(?:https?://)?unsplash\.com"
+
+
+class UnsplashExtractor(Extractor):
+ """Base class for unsplash extractors"""
+ category = "unsplash"
+ directory_fmt = ("{category}", "{user[username]}")
+ filename_fmt = "{id}.{extension}"
+ archive_fmt = "{id}"
+ root = "https://unsplash.com"
+ page_start = 1
+ per_page = 20
+
+ def __init__(self, match):
+ Extractor.__init__(self, match)
+ self.item = match.group(1)
+
+ def items(self):
+ fmt = self.config("format") or "raw"
+ for photo in self.photos():
+ util.delete_items(
+ photo, ("current_user_collections", "related_collections"))
+ url = photo["urls"][fmt]
+ text.nameext_from_url(url, photo)
+
+ photo["extension"] = "jpg"
+ photo["date"] = text.parse_datetime(photo["created_at"])
+ if "tags" in photo:
+ photo["tags"] = [t["title"] for t in photo["tags"]]
+
+ yield Message.Directory, photo
+ yield Message.Url, url, photo
+
+ def skip(self, num):
+ pages = num // self.per_page
+ self.page_start += pages
+ return pages * self.per_page
+
+ def _pagination(self, url, params, results=False):
+ params["per_page"] = self.per_page
+ params["page"] = self.page_start
+
+ while True:
+ photos = self.request(url, params=params).json()
+ if results:
+ photos = photos["results"]
+ yield from photos
+
+ if len(photos) < self.per_page:
+ return
+ params["page"] += 1
+
+
+class UnsplashImageExtractor(UnsplashExtractor):
+ """Extractor for a single unsplash photo"""
+ subcategory = "image"
+ pattern = BASE_PATTERN + r"/photos/([^/?#]+)"
+ test = ("https://unsplash.com/photos/lsoogGC_5dg", {
+ "url": "00accb0a64d5a0df0db911f8b425892718dce524",
+ "keyword": {
+ "alt_description": "re:silhouette of trees near body of water ",
+ "blur_hash": "LZP4uQS4jboe%#o0WCa}2doJNaaz",
+ "categories": list,
+ "color": "#f3c08c",
+ "created_at": "2020-04-08T08:29:42-04:00",
+ "date": "dt:2020-04-08 12:29:42",
+ "description": "The Island",
+ "downloads": int,
+ "exif": {
+ "aperture": "11",
+ "exposure_time": "30",
+ "focal_length": "70.0",
+ "iso": 200,
+ "make": "Canon",
+ "model": "Canon EOS 5D Mark IV"
+ },
+ "extension": "jpg",
+ "filename": "photo-1586348943529-beaae6c28db9",
+ "height": 6272,
+ "id": "lsoogGC_5dg",
+ "liked_by_user": False,
+ "likes": int,
+ "location": {
+ "city": "Beaver Dam",
+ "country": "United States",
+ "name": "Beaver Dam, WI 53916, USA",
+ "position": {
+ "latitude": 43.457769,
+ "longitude": -88.837329
+ },
+ "title": "Beaver Dam, WI 53916, USA"
+ },
+ "promoted_at": "2020-04-08T11:12:03-04:00",
+ "sponsorship": None,
+ "tags": list,
+ "updated_at": str,
+ "user": {
+ "accepted_tos": True,
+ "bio": str,
+ "first_name": "Dave",
+ "id": "uMJXuywXLiU",
+ "instagram_username": "just_midwest_rock",
+ "last_name": "Hoefler",
+ "location": "Madison, WI",
+ "name": "Dave Hoefler",
+ "portfolio_url": str,
+ "total_collections": int,
+ "total_likes": int,
+ "total_photos": int,
+ "twitter_username": None,
+ "updated_at": str,
+ "username": "johnwestrock"
+ },
+ "views": int,
+ "width": 4480,
+ },
+ })
+
+ def photos(self):
+ url = "{}/napi/photos/{}".format(self.root, self.item)
+ return (self.request(url).json(),)
+
+
+class UnsplashUserExtractor(UnsplashExtractor):
+ """Extractor for all photos of an unsplash user"""
+ subcategory = "user"
+ pattern = BASE_PATTERN + r"/@(\w+)/?$"
+ test = ("https://unsplash.com/@johnwestrock", {
+ "pattern": r"https://images\.unsplash\.com/(photo-\d+-\w+"
+ r"|reserve/[^/?#]+)\?ixid=\w+&ixlib=rb-1\.2\.1$",
+ "range": "1-30",
+ "count": 30,
+ })
+
+ def photos(self):
+ url = "{}/napi/users/{}/photos".format(self.root, self.item)
+ params = {"order_by": "latest"}
+ return self._pagination(url, params)
+
+
+class UnsplashFavoriteExtractor(UnsplashExtractor):
+ """Extractor for all likes of an unsplash user"""
+ subcategory = "favorite"
+ pattern = BASE_PATTERN + r"/@(\w+)/likes"
+ test = ("https://unsplash.com/@johnwestrock/likes", {
+ "pattern": r"https://images\.unsplash\.com/(photo-\d+-\w+"
+ r"|reserve/[^/?#]+)\?ixid=\w+&ixlib=rb-1\.2\.1$",
+ "range": "1-30",
+ "count": 30,
+ })
+
+ def photos(self):
+ url = "{}/napi/users/{}/likes".format(self.root, self.item)
+ params = {"order_by": "latest"}
+ return self._pagination(url, params)
+
+
+class UnsplashCollectionExtractor(UnsplashExtractor):
+ """Extractor for an unsplash collection"""
+ subcategory = "collection"
+ pattern = BASE_PATTERN + r"/collections/(\d+)"
+ test = ("https://unsplash.com/collections/3178572/winter", {
+ "pattern": r"https://images\.unsplash\.com/(photo-\d+-\w+"
+ r"|reserve/[^/?#]+)\?ixid=\w+&ixlib=rb-1\.2\.1$",
+ "range": "1-30",
+ "count": 30,
+ })
+
+ def photos(self):
+ url = "{}/napi/collections/{}/photos".format(self.root, self.item)
+ params = {"order_by": "latest"}
+ return self._pagination(url, params)
+
+
+class UnsplashSearchExtractor(UnsplashExtractor):
+ """Extractor for unsplash search results"""
+ subcategory = "search"
+ pattern = BASE_PATTERN + r"/s/photos/([^/?#]+)(?:\?([^/?#]+))?"
+ test = ("https://unsplash.com/s/photos/nature", {
+ "pattern": r"https://images\.unsplash\.com/(photo-\d+-\w+"
+ r"|reserve/[^/?#]+)\?ixid=\w+&ixlib=rb-1\.2\.1$",
+ "range": "1-30",
+ "count": 30,
+ })
+
+ def __init__(self, match):
+ UnsplashExtractor.__init__(self, match)
+ self.query = match.group(2)
+
+ def photos(self):
+ url = self.root + "/napi/search/photos"
+ params = {"query": text.unquote(self.item)}
+ if self.query:
+ params.update(text.parse_query(self.query))
+ return self._pagination(url, params, True)
diff --git a/gallery_dl/extractor/webtoons.py b/gallery_dl/extractor/webtoons.py
index 5d3ca89..4449e19 100644
--- a/gallery_dl/extractor/webtoons.py
+++ b/gallery_dl/extractor/webtoons.py
@@ -47,7 +47,8 @@ class WebtoonsEpisodeExtractor(WebtoonsExtractor):
(("https://www.webtoons.com/en/comedy/safely-endangered"
"/ep-572-earth/viewer?title_no=352&episode_no=572"), {
"url": "11041d71a3f92728305c11a228e77cf0f7aa02ef",
- "content": "1ce950324f14018b691c42b0ede57fa25618abeb",
+ "content": ("1748c7e82b6db910fa179f6dc7c4281b0f680fa7",
+ "49e1f2def04c6f7a6a3dacf245a1cd9abe77a6a9"),
"count": 5,
}),
)
diff --git a/gallery_dl/util.py b/gallery_dl/util.py
index ffd686e..2161b9d 100644
--- a/gallery_dl/util.py
+++ b/gallery_dl/util.py
@@ -18,6 +18,7 @@ import shutil
import string
import _string
import sqlite3
+import binascii
import datetime
import operator
import itertools
@@ -71,8 +72,10 @@ def raises(cls):
return wrap
-def generate_csrf_token():
- return random.getrandbits(128).to_bytes(16, "big").hex()
+def generate_token(size=16):
+ """Generate a random token with hexadecimal digits"""
+ data = random.getrandbits(size * 8).to_bytes(size, "big")
+ return binascii.hexlify(data).decode()
def combine_dict(a, b):
diff --git a/gallery_dl/version.py b/gallery_dl/version.py
index 601eeed..572d3bb 100644
--- a/gallery_dl/version.py
+++ b/gallery_dl/version.py
@@ -6,4 +6,4 @@
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
-__version__ = "1.16.3"
+__version__ = "1.16.4"
diff --git a/test/test_util.py b/test/test_util.py
index 159c4bc..8848ea0 100644
--- a/test/test_util.py
+++ b/test/test_util.py
@@ -474,6 +474,19 @@ class TestOther(unittest.TestCase):
with self.assertRaises(ValueError):
func(3)
+ def test_generate_token(self):
+ tokens = set()
+ for _ in range(100):
+ token = util.generate_token()
+ tokens.add(token)
+ self.assertEqual(len(token), 16 * 2)
+ self.assertRegex(token, r"^[0-9a-f]+$")
+ self.assertGreaterEqual(len(tokens), 99)
+
+ token = util.generate_token(80)
+ self.assertEqual(len(token), 80 * 2)
+ self.assertRegex(token, r"^[0-9a-f]+$")
+
def test_combine_dict(self):
self.assertEqual(
util.combine_dict({}, {}),