diff options
author | Unit 193 <unit193@unit193.net> | 2021-01-28 16:01:35 -0500 |
---|---|---|
committer | Unit 193 <unit193@unit193.net> | 2021-01-28 16:01:35 -0500 |
commit | 049f5338c920ac0530aa38d182bc33c42dad26a8 (patch) | |
tree | 2667231344df1f34810638eea3d44b53b9926666 | |
parent | 07849be7436c5181a785cd0eb32a7160672812c0 (diff) | |
parent | 2e29d2158d56879e5578dfabf9e8c0fa2e855ccf (diff) | |
download | gallery-dl-049f5338c920ac0530aa38d182bc33c42dad26a8.tar.bz2 gallery-dl-049f5338c920ac0530aa38d182bc33c42dad26a8.tar.xz gallery-dl-049f5338c920ac0530aa38d182bc33c42dad26a8.tar.zst |
Update upstream source from tag 'upstream/1.16.4'
Update to upstream version '1.16.4'
with Debian dir e3506d469f714e8be0057bf8b8c9c226db148160
30 files changed, 642 insertions, 136 deletions
diff --git a/CHANGELOG.md b/CHANGELOG.md index 3b3060a..8629536 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,30 @@ # Changelog +## 1.16.4 - 2021-01-23 +### Additions +- [furaffinity] add `descriptions` option ([#1231](https://github.com/mikf/gallery-dl/issues/1231)) +- [kemonoparty] add `user` and `post` extractors ([#1216](https://github.com/mikf/gallery-dl/issues/1216)) +- [nozomi] add `num` enumeration index ([#1239](https://github.com/mikf/gallery-dl/issues/1239)) +- [photovogue] added portfolio extractor ([#1253](https://github.com/mikf/gallery-dl/issues/1253)) +- [twitter] match `/i/user/ID` URLs +- [unsplash] add extractors ([#1197](https://github.com/mikf/gallery-dl/issues/1197)) +- [vipr] add image extractor ([#1258](https://github.com/mikf/gallery-dl/issues/1258)) +### Changes +- [derpibooru] use "Everything" filter by default ([#862](https://github.com/mikf/gallery-dl/issues/862)) +### Fixes +- [derpibooru] update `date` parsing +- [foolfuuka] stop search when results are exhausted ([#1174](https://github.com/mikf/gallery-dl/issues/1174)) +- [instagram] fix regex for `/saved` URLs ([#1251](https://github.com/mikf/gallery-dl/issues/1251)) +- [mangadex] update API URLs +- [mangakakalot] fix extraction +- [newgrounds] fix flash file extraction ([#1257](https://github.com/mikf/gallery-dl/issues/1257)) +- [sankaku] simplify login process +- [twitter] fix retries after hitting rate limit + ## 1.16.3 - 2021-01-10 +### Fixes +- fix crash when using a `dict` for `path-restrict` +- [postprocessor:metadata] sanitize custom filenames ## 1.16.2 - 2021-01-09 ### Additions @@ -12,7 +36,7 @@ - [twitter] fetch media from pinned tweets ([#1203](https://github.com/mikf/gallery-dl/issues/1203)) - [wikiart] add extractor for single paintings ([#1233](https://github.com/mikf/gallery-dl/issues/1233)) - [downloader:http] add MIME type and signature for `.ico` files ([#1211](https://github.com/mikf/gallery-dl/issues/1211)) -- add a `d` format string conversion for timestamp values +- add `d` format string conversion for timestamp values - add `"ascii"` as a special `path-restrict` value ### Fixes - [hentainexus] fix extraction ([#1234](https://github.com/mikf/gallery-dl/issues/1234)) @@ -1,6 +1,6 @@ Metadata-Version: 2.1 Name: gallery_dl -Version: 1.16.3 +Version: 1.16.4 Summary: Command-line program to download image galleries and collections from several image hosting sites Home-page: https://github.com/mikf/gallery-dl Author: Mike Fährmann @@ -94,8 +94,8 @@ Description: ========== put it into your `PATH <https://en.wikipedia.org/wiki/PATH_(variable)>`__, and run it inside a command prompt (like ``cmd.exe``). - - `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.16.3/gallery-dl.exe>`__ - - `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.16.3/gallery-dl.bin>`__ + - `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.16.4/gallery-dl.exe>`__ + - `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.16.4/gallery-dl.bin>`__ These executables include a Python interpreter and all required Python packages. @@ -332,7 +332,7 @@ Description: ========== .. _gallery-dl-example.conf: https://github.com/mikf/gallery-dl/blob/master/docs/gallery-dl-example.conf .. _configuration.rst: https://github.com/mikf/gallery-dl/blob/master/docs/configuration.rst .. _Supported Sites: https://github.com/mikf/gallery-dl/blob/master/docs/supportedsites.rst - .. _stable: https://github.com/mikf/gallery-dl/archive/v1.16.3.tar.gz + .. _stable: https://github.com/mikf/gallery-dl/archive/v1.16.4.tar.gz .. _dev: https://github.com/mikf/gallery-dl/archive/master.tar.gz .. _Python: https://www.python.org/downloads/ @@ -83,8 +83,8 @@ Download a standalone executable file, put it into your `PATH <https://en.wikipedia.org/wiki/PATH_(variable)>`__, and run it inside a command prompt (like ``cmd.exe``). -- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.16.3/gallery-dl.exe>`__ -- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.16.3/gallery-dl.bin>`__ +- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.16.4/gallery-dl.exe>`__ +- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.16.4/gallery-dl.bin>`__ These executables include a Python interpreter and all required Python packages. @@ -321,7 +321,7 @@ access to *gallery-dl*. Authorize it and you will be shown one or more .. _gallery-dl-example.conf: https://github.com/mikf/gallery-dl/blob/master/docs/gallery-dl-example.conf .. _configuration.rst: https://github.com/mikf/gallery-dl/blob/master/docs/configuration.rst .. _Supported Sites: https://github.com/mikf/gallery-dl/blob/master/docs/supportedsites.rst -.. _stable: https://github.com/mikf/gallery-dl/archive/v1.16.3.tar.gz +.. _stable: https://github.com/mikf/gallery-dl/archive/v1.16.4.tar.gz .. _dev: https://github.com/mikf/gallery-dl/archive/master.tar.gz .. _Python: https://www.python.org/downloads/ diff --git a/data/man/gallery-dl.1 b/data/man/gallery-dl.1 index a260907..619e84f 100644 --- a/data/man/gallery-dl.1 +++ b/data/man/gallery-dl.1 @@ -1,4 +1,4 @@ -.TH "GALLERY-DL" "1" "2021-01-10" "1.16.3" "gallery-dl Manual" +.TH "GALLERY-DL" "1" "2021-01-23" "1.16.4" "gallery-dl Manual" .\" disable hyphenation .nh diff --git a/data/man/gallery-dl.conf.5 b/data/man/gallery-dl.conf.5 index 609d1de..413a40b 100644 --- a/data/man/gallery-dl.conf.5 +++ b/data/man/gallery-dl.conf.5 @@ -1,4 +1,4 @@ -.TH "GALLERY-DL.CONF" "5" "2021-01-10" "1.16.3" "gallery-dl Manual" +.TH "GALLERY-DL.CONF" "5" "2021-01-23" "1.16.4" "gallery-dl Manual" .\" disable hyphenation .nh .\" disable justification (adjust text to left margin only) @@ -822,10 +822,7 @@ to use your account's browsing settings and filters. \f[I]integer\f[] .IP "Default:" 9 -\f[I]null\f[] - -.IP "Example:" 4 -56027 (\f[I]Everything\f[] filter) +\f[I]56027\f[] (\f[I]Everything\f[] filter) .IP "Description:" 4 The content filter ID to use. @@ -1111,6 +1108,22 @@ Sets the maximum allowed size for downloaded images. \f[I]"l"\f[], ...) to use as an upper limit. +.SS extractor.furaffinity.descriptions +.IP "Type:" 6 +\f[I]string\f[] + +.IP "Default:" 9 +\f[I]"text"\f[] + +.IP "Description:" 4 +Controls the format of \f[I]description\f[] metadata fields. + +.br +* \f[I]"text"\f[]: Plain text with HTML tags removed +.br +* \f[I]"html"\f[]: Raw HTML content + + .SS extractor.furaffinity.include .IP "Type:" 6 \f[I]string\f[] or \f[I]list\f[] of \f[I]strings\f[] @@ -1274,6 +1287,17 @@ If the selected format is not available, the first in the list gets chosen (usually mp3). +.SS extractor.newgrounds.flash +.IP "Type:" 6 +\f[I]bool\f[] + +.IP "Default:" 9 +\f[I]true\f[] + +.IP "Description:" 4 +Download original Adobe Flash animations instead of pre-rendered videos. + + .SS extractor.newgrounds.include .IP "Type:" 6 \f[I]string\f[] or \f[I]list\f[] of \f[I]strings\f[] @@ -1400,6 +1424,17 @@ Download from video pins. Download user avatars. +.SS extractor.pixiv.work.related +.IP "Type:" 6 +\f[I]bool\f[] + +.IP "Default:" 9 +\f[I]false\f[] + +.IP "Description:" 4 +Also download related artworks. + + .SS extractor.pixiv.ugoira .IP "Type:" 6 \f[I]bool\f[] @@ -1776,6 +1811,20 @@ Control video download behavior. * \f[I]false\f[]: Skip video Tweets +.SS extractor.unsplash.format +.IP "Type:" 6 +\f[I]string\f[] + +.IP "Default:" 9 +\f[I]"raw"\f[] + +.IP "Description:" 4 +Name of the image format to download. + +Available formats are +\f[I]"raw"\f[], \f[I]"full"\f[], \f[I]"regular"\f[], \f[I]"small"\f[], and \f[I]"thumb"\f[]. + + .SS extractor.vsco.videos .IP "Type:" 6 \f[I]bool\f[] diff --git a/gallery_dl.egg-info/PKG-INFO b/gallery_dl.egg-info/PKG-INFO index 2ce1d97..f1a1ebe 100644 --- a/gallery_dl.egg-info/PKG-INFO +++ b/gallery_dl.egg-info/PKG-INFO @@ -1,6 +1,6 @@ Metadata-Version: 2.1 Name: gallery-dl -Version: 1.16.3 +Version: 1.16.4 Summary: Command-line program to download image galleries and collections from several image hosting sites Home-page: https://github.com/mikf/gallery-dl Author: Mike Fährmann @@ -94,8 +94,8 @@ Description: ========== put it into your `PATH <https://en.wikipedia.org/wiki/PATH_(variable)>`__, and run it inside a command prompt (like ``cmd.exe``). - - `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.16.3/gallery-dl.exe>`__ - - `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.16.3/gallery-dl.bin>`__ + - `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.16.4/gallery-dl.exe>`__ + - `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.16.4/gallery-dl.bin>`__ These executables include a Python interpreter and all required Python packages. @@ -332,7 +332,7 @@ Description: ========== .. _gallery-dl-example.conf: https://github.com/mikf/gallery-dl/blob/master/docs/gallery-dl-example.conf .. _configuration.rst: https://github.com/mikf/gallery-dl/blob/master/docs/configuration.rst .. _Supported Sites: https://github.com/mikf/gallery-dl/blob/master/docs/supportedsites.rst - .. _stable: https://github.com/mikf/gallery-dl/archive/v1.16.3.tar.gz + .. _stable: https://github.com/mikf/gallery-dl/archive/v1.16.4.tar.gz .. _dev: https://github.com/mikf/gallery-dl/archive/master.tar.gz .. _Python: https://www.python.org/downloads/ diff --git a/gallery_dl.egg-info/SOURCES.txt b/gallery_dl.egg-info/SOURCES.txt index cb025ff..d4907de 100644 --- a/gallery_dl.egg-info/SOURCES.txt +++ b/gallery_dl.egg-info/SOURCES.txt @@ -89,6 +89,7 @@ gallery_dl/extractor/instagram.py gallery_dl/extractor/issuu.py gallery_dl/extractor/kabeuchi.py gallery_dl/extractor/keenspot.py +gallery_dl/extractor/kemonoparty.py gallery_dl/extractor/khinsider.py gallery_dl/extractor/komikcast.py gallery_dl/extractor/lineblog.py @@ -118,6 +119,7 @@ gallery_dl/extractor/oauth.py gallery_dl/extractor/paheal.py gallery_dl/extractor/patreon.py gallery_dl/extractor/photobucket.py +gallery_dl/extractor/photovogue.py gallery_dl/extractor/piczel.py gallery_dl/extractor/pinterest.py gallery_dl/extractor/pixiv.py @@ -146,6 +148,7 @@ gallery_dl/extractor/test.py gallery_dl/extractor/tsumino.py gallery_dl/extractor/tumblr.py gallery_dl/extractor/twitter.py +gallery_dl/extractor/unsplash.py gallery_dl/extractor/vanillarock.py gallery_dl/extractor/vsco.py gallery_dl/extractor/wallhaven.py diff --git a/gallery_dl/extractor/2chan.py b/gallery_dl/extractor/2chan.py index 9813f2b..d34209f 100644 --- a/gallery_dl/extractor/2chan.py +++ b/gallery_dl/extractor/2chan.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2017-2020 Mike Fährmann +# Copyright 2017-2021 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -22,8 +22,8 @@ class _2chanThreadExtractor(Extractor): url_fmt = "https://{server}.2chan.net/{board}/src/{filename}" pattern = r"(?:https?://)?([^.]+)\.2chan\.net/([^/]+)/res/(\d+)" test = ("http://dec.2chan.net/70/res/4752.htm", { - "url": "1c2d2ce8aea0fc71d94735cfc30009d628f33548", - "keyword": "f508d6841ea2cb19ed799aac9dc580263ca50651", + "url": "20c211ae7c06b18ec345a057fe0b68dde979b051", + "keyword": "23a529b46313b927fc94b577e5e1fdb3aa164ac1", }) def __init__(self, match): diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py index 36107d9..a69bacc 100644 --- a/gallery_dl/extractor/__init__.py +++ b/gallery_dl/extractor/__init__.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2015-2020 Mike Fährmann +# Copyright 2015-2021 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -58,6 +58,7 @@ modules = [ "issuu", "kabeuchi", "keenspot", + "kemonoparty", "khinsider", "komikcast", "lineblog", @@ -83,6 +84,7 @@ modules = [ "paheal", "patreon", "photobucket", + "photovogue", "piczel", "pinterest", "pixiv", @@ -108,6 +110,7 @@ modules = [ "tsumino", "tumblr", "twitter", + "unsplash", "vanillarock", "vsco", "wallhaven", diff --git a/gallery_dl/extractor/derpibooru.py b/gallery_dl/extractor/derpibooru.py index 3b20fa5..94f3729 100644 --- a/gallery_dl/extractor/derpibooru.py +++ b/gallery_dl/extractor/derpibooru.py @@ -28,8 +28,7 @@ class DerpibooruExtractor(BooruExtractor): @staticmethod def _prepare(post): - post["date"] = text.parse_datetime( - post["created_at"], "%Y-%m-%dT%H:%M:%S") + post["date"] = text.parse_datetime(post["created_at"]) @staticmethod def _extended_tags(post): @@ -46,6 +45,8 @@ class DerpibooruExtractor(BooruExtractor): filter_id = self.config("filter") if filter_id: params["filter_id"] = filter_id + elif not api_key: + params["filter_id"] = "56027" # "Everything" filter while True: data = self.request(url, params=params).json() @@ -67,7 +68,7 @@ class DerpibooruPostExtractor(DerpibooruExtractor): "animated": False, "aspect_ratio": 1.0, "comment_count": int, - "created_at": "2012-01-02T03:12:33", + "created_at": "2012-01-02T03:12:33Z", "date": "dt:2012-01-02 03:12:33", "deletion_reason": None, "description": "", @@ -76,7 +77,7 @@ class DerpibooruPostExtractor(DerpibooruExtractor): "duration": 0.04, "extension": "png", "faves": int, - "first_seen_at": "2012-01-02T03:12:33", + "first_seen_at": "2012-01-02T03:12:33Z", "format": "png", "height": 900, "hidden_from_users": False, @@ -99,7 +100,7 @@ class DerpibooruPostExtractor(DerpibooruExtractor): "tag_ids": list, "tags": list, "thumbnails_generated": True, - "updated_at": "2020-05-28T13:14:07", + "updated_at": "2020-05-28T13:14:07Z", "uploader": "Clover the Clever", "uploader_id": 211188, "upvotes": int, diff --git a/gallery_dl/extractor/deviantart.py b/gallery_dl/extractor/deviantart.py index a9c63a9..a58401e 100644 --- a/gallery_dl/extractor/deviantart.py +++ b/gallery_dl/extractor/deviantart.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2015-2020 Mike Fährmann +# Copyright 2015-2021 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -686,6 +686,8 @@ class DeviantartPopularExtractor(DeviantartExtractor): }), ("https://www.deviantart.com/popular-24-hours/?q=tree+house", { "options": (("original", False),), + "range": "1-30", + "count": 30, }), ("https://www.deviantart.com/search?q=tree"), ("https://www.deviantart.com/search/deviations?order=popular-1-week"), diff --git a/gallery_dl/extractor/foolfuuka.py b/gallery_dl/extractor/foolfuuka.py index 81f2bc2..319ebe2 100644 --- a/gallery_dl/extractor/foolfuuka.py +++ b/gallery_dl/extractor/foolfuuka.py @@ -167,6 +167,8 @@ class FoolfuukaSearchExtractor(FoolfuukaExtractor): return yield from posts + if len(posts) <= 3: + return params["page"] += 1 diff --git a/gallery_dl/extractor/furaffinity.py b/gallery_dl/extractor/furaffinity.py index 752cd62..df5a73e 100644 --- a/gallery_dl/extractor/furaffinity.py +++ b/gallery_dl/extractor/furaffinity.py @@ -29,6 +29,9 @@ class FuraffinityExtractor(Extractor): self.user = match.group(1) self.offset = 0 + if self.config("descriptions") == "html": + self._process_description = lambda x: x.strip() + def items(self): metadata = self.metadata() for post_id in util.advance(self.posts(), self.offset): @@ -83,8 +86,8 @@ class FuraffinityExtractor(Extractor): if tags: # new site layout data["tags"] = text.split_html(tags) - data["description"] = text.unescape(rh(extr( - 'class="section-body">', '</div>'), "", "")) + data["description"] = self._process_description(extr( + 'class="section-body">', '</div>')) data["views"] = pi(rh(extr('class="views">', '</span>'))) data["favorites"] = pi(rh(extr('class="favorites">', '</span>'))) data["comments"] = pi(rh(extr('class="comments">', '</span>'))) @@ -109,12 +112,16 @@ class FuraffinityExtractor(Extractor): data["tags"] = text.split_html(extr( 'id="keywords">', '</div>'))[::2] data["rating"] = extr('<img alt="', ' ') - data["description"] = text.unescape(text.remove_html(extr( - "</table>", "</table>"), "", "")) + data["description"] = self._process_description(extr( + "</table>", "</table>")) data["date"] = text.parse_timestamp(data["filename"].partition(".")[0]) return data + @staticmethod + def _process_description(description): + return text.unescape(text.remove_html(description, "", "")) + def _pagination(self): num = 1 diff --git a/gallery_dl/extractor/imagehosts.py b/gallery_dl/extractor/imagehosts.py index 28af179..fe3afbb 100644 --- a/gallery_dl/extractor/imagehosts.py +++ b/gallery_dl/extractor/imagehosts.py @@ -249,3 +249,19 @@ class TurboimagehostImageExtractor(ImagehostImageExtractor): def get_info(self, page): url = text.extract(page, 'src="', '"', page.index("<img "))[0] return url, url + + +class ViprImageExtractor(ImagehostImageExtractor): + """Extractor for single images from vipr.im""" + category = "vipr" + pattern = r"(?:https?://)?(vipr\.im/(\w+))" + test = ("https://vipr.im/kcd5jcuhgs3v.html", { + "url": "88f6a3ecbf3356a11ae0868b518c60800e070202", + "keyword": "c432e8a1836b0d97045195b745731c2b1bb0e771", + }) + https = True + params = None + + def get_info(self, page): + url = text.extract(page, '<img src="', '"')[0] + return url, url diff --git a/gallery_dl/extractor/instagram.py b/gallery_dl/extractor/instagram.py index c3e7fe4..84018a9 100644 --- a/gallery_dl/extractor/instagram.py +++ b/gallery_dl/extractor/instagram.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- # Copyright 2018-2020 Leonardo Taccari -# Copyright 2018-2020 Mike Fährmann +# Copyright 2018-2021 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -35,7 +35,7 @@ class InstagramExtractor(Extractor): Extractor.__init__(self, match) self.item = match.group(1) self.www_claim = "0" - self.csrf_token = util.generate_csrf_token() + self.csrf_token = util.generate_token() self._find_tags = re.compile(r"#\w+").findall self._cursor = None @@ -424,7 +424,7 @@ class InstagramChannelExtractor(InstagramExtractor): class InstagramSavedExtractor(InstagramExtractor): """Extractor for ProfilePage saved media""" subcategory = "saved" - pattern = USER_PATTERN + r"([^/?#]+)/saved" + pattern = USER_PATTERN + r"/saved" test = ("https://www.instagram.com/instagram/saved/",) def posts(self): diff --git a/gallery_dl/extractor/kemonoparty.py b/gallery_dl/extractor/kemonoparty.py new file mode 100644 index 0000000..a5b5e00 --- /dev/null +++ b/gallery_dl/extractor/kemonoparty.py @@ -0,0 +1,101 @@ +# -*- coding: utf-8 -*- + +# Copyright 2021 Mike Fährmann +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. + +"""Extractors for https://kemono.party/""" + +from .common import Extractor, Message +from .. import text + + +class KemonopartyExtractor(Extractor): + """Base class for kemonoparty extractors""" + category = "kemonoparty" + root = "https://kemono.party" + directory_fmt = ("{category}", "{user}") + filename_fmt = "{id}_{title}_{filename}.{extension}" + archive_fmt = "{user}_{id}_{filename}.{extension}" + + def items(self): + for post in self.posts(): + + files = [] + if post["file"]: + files.append(post["file"]) + if post["attachments"]: + files.extend(post["attachments"]) + post["date"] = text.parse_datetime( + post["published"], "%a, %d %b %Y %H:%M:%S %Z") + yield Message.Directory, post + + for post["num"], file in enumerate(files, 1): + text.nameext_from_url(file["name"], post) + yield Message.Url, self.root + file["path"], post + + +class KemonopartyUserExtractor(KemonopartyExtractor): + """Extractor for all posts from a kemono.party user listing""" + subcategory = "user" + pattern = r"(?:https?://)?kemono\.party/([^/?#]+)/user/(\d+)/?(?:$|[?#])" + test = ("https://kemono.party/fanbox/user/6993449", { + "range": "1-25", + "count": 25, + }) + + def __init__(self, match): + KemonopartyExtractor.__init__(self, match) + service, user_id = match.groups() + self.api_url = "{}/api/{}/user/{}".format(self.root, service, user_id) + + def posts(self): + url = self.api_url + params = {"o": 0} + + while True: + posts = self.request(url, params=params).json() + yield from posts + + if len(posts) < 25: + return + params["o"] += 25 + + +class KemonopartyPostExtractor(KemonopartyExtractor): + """Extractor for a single kemono.party post""" + subcategory = "post" + pattern = r"(?:https?://)?kemono\.party/([^/?#]+)/user/(\d+)/post/(\d+)" + test = ("https://kemono.party/fanbox/user/6993449/post/506575", { + "pattern": r"https://kemono\.party/files/fanbox" + r"/6993449/506575/P058kDFYus7DbqAkGlfWTlOr\.jpeg", + "keyword": { + "added": "Wed, 06 May 2020 20:28:02 GMT", + "content": str, + "date": "dt:2019-08-11 02:09:04", + "edited": None, + "embed": dict, + "extension": "jpeg", + "filename": "P058kDFYus7DbqAkGlfWTlOr", + "id": "506575", + "num": 1, + "published": "Sun, 11 Aug 2019 02:09:04 GMT", + "service": "fanbox", + "shared_file": False, + "subcategory": "post", + "title": "c96取り置き", + "user": "6993449", + }, + }) + + def __init__(self, match): + KemonopartyExtractor.__init__(self, match) + service, user_id, post_id = match.groups() + self.api_url = "{}/api/{}/user/{}/post/{}".format( + self.root, service, user_id, post_id) + + def posts(self): + posts = self.request(self.api_url).json() + return (posts[0],) if len(posts) > 1 else posts diff --git a/gallery_dl/extractor/mangadex.py b/gallery_dl/extractor/mangadex.py index dca8995..2156ecf 100644 --- a/gallery_dl/extractor/mangadex.py +++ b/gallery_dl/extractor/mangadex.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2018-2020 Mike Fährmann +# Copyright 2018-2021 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -17,6 +17,7 @@ class MangadexExtractor(Extractor): """Base class for mangadex extractors""" category = "mangadex" root = "https://mangadex.org" + api_root = "https://api.mangadex.org" # mangadex-to-iso639-1 codes iso639_map = { @@ -28,18 +29,18 @@ class MangadexExtractor(Extractor): def chapter_data(self, chapter_id): """Request API results for 'chapter_id'""" - url = "{}/api/v2/chapter/{}".format(self.root, chapter_id) + url = "{}/v2/chapter/{}".format(self.api_root, chapter_id) return self.request(url).json()["data"] @memcache(keyarg=1) def manga_data(self, manga_id): """Request API results for 'manga_id'""" - url = "{}/api/v2/manga/{}".format(self.root, manga_id) + url = "{}/v2/manga/{}".format(self.api_root, manga_id) return self.request(url).json()["data"] def manga_chapters(self, manga_id): """Request chapter list for 'manga_id'""" - url = "{}/api/v2/manga/{}/chapters".format(self.root, manga_id) + url = "{}/v2/manga/{}/chapters".format(self.api_root, manga_id) data = self.request(url).json()["data"] groups = { @@ -76,7 +77,7 @@ class MangadexChapterExtractor(MangadexExtractor): }), # MANGA Plus (#1154) ("https://mangadex.org/chapter/1122815", { - "excepion": exception.StopExtraction, + "exception": exception.HttpError, }), ) diff --git a/gallery_dl/extractor/mangakakalot.py b/gallery_dl/extractor/mangakakalot.py index 951a257..cab866a 100644 --- a/gallery_dl/extractor/mangakakalot.py +++ b/gallery_dl/extractor/mangakakalot.py @@ -78,7 +78,8 @@ class MangakakalotChapterExtractor(MangakakalotBase, ChapterExtractor): } def images(self, page): - page = text.extract(page, 'id="vungdoc"', '\n<div')[0] + page = text.extract( + page, 'class="container-chapter-reader', '\n<div')[0] return [ (url, None) for url in text.extract_iter(page, '<img src="', '"') diff --git a/gallery_dl/extractor/newgrounds.py b/gallery_dl/extractor/newgrounds.py index a6cc5fa..4fdfac9 100644 --- a/gallery_dl/extractor/newgrounds.py +++ b/gallery_dl/extractor/newgrounds.py @@ -29,6 +29,7 @@ class NewgroundsExtractor(Extractor): Extractor.__init__(self, match) self.user = match.group(1) self.user_root = "https://{}.newgrounds.com".format(self.user) + self.flash = self.config("flash", True) def items(self): self.login() @@ -92,18 +93,22 @@ class NewgroundsExtractor(Extractor): } def extract_post(self, post_url): + + if "/art/view/" in post_url: + extract_data = self._extract_image_data + elif "/audio/listen/" in post_url: + extract_data = self._extract_audio_data + else: + extract_data = self._extract_media_data + if self.flash: + post_url += "/format/flash" + response = self.request(post_url, fatal=False) if response.status_code >= 400: return {} page = response.text extr = text.extract_from(page) - - if "/art/view/" in post_url: - data = self._extract_image_data(extr, post_url) - elif "/audio/listen/" in post_url: - data = self._extract_audio_data(extr, post_url) - else: - data = self._extract_media_data(extr, post_url) + data = extract_data(extr, post_url) data["_comment"] = extr('id="author_comments"', '</div>') data["comment"] = text.unescape(text.remove_html( @@ -313,6 +318,11 @@ class NewgroundsMediaExtractor(NewgroundsExtractor): "user" : "zj", }, }), + # flash animation (#1257) + ("https://www.newgrounds.com/portal/view/161181/format/flash", { + "pattern": r"https://uploads\.ungrounded\.net/161000" + r"/161181_ddautta_mask__550x281_\.swf\?f1081628129", + }) ) def __init__(self, match): diff --git a/gallery_dl/extractor/nozomi.py b/gallery_dl/extractor/nozomi.py index 99e397b..4eb3ee6 100644 --- a/gallery_dl/extractor/nozomi.py +++ b/gallery_dl/extractor/nozomi.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2019-2020 Mike Fährmann +# Copyright 2019-2021 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -62,7 +62,7 @@ class NozomiExtractor(Extractor): del post[key] yield Message.Directory, post - for image in images: + for post["num"], image in enumerate(images, 1): post["url"] = url = text.urljoin(self.root, image["imageurl"]) text.nameext_from_url(url, post) post["is_video"] = bool(image.get("is_video")) @@ -95,13 +95,11 @@ class NozomiPostExtractor(NozomiExtractor): "dataid" : "re:aaa9f7c632cde1e1a5baaff3fb6a6d857ec73df7fdc5", "date" : "dt:2016-07-26 02:32:03", "extension": "jpg", - "favorites": int, "filename" : str, "height" : 768, "is_video" : False, "postid" : 3649262, "source" : "danbooru", - "sourceid" : 2434215, "tags" : list, "type" : "jpg", "url" : str, @@ -111,7 +109,7 @@ class NozomiPostExtractor(NozomiExtractor): # multiple images per post ("https://nozomi.la/post/25588032.html", { "url": "6aa3b7db385abcc9d374bdffd19187bccbf8f228", - "keyword": "8c3a2561ccc9ad429be9850d1383a952d0b4a8ab", + "keyword": "f60e048df36308b6b25dfaac419b586895d360bc", "count": 7, }), # empty 'date' (#1163) diff --git a/gallery_dl/extractor/photovogue.py b/gallery_dl/extractor/photovogue.py new file mode 100644 index 0000000..a5c788a --- /dev/null +++ b/gallery_dl/extractor/photovogue.py @@ -0,0 +1,84 @@ +# -*- coding: utf-8 -*- + +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. + +"""Extractors for https://www.vogue.it/en/photovogue/""" + +from .common import Extractor, Message +from .. import text + +BASE_PATTERN = r"(?:https?://)?(?:www\.)?vogue\.it/(?:en/)?photovogue" + + +class PhotovogueUserExtractor(Extractor): + category = "photovogue" + subcategory = "user" + directory_fmt = ("{category}", "{photographer[id]} {photographer[name]}") + filename_fmt = "{id} {title}.{extension}" + archive_fmt = "{id}" + pattern = BASE_PATTERN + r"/portfolio/?\?id=(\d+)" + test = ( + ("https://www.vogue.it/en/photovogue/portfolio/?id=221252"), + ("https://vogue.it/photovogue/portfolio?id=221252", { + "pattern": r"https://images.vogue.it/Photovogue/[^/]+_gallery.jpg", + "keyword": { + "date": "type:datetime", + "favorite_count": int, + "favorited": list, + "id": int, + "image_id": str, + "is_favorite": False, + "orientation": "re:portrait|landscape", + "photographer": { + "biography": "Born in 1995. Live in Bologna.", + "city": "Bologna", + "country_id": 106, + "favoritedCount": int, + "id": 221252, + "isGold": bool, + "isPro": bool, + "latitude": str, + "longitude": str, + "name": "Arianna Mattarozzi", + "user_id": "38cb0601-4a85-453c-b7dc-7650a037f2ab", + "websites": list, + }, + "photographer_id": 221252, + "tags": list, + "title": str, + }, + }), + ) + + def __init__(self, match): + Extractor.__init__(self, match) + self.user_id = match.group(1) + + def items(self): + for photo in self.photos(): + url = photo["gallery_image"] + photo["title"] = photo["title"].strip() + photo["date"] = text.parse_datetime( + photo["date"], "%Y-%m-%dT%H:%M:%S.%f%z") + + yield Message.Directory, photo + yield Message.Url, url, text.nameext_from_url(url, photo) + + def photos(self): + url = "https://api.vogue.it/production/photos" + params = { + "count": "50", + "order_by": "DESC", + "page": 0, + "photographer_id": self.user_id, + } + + while True: + data = self.request(url, params=params).json() + yield from data["items"] + + if not data["has_next"]: + break + params["page"] += 1 diff --git a/gallery_dl/extractor/pinterest.py b/gallery_dl/extractor/pinterest.py index 334412d..e5a0486 100644 --- a/gallery_dl/extractor/pinterest.py +++ b/gallery_dl/extractor/pinterest.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2016-2020 Mike Fährmann +# Copyright 2016-2021 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -305,7 +305,7 @@ class PinterestAPI(): def __init__(self, extractor): self.extractor = extractor - csrf_token = util.generate_csrf_token() + csrf_token = util.generate_token() self.headers = self.HEADERS.copy() self.headers["X-CSRFToken"] = csrf_token self.cookies = {"csrftoken": csrf_token} diff --git a/gallery_dl/extractor/pixiv.py b/gallery_dl/extractor/pixiv.py index 8aee058..a872ada 100644 --- a/gallery_dl/extractor/pixiv.py +++ b/gallery_dl/extractor/pixiv.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2014-2020 Mike Fährmann +# Copyright 2014-2021 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -217,6 +217,12 @@ class PixivWorkExtractor(PixivExtractor): "url": "7267695a985c4db8759bebcf8d21dbdd2d2317ef", "keywords": {"frames": list}, }), + # related works (#1237) + ("https://www.pixiv.net/artworks/966412", { + "options": (("related", True),), + "range": "1-10", + "count": ">= 10", + }), ("https://www.pixiv.net/en/artworks/966412"), ("http://www.pixiv.net/member_illust.php?mode=medium&illust_id=96641"), ("http://i1.pixiv.net/c/600x600/img-master" @@ -233,7 +239,11 @@ class PixivWorkExtractor(PixivExtractor): self.illust_id = match.group(1) or match.group(2) def works(self): - return (self.api.illust_detail(self.illust_id),) + works = (self.api.illust_detail(self.illust_id),) + if self.config("related", False): + related = self.api.illust_related(self.illust_id) + works = itertools.chain(works, related) + return works class PixivFavoriteExtractor(PixivExtractor): @@ -574,6 +584,10 @@ class PixivAppAPI(): params = {"mode": mode, "date": date} return self._pagination("v1/illust/ranking", params) + def illust_related(self, illust_id): + params = {"illust_id": illust_id} + return self._pagination("v2/illust/related", params) + def search_illust(self, word, sort=None, target=None, duration=None): params = {"word": word, "search_target": target, "sort": sort, "duration": duration} diff --git a/gallery_dl/extractor/sankaku.py b/gallery_dl/extractor/sankaku.py index 6a499a3..e98b630 100644 --- a/gallery_dl/extractor/sankaku.py +++ b/gallery_dl/extractor/sankaku.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2014-2020 Mike Fährmann +# Copyright 2014-2021 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -264,49 +264,11 @@ class SankakuAPI(): @cache(maxage=365*24*3600, keyarg=1) def _authenticate_impl(extr, username, password): extr.log.info("Logging in as %s", username) - headers = {"Accept": "application/vnd.sankaku.api+json;v=2"} - # get initial access_token - url = "https://login.sankakucomplex.com/auth/token" + url = "https://capi-v2.sankakucomplex.com/auth/token" + headers = {"Accept": "application/vnd.sankaku.api+json;v=2"} data = {"login": username, "password": password} - response = extr.request( - url, method="POST", headers=headers, json=data, fatal=False) - data = response.json() - if response.status_code >= 400 or not data.get("success"): - raise exception.AuthenticationError(data.get("error")) - access_token = data["access_token"] - - # start openid auth - url = "https://login.sankakucomplex.com/oidc/auth" - params = { - "response_type": "code", - "scope" : "openid", - "client_id" : "sankaku-web-app", - "redirect_uri" : "https://sankaku.app/sso/callback", - "state" : "return_uri=https://sankaku.app/", - "theme" : "black", - "lang" : "undefined", - } - page = extr.request(url, params=params).text - submit_url = text.extract(page, 'submitUrl = "', '"')[0] - - # get code from initial access_token - url = "https://login.sankakucomplex.com" + submit_url - data = { - "accessToken": access_token, - "nonce" : "undefined", - } - response = extr.request(url, method="POST", data=data) - query = text.parse_query(response.request.url.partition("?")[2]) - - # get final access_token from code - url = "https://capi-v2.sankakucomplex.com/sso/finalize?lang=en" - data = { - "code" : query["code"], - "client_id" : "sankaku-web-app", - "redirect_uri": "https://sankaku.app/sso/callback", - } response = extr.request( url, method="POST", headers=headers, json=data, fatal=False) data = response.json() diff --git a/gallery_dl/extractor/twitter.py b/gallery_dl/extractor/twitter.py index b769912..4034732 100644 --- a/gallery_dl/extractor/twitter.py +++ b/gallery_dl/extractor/twitter.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2016-2020 Mike Fährmann +# Copyright 2016-2021 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -239,7 +239,7 @@ class TwitterExtractor(Extractor): def _login_impl(self, username, password): self.log.info("Logging in as %s", username) - token = util.generate_csrf_token() + token = util.generate_token() self.session.cookies.clear() self.request(self.root + "/login") @@ -272,8 +272,8 @@ class TwitterExtractor(Extractor): class TwitterTimelineExtractor(TwitterExtractor): """Extractor for all images from a user's timeline""" subcategory = "timeline" - pattern = BASE_PATTERN + \ - r"/(?!search)(?:([^/?#]+)/?(?:$|[?#])|intent/user\?user_id=(\d+))" + pattern = (BASE_PATTERN + r"/(?!search)(?:([^/?#]+)/?(?:$|[?#])" + r"|i(?:/user/|ntent/user\?user_id=)(\d+))") test = ( ("https://twitter.com/supernaturepics", { "range": "1-40", @@ -281,14 +281,15 @@ class TwitterTimelineExtractor(TwitterExtractor): }), ("https://mobile.twitter.com/supernaturepics?p=i"), ("https://www.twitter.com/id:2976459548"), + ("https://twitter.com/i/user/2976459548"), ("https://twitter.com/intent/user?user_id=2976459548"), ) def __init__(self, match): TwitterExtractor.__init__(self, match) - uid = match.group(2) - if uid: - self.user = "id:" + uid + user_id = match.group(2) + if user_id: + self.user = "id:" + user_id def tweets(self): return TwitterAPI(self).timeline_profile(self.user) @@ -355,8 +356,7 @@ class TwitterListMembersExtractor(TwitterExtractor): self.login() for user in TwitterAPI(self).list_members(self.user): user["_extractor"] = TwitterTimelineExtractor - url = "{}/intent/user?user_id={}".format( - self.root, user["rest_id"]) + url = "{}/i/user/{}".format(self.root, user["rest_id"]) yield Message.Queue, url, user @@ -509,7 +509,7 @@ class TwitterAPI(): # CSRF csrf_token = cookies.get("ct0", domain=cookiedomain) if not csrf_token: - csrf_token = util.generate_csrf_token() + csrf_token = util.generate_token() cookies.set("ct0", csrf_token, domain=cookiedomain) self.headers["x-csrf-token"] = csrf_token @@ -617,31 +617,34 @@ class TwitterAPI(): def _call(self, endpoint, params, root=None, method="GET"): if root is None: root = self.root - response = self.extractor.request( - root + endpoint, method=method, params=params, - headers=self.headers, fatal=None) - - # update 'x-csrf-token' header (#1170) - csrf_token = response.cookies.get("ct0") - if csrf_token: - self.headers["x-csrf-token"] = csrf_token - - if response.status_code < 400: - return response.json() - if response.status_code == 429: - until = response.headers.get("x-rate-limit-reset") - self.extractor.wait(until=until, seconds=(None if until else 60)) - return self._call(endpoint, params, method) - try: - msg = ", ".join( - '"' + error["message"] + '"' - for error in response.json()["errors"] - ) - except Exception: - msg = response.text - raise exception.StopExtraction( - "%s %s (%s)", response.status_code, response.reason, msg) + while True: + response = self.extractor.request( + root + endpoint, method=method, params=params, + headers=self.headers, fatal=None) + + # update 'x-csrf-token' header (#1170) + csrf_token = response.cookies.get("ct0") + if csrf_token: + self.headers["x-csrf-token"] = csrf_token + + if response.status_code < 400: + return response.json() + if response.status_code == 429: + until = response.headers.get("x-rate-limit-reset") + seconds = None if until else 60 + self.extractor.wait(until=until, seconds=seconds) + continue + + try: + msg = ", ".join( + '"' + error["message"] + '"' + for error in response.json()["errors"] + ) + except Exception: + msg = response.text + raise exception.StopExtraction( + "%s %s (%s)", response.status_code, response.reason, msg) def _pagination(self, endpoint, params=None): if params is None: diff --git a/gallery_dl/extractor/unsplash.py b/gallery_dl/extractor/unsplash.py new file mode 100644 index 0000000..545eb31 --- /dev/null +++ b/gallery_dl/extractor/unsplash.py @@ -0,0 +1,208 @@ +# -*- coding: utf-8 -*- + +# Copyright 2021 Mike Fährmann +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. + +"""Extractors for https://unsplash.com/""" + +from .common import Extractor, Message +from .. import text, util + +BASE_PATTERN = r"(?:https?://)?unsplash\.com" + + +class UnsplashExtractor(Extractor): + """Base class for unsplash extractors""" + category = "unsplash" + directory_fmt = ("{category}", "{user[username]}") + filename_fmt = "{id}.{extension}" + archive_fmt = "{id}" + root = "https://unsplash.com" + page_start = 1 + per_page = 20 + + def __init__(self, match): + Extractor.__init__(self, match) + self.item = match.group(1) + + def items(self): + fmt = self.config("format") or "raw" + for photo in self.photos(): + util.delete_items( + photo, ("current_user_collections", "related_collections")) + url = photo["urls"][fmt] + text.nameext_from_url(url, photo) + + photo["extension"] = "jpg" + photo["date"] = text.parse_datetime(photo["created_at"]) + if "tags" in photo: + photo["tags"] = [t["title"] for t in photo["tags"]] + + yield Message.Directory, photo + yield Message.Url, url, photo + + def skip(self, num): + pages = num // self.per_page + self.page_start += pages + return pages * self.per_page + + def _pagination(self, url, params, results=False): + params["per_page"] = self.per_page + params["page"] = self.page_start + + while True: + photos = self.request(url, params=params).json() + if results: + photos = photos["results"] + yield from photos + + if len(photos) < self.per_page: + return + params["page"] += 1 + + +class UnsplashImageExtractor(UnsplashExtractor): + """Extractor for a single unsplash photo""" + subcategory = "image" + pattern = BASE_PATTERN + r"/photos/([^/?#]+)" + test = ("https://unsplash.com/photos/lsoogGC_5dg", { + "url": "00accb0a64d5a0df0db911f8b425892718dce524", + "keyword": { + "alt_description": "re:silhouette of trees near body of water ", + "blur_hash": "LZP4uQS4jboe%#o0WCa}2doJNaaz", + "categories": list, + "color": "#f3c08c", + "created_at": "2020-04-08T08:29:42-04:00", + "date": "dt:2020-04-08 12:29:42", + "description": "The Island", + "downloads": int, + "exif": { + "aperture": "11", + "exposure_time": "30", + "focal_length": "70.0", + "iso": 200, + "make": "Canon", + "model": "Canon EOS 5D Mark IV" + }, + "extension": "jpg", + "filename": "photo-1586348943529-beaae6c28db9", + "height": 6272, + "id": "lsoogGC_5dg", + "liked_by_user": False, + "likes": int, + "location": { + "city": "Beaver Dam", + "country": "United States", + "name": "Beaver Dam, WI 53916, USA", + "position": { + "latitude": 43.457769, + "longitude": -88.837329 + }, + "title": "Beaver Dam, WI 53916, USA" + }, + "promoted_at": "2020-04-08T11:12:03-04:00", + "sponsorship": None, + "tags": list, + "updated_at": str, + "user": { + "accepted_tos": True, + "bio": str, + "first_name": "Dave", + "id": "uMJXuywXLiU", + "instagram_username": "just_midwest_rock", + "last_name": "Hoefler", + "location": "Madison, WI", + "name": "Dave Hoefler", + "portfolio_url": str, + "total_collections": int, + "total_likes": int, + "total_photos": int, + "twitter_username": None, + "updated_at": str, + "username": "johnwestrock" + }, + "views": int, + "width": 4480, + }, + }) + + def photos(self): + url = "{}/napi/photos/{}".format(self.root, self.item) + return (self.request(url).json(),) + + +class UnsplashUserExtractor(UnsplashExtractor): + """Extractor for all photos of an unsplash user""" + subcategory = "user" + pattern = BASE_PATTERN + r"/@(\w+)/?$" + test = ("https://unsplash.com/@johnwestrock", { + "pattern": r"https://images\.unsplash\.com/(photo-\d+-\w+" + r"|reserve/[^/?#]+)\?ixid=\w+&ixlib=rb-1\.2\.1$", + "range": "1-30", + "count": 30, + }) + + def photos(self): + url = "{}/napi/users/{}/photos".format(self.root, self.item) + params = {"order_by": "latest"} + return self._pagination(url, params) + + +class UnsplashFavoriteExtractor(UnsplashExtractor): + """Extractor for all likes of an unsplash user""" + subcategory = "favorite" + pattern = BASE_PATTERN + r"/@(\w+)/likes" + test = ("https://unsplash.com/@johnwestrock/likes", { + "pattern": r"https://images\.unsplash\.com/(photo-\d+-\w+" + r"|reserve/[^/?#]+)\?ixid=\w+&ixlib=rb-1\.2\.1$", + "range": "1-30", + "count": 30, + }) + + def photos(self): + url = "{}/napi/users/{}/likes".format(self.root, self.item) + params = {"order_by": "latest"} + return self._pagination(url, params) + + +class UnsplashCollectionExtractor(UnsplashExtractor): + """Extractor for an unsplash collection""" + subcategory = "collection" + pattern = BASE_PATTERN + r"/collections/(\d+)" + test = ("https://unsplash.com/collections/3178572/winter", { + "pattern": r"https://images\.unsplash\.com/(photo-\d+-\w+" + r"|reserve/[^/?#]+)\?ixid=\w+&ixlib=rb-1\.2\.1$", + "range": "1-30", + "count": 30, + }) + + def photos(self): + url = "{}/napi/collections/{}/photos".format(self.root, self.item) + params = {"order_by": "latest"} + return self._pagination(url, params) + + +class UnsplashSearchExtractor(UnsplashExtractor): + """Extractor for unsplash search results""" + subcategory = "search" + pattern = BASE_PATTERN + r"/s/photos/([^/?#]+)(?:\?([^/?#]+))?" + test = ("https://unsplash.com/s/photos/nature", { + "pattern": r"https://images\.unsplash\.com/(photo-\d+-\w+" + r"|reserve/[^/?#]+)\?ixid=\w+&ixlib=rb-1\.2\.1$", + "range": "1-30", + "count": 30, + }) + + def __init__(self, match): + UnsplashExtractor.__init__(self, match) + self.query = match.group(2) + + def photos(self): + url = self.root + "/napi/search/photos" + params = {"query": text.unquote(self.item)} + if self.query: + params.update(text.parse_query(self.query)) + return self._pagination(url, params, True) diff --git a/gallery_dl/extractor/webtoons.py b/gallery_dl/extractor/webtoons.py index 5d3ca89..4449e19 100644 --- a/gallery_dl/extractor/webtoons.py +++ b/gallery_dl/extractor/webtoons.py @@ -47,7 +47,8 @@ class WebtoonsEpisodeExtractor(WebtoonsExtractor): (("https://www.webtoons.com/en/comedy/safely-endangered" "/ep-572-earth/viewer?title_no=352&episode_no=572"), { "url": "11041d71a3f92728305c11a228e77cf0f7aa02ef", - "content": "1ce950324f14018b691c42b0ede57fa25618abeb", + "content": ("1748c7e82b6db910fa179f6dc7c4281b0f680fa7", + "49e1f2def04c6f7a6a3dacf245a1cd9abe77a6a9"), "count": 5, }), ) diff --git a/gallery_dl/util.py b/gallery_dl/util.py index ffd686e..2161b9d 100644 --- a/gallery_dl/util.py +++ b/gallery_dl/util.py @@ -18,6 +18,7 @@ import shutil import string import _string import sqlite3 +import binascii import datetime import operator import itertools @@ -71,8 +72,10 @@ def raises(cls): return wrap -def generate_csrf_token(): - return random.getrandbits(128).to_bytes(16, "big").hex() +def generate_token(size=16): + """Generate a random token with hexadecimal digits""" + data = random.getrandbits(size * 8).to_bytes(size, "big") + return binascii.hexlify(data).decode() def combine_dict(a, b): diff --git a/gallery_dl/version.py b/gallery_dl/version.py index 601eeed..572d3bb 100644 --- a/gallery_dl/version.py +++ b/gallery_dl/version.py @@ -6,4 +6,4 @@ # it under the terms of the GNU General Public License version 2 as # published by the Free Software Foundation. -__version__ = "1.16.3" +__version__ = "1.16.4" diff --git a/test/test_util.py b/test/test_util.py index 159c4bc..8848ea0 100644 --- a/test/test_util.py +++ b/test/test_util.py @@ -474,6 +474,19 @@ class TestOther(unittest.TestCase): with self.assertRaises(ValueError): func(3) + def test_generate_token(self): + tokens = set() + for _ in range(100): + token = util.generate_token() + tokens.add(token) + self.assertEqual(len(token), 16 * 2) + self.assertRegex(token, r"^[0-9a-f]+$") + self.assertGreaterEqual(len(tokens), 99) + + token = util.generate_token(80) + self.assertEqual(len(token), 80 * 2) + self.assertRegex(token, r"^[0-9a-f]+$") + def test_combine_dict(self): self.assertEqual( util.combine_dict({}, {}), |