From fc83315c164afd74734adf27e0f7fec2011904aa Mon Sep 17 00:00:00 2001 From: Unit 193 Date: Tue, 16 Feb 2021 21:35:52 -0500 Subject: New upstream version 1.16.5. --- CHANGELOG.md | 26 +++++++ PKG-INFO | 12 ++-- README.rst | 10 +-- data/man/gallery-dl.1 | 2 +- data/man/gallery-dl.conf.5 | 15 +++- gallery_dl.egg-info/PKG-INFO | 12 ++-- gallery_dl.egg-info/SOURCES.txt | 2 + gallery_dl/downloader/http.py | 6 ++ gallery_dl/extractor/2chan.py | 4 +- gallery_dl/extractor/500px.py | 4 +- gallery_dl/extractor/__init__.py | 2 + gallery_dl/extractor/behance.py | 17 ++++- gallery_dl/extractor/deviantart.py | 46 ++++++++---- gallery_dl/extractor/erome.py | 131 +++++++++++++++++++++++++++++++++ gallery_dl/extractor/furaffinity.py | 20 +++--- gallery_dl/extractor/hentaifox.py | 3 +- gallery_dl/extractor/imagehosts.py | 48 +++++++------ gallery_dl/extractor/inkbunny.py | 11 ++- gallery_dl/extractor/kemonoparty.py | 83 +++++++++++++-------- gallery_dl/extractor/mangadex.py | 5 +- gallery_dl/extractor/nsfwalbum.py | 15 ++-- gallery_dl/extractor/oauth.py | 73 +++++++++++++++++-- gallery_dl/extractor/paheal.py | 8 ++- gallery_dl/extractor/patreon.py | 23 +++--- gallery_dl/extractor/pillowfort.py | 136 +++++++++++++++++++++++++++++++++++ gallery_dl/extractor/pixiv.py | 55 +++++++------- gallery_dl/extractor/sankaku.py | 2 +- gallery_dl/extractor/slideshare.py | 4 +- gallery_dl/extractor/twitter.py | 8 ++- gallery_dl/extractor/vanillarock.py | 6 +- gallery_dl/extractor/webtoons.py | 1 + gallery_dl/postprocessor/metadata.py | 2 +- gallery_dl/version.py | 4 +- 33 files changed, 631 insertions(+), 165 deletions(-) create mode 100644 gallery_dl/extractor/erome.py create mode 100644 gallery_dl/extractor/pillowfort.py diff --git a/CHANGELOG.md b/CHANGELOG.md index 8629536..893b944 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,31 @@ # Changelog +## 1.16.5 - 2021-02-14 +### Additions +- [behance] support `video` modules ([#1282](https://github.com/mikf/gallery-dl/issues/1282)) +- [erome] add `album`, `user`, and `search` extractors ([#409](https://github.com/mikf/gallery-dl/issues/409)) +- [hentaifox] support searching by group ([#1294](https://github.com/mikf/gallery-dl/issues/1294)) +- [imgclick] add `image` extractor ([#1307](https://github.com/mikf/gallery-dl/issues/1307)) +- [kemonoparty] extract inline images ([#1286](https://github.com/mikf/gallery-dl/issues/1286)) +- [kemonoparty] support URLs with non-numeric user and post IDs ([#1303](https://github.com/mikf/gallery-dl/issues/1303)) +- [pillowfort] add `user` and `post` extractors ([#846](https://github.com/mikf/gallery-dl/issues/846)) +### Changes +- [kemonoparty] include `service` in directories and archive keys +- [pixiv] require a `refresh-token` to login ([#1304](https://github.com/mikf/gallery-dl/issues/1304)) +- [snap] use `core18` as base +### Fixes +- [500px] update query hashes +- [deviantart] update parameters for `/browse/popular` ([#1267](https://github.com/mikf/gallery-dl/issues/1267)) +- [deviantart] provide filename extension for original file downloads ([#1272](https://github.com/mikf/gallery-dl/issues/1272)) +- [deviantart] fix `folders` option ([#1302](https://github.com/mikf/gallery-dl/issues/1302)) +- [inkbunny] add `sid` parameter to private file downloads ([#1281](https://github.com/mikf/gallery-dl/issues/1281)) +- [kemonoparty] fix absolute file URLs +- [mangadex] revert to `https://mangadex.org/api/` and add `api-server` option ([#1310](https://github.com/mikf/gallery-dl/issues/1310)) +- [nsfwalbum] use fallback for deleted content ([#1259](https://github.com/mikf/gallery-dl/issues/1259)) +- [sankaku] update `invalid token` detection ([#1309](https://github.com/mikf/gallery-dl/issues/1309)) +- [slideshare] fix extraction +- [postprocessor:metadata] fix crash with `extension-format` ([#1285](https://github.com/mikf/gallery-dl/issues/1285)) + ## 1.16.4 - 2021-01-23 ### Additions - [furaffinity] add `descriptions` option ([#1231](https://github.com/mikf/gallery-dl/issues/1231)) diff --git a/PKG-INFO b/PKG-INFO index bdacf73..a89521e 100644 --- a/PKG-INFO +++ b/PKG-INFO @@ -1,6 +1,6 @@ Metadata-Version: 2.1 Name: gallery_dl -Version: 1.16.4 +Version: 1.16.5 Summary: Command-line program to download image galleries and collections from several image hosting sites Home-page: https://github.com/mikf/gallery-dl Author: Mike Fährmann @@ -94,8 +94,8 @@ Description: ========== put it into your `PATH `__, and run it inside a command prompt (like ``cmd.exe``). - - `Windows `__ - - `Linux `__ + - `Windows `__ + - `Linux `__ These executables include a Python interpreter and all required Python packages. @@ -225,7 +225,7 @@ Description: ========== Some extractors require you to provide valid login credentials in the form of a username & password pair. This is necessary for - ``pixiv``, ``nijie``, and ``seiga`` + ``nijie`` and ``seiga`` and optional for ``aryion``, ``danbooru``, @@ -248,7 +248,7 @@ Description: ========== { "extractor": { - "pixiv": { + "seiga": { "username": "", "password": "" } @@ -332,7 +332,7 @@ Description: ========== .. _gallery-dl-example.conf: https://github.com/mikf/gallery-dl/blob/master/docs/gallery-dl-example.conf .. _configuration.rst: https://github.com/mikf/gallery-dl/blob/master/docs/configuration.rst .. _Supported Sites: https://github.com/mikf/gallery-dl/blob/master/docs/supportedsites.rst - .. _stable: https://github.com/mikf/gallery-dl/archive/v1.16.4.tar.gz + .. _stable: https://github.com/mikf/gallery-dl/archive/v1.16.5.tar.gz .. _dev: https://github.com/mikf/gallery-dl/archive/master.tar.gz .. _Python: https://www.python.org/downloads/ diff --git a/README.rst b/README.rst index 1ddebcf..cec53a4 100644 --- a/README.rst +++ b/README.rst @@ -83,8 +83,8 @@ Download a standalone executable file, put it into your `PATH `__, and run it inside a command prompt (like ``cmd.exe``). -- `Windows `__ -- `Linux `__ +- `Windows `__ +- `Linux `__ These executables include a Python interpreter and all required Python packages. @@ -214,7 +214,7 @@ Username & Password Some extractors require you to provide valid login credentials in the form of a username & password pair. This is necessary for -``pixiv``, ``nijie``, and ``seiga`` +``nijie`` and ``seiga`` and optional for ``aryion``, ``danbooru``, @@ -237,7 +237,7 @@ You can set the necessary information in your configuration file { "extractor": { - "pixiv": { + "seiga": { "username": "", "password": "" } @@ -321,7 +321,7 @@ access to *gallery-dl*. Authorize it and you will be shown one or more .. _gallery-dl-example.conf: https://github.com/mikf/gallery-dl/blob/master/docs/gallery-dl-example.conf .. _configuration.rst: https://github.com/mikf/gallery-dl/blob/master/docs/configuration.rst .. _Supported Sites: https://github.com/mikf/gallery-dl/blob/master/docs/supportedsites.rst -.. _stable: https://github.com/mikf/gallery-dl/archive/v1.16.4.tar.gz +.. _stable: https://github.com/mikf/gallery-dl/archive/v1.16.5.tar.gz .. _dev: https://github.com/mikf/gallery-dl/archive/master.tar.gz .. _Python: https://www.python.org/downloads/ diff --git a/data/man/gallery-dl.1 b/data/man/gallery-dl.1 index 619e84f..02639b8 100644 --- a/data/man/gallery-dl.1 +++ b/data/man/gallery-dl.1 @@ -1,4 +1,4 @@ -.TH "GALLERY-DL" "1" "2021-01-23" "1.16.4" "gallery-dl Manual" +.TH "GALLERY-DL" "1" "2021-02-14" "1.16.5" "gallery-dl Manual" .\" disable hyphenation .nh diff --git a/data/man/gallery-dl.conf.5 b/data/man/gallery-dl.conf.5 index 413a40b..408cb61 100644 --- a/data/man/gallery-dl.conf.5 +++ b/data/man/gallery-dl.conf.5 @@ -1,4 +1,4 @@ -.TH "GALLERY-DL.CONF" "5" "2021-01-23" "1.16.4" "gallery-dl Manual" +.TH "GALLERY-DL.CONF" "5" "2021-02-14" "1.16.5" "gallery-dl Manual" .\" disable hyphenation .nh .\" disable justification (adjust text to left margin only) @@ -320,8 +320,6 @@ another site. Specifying a username and password is required for -.br -* \f[I]pixiv\f[] .br * \f[I]nijie\f[] .br @@ -1391,6 +1389,17 @@ port than the default. Download subalbums. +.SS extractor.pillowfort.reblogs +.IP "Type:" 6 +\f[I]bool\f[] + +.IP "Default:" 9 +\f[I]false\f[] + +.IP "Description:" 4 +Extract media from reblogged posts. + + .SS extractor.pinterest.sections .IP "Type:" 6 \f[I]bool\f[] diff --git a/gallery_dl.egg-info/PKG-INFO b/gallery_dl.egg-info/PKG-INFO index f1a1ebe..b87c59d 100644 --- a/gallery_dl.egg-info/PKG-INFO +++ b/gallery_dl.egg-info/PKG-INFO @@ -1,6 +1,6 @@ Metadata-Version: 2.1 Name: gallery-dl -Version: 1.16.4 +Version: 1.16.5 Summary: Command-line program to download image galleries and collections from several image hosting sites Home-page: https://github.com/mikf/gallery-dl Author: Mike Fährmann @@ -94,8 +94,8 @@ Description: ========== put it into your `PATH `__, and run it inside a command prompt (like ``cmd.exe``). - - `Windows `__ - - `Linux `__ + - `Windows `__ + - `Linux `__ These executables include a Python interpreter and all required Python packages. @@ -225,7 +225,7 @@ Description: ========== Some extractors require you to provide valid login credentials in the form of a username & password pair. This is necessary for - ``pixiv``, ``nijie``, and ``seiga`` + ``nijie`` and ``seiga`` and optional for ``aryion``, ``danbooru``, @@ -248,7 +248,7 @@ Description: ========== { "extractor": { - "pixiv": { + "seiga": { "username": "", "password": "" } @@ -332,7 +332,7 @@ Description: ========== .. _gallery-dl-example.conf: https://github.com/mikf/gallery-dl/blob/master/docs/gallery-dl-example.conf .. _configuration.rst: https://github.com/mikf/gallery-dl/blob/master/docs/configuration.rst .. _Supported Sites: https://github.com/mikf/gallery-dl/blob/master/docs/supportedsites.rst - .. _stable: https://github.com/mikf/gallery-dl/archive/v1.16.4.tar.gz + .. _stable: https://github.com/mikf/gallery-dl/archive/v1.16.5.tar.gz .. _dev: https://github.com/mikf/gallery-dl/archive/master.tar.gz .. _Python: https://www.python.org/downloads/ diff --git a/gallery_dl.egg-info/SOURCES.txt b/gallery_dl.egg-info/SOURCES.txt index d4907de..066ac90 100644 --- a/gallery_dl.egg-info/SOURCES.txt +++ b/gallery_dl.egg-info/SOURCES.txt @@ -56,6 +56,7 @@ gallery_dl/extractor/deviantart.py gallery_dl/extractor/directlink.py gallery_dl/extractor/dynastyscans.py gallery_dl/extractor/e621.py +gallery_dl/extractor/erome.py gallery_dl/extractor/exhentai.py gallery_dl/extractor/fallenangels.py gallery_dl/extractor/flickr.py @@ -121,6 +122,7 @@ gallery_dl/extractor/patreon.py gallery_dl/extractor/photobucket.py gallery_dl/extractor/photovogue.py gallery_dl/extractor/piczel.py +gallery_dl/extractor/pillowfort.py gallery_dl/extractor/pinterest.py gallery_dl/extractor/pixiv.py gallery_dl/extractor/pixnet.py diff --git a/gallery_dl/downloader/http.py b/gallery_dl/downloader/http.py index 179a552..8d72dc2 100644 --- a/gallery_dl/downloader/http.py +++ b/gallery_dl/downloader/http.py @@ -134,6 +134,12 @@ class HttpDownloader(DownloaderBase): self.log.warning(msg) return False + # check for invalid responses + validate = pathfmt.kwdict.get("_http_validate") + if validate and not validate(response): + self.log.warning("Invalid response") + return False + # set missing filename extension from MIME type if not pathfmt.extension: pathfmt.set_extension(self._find_extension(response)) diff --git a/gallery_dl/extractor/2chan.py b/gallery_dl/extractor/2chan.py index d34209f..f5d2a4c 100644 --- a/gallery_dl/extractor/2chan.py +++ b/gallery_dl/extractor/2chan.py @@ -22,8 +22,8 @@ class _2chanThreadExtractor(Extractor): url_fmt = "https://{server}.2chan.net/{board}/src/{filename}" pattern = r"(?:https?://)?([^.]+)\.2chan\.net/([^/]+)/res/(\d+)" test = ("http://dec.2chan.net/70/res/4752.htm", { - "url": "20c211ae7c06b18ec345a057fe0b68dde979b051", - "keyword": "23a529b46313b927fc94b577e5e1fdb3aa164ac1", + "url": "f49aa31340e9a3429226af24e19e01f5b819ca1f", + "keyword": "44599c21b248e79692b2eb2da12699bd0ed5640a", }) def __init__(self, match): diff --git a/gallery_dl/extractor/500px.py b/gallery_dl/extractor/500px.py index df9941a..81b11fd 100644 --- a/gallery_dl/extractor/500px.py +++ b/gallery_dl/extractor/500px.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2019-2020 Mike Fährmann +# Copyright 2019-2021 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -153,7 +153,7 @@ class _500pxGalleryExtractor(_500pxExtractor): def metadata(self): user = self._request_graphql( "ProfileRendererQuery", {"username": self.user_name}, - "5a17a9af1830b58b94a912995b7947b24f27f1301c6ea8ab71a9eb1a6a86585b", + "105058632482dd2786fd5775745908dc928f537b28e28356b076522757d65c19", )["profile"] self.user_id = str(user["legacyId"]) diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py index a69bacc..923a78b 100644 --- a/gallery_dl/extractor/__init__.py +++ b/gallery_dl/extractor/__init__.py @@ -28,6 +28,7 @@ modules = [ "deviantart", "dynastyscans", "e621", + "erome", "exhentai", "fallenangels", "flickr", @@ -86,6 +87,7 @@ modules = [ "photobucket", "photovogue", "piczel", + "pillowfort", "pinterest", "pixiv", "pixnet", diff --git a/gallery_dl/extractor/behance.py b/gallery_dl/extractor/behance.py index a817174..b081cc9 100644 --- a/gallery_dl/extractor/behance.py +++ b/gallery_dl/extractor/behance.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2018-2019 Mike Fährmann +# Copyright 2018-2021 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -83,6 +83,11 @@ class BehanceGalleryExtractor(BehanceExtractor): "count": 20, "url": "6bebff0d37f85349f9ad28bd8b76fd66627c1e2f", }), + # 'video' modules (#1282) + ("https://www.behance.net/gallery/101185577/COLCCI", { + "pattern": r"ytdl:https://adobeprod-a\.akamaihd\.net/", + "count": 3, + }), ) def __init__(self, match): @@ -120,8 +125,7 @@ class BehanceGalleryExtractor(BehanceExtractor): page, 'id="beconfig-store_state">', '')[0]) return self._update(data["project"]["project"]) - @staticmethod - def get_images(data): + def get_images(self, data): """Extract image results from an API response""" result = [] append = result.append @@ -133,6 +137,13 @@ class BehanceGalleryExtractor(BehanceExtractor): url = module["sizes"]["original"] append((url, module)) + elif mtype == "video": + page = self.request(module["src"]).text + url = text.extract(page, 'Please wait a few moments", 0, 600) < 0: + return response + time.sleep(5) + + def _pagination(self, url, params): + for params["page"] in itertools.count(1): + page = self.request(url, params=params).text + + album_ids = EromeAlbumExtractor.pattern.findall(page) + yield from album_ids + + if len(album_ids) < 36: + return + + +class EromeAlbumExtractor(EromeExtractor): + """Extractor for albums on erome.com""" + subcategory = "album" + pattern = BASE_PATTERN + r"/a/(\w+)" + test = ("https://www.erome.com/a/UHUX1B73", { + "pattern": r"https://s\d+\.erome\.com/342/UHUX1B73/\w+", + "count": 5, + "keyword": { + "album_id": "UHUX1B73", + "num": int, + "title": "Ryan Ryans", + "user": "gutiquq", + }, + }) + + def albums(self): + return (self.item,) + + +class EromeUserExtractor(EromeExtractor): + subcategory = "user" + pattern = BASE_PATTERN + r"/(?!a/|search\?)([^/?#]+)" + test = ("https://www.erome.com/gutiquq", { + "range": "1-25", + "count": 25, + }) + + def albums(self): + url = "{}/{}".format(self.root, self.item) + return self._pagination(url, {}) + + +class EromeSearchExtractor(EromeExtractor): + subcategory = "search" + pattern = BASE_PATTERN + r"/search\?q=([^&#]+)" + test = ("https://www.erome.com/search?q=cute", { + "range": "1-25", + "count": 25, + }) + + def albums(self): + url = self.root + "/search" + params = {"q": text.unquote(self.item)} + return self._pagination(url, params) + + +@cache() +def _cookie_cache(): + return () diff --git a/gallery_dl/extractor/furaffinity.py b/gallery_dl/extractor/furaffinity.py index df5a73e..a7b0356 100644 --- a/gallery_dl/extractor/furaffinity.py +++ b/gallery_dl/extractor/furaffinity.py @@ -186,7 +186,8 @@ class FuraffinityGalleryExtractor(FuraffinityExtractor): subcategory = "gallery" pattern = BASE_PATTERN + r"/gallery/([^/?#]+)" test = ("https://www.furaffinity.net/gallery/mirlinthloth/", { - "pattern": r"https://d\d?.facdn.net/art/mirlinthloth/\d+/\d+.\w+\.\w+", + "pattern": r"https://d\d?\.f(uraffinity|acdn)\.net" + r"/art/mirlinthloth/\d+/\d+.\w+\.\w+", "range": "45-50", "count": 6, }) @@ -198,7 +199,8 @@ class FuraffinityScrapsExtractor(FuraffinityExtractor): directory_fmt = ("{category}", "{user!l}", "Scraps") pattern = BASE_PATTERN + r"/scraps/([^/?#]+)" test = ("https://www.furaffinity.net/scraps/mirlinthloth/", { - "pattern": r"https://d\d?.facdn.net/art/[^/]+(/stories)?/\d+/\d+.\w+.", + "pattern": r"https://d\d?\.f(uraffinity|acdn)\.net" + r"/art/[^/]+(/stories)?/\d+/\d+.\w+.", "count": ">= 3", }) @@ -209,7 +211,8 @@ class FuraffinityFavoriteExtractor(FuraffinityExtractor): directory_fmt = ("{category}", "{user!l}", "Favorites") pattern = BASE_PATTERN + r"/favorites/([^/?#]+)" test = ("https://www.furaffinity.net/favorites/mirlinthloth/", { - "pattern": r"https://d\d?.facdn.net/art/[^/]+/\d+/\d+.\w+\.\w+", + "pattern": r"https://d\d?\.f(uraffinity|acdn)\.net" + r"/art/[^/]+/\d+/\d+.\w+\.\w+", "range": "45-50", "count": 6, }) @@ -224,7 +227,8 @@ class FuraffinitySearchExtractor(FuraffinityExtractor): directory_fmt = ("{category}", "Search", "{search}") pattern = BASE_PATTERN + r"/search/?\?([^#]+)" test = ("https://www.furaffinity.net/search/?q=cute", { - "pattern": r"https://d\d?.facdn.net/art/[^/]+/\d+/\d+.\w+\.\w+", + "pattern": r"https://d\d?\.f(uraffinity|acdn)\.net" + r"/art/[^/]+/\d+/\d+.\w+\.\w+", "range": "45-50", "count": 6, }) @@ -243,9 +247,9 @@ class FuraffinityPostExtractor(FuraffinityExtractor): pattern = BASE_PATTERN + r"/(?:view|full)/(\d+)" test = ( ("https://www.furaffinity.net/view/21835115/", { - "pattern": r"https://d\d*\.facdn\.net/(download/)?art/mirlinthloth" - r"/music/1488278723/1480267446.mirlinthloth_dj_fennmink" - r"_-_bude_s_4_ever\.mp3", + "pattern": r"https://d\d*\.f(uraffinity|acdn)\.net/(download/)?art" + r"/mirlinthloth/music/1488278723/1480267446.mirlinthlot" + r"h_dj_fennmink_-_bude_s_4_ever\.mp3", "keyword": { "artist" : "mirlinthloth", "artist_url" : "mirlinthloth", @@ -256,7 +260,7 @@ class FuraffinityPostExtractor(FuraffinityExtractor): "id" : 21835115, "tags" : list, "title" : "Bude's 4 Ever", - "url" : r"re:https://d\d?.facdn.net/art/mirlinthloth/m", + "url" : r"re:https://d\d?\.f(uraffinity|acdn)\.net/art", "user" : "mirlinthloth", "views" : int, "favorites" : int, diff --git a/gallery_dl/extractor/hentaifox.py b/gallery_dl/extractor/hentaifox.py index 84ad3af..093f3fe 100644 --- a/gallery_dl/extractor/hentaifox.py +++ b/gallery_dl/extractor/hentaifox.py @@ -82,12 +82,13 @@ class HentaifoxSearchExtractor(HentaifoxBase, Extractor): """Extractor for search results and listings on hentaifox.com""" subcategory = "search" pattern = (r"(?:https?://)?(?:www\.)?hentaifox\.com" - r"(/(?:parody|tag|artist|character|search)/[^/?%#]+)") + r"(/(?:parody|tag|artist|character|search|group)/[^/?%#]+)") test = ( ("https://hentaifox.com/parody/touhou-project/"), ("https://hentaifox.com/character/reimu-hakurei/"), ("https://hentaifox.com/artist/distance/"), ("https://hentaifox.com/search/touhou/"), + ("https://hentaifox.com/group/v-slash/"), ("https://hentaifox.com/tag/heterochromia/", { "pattern": HentaifoxGalleryExtractor.pattern, "count": ">= 60", diff --git a/gallery_dl/extractor/imagehosts.py b/gallery_dl/extractor/imagehosts.py index fe3afbb..abb6d10 100644 --- a/gallery_dl/extractor/imagehosts.py +++ b/gallery_dl/extractor/imagehosts.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2016-2020 Mike Fährmann +# Copyright 2016-2021 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -19,9 +19,8 @@ class ImagehostImageExtractor(Extractor): basecategory = "imagehost" subcategory = "image" archive_fmt = "{token}" - https = False - method = "post" - params = "simple" + https = True + params = None cookies = None encoding = None @@ -30,6 +29,7 @@ class ImagehostImageExtractor(Extractor): self.page_url = "http{}://{}".format( "s" if self.https else "", match.group(1)) self.token = match.group(2) + if self.params == "simple": self.params = { "imgContinue": "Continue+to+image+...+", @@ -42,14 +42,11 @@ class ImagehostImageExtractor(Extractor): "adb": "1", "next": "Continue+to+image+...+", } - else: - self.params = {} - self.method = "get" def items(self): page = self.request( self.page_url, - method=self.method, + method=("POST" if self.params else "GET"), data=self.params, cookies=self.cookies, encoding=self.encoding, @@ -91,7 +88,7 @@ class ImxtoImageExtractor(ImagehostImageExtractor): "exception": exception.NotFoundError, }), ) - https = True + params = "simple" encoding = "utf-8" def __init__(self, match): @@ -122,7 +119,7 @@ class AcidimgImageExtractor(ImagehostImageExtractor): "keyword": "a8bb9ab8b2f6844071945d31f8c6e04724051f37", "content": "0c8768055e4e20e7c7259608b67799171b691140", }) - https = True + params = "simple" encoding = "utf-8" def get_info(self, page): @@ -143,7 +140,7 @@ class ImagevenueImageExtractor(ImagehostImageExtractor): "url": "46812995d557f2c6adf0ebd0e631e6e4e45facde", "content": "59ec819cbd972dd9a71f25866fbfc416f2f215b3", }) - params = None + https = False def get_info(self, page): url = text.extract(page, "SRC='", "'")[0] @@ -159,8 +156,6 @@ class ImagetwistImageExtractor(ImagehostImageExtractor): "keyword": "d1060a4c2e3b73b83044e20681712c0ffdd6cfef", "content": "0c8768055e4e20e7c7259608b67799171b691140", }) - https = True - params = None @property @memcache(maxage=3*3600) @@ -182,8 +177,6 @@ class ImgspiceImageExtractor(ImagehostImageExtractor): "keyword": "100e310a19a2fa22d87e1bbc427ecb9f6501e0c0", "content": "0c8768055e4e20e7c7259608b67799171b691140", }) - https = True - params = None def get_info(self, page): pos = page.find('id="imgpreview"') @@ -204,8 +197,6 @@ class PixhostImageExtractor(ImagehostImageExtractor): "keyword": "3bad6d59db42a5ebbd7842c2307e1c3ebd35e6b0", "content": "0c8768055e4e20e7c7259608b67799171b691140", }) - https = True - params = None cookies = {"pixhostads": "1", "pixhosttest": "1"} def get_info(self, page): @@ -224,8 +215,6 @@ class PostimgImageExtractor(ImagehostImageExtractor): "keyword": "2d05808d04e4e83e33200db83521af06e3147a84", "content": "cfaa8def53ed1a575e0c665c9d6d8cf2aac7a0ee", }) - https = True - params = None def get_info(self, page): url , pos = text.extract(page, 'id="main-image" src="', '"') @@ -243,8 +232,6 @@ class TurboimagehostImageExtractor(ImagehostImageExtractor): "keyword": "704757ca8825f51cec516ec44c1e627c1f2058ca", "content": "0c8768055e4e20e7c7259608b67799171b691140", }) - https = True - params = None def get_info(self, page): url = text.extract(page, 'src="', '"', page.index("02}.{extension}") + archive_fmt = "{id}" + + def __init__(self, match): + Extractor.__init__(self, match) + self.item = match.group(1) + self.reblogs = self.config("reblogs", False) + + def items(self): + for post in self.posts(): + + if "original_post" in post and not self.reblogs: + continue + + files = post["media"] + del post["media"] + + post["date"] = text.parse_datetime( + post["created_at"], "%Y-%m-%dT%H:%M:%S.%f%z") + yield Message.Directory, post + + post["num"] = 0 + for file in files: + url = file["url"] + if url: + post.update(file) + post["num"] += 1 + post["date"] = text.parse_datetime( + file["created_at"], "%Y-%m-%dT%H:%M:%S.%f%z") + yield Message.Url, url, text.nameext_from_url(url, post) + + +class PillowfortPostExtractor(PillowfortExtractor): + """Extractor for a single pillowfort post""" + subcategory = "post" + pattern = BASE_PATTERN + r"/posts/(\d+)" + test = ("https://www.pillowfort.social/posts/27510", { + "pattern": r"https://img\d+\.pillowfort\.social/posts/\w+_out\d+\.png", + "count": 4, + "keyword": { + "avatar_url": str, + "col": 0, + "commentable": True, + "comments_count": int, + "community_id": None, + "content": str, + "created_at": str, + "date": "type:datetime", + "deleted": None, + "deleted_at": None, + "deleted_by_mod": None, + "deleted_for_flag_id": None, + "embed_code": None, + "id": int, + "last_activity": str, + "last_activity_elapsed": str, + "last_edited_at": None, + "likes_count": int, + "media_type": "picture", + "nsfw": False, + "num": int, + "original_post_id": None, + "original_post_user_id": None, + "picture_content_type": None, + "picture_file_name": None, + "picture_file_size": None, + "picture_updated_at": None, + "post_id": 27510, + "post_type": "picture", + "privacy": "public", + "reblog_copy_info": list, + "rebloggable": True, + "reblogged_from_post_id": None, + "reblogged_from_user_id": None, + "reblogs_count": int, + "row": int, + "small_image_url": None, + "tags": list, + "time_elapsed": str, + "timestamp": str, + "title": "What is Pillowfort.io? ", + "updated_at": str, + "url": r"re:https://img3.pillowfort.social/posts/.*\.png", + "user_id": 5, + "username": "Staff" + }, + }) + + def posts(self): + url = "{}/posts/{}/json/".format(self.root, self.item) + return (self.request(url).json(),) + + +class PillowfortUserExtractor(PillowfortExtractor): + """Extractor for all posts of a pillowfort user""" + subcategory = "user" + pattern = BASE_PATTERN + r"/(?!posts/)([^/?#]+)" + test = ("https://www.pillowfort.social/Pome", { + "pattern": r"https://img\d+\.pillowfort\.social/posts/", + "range": "1-15", + "count": 15, + }) + + def posts(self): + url = "{}/{}/json/".format(self.root, self.item) + params = {"p": 1} + + while True: + posts = self.request(url, params=params).json()["posts"] + yield from posts + + if len(posts) < 20: + return + params["p"] += 1 diff --git a/gallery_dl/extractor/pixiv.py b/gallery_dl/extractor/pixiv.py index a872ada..be976e9 100644 --- a/gallery_dl/extractor/pixiv.py +++ b/gallery_dl/extractor/pixiv.py @@ -510,49 +510,48 @@ class PixivAppAPI(): def __init__(self, extractor): self.extractor = extractor self.log = extractor.log - self.username, self.password = extractor._get_auth_info() + self.username = extractor._get_auth_info()[0] self.user = None + extractor.session.headers.update({ + "App-OS" : "ios", + "App-OS-Version": "13.1.2", + "App-Version" : "7.7.6", + "User-Agent" : "PixivIOSApp/7.7.6 (iOS 13.1.2; iPhone11,8)", + "Referer" : "https://app-api.pixiv.net/", + }) + self.client_id = extractor.config( "client-id", self.CLIENT_ID) self.client_secret = extractor.config( "client-secret", self.CLIENT_SECRET) - extractor.session.headers.update({ - "App-OS": "ios", - "App-OS-Version": "10.3.1", - "App-Version": "6.7.1", - "User-Agent": "PixivIOSApp/6.7.1 (iOS 10.3.1; iPhone8,1)", - "Referer": "https://app-api.pixiv.net/", - }) + + token = extractor.config("refresh-token") + if token is None or token == "cache": + token = _refresh_token_cache(self.username) + self.refresh_token = token def login(self): """Login and gain an access token""" - self.user, auth = self._login_impl(self.username, self.password) + self.user, auth = self._login_impl(self.username) self.extractor.session.headers["Authorization"] = auth @cache(maxage=3600, keyarg=1) - def _login_impl(self, username, password): - if not username or not password: + def _login_impl(self, username): + if not self.refresh_token: raise exception.AuthenticationError( - "Username and password required") + "'refresh-token' required.\n" + "Run `gallery-dl oauth:pixiv` to get one.") + self.log.info("Refreshing access token") url = "https://oauth.secure.pixiv.net/auth/token" data = { - "client_id": self.client_id, - "client_secret": self.client_secret, - "get_secure_url": 1, + "client_id" : self.client_id, + "client_secret" : self.client_secret, + "grant_type" : "refresh_token", + "refresh_token" : self.refresh_token, + "get_secure_url": "1", } - refresh_token = _refresh_token_cache(username) - - if refresh_token: - self.log.info("Refreshing access token") - data["grant_type"] = "refresh_token" - data["refresh_token"] = refresh_token - else: - self.log.info("Logging in as %s", username) - data["grant_type"] = "password" - data["username"] = username - data["password"] = password time = datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%S+00:00") headers = { @@ -565,11 +564,9 @@ class PixivAppAPI(): url, method="POST", headers=headers, data=data, fatal=False) if response.status_code >= 400: self.log.debug(response.text) - raise exception.AuthenticationError() + raise exception.AuthenticationError("Invalid refresh token") data = response.json()["response"] - if not refresh_token: - _refresh_token_cache.update(username, data["refresh_token"]) return data["user"], "Bearer " + data["access_token"] def illust_detail(self, illust_id): diff --git a/gallery_dl/extractor/sankaku.py b/gallery_dl/extractor/sankaku.py index e98b630..a5f0138 100644 --- a/gallery_dl/extractor/sankaku.py +++ b/gallery_dl/extractor/sankaku.py @@ -242,7 +242,7 @@ class SankakuAPI(): success = True if not success: code = data.get("code") - if code == "invalid_token": + if code and code.endswith(("invalid-token", "invalid_token")): _authenticate_impl.invalidate(self.username) continue raise exception.StopExtraction(code) diff --git a/gallery_dl/extractor/slideshare.py b/gallery_dl/extractor/slideshare.py index 8f668df..0b970cc 100644 --- a/gallery_dl/extractor/slideshare.py +++ b/gallery_dl/extractor/slideshare.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2016-2019 Mike Fährmann, Leonardo Taccari +# Copyright 2016-2021 Mike Fährmann, Leonardo Taccari # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -61,7 +61,7 @@ class SlidesharePresentationExtractor(Extractor): title, pos = text.extract( page, '', '', pos) views, pos = text.extract( - page, '', 'views<', pos) + page, '', 'views<', pos) published, pos = text.extract( page, '