diff options
author | Unit 193 <unit193@unit193.net> | 2021-02-16 21:36:00 -0500 |
---|---|---|
committer | Unit 193 <unit193@unit193.net> | 2021-02-16 21:36:00 -0500 |
commit | b99b946e32279961452a2c1143d9cc1b1c2db32b (patch) | |
tree | 4644c7bd326a5ded783609ac3aafa0c357815603 | |
parent | 039a989d1d4351612c15d117ce0c388eb816b0c1 (diff) | |
parent | fc83315c164afd74734adf27e0f7fec2011904aa (diff) | |
download | gallery-dl-b99b946e32279961452a2c1143d9cc1b1c2db32b.tar.bz2 gallery-dl-b99b946e32279961452a2c1143d9cc1b1c2db32b.tar.xz gallery-dl-b99b946e32279961452a2c1143d9cc1b1c2db32b.tar.zst |
Update upstream source from tag 'upstream/1.16.5'
Update to upstream version '1.16.5'
with Debian dir cf3daef99ed1e95e13091ce2976ff5f6c8dbecf6
33 files changed, 631 insertions, 165 deletions
diff --git a/CHANGELOG.md b/CHANGELOG.md index 8629536..893b944 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,31 @@ # Changelog +## 1.16.5 - 2021-02-14 +### Additions +- [behance] support `video` modules ([#1282](https://github.com/mikf/gallery-dl/issues/1282)) +- [erome] add `album`, `user`, and `search` extractors ([#409](https://github.com/mikf/gallery-dl/issues/409)) +- [hentaifox] support searching by group ([#1294](https://github.com/mikf/gallery-dl/issues/1294)) +- [imgclick] add `image` extractor ([#1307](https://github.com/mikf/gallery-dl/issues/1307)) +- [kemonoparty] extract inline images ([#1286](https://github.com/mikf/gallery-dl/issues/1286)) +- [kemonoparty] support URLs with non-numeric user and post IDs ([#1303](https://github.com/mikf/gallery-dl/issues/1303)) +- [pillowfort] add `user` and `post` extractors ([#846](https://github.com/mikf/gallery-dl/issues/846)) +### Changes +- [kemonoparty] include `service` in directories and archive keys +- [pixiv] require a `refresh-token` to login ([#1304](https://github.com/mikf/gallery-dl/issues/1304)) +- [snap] use `core18` as base +### Fixes +- [500px] update query hashes +- [deviantart] update parameters for `/browse/popular` ([#1267](https://github.com/mikf/gallery-dl/issues/1267)) +- [deviantart] provide filename extension for original file downloads ([#1272](https://github.com/mikf/gallery-dl/issues/1272)) +- [deviantart] fix `folders` option ([#1302](https://github.com/mikf/gallery-dl/issues/1302)) +- [inkbunny] add `sid` parameter to private file downloads ([#1281](https://github.com/mikf/gallery-dl/issues/1281)) +- [kemonoparty] fix absolute file URLs +- [mangadex] revert to `https://mangadex.org/api/` and add `api-server` option ([#1310](https://github.com/mikf/gallery-dl/issues/1310)) +- [nsfwalbum] use fallback for deleted content ([#1259](https://github.com/mikf/gallery-dl/issues/1259)) +- [sankaku] update `invalid token` detection ([#1309](https://github.com/mikf/gallery-dl/issues/1309)) +- [slideshare] fix extraction +- [postprocessor:metadata] fix crash with `extension-format` ([#1285](https://github.com/mikf/gallery-dl/issues/1285)) + ## 1.16.4 - 2021-01-23 ### Additions - [furaffinity] add `descriptions` option ([#1231](https://github.com/mikf/gallery-dl/issues/1231)) @@ -1,6 +1,6 @@ Metadata-Version: 2.1 Name: gallery_dl -Version: 1.16.4 +Version: 1.16.5 Summary: Command-line program to download image galleries and collections from several image hosting sites Home-page: https://github.com/mikf/gallery-dl Author: Mike Fährmann @@ -94,8 +94,8 @@ Description: ========== put it into your `PATH <https://en.wikipedia.org/wiki/PATH_(variable)>`__, and run it inside a command prompt (like ``cmd.exe``). - - `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.16.4/gallery-dl.exe>`__ - - `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.16.4/gallery-dl.bin>`__ + - `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.16.5/gallery-dl.exe>`__ + - `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.16.5/gallery-dl.bin>`__ These executables include a Python interpreter and all required Python packages. @@ -225,7 +225,7 @@ Description: ========== Some extractors require you to provide valid login credentials in the form of a username & password pair. This is necessary for - ``pixiv``, ``nijie``, and ``seiga`` + ``nijie`` and ``seiga`` and optional for ``aryion``, ``danbooru``, @@ -248,7 +248,7 @@ Description: ========== { "extractor": { - "pixiv": { + "seiga": { "username": "<username>", "password": "<password>" } @@ -332,7 +332,7 @@ Description: ========== .. _gallery-dl-example.conf: https://github.com/mikf/gallery-dl/blob/master/docs/gallery-dl-example.conf .. _configuration.rst: https://github.com/mikf/gallery-dl/blob/master/docs/configuration.rst .. _Supported Sites: https://github.com/mikf/gallery-dl/blob/master/docs/supportedsites.rst - .. _stable: https://github.com/mikf/gallery-dl/archive/v1.16.4.tar.gz + .. _stable: https://github.com/mikf/gallery-dl/archive/v1.16.5.tar.gz .. _dev: https://github.com/mikf/gallery-dl/archive/master.tar.gz .. _Python: https://www.python.org/downloads/ @@ -83,8 +83,8 @@ Download a standalone executable file, put it into your `PATH <https://en.wikipedia.org/wiki/PATH_(variable)>`__, and run it inside a command prompt (like ``cmd.exe``). -- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.16.4/gallery-dl.exe>`__ -- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.16.4/gallery-dl.bin>`__ +- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.16.5/gallery-dl.exe>`__ +- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.16.5/gallery-dl.bin>`__ These executables include a Python interpreter and all required Python packages. @@ -214,7 +214,7 @@ Username & Password Some extractors require you to provide valid login credentials in the form of a username & password pair. This is necessary for -``pixiv``, ``nijie``, and ``seiga`` +``nijie`` and ``seiga`` and optional for ``aryion``, ``danbooru``, @@ -237,7 +237,7 @@ You can set the necessary information in your configuration file { "extractor": { - "pixiv": { + "seiga": { "username": "<username>", "password": "<password>" } @@ -321,7 +321,7 @@ access to *gallery-dl*. Authorize it and you will be shown one or more .. _gallery-dl-example.conf: https://github.com/mikf/gallery-dl/blob/master/docs/gallery-dl-example.conf .. _configuration.rst: https://github.com/mikf/gallery-dl/blob/master/docs/configuration.rst .. _Supported Sites: https://github.com/mikf/gallery-dl/blob/master/docs/supportedsites.rst -.. _stable: https://github.com/mikf/gallery-dl/archive/v1.16.4.tar.gz +.. _stable: https://github.com/mikf/gallery-dl/archive/v1.16.5.tar.gz .. _dev: https://github.com/mikf/gallery-dl/archive/master.tar.gz .. _Python: https://www.python.org/downloads/ diff --git a/data/man/gallery-dl.1 b/data/man/gallery-dl.1 index 619e84f..02639b8 100644 --- a/data/man/gallery-dl.1 +++ b/data/man/gallery-dl.1 @@ -1,4 +1,4 @@ -.TH "GALLERY-DL" "1" "2021-01-23" "1.16.4" "gallery-dl Manual" +.TH "GALLERY-DL" "1" "2021-02-14" "1.16.5" "gallery-dl Manual" .\" disable hyphenation .nh diff --git a/data/man/gallery-dl.conf.5 b/data/man/gallery-dl.conf.5 index 413a40b..408cb61 100644 --- a/data/man/gallery-dl.conf.5 +++ b/data/man/gallery-dl.conf.5 @@ -1,4 +1,4 @@ -.TH "GALLERY-DL.CONF" "5" "2021-01-23" "1.16.4" "gallery-dl Manual" +.TH "GALLERY-DL.CONF" "5" "2021-02-14" "1.16.5" "gallery-dl Manual" .\" disable hyphenation .nh .\" disable justification (adjust text to left margin only) @@ -321,8 +321,6 @@ another site. Specifying a username and password is required for .br -* \f[I]pixiv\f[] -.br * \f[I]nijie\f[] .br * \f[I]seiga\f[] @@ -1391,6 +1389,17 @@ port than the default. Download subalbums. +.SS extractor.pillowfort.reblogs +.IP "Type:" 6 +\f[I]bool\f[] + +.IP "Default:" 9 +\f[I]false\f[] + +.IP "Description:" 4 +Extract media from reblogged posts. + + .SS extractor.pinterest.sections .IP "Type:" 6 \f[I]bool\f[] diff --git a/gallery_dl.egg-info/PKG-INFO b/gallery_dl.egg-info/PKG-INFO index f1a1ebe..b87c59d 100644 --- a/gallery_dl.egg-info/PKG-INFO +++ b/gallery_dl.egg-info/PKG-INFO @@ -1,6 +1,6 @@ Metadata-Version: 2.1 Name: gallery-dl -Version: 1.16.4 +Version: 1.16.5 Summary: Command-line program to download image galleries and collections from several image hosting sites Home-page: https://github.com/mikf/gallery-dl Author: Mike Fährmann @@ -94,8 +94,8 @@ Description: ========== put it into your `PATH <https://en.wikipedia.org/wiki/PATH_(variable)>`__, and run it inside a command prompt (like ``cmd.exe``). - - `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.16.4/gallery-dl.exe>`__ - - `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.16.4/gallery-dl.bin>`__ + - `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.16.5/gallery-dl.exe>`__ + - `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.16.5/gallery-dl.bin>`__ These executables include a Python interpreter and all required Python packages. @@ -225,7 +225,7 @@ Description: ========== Some extractors require you to provide valid login credentials in the form of a username & password pair. This is necessary for - ``pixiv``, ``nijie``, and ``seiga`` + ``nijie`` and ``seiga`` and optional for ``aryion``, ``danbooru``, @@ -248,7 +248,7 @@ Description: ========== { "extractor": { - "pixiv": { + "seiga": { "username": "<username>", "password": "<password>" } @@ -332,7 +332,7 @@ Description: ========== .. _gallery-dl-example.conf: https://github.com/mikf/gallery-dl/blob/master/docs/gallery-dl-example.conf .. _configuration.rst: https://github.com/mikf/gallery-dl/blob/master/docs/configuration.rst .. _Supported Sites: https://github.com/mikf/gallery-dl/blob/master/docs/supportedsites.rst - .. _stable: https://github.com/mikf/gallery-dl/archive/v1.16.4.tar.gz + .. _stable: https://github.com/mikf/gallery-dl/archive/v1.16.5.tar.gz .. _dev: https://github.com/mikf/gallery-dl/archive/master.tar.gz .. _Python: https://www.python.org/downloads/ diff --git a/gallery_dl.egg-info/SOURCES.txt b/gallery_dl.egg-info/SOURCES.txt index d4907de..066ac90 100644 --- a/gallery_dl.egg-info/SOURCES.txt +++ b/gallery_dl.egg-info/SOURCES.txt @@ -56,6 +56,7 @@ gallery_dl/extractor/deviantart.py gallery_dl/extractor/directlink.py gallery_dl/extractor/dynastyscans.py gallery_dl/extractor/e621.py +gallery_dl/extractor/erome.py gallery_dl/extractor/exhentai.py gallery_dl/extractor/fallenangels.py gallery_dl/extractor/flickr.py @@ -121,6 +122,7 @@ gallery_dl/extractor/patreon.py gallery_dl/extractor/photobucket.py gallery_dl/extractor/photovogue.py gallery_dl/extractor/piczel.py +gallery_dl/extractor/pillowfort.py gallery_dl/extractor/pinterest.py gallery_dl/extractor/pixiv.py gallery_dl/extractor/pixnet.py diff --git a/gallery_dl/downloader/http.py b/gallery_dl/downloader/http.py index 179a552..8d72dc2 100644 --- a/gallery_dl/downloader/http.py +++ b/gallery_dl/downloader/http.py @@ -134,6 +134,12 @@ class HttpDownloader(DownloaderBase): self.log.warning(msg) return False + # check for invalid responses + validate = pathfmt.kwdict.get("_http_validate") + if validate and not validate(response): + self.log.warning("Invalid response") + return False + # set missing filename extension from MIME type if not pathfmt.extension: pathfmt.set_extension(self._find_extension(response)) diff --git a/gallery_dl/extractor/2chan.py b/gallery_dl/extractor/2chan.py index d34209f..f5d2a4c 100644 --- a/gallery_dl/extractor/2chan.py +++ b/gallery_dl/extractor/2chan.py @@ -22,8 +22,8 @@ class _2chanThreadExtractor(Extractor): url_fmt = "https://{server}.2chan.net/{board}/src/{filename}" pattern = r"(?:https?://)?([^.]+)\.2chan\.net/([^/]+)/res/(\d+)" test = ("http://dec.2chan.net/70/res/4752.htm", { - "url": "20c211ae7c06b18ec345a057fe0b68dde979b051", - "keyword": "23a529b46313b927fc94b577e5e1fdb3aa164ac1", + "url": "f49aa31340e9a3429226af24e19e01f5b819ca1f", + "keyword": "44599c21b248e79692b2eb2da12699bd0ed5640a", }) def __init__(self, match): diff --git a/gallery_dl/extractor/500px.py b/gallery_dl/extractor/500px.py index df9941a..81b11fd 100644 --- a/gallery_dl/extractor/500px.py +++ b/gallery_dl/extractor/500px.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2019-2020 Mike Fährmann +# Copyright 2019-2021 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -153,7 +153,7 @@ class _500pxGalleryExtractor(_500pxExtractor): def metadata(self): user = self._request_graphql( "ProfileRendererQuery", {"username": self.user_name}, - "5a17a9af1830b58b94a912995b7947b24f27f1301c6ea8ab71a9eb1a6a86585b", + "105058632482dd2786fd5775745908dc928f537b28e28356b076522757d65c19", )["profile"] self.user_id = str(user["legacyId"]) diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py index a69bacc..923a78b 100644 --- a/gallery_dl/extractor/__init__.py +++ b/gallery_dl/extractor/__init__.py @@ -28,6 +28,7 @@ modules = [ "deviantart", "dynastyscans", "e621", + "erome", "exhentai", "fallenangels", "flickr", @@ -86,6 +87,7 @@ modules = [ "photobucket", "photovogue", "piczel", + "pillowfort", "pinterest", "pixiv", "pixnet", diff --git a/gallery_dl/extractor/behance.py b/gallery_dl/extractor/behance.py index a817174..b081cc9 100644 --- a/gallery_dl/extractor/behance.py +++ b/gallery_dl/extractor/behance.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2018-2019 Mike Fährmann +# Copyright 2018-2021 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -83,6 +83,11 @@ class BehanceGalleryExtractor(BehanceExtractor): "count": 20, "url": "6bebff0d37f85349f9ad28bd8b76fd66627c1e2f", }), + # 'video' modules (#1282) + ("https://www.behance.net/gallery/101185577/COLCCI", { + "pattern": r"ytdl:https://adobeprod-a\.akamaihd\.net/", + "count": 3, + }), ) def __init__(self, match): @@ -120,8 +125,7 @@ class BehanceGalleryExtractor(BehanceExtractor): page, 'id="beconfig-store_state">', '</script>')[0]) return self._update(data["project"]["project"]) - @staticmethod - def get_images(data): + def get_images(self, data): """Extract image results from an API response""" result = [] append = result.append @@ -133,6 +137,13 @@ class BehanceGalleryExtractor(BehanceExtractor): url = module["sizes"]["original"] append((url, module)) + elif mtype == "video": + page = self.request(module["src"]).text + url = text.extract(page, '<source src="', '"')[0] + if text.ext_from_url(url) == "m3u8": + url = "ytdl:" + url + append((url, module)) + elif mtype == "media_collection": for component in module["components"]: url = component["sizes"]["source"] diff --git a/gallery_dl/extractor/deviantart.py b/gallery_dl/extractor/deviantart.py index a58401e..2eb3b28 100644 --- a/gallery_dl/extractor/deviantart.py +++ b/gallery_dl/extractor/deviantart.py @@ -176,10 +176,11 @@ class DeviantartExtractor(Extractor): @staticmethod def commit(deviation, target): url = target["src"] + name = target.get("filename") or url target = target.copy() target["filename"] = deviation["filename"] deviation["target"] = target - deviation["extension"] = target["extension"] = text.ext_from_url(url) + deviation["extension"] = target["extension"] = text.ext_from_url(name) return Message.Url, url, deviation def _commit_journal_html(self, deviation, journal): @@ -722,7 +723,7 @@ class DeviantartPopularExtractor(DeviantartExtractor): def deviations(self): return self.api.browse_popular( - self.search_term, self.time_range, self.category_path, self.offset) + self.search_term, self.time_range, self.offset) def prepare(self, deviation): DeviantartExtractor.prepare(self, deviation) @@ -917,13 +918,16 @@ class DeviantartOAuthAPI(): self.client_id, ) - def browse_popular(self, query=None, timerange=None, - category_path=None, offset=0): + def browse_popular(self, query=None, timerange=None, offset=0): """Yield popular deviations""" endpoint = "browse/popular" - params = {"q": query, "offset": offset, "limit": 120, - "timerange": timerange, "category_path": category_path, - "mature_content": self.mature} + params = { + "q" : query, + "limit" : 50 if self.metadata else 120, + "timerange" : timerange, + "offset" : offset, + "mature_content": self.mature, + } return self._pagination(endpoint, params) def browse_user_journals(self, username, offset=0): @@ -1127,13 +1131,31 @@ class DeviantartOAuthAPI(): self.log.info("Collecting folder information for '%s'", username) folders = self.gallery_folders(username) + # create 'folderid'-to-'folder' mapping + fmap = { + folder["folderid"]: folder + for folder in folders + } + # add parent names to folders, but ignore "Featured" as parent - fmap = {} featured = folders[0]["folderid"] - for folder in folders: - if folder["parent"] and folder["parent"] != featured: - folder["name"] = fmap[folder["parent"]] + "/" + folder["name"] - fmap[folder["folderid"]] = folder["name"] + done = False + + while not done: + done = True + for folder in folders: + parent = folder["parent"] + if not parent: + pass + elif parent == featured: + folder["parent"] = None + else: + parent = fmap[parent] + if parent["parent"]: + done = False + else: + folder["name"] = parent["name"] + "/" + folder["name"] + folder["parent"] = None # map deviationids to folder names dmap = collections.defaultdict(list) diff --git a/gallery_dl/extractor/erome.py b/gallery_dl/extractor/erome.py new file mode 100644 index 0000000..1c6ebb4 --- /dev/null +++ b/gallery_dl/extractor/erome.py @@ -0,0 +1,131 @@ +# -*- coding: utf-8 -*- + +# Copyright 2021 Mike Fährmann +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. + +"""Extractors for https://www.erome.com/""" + +from .common import Extractor, Message +from .. import text, util +from ..cache import cache +import itertools +import time + +BASE_PATTERN = r"(?:https?://)?(?:www\.)?erome\.com" + + +class EromeExtractor(Extractor): + category = "erome" + directory_fmt = ("{category}", "{user}") + filename_fmt = "{album_id} {title} {num:>02}.{extension}" + archive_fmt = "{album_id}_{num}" + root = "https://www.erome.com" + + def __init__(self, match): + Extractor.__init__(self, match) + self.item = match.group(1) + self.__cookies = True + + def items(self): + for album_id in self.albums(): + url = "{}/a/{}".format(self.root, album_id) + page = self.request(url).text + + title, pos = text.extract( + page, 'property="og:title" content="', '"') + pos = page.index('<div class="user-profile', pos) + user, pos = text.extract( + page, 'href="https://www.erome.com/', '"', pos) + data = { + "album_id": album_id, + "title" : text.unescape(title), + "user" : text.unquote(user), + } + + yield Message.Directory, data + groups = page.split('<div class="media-group"') + for data["num"], group in enumerate(util.advance(groups, 1), 1): + url = (text.extract(group, '<source src="', '"')[0] or + text.extract(group, 'data-src="', '"')[0]) + yield Message.Url, url, text.nameext_from_url(url, data) + + def albums(self): + return () + + def request(self, url, **kwargs): + if self.__cookies: + self.__cookies = False + self.session.cookies.update(_cookie_cache()) + + for _ in range(5): + response = Extractor.request(self, url, **kwargs) + if response.cookies: + _cookie_cache.update("", response.cookies) + if response.content.find( + b"<title>Please wait a few moments</title>", 0, 600) < 0: + return response + time.sleep(5) + + def _pagination(self, url, params): + for params["page"] in itertools.count(1): + page = self.request(url, params=params).text + + album_ids = EromeAlbumExtractor.pattern.findall(page) + yield from album_ids + + if len(album_ids) < 36: + return + + +class EromeAlbumExtractor(EromeExtractor): + """Extractor for albums on erome.com""" + subcategory = "album" + pattern = BASE_PATTERN + r"/a/(\w+)" + test = ("https://www.erome.com/a/UHUX1B73", { + "pattern": r"https://s\d+\.erome\.com/342/UHUX1B73/\w+", + "count": 5, + "keyword": { + "album_id": "UHUX1B73", + "num": int, + "title": "Ryan Ryans", + "user": "gutiquq", + }, + }) + + def albums(self): + return (self.item,) + + +class EromeUserExtractor(EromeExtractor): + subcategory = "user" + pattern = BASE_PATTERN + r"/(?!a/|search\?)([^/?#]+)" + test = ("https://www.erome.com/gutiquq", { + "range": "1-25", + "count": 25, + }) + + def albums(self): + url = "{}/{}".format(self.root, self.item) + return self._pagination(url, {}) + + +class EromeSearchExtractor(EromeExtractor): + subcategory = "search" + pattern = BASE_PATTERN + r"/search\?q=([^&#]+)" + test = ("https://www.erome.com/search?q=cute", { + "range": "1-25", + "count": 25, + }) + + def albums(self): + url = self.root + "/search" + params = {"q": text.unquote(self.item)} + return self._pagination(url, params) + + +@cache() +def _cookie_cache(): + return () diff --git a/gallery_dl/extractor/furaffinity.py b/gallery_dl/extractor/furaffinity.py index df5a73e..a7b0356 100644 --- a/gallery_dl/extractor/furaffinity.py +++ b/gallery_dl/extractor/furaffinity.py @@ -186,7 +186,8 @@ class FuraffinityGalleryExtractor(FuraffinityExtractor): subcategory = "gallery" pattern = BASE_PATTERN + r"/gallery/([^/?#]+)" test = ("https://www.furaffinity.net/gallery/mirlinthloth/", { - "pattern": r"https://d\d?.facdn.net/art/mirlinthloth/\d+/\d+.\w+\.\w+", + "pattern": r"https://d\d?\.f(uraffinity|acdn)\.net" + r"/art/mirlinthloth/\d+/\d+.\w+\.\w+", "range": "45-50", "count": 6, }) @@ -198,7 +199,8 @@ class FuraffinityScrapsExtractor(FuraffinityExtractor): directory_fmt = ("{category}", "{user!l}", "Scraps") pattern = BASE_PATTERN + r"/scraps/([^/?#]+)" test = ("https://www.furaffinity.net/scraps/mirlinthloth/", { - "pattern": r"https://d\d?.facdn.net/art/[^/]+(/stories)?/\d+/\d+.\w+.", + "pattern": r"https://d\d?\.f(uraffinity|acdn)\.net" + r"/art/[^/]+(/stories)?/\d+/\d+.\w+.", "count": ">= 3", }) @@ -209,7 +211,8 @@ class FuraffinityFavoriteExtractor(FuraffinityExtractor): directory_fmt = ("{category}", "{user!l}", "Favorites") pattern = BASE_PATTERN + r"/favorites/([^/?#]+)" test = ("https://www.furaffinity.net/favorites/mirlinthloth/", { - "pattern": r"https://d\d?.facdn.net/art/[^/]+/\d+/\d+.\w+\.\w+", + "pattern": r"https://d\d?\.f(uraffinity|acdn)\.net" + r"/art/[^/]+/\d+/\d+.\w+\.\w+", "range": "45-50", "count": 6, }) @@ -224,7 +227,8 @@ class FuraffinitySearchExtractor(FuraffinityExtractor): directory_fmt = ("{category}", "Search", "{search}") pattern = BASE_PATTERN + r"/search/?\?([^#]+)" test = ("https://www.furaffinity.net/search/?q=cute", { - "pattern": r"https://d\d?.facdn.net/art/[^/]+/\d+/\d+.\w+\.\w+", + "pattern": r"https://d\d?\.f(uraffinity|acdn)\.net" + r"/art/[^/]+/\d+/\d+.\w+\.\w+", "range": "45-50", "count": 6, }) @@ -243,9 +247,9 @@ class FuraffinityPostExtractor(FuraffinityExtractor): pattern = BASE_PATTERN + r"/(?:view|full)/(\d+)" test = ( ("https://www.furaffinity.net/view/21835115/", { - "pattern": r"https://d\d*\.facdn\.net/(download/)?art/mirlinthloth" - r"/music/1488278723/1480267446.mirlinthloth_dj_fennmink" - r"_-_bude_s_4_ever\.mp3", + "pattern": r"https://d\d*\.f(uraffinity|acdn)\.net/(download/)?art" + r"/mirlinthloth/music/1488278723/1480267446.mirlinthlot" + r"h_dj_fennmink_-_bude_s_4_ever\.mp3", "keyword": { "artist" : "mirlinthloth", "artist_url" : "mirlinthloth", @@ -256,7 +260,7 @@ class FuraffinityPostExtractor(FuraffinityExtractor): "id" : 21835115, "tags" : list, "title" : "Bude's 4 Ever", - "url" : r"re:https://d\d?.facdn.net/art/mirlinthloth/m", + "url" : r"re:https://d\d?\.f(uraffinity|acdn)\.net/art", "user" : "mirlinthloth", "views" : int, "favorites" : int, diff --git a/gallery_dl/extractor/hentaifox.py b/gallery_dl/extractor/hentaifox.py index 84ad3af..093f3fe 100644 --- a/gallery_dl/extractor/hentaifox.py +++ b/gallery_dl/extractor/hentaifox.py @@ -82,12 +82,13 @@ class HentaifoxSearchExtractor(HentaifoxBase, Extractor): """Extractor for search results and listings on hentaifox.com""" subcategory = "search" pattern = (r"(?:https?://)?(?:www\.)?hentaifox\.com" - r"(/(?:parody|tag|artist|character|search)/[^/?%#]+)") + r"(/(?:parody|tag|artist|character|search|group)/[^/?%#]+)") test = ( ("https://hentaifox.com/parody/touhou-project/"), ("https://hentaifox.com/character/reimu-hakurei/"), ("https://hentaifox.com/artist/distance/"), ("https://hentaifox.com/search/touhou/"), + ("https://hentaifox.com/group/v-slash/"), ("https://hentaifox.com/tag/heterochromia/", { "pattern": HentaifoxGalleryExtractor.pattern, "count": ">= 60", diff --git a/gallery_dl/extractor/imagehosts.py b/gallery_dl/extractor/imagehosts.py index fe3afbb..abb6d10 100644 --- a/gallery_dl/extractor/imagehosts.py +++ b/gallery_dl/extractor/imagehosts.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2016-2020 Mike Fährmann +# Copyright 2016-2021 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -19,9 +19,8 @@ class ImagehostImageExtractor(Extractor): basecategory = "imagehost" subcategory = "image" archive_fmt = "{token}" - https = False - method = "post" - params = "simple" + https = True + params = None cookies = None encoding = None @@ -30,6 +29,7 @@ class ImagehostImageExtractor(Extractor): self.page_url = "http{}://{}".format( "s" if self.https else "", match.group(1)) self.token = match.group(2) + if self.params == "simple": self.params = { "imgContinue": "Continue+to+image+...+", @@ -42,14 +42,11 @@ class ImagehostImageExtractor(Extractor): "adb": "1", "next": "Continue+to+image+...+", } - else: - self.params = {} - self.method = "get" def items(self): page = self.request( self.page_url, - method=self.method, + method=("POST" if self.params else "GET"), data=self.params, cookies=self.cookies, encoding=self.encoding, @@ -91,7 +88,7 @@ class ImxtoImageExtractor(ImagehostImageExtractor): "exception": exception.NotFoundError, }), ) - https = True + params = "simple" encoding = "utf-8" def __init__(self, match): @@ -122,7 +119,7 @@ class AcidimgImageExtractor(ImagehostImageExtractor): "keyword": "a8bb9ab8b2f6844071945d31f8c6e04724051f37", "content": "0c8768055e4e20e7c7259608b67799171b691140", }) - https = True + params = "simple" encoding = "utf-8" def get_info(self, page): @@ -143,7 +140,7 @@ class ImagevenueImageExtractor(ImagehostImageExtractor): "url": "46812995d557f2c6adf0ebd0e631e6e4e45facde", "content": "59ec819cbd972dd9a71f25866fbfc416f2f215b3", }) - params = None + https = False def get_info(self, page): url = text.extract(page, "SRC='", "'")[0] @@ -159,8 +156,6 @@ class ImagetwistImageExtractor(ImagehostImageExtractor): "keyword": "d1060a4c2e3b73b83044e20681712c0ffdd6cfef", "content": "0c8768055e4e20e7c7259608b67799171b691140", }) - https = True - params = None @property @memcache(maxage=3*3600) @@ -182,8 +177,6 @@ class ImgspiceImageExtractor(ImagehostImageExtractor): "keyword": "100e310a19a2fa22d87e1bbc427ecb9f6501e0c0", "content": "0c8768055e4e20e7c7259608b67799171b691140", }) - https = True - params = None def get_info(self, page): pos = page.find('id="imgpreview"') @@ -204,8 +197,6 @@ class PixhostImageExtractor(ImagehostImageExtractor): "keyword": "3bad6d59db42a5ebbd7842c2307e1c3ebd35e6b0", "content": "0c8768055e4e20e7c7259608b67799171b691140", }) - https = True - params = None cookies = {"pixhostads": "1", "pixhosttest": "1"} def get_info(self, page): @@ -224,8 +215,6 @@ class PostimgImageExtractor(ImagehostImageExtractor): "keyword": "2d05808d04e4e83e33200db83521af06e3147a84", "content": "cfaa8def53ed1a575e0c665c9d6d8cf2aac7a0ee", }) - https = True - params = None def get_info(self, page): url , pos = text.extract(page, 'id="main-image" src="', '"') @@ -243,8 +232,6 @@ class TurboimagehostImageExtractor(ImagehostImageExtractor): "keyword": "704757ca8825f51cec516ec44c1e627c1f2058ca", "content": "0c8768055e4e20e7c7259608b67799171b691140", }) - https = True - params = None def get_info(self, page): url = text.extract(page, 'src="', '"', page.index("<img "))[0] @@ -259,9 +246,24 @@ class ViprImageExtractor(ImagehostImageExtractor): "url": "88f6a3ecbf3356a11ae0868b518c60800e070202", "keyword": "c432e8a1836b0d97045195b745731c2b1bb0e771", }) - https = True - params = None def get_info(self, page): url = text.extract(page, '<img src="', '"')[0] return url, url + + +class ImgclickImageExtractor(ImagehostImageExtractor): + """Extractor for single images from imgclick.net""" + category = "imgclick" + pattern = r"(?:https?://)?((?:www\.)?imgclick\.net/([^/?#]+))" + test = ("http://imgclick.net/4tbrre1oxew9/test-_-_.png.html", { + "url": "b967f2d372ffb9f5d3a927c6dd560e120b10a808", + "keyword": "6895256143eab955622fc149aa367777a8815ba3", + "content": "0c8768055e4e20e7c7259608b67799171b691140", + }) + params = "complex" + + def get_info(self, page): + url , pos = text.extract(page, '<br><img src="', '"') + filename, pos = text.extract(page, 'alt="', '"', pos) + return url, filename diff --git a/gallery_dl/extractor/inkbunny.py b/gallery_dl/extractor/inkbunny.py index 6051db0..9b5331a 100644 --- a/gallery_dl/extractor/inkbunny.py +++ b/gallery_dl/extractor/inkbunny.py @@ -54,7 +54,11 @@ class InkbunnyExtractor(Extractor): post["date"] = text.parse_datetime( file["create_datetime"] + "00", "%Y-%m-%d %H:%M:%S.%f%z") text.nameext_from_url(file["file_name"], post) - yield Message.Url, file["file_url_full"], post + + url = file["file_url_full"] + if "/private_files/" in url: + url += "?sid=" + self.api.session_id + yield Message.Url, url, post class InkbunnyUserExtractor(InkbunnyExtractor): @@ -154,7 +158,10 @@ class InkbunnyPostExtractor(InkbunnyExtractor): self.submission_id = match.group(1) def posts(self): - return self.api.detail(({"submission_id": self.submission_id},)) + submissions = self.api.detail(({"submission_id": self.submission_id},)) + if submissions[0] is None: + raise exception.NotFoundError("submission") + return submissions class InkbunnyAPI(): diff --git a/gallery_dl/extractor/kemonoparty.py b/gallery_dl/extractor/kemonoparty.py index a5b5e00..377e00b 100644 --- a/gallery_dl/extractor/kemonoparty.py +++ b/gallery_dl/extractor/kemonoparty.py @@ -10,17 +10,22 @@ from .common import Extractor, Message from .. import text +import re + +BASE_PATTERN = r"(?:https?://)?kemono\.party/([^/?#]+)/user/([^/?#]+)" class KemonopartyExtractor(Extractor): """Base class for kemonoparty extractors""" category = "kemonoparty" root = "https://kemono.party" - directory_fmt = ("{category}", "{user}") + directory_fmt = ("{category}", "{service}", "{user}") filename_fmt = "{id}_{title}_{filename}.{extension}" - archive_fmt = "{user}_{id}_{filename}.{extension}" + archive_fmt = "{service}_{user}_{id}_{filename}.{extension}" def items(self): + find_inline = re.compile(r'src="(/inline/[^"]+)').findall + for post in self.posts(): files = [] @@ -28,23 +33,32 @@ class KemonopartyExtractor(Extractor): files.append(post["file"]) if post["attachments"]: files.extend(post["attachments"]) + for path in find_inline(post["content"] or ""): + files.append({"path": path, "name": path}) + post["date"] = text.parse_datetime( post["published"], "%a, %d %b %Y %H:%M:%S %Z") yield Message.Directory, post for post["num"], file in enumerate(files, 1): + url = file["path"] + if url[0] == "/": + url = self.root + url text.nameext_from_url(file["name"], post) - yield Message.Url, self.root + file["path"], post + yield Message.Url, url, post class KemonopartyUserExtractor(KemonopartyExtractor): """Extractor for all posts from a kemono.party user listing""" subcategory = "user" - pattern = r"(?:https?://)?kemono\.party/([^/?#]+)/user/(\d+)/?(?:$|[?#])" - test = ("https://kemono.party/fanbox/user/6993449", { - "range": "1-25", - "count": 25, - }) + pattern = BASE_PATTERN + r"/?(?:$|[?#])" + test = ( + ("https://kemono.party/fanbox/user/6993449", { + "range": "1-25", + "count": 25, + }), + ("https://kemono.party/subscribestar/user/alcorart"), + ) def __init__(self, match): KemonopartyExtractor.__init__(self, match) @@ -67,28 +81,37 @@ class KemonopartyUserExtractor(KemonopartyExtractor): class KemonopartyPostExtractor(KemonopartyExtractor): """Extractor for a single kemono.party post""" subcategory = "post" - pattern = r"(?:https?://)?kemono\.party/([^/?#]+)/user/(\d+)/post/(\d+)" - test = ("https://kemono.party/fanbox/user/6993449/post/506575", { - "pattern": r"https://kemono\.party/files/fanbox" - r"/6993449/506575/P058kDFYus7DbqAkGlfWTlOr\.jpeg", - "keyword": { - "added": "Wed, 06 May 2020 20:28:02 GMT", - "content": str, - "date": "dt:2019-08-11 02:09:04", - "edited": None, - "embed": dict, - "extension": "jpeg", - "filename": "P058kDFYus7DbqAkGlfWTlOr", - "id": "506575", - "num": 1, - "published": "Sun, 11 Aug 2019 02:09:04 GMT", - "service": "fanbox", - "shared_file": False, - "subcategory": "post", - "title": "c96取り置き", - "user": "6993449", - }, - }) + pattern = BASE_PATTERN + r"/post/([^/?#]+)" + test = ( + ("https://kemono.party/fanbox/user/6993449/post/506575", { + "pattern": r"https://kemono\.party/files/fanbox" + r"/6993449/506575/P058kDFYus7DbqAkGlfWTlOr\.jpeg", + "keyword": { + "added": "Wed, 06 May 2020 20:28:02 GMT", + "content": str, + "date": "dt:2019-08-11 02:09:04", + "edited": None, + "embed": dict, + "extension": "jpeg", + "filename": "P058kDFYus7DbqAkGlfWTlOr", + "id": "506575", + "num": 1, + "published": "Sun, 11 Aug 2019 02:09:04 GMT", + "service": "fanbox", + "shared_file": False, + "subcategory": "post", + "title": "c96取り置き", + "user": "6993449", + }, + }), + # inline image (#1286) + ("https://kemono.party/fanbox/user/7356311/post/802343", { + "pattern": r"https://kemono\.party/inline/fanbox" + r"/uaozO4Yga6ydkGIJFAQDixfE\.jpeg", + }), + ("https://kemono.party/subscribestar/user/alcorart/post/184330"), + ("https://kemono.party/gumroad/user/trylsc/post/IURjT"), + ) def __init__(self, match): KemonopartyExtractor.__init__(self, match) diff --git a/gallery_dl/extractor/mangadex.py b/gallery_dl/extractor/mangadex.py index 2156ecf..d59e5bb 100644 --- a/gallery_dl/extractor/mangadex.py +++ b/gallery_dl/extractor/mangadex.py @@ -17,7 +17,6 @@ class MangadexExtractor(Extractor): """Base class for mangadex extractors""" category = "mangadex" root = "https://mangadex.org" - api_root = "https://api.mangadex.org" # mangadex-to-iso639-1 codes iso639_map = { @@ -27,6 +26,10 @@ class MangadexExtractor(Extractor): "vn": "vi", } + def __init__(self, match): + Extractor.__init__(self, match) + self.api_root = self.config("api-server") or "https://mangadex.org/api" + def chapter_data(self, chapter_id): """Request API results for 'chapter_id'""" url = "{}/v2/chapter/{}".format(self.api_root, chapter_id) diff --git a/gallery_dl/extractor/nsfwalbum.py b/gallery_dl/extractor/nsfwalbum.py index 8f1f3f2..9c4d686 100644 --- a/gallery_dl/extractor/nsfwalbum.py +++ b/gallery_dl/extractor/nsfwalbum.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2019 Mike Fährmann +# Copyright 2019-2021 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -44,17 +44,24 @@ class NsfwalbumAlbumExtractor(GalleryExtractor): iframe = self.root + "/iframe_image.php?id=" backend = self.root + "/backend.php" for image_id in text.extract_iter(page, 'data-img-id="', '"'): - spirit = text.extract(self.request( - iframe + image_id).text, 'giraffe.annihilate("', '"')[0] - params = {"spirit": self._annihilate(spirit), "photo": image_id} + spirit = self._annihilate(text.extract(self.request( + iframe + image_id).text, 'giraffe.annihilate("', '"')[0]) + params = {"spirit": spirit, "photo": image_id} data = self.request(backend, params=params).json() yield data[0], { "id" : text.parse_int(image_id), "width" : text.parse_int(data[1]), "height": text.parse_int(data[2]), + "_http_validate": self._validate_response, + "_fallback": ("{}/imageProxy.php?photoId={}&spirit={}".format( + self.root, image_id, spirit),), } @staticmethod + def _validate_response(response): + return not response.request.url.endswith("/no_image.jpg") + + @staticmethod def _annihilate(value, base=6): return "".join( chr(ord(char) ^ base) diff --git a/gallery_dl/extractor/oauth.py b/gallery_dl/extractor/oauth.py index 4bb2c48..2ec7165 100644 --- a/gallery_dl/extractor/oauth.py +++ b/gallery_dl/extractor/oauth.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2017-2020 Mike Fährmann +# Copyright 2017-2021 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -9,10 +9,12 @@ """Utility classes to setup OAuth and link accounts to gallery-dl""" from .common import Extractor, Message -from . import deviantart, flickr, reddit, smugmug, tumblr +from . import deviantart, flickr, pixiv, reddit, smugmug, tumblr from .. import text, oauth, util, config, exception from ..cache import cache import urllib.parse +import hashlib +import base64 REDIRECT_URI_LOCALHOST = "http://localhost:6414/" REDIRECT_URI_HTTPS = "https://mikf.github.io/gallery-dl/oauth-redirect.html" @@ -62,14 +64,14 @@ class OAuthBase(Extractor): self.client.send(b"HTTP/1.1 200 OK\r\n\r\n" + msg.encode()) self.client.close() - def open(self, url, params): + def open(self, url, params, recv=None): """Open 'url' in browser amd return response parameters""" import webbrowser url += "?" + urllib.parse.urlencode(params) if not self.config("browser", True) or not webbrowser.open(url): print("Please open this URL in your browser:") print(url, end="\n\n", flush=True) - return self.recv() + return (recv or self.recv)() def _oauth1_authorization_flow( self, request_token_url, authorize_url, access_token_url): @@ -362,6 +364,69 @@ class OAuthMastodon(OAuthBase): return data +class OAuthPixiv(OAuthBase): + subcategory = "pixiv" + pattern = "oauth:pixiv$" + + def items(self): + yield Message.Version, 1 + + code_verifier = util.generate_token(32) + digest = hashlib.sha256(code_verifier.encode("ascii")).digest() + code_challenge = base64.urlsafe_b64encode( + digest).rstrip(b"=").decode("ascii") + + url = "https://app-api.pixiv.net/web/v1/login" + params = { + "code_challenge": code_challenge, + "code_challenge_method": "S256", + "client": "pixiv-android", + } + code = self.open(url, params, self._input) + + url = "https://oauth.secure.pixiv.net/auth/token" + headers = { + "User-Agent": "PixivAndroidApp/5.0.234 (Android 11; Pixel 5)", + } + data = { + "client_id" : self.oauth_config( + "client-id" , pixiv.PixivAppAPI.CLIENT_ID), + "client_secret" : self.oauth_config( + "client-secret", pixiv.PixivAppAPI.CLIENT_SECRET), + "code" : code, + "code_verifier" : code_verifier, + "grant_type" : "authorization_code", + "include_policy": "true", + "redirect_uri" : "https://app-api.pixiv.net" + "/web/v1/users/auth/pixiv/callback", + } + data = self.session.post(url, headers=headers, data=data).json() + + if "error" in data: + print(data) + if data["error"] == "invalid_request": + print("'code' expired, try again") + return + + token = data["refresh_token"] + if self.cache: + username = self.oauth_config("username") + pixiv._refresh_token_cache.update(username, token) + self.log.info("Writing 'refresh-token' to cache") + + print(self._generate_message(("refresh-token",), (token,))) + + def _input(self): + print(""" +1) Open your browser's Developer Tools (F12) and switch to the Network tab +2) Login +4) Select the last network monitor entry ('callback?state=...') +4) Copy its 'code' query parameter, paste it below, and press Enter +""") + code = input("code: ") + return code.rpartition("=")[2].strip() + + MASTODON_MSG_TEMPLATE = """ Your 'access-token' is diff --git a/gallery_dl/extractor/paheal.py b/gallery_dl/extractor/paheal.py index abcc33d..05cbcdf 100644 --- a/gallery_dl/extractor/paheal.py +++ b/gallery_dl/extractor/paheal.py @@ -115,10 +115,12 @@ class PahealPostExtractor(PahealExtractor): tags , pos = text.extract(page, ": ", "<") md5 , pos = text.extract(page, "/_thumbs/", "/", pos) url , pos = text.extract(page, "id='main_image' src='", "'", pos) - width , pos = text.extract(page, "data-width='", "'", pos) - height, pos = text.extract(page, "data-height='", "'", pos) + width , pos = text.extract(page, "data-width=", " ", pos) + height, pos = text.extract(page, "data-height=", " ", pos) return ({ "id": self.post_id, "md5": md5, "tags": tags, "file_url": url, - "width": width, "height": height, "size": 0, + "size" : 0, + "width" : width.strip("'\""), + "height": height.strip("'\""), },) diff --git a/gallery_dl/extractor/patreon.py b/gallery_dl/extractor/patreon.py index ad259f4..688c005 100644 --- a/gallery_dl/extractor/patreon.py +++ b/gallery_dl/extractor/patreon.py @@ -34,6 +34,10 @@ class PatreonExtractor(Extractor): PatreonExtractor._warning = False for post in self.posts(): + + if not post.get("current_user_can_view", True): + self.log.warning("Not allowed to view post %s", post["id"]) + continue post["num"] = 0 hashes = set() @@ -113,14 +117,17 @@ class PatreonExtractor(Extractor): """Process and extend a 'post' object""" attr = post["attributes"] attr["id"] = text.parse_int(post["id"]) - attr["images"] = self._files(post, included, "images") - attr["attachments"] = self._files(post, included, "attachments") - attr["date"] = text.parse_datetime( - attr["published_at"], "%Y-%m-%dT%H:%M:%S.%f%z") - user = post["relationships"]["user"] - attr["creator"] = ( - self._user(user["links"]["related"]) or - included["user"][user["data"]["id"]]) + + if post.get("current_user_can_view", True): + attr["images"] = self._files(post, included, "images") + attr["attachments"] = self._files(post, included, "attachments") + attr["date"] = text.parse_datetime( + attr["published_at"], "%Y-%m-%dT%H:%M:%S.%f%z") + user = post["relationships"]["user"] + attr["creator"] = ( + self._user(user["links"]["related"]) or + included["user"][user["data"]["id"]]) + return attr @staticmethod diff --git a/gallery_dl/extractor/pillowfort.py b/gallery_dl/extractor/pillowfort.py new file mode 100644 index 0000000..cbd65d7 --- /dev/null +++ b/gallery_dl/extractor/pillowfort.py @@ -0,0 +1,136 @@ +# -*- coding: utf-8 -*- + +# Copyright 2021 Mike Fährmann +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. + +"""Extractors for https://www.pillowfort.social/""" + +from .common import Extractor, Message +from .. import text + +BASE_PATTERN = r"(?:https?://)?www\.pillowfort\.social" + + +class PillowfortExtractor(Extractor): + """Base class for pillowfort extractors""" + category = "pillowfort" + root = "https://www.pillowfort.social" + directory_fmt = ("{category}", "{username}") + filename_fmt = ("{post_id} {title|original_post[title]} " + "{num:>02}.{extension}") + archive_fmt = "{id}" + + def __init__(self, match): + Extractor.__init__(self, match) + self.item = match.group(1) + self.reblogs = self.config("reblogs", False) + + def items(self): + for post in self.posts(): + + if "original_post" in post and not self.reblogs: + continue + + files = post["media"] + del post["media"] + + post["date"] = text.parse_datetime( + post["created_at"], "%Y-%m-%dT%H:%M:%S.%f%z") + yield Message.Directory, post + + post["num"] = 0 + for file in files: + url = file["url"] + if url: + post.update(file) + post["num"] += 1 + post["date"] = text.parse_datetime( + file["created_at"], "%Y-%m-%dT%H:%M:%S.%f%z") + yield Message.Url, url, text.nameext_from_url(url, post) + + +class PillowfortPostExtractor(PillowfortExtractor): + """Extractor for a single pillowfort post""" + subcategory = "post" + pattern = BASE_PATTERN + r"/posts/(\d+)" + test = ("https://www.pillowfort.social/posts/27510", { + "pattern": r"https://img\d+\.pillowfort\.social/posts/\w+_out\d+\.png", + "count": 4, + "keyword": { + "avatar_url": str, + "col": 0, + "commentable": True, + "comments_count": int, + "community_id": None, + "content": str, + "created_at": str, + "date": "type:datetime", + "deleted": None, + "deleted_at": None, + "deleted_by_mod": None, + "deleted_for_flag_id": None, + "embed_code": None, + "id": int, + "last_activity": str, + "last_activity_elapsed": str, + "last_edited_at": None, + "likes_count": int, + "media_type": "picture", + "nsfw": False, + "num": int, + "original_post_id": None, + "original_post_user_id": None, + "picture_content_type": None, + "picture_file_name": None, + "picture_file_size": None, + "picture_updated_at": None, + "post_id": 27510, + "post_type": "picture", + "privacy": "public", + "reblog_copy_info": list, + "rebloggable": True, + "reblogged_from_post_id": None, + "reblogged_from_user_id": None, + "reblogs_count": int, + "row": int, + "small_image_url": None, + "tags": list, + "time_elapsed": str, + "timestamp": str, + "title": "What is Pillowfort.io? ", + "updated_at": str, + "url": r"re:https://img3.pillowfort.social/posts/.*\.png", + "user_id": 5, + "username": "Staff" + }, + }) + + def posts(self): + url = "{}/posts/{}/json/".format(self.root, self.item) + return (self.request(url).json(),) + + +class PillowfortUserExtractor(PillowfortExtractor): + """Extractor for all posts of a pillowfort user""" + subcategory = "user" + pattern = BASE_PATTERN + r"/(?!posts/)([^/?#]+)" + test = ("https://www.pillowfort.social/Pome", { + "pattern": r"https://img\d+\.pillowfort\.social/posts/", + "range": "1-15", + "count": 15, + }) + + def posts(self): + url = "{}/{}/json/".format(self.root, self.item) + params = {"p": 1} + + while True: + posts = self.request(url, params=params).json()["posts"] + yield from posts + + if len(posts) < 20: + return + params["p"] += 1 diff --git a/gallery_dl/extractor/pixiv.py b/gallery_dl/extractor/pixiv.py index a872ada..be976e9 100644 --- a/gallery_dl/extractor/pixiv.py +++ b/gallery_dl/extractor/pixiv.py @@ -510,49 +510,48 @@ class PixivAppAPI(): def __init__(self, extractor): self.extractor = extractor self.log = extractor.log - self.username, self.password = extractor._get_auth_info() + self.username = extractor._get_auth_info()[0] self.user = None + extractor.session.headers.update({ + "App-OS" : "ios", + "App-OS-Version": "13.1.2", + "App-Version" : "7.7.6", + "User-Agent" : "PixivIOSApp/7.7.6 (iOS 13.1.2; iPhone11,8)", + "Referer" : "https://app-api.pixiv.net/", + }) + self.client_id = extractor.config( "client-id", self.CLIENT_ID) self.client_secret = extractor.config( "client-secret", self.CLIENT_SECRET) - extractor.session.headers.update({ - "App-OS": "ios", - "App-OS-Version": "10.3.1", - "App-Version": "6.7.1", - "User-Agent": "PixivIOSApp/6.7.1 (iOS 10.3.1; iPhone8,1)", - "Referer": "https://app-api.pixiv.net/", - }) + + token = extractor.config("refresh-token") + if token is None or token == "cache": + token = _refresh_token_cache(self.username) + self.refresh_token = token def login(self): """Login and gain an access token""" - self.user, auth = self._login_impl(self.username, self.password) + self.user, auth = self._login_impl(self.username) self.extractor.session.headers["Authorization"] = auth @cache(maxage=3600, keyarg=1) - def _login_impl(self, username, password): - if not username or not password: + def _login_impl(self, username): + if not self.refresh_token: raise exception.AuthenticationError( - "Username and password required") + "'refresh-token' required.\n" + "Run `gallery-dl oauth:pixiv` to get one.") + self.log.info("Refreshing access token") url = "https://oauth.secure.pixiv.net/auth/token" data = { - "client_id": self.client_id, - "client_secret": self.client_secret, - "get_secure_url": 1, + "client_id" : self.client_id, + "client_secret" : self.client_secret, + "grant_type" : "refresh_token", + "refresh_token" : self.refresh_token, + "get_secure_url": "1", } - refresh_token = _refresh_token_cache(username) - - if refresh_token: - self.log.info("Refreshing access token") - data["grant_type"] = "refresh_token" - data["refresh_token"] = refresh_token - else: - self.log.info("Logging in as %s", username) - data["grant_type"] = "password" - data["username"] = username - data["password"] = password time = datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%S+00:00") headers = { @@ -565,11 +564,9 @@ class PixivAppAPI(): url, method="POST", headers=headers, data=data, fatal=False) if response.status_code >= 400: self.log.debug(response.text) - raise exception.AuthenticationError() + raise exception.AuthenticationError("Invalid refresh token") data = response.json()["response"] - if not refresh_token: - _refresh_token_cache.update(username, data["refresh_token"]) return data["user"], "Bearer " + data["access_token"] def illust_detail(self, illust_id): diff --git a/gallery_dl/extractor/sankaku.py b/gallery_dl/extractor/sankaku.py index e98b630..a5f0138 100644 --- a/gallery_dl/extractor/sankaku.py +++ b/gallery_dl/extractor/sankaku.py @@ -242,7 +242,7 @@ class SankakuAPI(): success = True if not success: code = data.get("code") - if code == "invalid_token": + if code and code.endswith(("invalid-token", "invalid_token")): _authenticate_impl.invalidate(self.username) continue raise exception.StopExtraction(code) diff --git a/gallery_dl/extractor/slideshare.py b/gallery_dl/extractor/slideshare.py index 8f668df..0b970cc 100644 --- a/gallery_dl/extractor/slideshare.py +++ b/gallery_dl/extractor/slideshare.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2016-2019 Mike Fährmann, Leonardo Taccari +# Copyright 2016-2021 Mike Fährmann, Leonardo Taccari # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -61,7 +61,7 @@ class SlidesharePresentationExtractor(Extractor): title, pos = text.extract( page, '<span class="j-title-breadcrumb">', '</span>', pos) views, pos = text.extract( - page, '<span class="notranslate pippin-data">', 'views<', pos) + page, '<span class="notranslate">', 'views<', pos) published, pos = text.extract( page, '<time datetime="', '"', pos) alt_descr, pos = text.extract( diff --git a/gallery_dl/extractor/twitter.py b/gallery_dl/extractor/twitter.py index 4034732..7b6bf21 100644 --- a/gallery_dl/extractor/twitter.py +++ b/gallery_dl/extractor/twitter.py @@ -26,6 +26,7 @@ class TwitterExtractor(Extractor): filename_fmt = "{tweet_id}_{num}.{extension}" archive_fmt = "{tweet_id}_{retweet_id}_{num}" cookiedomain = ".twitter.com" + cookienames = ("auth_token",) root = "https://twitter.com" def __init__(self, match): @@ -231,9 +232,10 @@ class TwitterExtractor(Extractor): """Yield all relevant tweet objects""" def login(self): - username, password = self._get_auth_info() - if username: - self._update_cookies(self._login_impl(username, password)) + if not self._check_cookies(self.cookienames): + username, password = self._get_auth_info() + if username: + self._update_cookies(self._login_impl(username, password)) @cache(maxage=360*24*3600, keyarg=1) def _login_impl(self, username, password): diff --git a/gallery_dl/extractor/vanillarock.py b/gallery_dl/extractor/vanillarock.py index e10c642..32eaa36 100644 --- a/gallery_dl/extractor/vanillarock.py +++ b/gallery_dl/extractor/vanillarock.py @@ -37,7 +37,7 @@ class VanillarockPostExtractor(VanillarockExtractor): def items(self): extr = text.extract_from(self.request(self.root + self.path).text) - name = extr("<title>", "</title>") + name = extr('<h1 class="entry-title">', "<") imgs = [] while True: @@ -48,7 +48,7 @@ class VanillarockPostExtractor(VanillarockExtractor): data = { "count": len(imgs), - "title": text.unescape(name.rpartition(" | ")[0]), + "title": text.unescape(name), "path" : self.path.strip("/"), "date" : text.parse_datetime(extr( '<div class="date">', '</div>'), "%Y-%m-%d %H:%M"), @@ -76,7 +76,7 @@ class VanillarockTagExtractor(VanillarockExtractor): "%ad%e7%94%bb%e5%83%8f/%e8%90%8c%e3%81%88%e3%83%bb%e3%82%bd%e3%83%95" "%e3%83%88%e3%82%a8%e3%83%ad"), { "pattern": VanillarockPostExtractor.pattern, - "count": 3, + "count": ">= 5", }), ) diff --git a/gallery_dl/extractor/webtoons.py b/gallery_dl/extractor/webtoons.py index 4449e19..1a26264 100644 --- a/gallery_dl/extractor/webtoons.py +++ b/gallery_dl/extractor/webtoons.py @@ -48,6 +48,7 @@ class WebtoonsEpisodeExtractor(WebtoonsExtractor): "/ep-572-earth/viewer?title_no=352&episode_no=572"), { "url": "11041d71a3f92728305c11a228e77cf0f7aa02ef", "content": ("1748c7e82b6db910fa179f6dc7c4281b0f680fa7", + "42055e44659f6ffc410b3fb6557346dfbb993df3", "49e1f2def04c6f7a6a3dacf245a1cd9abe77a6a9"), "count": 5, }), diff --git a/gallery_dl/postprocessor/metadata.py b/gallery_dl/postprocessor/metadata.py index 71a67c1..c08f111 100644 --- a/gallery_dl/postprocessor/metadata.py +++ b/gallery_dl/postprocessor/metadata.py @@ -91,7 +91,7 @@ class MetadataPP(PostProcessor): def _filename_extfmt(self, pathfmt): kwdict = pathfmt.kwdict - ext = kwdict["extension"] + ext = kwdict.get("extension") kwdict["extension"] = pathfmt.extension kwdict["extension"] = pathfmt.prefix + self._extension_fmt(kwdict) filename = pathfmt.build_filename() diff --git a/gallery_dl/version.py b/gallery_dl/version.py index 572d3bb..8244a95 100644 --- a/gallery_dl/version.py +++ b/gallery_dl/version.py @@ -1,9 +1,9 @@ # -*- coding: utf-8 -*- -# Copyright 2016-2020 Mike Fährmann +# Copyright 2016-2021 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as # published by the Free Software Foundation. -__version__ = "1.16.4" +__version__ = "1.16.5" |