From c6b88a96bd191711fc540d7babab3d2e09c68da8 Mon Sep 17 00:00:00 2001 From: Unit 193 Date: Fri, 29 Apr 2022 01:58:56 -0400 Subject: New upstream version 1.21.2. --- CHANGELOG.md | 19 ++++++++ PKG-INFO | 6 +-- README.rst | 4 +- data/man/gallery-dl.1 | 2 +- data/man/gallery-dl.conf.5 | 50 ++++++++++++++++++-- gallery_dl.egg-info/PKG-INFO | 6 +-- gallery_dl/extractor/common.py | 23 +++++---- gallery_dl/extractor/cyberdrop.py | 6 +-- gallery_dl/extractor/danbooru.py | 26 ++++++----- gallery_dl/extractor/deviantart.py | 28 +++++++---- gallery_dl/extractor/exhentai.py | 2 +- gallery_dl/extractor/issuu.py | 44 +++++++++--------- gallery_dl/extractor/lolisafe.py | 2 +- gallery_dl/extractor/mangadex.py | 8 +++- gallery_dl/extractor/nijie.py | 12 ++--- gallery_dl/extractor/photovogue.py | 12 ++--- gallery_dl/extractor/pixiv.py | 82 +++++++++++++++++++++++---------- gallery_dl/extractor/readcomiconline.py | 37 +++++++++++++-- gallery_dl/extractor/sexcom.py | 9 +++- gallery_dl/extractor/twitter.py | 14 +++++- gallery_dl/extractor/unsplash.py | 4 +- gallery_dl/extractor/vk.py | 78 ++++++++++++++++++------------- gallery_dl/extractor/weibo.py | 16 ++++++- gallery_dl/extractor/wikiart.py | 4 +- gallery_dl/postprocessor/ugoira.py | 14 ++++-- gallery_dl/util.py | 2 + gallery_dl/version.py | 2 +- test/test_util.py | 8 +++- 28 files changed, 359 insertions(+), 161 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 994d5f0..61e343d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,24 @@ # Changelog +## 1.21.2 - 2022-04-27 +### Additions +- [deviantart] implement `pagination` option ([#2488](https://github.com/mikf/gallery-dl/issues/2488)) +- [pixiv] implement `background` option ([#623](https://github.com/mikf/gallery-dl/issues/623), [#1124](https://github.com/mikf/gallery-dl/issues/1124), [#2495](https://github.com/mikf/gallery-dl/issues/2495)) +- [postprocessor:ugoira] report ffmpeg/mkvmerge errors ([#2487](https://github.com/mikf/gallery-dl/issues/2487)) +### Fixes +- [cyberdrop] match cyberdrop.to URLs ([#2496](https://github.com/mikf/gallery-dl/issues/2496)) +- [e621] fix 403 errors ([#2533](https://github.com/mikf/gallery-dl/issues/2533)) +- [issuu] fix extraction ([#2483](https://github.com/mikf/gallery-dl/issues/2483)) +- [mangadex] download from available chapters despite `externalUrl` ([#2503](https://github.com/mikf/gallery-dl/issues/2503)) +- [photovogue] update domain and api endpoint ([#2494](https://github.com/mikf/gallery-dl/issues/2494)) +- [sexcom] add fallback for empty files ([#2485](https://github.com/mikf/gallery-dl/issues/2485)) +- [twitter] improve syndication video selection ([#2354](https://github.com/mikf/gallery-dl/issues/2354)) +- [twitter] fix various syndication issues ([#2499](https://github.com/mikf/gallery-dl/issues/2499), [#2354](https://github.com/mikf/gallery-dl/issues/2354)) +- [weibo] fix infinite retries for deleted accounts ([#2521](https://github.com/mikf/gallery-dl/issues/2521)) +- [vk] fix extraction ([#2512](https://github.com/mikf/gallery-dl/issues/2512)) +- [postprocessor:ugoira] use compatible paths with mkvmerge ([#2487](https://github.com/mikf/gallery-dl/issues/2487)) +- [postprocessor:ugoira] do not auto-select the `image2` demuxer ([#2492](https://github.com/mikf/gallery-dl/issues/2492)) + ## 1.21.1 - 2022-04-08 ### Additions - [gofile] add gofile.io extractor ([#2364](https://github.com/mikf/gallery-dl/issues/2364)) diff --git a/PKG-INFO b/PKG-INFO index c3c66f3..63c6185 100644 --- a/PKG-INFO +++ b/PKG-INFO @@ -1,6 +1,6 @@ Metadata-Version: 2.1 Name: gallery_dl -Version: 1.21.1 +Version: 1.21.2 Summary: Command-line program to download image galleries and collections from several image hosting sites Home-page: https://github.com/mikf/gallery-dl Author: Mike Fährmann @@ -99,8 +99,8 @@ Standalone Executable Prebuilt executable files with a Python interpreter and required Python packages included are available for -- `Windows `__ -- `Linux `__ +- `Windows `__ +- `Linux `__ | Executables build from the latest commit can be found at | https://github.com/mikf/gallery-dl/actions/workflows/executables.yml diff --git a/README.rst b/README.rst index a9dfe11..acdf738 100644 --- a/README.rst +++ b/README.rst @@ -65,8 +65,8 @@ Standalone Executable Prebuilt executable files with a Python interpreter and required Python packages included are available for -- `Windows `__ -- `Linux `__ +- `Windows `__ +- `Linux `__ | Executables build from the latest commit can be found at | https://github.com/mikf/gallery-dl/actions/workflows/executables.yml diff --git a/data/man/gallery-dl.1 b/data/man/gallery-dl.1 index 6e3a965..18f4d64 100644 --- a/data/man/gallery-dl.1 +++ b/data/man/gallery-dl.1 @@ -1,4 +1,4 @@ -.TH "GALLERY-DL" "1" "2022-04-08" "1.21.1" "gallery-dl Manual" +.TH "GALLERY-DL" "1" "2022-04-27" "1.21.2" "gallery-dl Manual" .\" disable hyphenation .nh diff --git a/data/man/gallery-dl.conf.5 b/data/man/gallery-dl.conf.5 index 950300e..aa998ee 100644 --- a/data/man/gallery-dl.conf.5 +++ b/data/man/gallery-dl.conf.5 @@ -1,4 +1,4 @@ -.TH "GALLERY-DL.CONF" "5" "2022-04-08" "1.21.1" "gallery-dl Manual" +.TH "GALLERY-DL.CONF" "5" "2022-04-27" "1.21.2" "gallery-dl Manual" .\" disable hyphenation .nh .\" disable justification (adjust text to left margin only) @@ -1239,6 +1239,22 @@ files if they are images and falls back to preview versions for everything else (archives, etc.). +.SS extractor.deviantart.pagination +.IP "Type:" 6 +\f[I]string\f[] + +.IP "Default:" 9 +\f[I]"api"\f[] + +.IP "Description:" 4 +Controls when to stop paginating over API results. + +.br +* \f[I]"api"\f[]: Trust the API and stop when \f[I]has_more\f[] is \f[I]false\f[]. +.br +* \f[I]"manual"\f[]: Disregard \f[I]has_more\f[] and only stop when a batch of results is empty. + + .SS extractor.deviantart.refresh-token .IP "Type:" 6 \f[I]string\f[] @@ -2021,6 +2037,17 @@ Download from video pins. Download user avatars. +.SS extractor.pixiv.user.background +.IP "Type:" 6 +\f[I]bool\f[] + +.IP "Default:" 9 +\f[I]false\f[] + +.IP "Description:" 4 +Download user background banners. + + .SS extractor.pixiv.user.metadata .IP "Type:" 6 \f[I]bool\f[] @@ -2131,6 +2158,20 @@ Controls how to handle redirects to CAPTCHA pages. * \f[I]"wait\f[]: Ask the user to solve the CAPTCHA and wait. +.SS extractor.readcomiconline.quality +.IP "Type:" 6 +\f[I]string\f[] + +.IP "Default:" 9 +\f[I]"auto"\f[] + +.IP "Description:" 4 +Sets the \f[I]quality\f[] query parameter of issue pages. (\f[I]"lq"\f[] or \f[I]"hq"\f[]) + +\f[I]"auto"\f[] uses the quality parameter of the input URL +or \f[I]"hq"\f[] if not present. + + .SS extractor.reddit.comments .IP "Type:" 6 \f[I]integer\f[] @@ -3674,14 +3715,13 @@ Additional FFmpeg command-line arguments. FFmpeg demuxer to read and process input files with. Possible values are .br -* "\f[I]concat\f[]" (inaccurate frame timecodes) +* "\f[I]concat\f[]" (inaccurate frame timecodes for non-uniform frame delays) .br -* "\f[I]image2\f[]" (accurate timecodes, not usable on Windows) +* "\f[I]image2\f[]" (accurate timecodes, requires nanosecond file timestamps, i.e. no Windows or macOS) .br * "mkvmerge" (accurate timecodes, only WebM or MKV, requires \f[I]mkvmerge\f[]) -"auto" will select mkvmerge if possible and fall back to image2 or -concat depending on the local operating system. +"auto" will select mkvmerge if available and fall back to concat otherwise. .SS ugoira.ffmpeg-location diff --git a/gallery_dl.egg-info/PKG-INFO b/gallery_dl.egg-info/PKG-INFO index 1c00d88..4b00b85 100644 --- a/gallery_dl.egg-info/PKG-INFO +++ b/gallery_dl.egg-info/PKG-INFO @@ -1,6 +1,6 @@ Metadata-Version: 2.1 Name: gallery-dl -Version: 1.21.1 +Version: 1.21.2 Summary: Command-line program to download image galleries and collections from several image hosting sites Home-page: https://github.com/mikf/gallery-dl Author: Mike Fährmann @@ -99,8 +99,8 @@ Standalone Executable Prebuilt executable files with a Python interpreter and required Python packages included are available for -- `Windows `__ -- `Linux `__ +- `Windows `__ +- `Linux `__ | Executables build from the latest commit can be found at | https://github.com/mikf/gallery-dl/actions/workflows/executables.yml diff --git a/gallery_dl/extractor/common.py b/gallery_dl/extractor/common.py index ff49d89..abb352c 100644 --- a/gallery_dl/extractor/common.py +++ b/gallery_dl/extractor/common.py @@ -603,18 +603,21 @@ class BaseExtractor(Extractor): def __init__(self, match): if not self.category: - for index, group in enumerate(match.groups()): - if group is not None: - if index: - self.category, self.root = self.instances[index-1] - if not self.root: - self.root = text.root_from_url(match.group(0)) - else: - self.root = group - self.category = group.partition("://")[2] - break + self._init_category(match) Extractor.__init__(self, match) + def _init_category(self, match): + for index, group in enumerate(match.groups()): + if group is not None: + if index: + self.category, self.root = self.instances[index-1] + if not self.root: + self.root = text.root_from_url(match.group(0)) + else: + self.root = group + self.category = group.partition("://")[2] + break + @classmethod def update(cls, instances): extra_instances = config.get(("extractor",), cls.basecategory) diff --git a/gallery_dl/extractor/cyberdrop.py b/gallery_dl/extractor/cyberdrop.py index 6d6e192..1afaac8 100644 --- a/gallery_dl/extractor/cyberdrop.py +++ b/gallery_dl/extractor/cyberdrop.py @@ -10,10 +10,10 @@ from . import lolisafe from .. import text -class CyberdropAlbumExtractor(lolisafe.LolisafelbumExtractor): +class CyberdropAlbumExtractor(lolisafe.LolisafeAlbumExtractor): category = "cyberdrop" root = "https://cyberdrop.me" - pattern = r"(?:https?://)?(?:www\.)?cyberdrop\.me/a/([^/?#]+)" + pattern = r"(?:https?://)?(?:www\.)?cyberdrop\.(?:me|to)/a/([^/?#]+)" test = ( # images ("https://cyberdrop.me/a/keKRjm4t", { @@ -29,7 +29,7 @@ class CyberdropAlbumExtractor(lolisafe.LolisafelbumExtractor): }, }), # videos - ("https://cyberdrop.me/a/l8gIAXVD", { + ("https://cyberdrop.to/a/l8gIAXVD", { "pattern": r"https://fs-\d+\.cyberdrop\.to/.*\.mp4$", "count": 31, "keyword": { diff --git a/gallery_dl/extractor/danbooru.py b/gallery_dl/extractor/danbooru.py index 710950a..f21817e 100644 --- a/gallery_dl/extractor/danbooru.py +++ b/gallery_dl/extractor/danbooru.py @@ -9,6 +9,7 @@ """Extractors for https://danbooru.donmai.us/ and other Danbooru instances""" from .common import BaseExtractor, Message +from ..version import __version__ from .. import text import datetime @@ -22,16 +23,7 @@ class DanbooruExtractor(BaseExtractor): per_page = 200 def __init__(self, match): - BaseExtractor.__init__(self, match) - - self.ugoira = self.config("ugoira", False) - self.external = self.config("external", False) - self.extended_metadata = self.config("metadata", False) - - username, api_key = self._get_auth_info() - if username: - self.log.debug("Using HTTP Basic Auth for user '%s'", username) - self.session.auth = (username, api_key) + self._init_category(match) instance = INSTANCES.get(self.category) or {} iget = instance.get @@ -43,6 +35,17 @@ class DanbooruExtractor(BaseExtractor): self.request_interval_min = iget("request-interval-min", 0.0) self._pools = iget("pools") + BaseExtractor.__init__(self, match) + + self.ugoira = self.config("ugoira", False) + self.external = self.config("external", False) + self.extended_metadata = self.config("metadata", False) + + username, api_key = self._get_auth_info() + if username: + self.log.debug("Using HTTP Basic Auth for user '%s'", username) + self.session.auth = (username, api_key) + def request(self, url, **kwargs): kwargs["headers"] = self.headers return BaseExtractor.request(self, url, **kwargs) @@ -144,7 +147,8 @@ INSTANCES = { "e621": { "root": None, "pattern": r"e(?:621|926)\.net", - "headers": {"User-Agent": "gallery-dl/1.14.0 (by mikf)"}, + "headers": {"User-Agent": "gallery-dl/{} (by mikf)".format( + __version__)}, "pools": "sort", "page-limit": 750, "per-page": 320, diff --git a/gallery_dl/extractor/deviantart.py b/gallery_dl/extractor/deviantart.py index fda7220..85ec0cf 100644 --- a/gallery_dl/extractor/deviantart.py +++ b/gallery_dl/extractor/deviantart.py @@ -417,8 +417,8 @@ class DeviantartGalleryExtractor(DeviantartExtractor): pattern = BASE_PATTERN + r"/gallery(?:/all|/?\?catpath=)?/?$" test = ( ("https://www.deviantart.com/shimoda7/gallery/", { - "pattern": r"https://(api-da\.wixmp\.com/_api/download/file" - r"|images-wixmp-[^.]+.wixmp.com/f/.+/.+.jpg\?token=.+)", + "pattern": r"https://(images-)?wixmp-[^.]+\.wixmp\.com" + r"/f/.+/.+\.(jpg|png)\?token=.+", "count": ">= 30", "keyword": { "allows_comments": bool, @@ -563,7 +563,8 @@ class DeviantartStashExtractor(DeviantartExtractor): pattern = r"(?:https?://)?sta\.sh/([a-z0-9]+)" test = ( ("https://sta.sh/022c83odnaxc", { - "pattern": r"https://api-da\.wixmp\.com/_api/download/file", + "pattern": r"https://wixmp-[^.]+\.wixmp\.com" + r"/f/.+/.+\.png\?token=.+", "content": "057eb2f2861f6c8a96876b13cca1a4b7a408c11f", "count": 1, }), @@ -574,7 +575,8 @@ class DeviantartStashExtractor(DeviantartExtractor): }), # downloadable, but no "content" field (#307) ("https://sta.sh/024t4coz16mi", { - "pattern": r"https://api-da\.wixmp\.com/_api/download/file", + "pattern": r"https://wixmp-[^.]+\.wixmp\.com" + r"/f/.+/.+\.rar\?token=.+", "count": 1, }), # mixed folders and images (#659) @@ -863,8 +865,9 @@ class DeviantartDeviationExtractor(DeviantartExtractor): }), (("https://www.deviantart.com/myria-moon/art/Aime-Moi-261986576"), { "options": (("comments", True),), - "pattern": r"https://api-da\.wixmp\.com/_api/download/file", "keyword": {"comments": list}, + "pattern": r"https://wixmp-[^.]+\.wixmp\.com" + r"/f/.+/.+\.jpg\?token=.+", }), # wixmp URL rewrite (("https://www.deviantart.com/citizenfresh/art/Hverarond-789295466"), { @@ -878,8 +881,8 @@ class DeviantartDeviationExtractor(DeviantartExtractor): }), # Flash animation with GIF preview (#1731) ("https://www.deviantart.com/yuumei/art/Flash-Comic-214724929", { - "pattern": r"https://api-da\.wixmp\.com/_api/download" - r"/file\?downloadToken=.+", + "pattern": r"https://wixmp-[^.]+\.wixmp\.com" + r"/f/.+/.+\.swf\?token=.+", "keyword": { "filename": "flash_comic_tutorial_by_yuumei-d3juatd", "extension": "swf", @@ -1015,6 +1018,7 @@ class DeviantartOAuthAPI(): self.folders = extractor.config("folders", False) self.metadata = extractor.extra or extractor.config("metadata", False) + self.strategy = extractor.config("pagination") self.client_id = extractor.config("client-id") if self.client_id: @@ -1306,14 +1310,20 @@ class DeviantartOAuthAPI(): self._folders(results) yield from results - if not data["has_more"]: + if not data["has_more"] and ( + self.strategy != "manual" or not results): return + if "next_cursor" in data: params["offset"] = None params["cursor"] = data["next_cursor"] - else: + elif data["next_offset"] is not None: params["offset"] = data["next_offset"] params["cursor"] = None + else: + if params.get("offset") is None: + return + params["offset"] = int(params["offset"]) + len(results) def _pagination_list(self, endpoint, params, key="results"): result = [] diff --git a/gallery_dl/extractor/exhentai.py b/gallery_dl/extractor/exhentai.py index c23c36f..36b89f7 100644 --- a/gallery_dl/extractor/exhentai.py +++ b/gallery_dl/extractor/exhentai.py @@ -122,7 +122,7 @@ class ExhentaiGalleryExtractor(ExhentaiExtractor): "date": "dt:2018-03-18 20:15:00", "eh_category": "Non-H", "expunged": False, - "favorites": "20", + "favorites": "21", "filecount": "4", "filesize": 1488978, "gid": 1200119, diff --git a/gallery_dl/extractor/issuu.py b/gallery_dl/extractor/issuu.py index 88d57e5..ae4112b 100644 --- a/gallery_dl/extractor/issuu.py +++ b/gallery_dl/extractor/issuu.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2019-2020 Mike Fährmann +# Copyright 2019-2022 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -9,7 +9,7 @@ """Extractors for https://issuu.com/""" from .common import GalleryExtractor, Extractor, Message -from .. import text, util +from .. import text import json @@ -22,33 +22,30 @@ class IssuuBase(): class IssuuPublicationExtractor(IssuuBase, GalleryExtractor): """Extractor for a single publication""" subcategory = "publication" - directory_fmt = ("{category}", "{document[userName]}", - "{document[originalPublishDate]} {document[title]}") + directory_fmt = ("{category}", "{document[username]}", + "{document[date]:%Y-%m-%d} {document[title]}") filename_fmt = "{num:>03}.{extension}" - archive_fmt = "{document[id]}_{num}" + archive_fmt = "{document[publicationId]}_{num}" pattern = r"(?:https?://)?issuu\.com(/[^/?#]+/docs/[^/?#]+)" test = ("https://issuu.com/issuu/docs/motions-1-2019/", { "pattern": r"https://image.isu.pub/190916155301-\w+/jpg/page_\d+.jpg", "count" : 36, "keyword": { "document": { - "access" : "public", - "articleStories": list, - "contentRating" : dict, + "access" : "PUBLIC", + "contentRating" : { + "isAdsafe" : True, + "isExplicit": False, + "isReviewed": True, + }, "date" : "dt:2019-09-16 00:00:00", "description" : "re:Motions, the brand new publication by I", - "documentId" : r"re:\d+-d99ec95935f15091b040cb8060f05510", "documentName" : "motions-1-2019", - "downloadState" : "NOT_AVAILABLE", - "id" : r"re:\d+-d99ec95935f15091b040cb8060f05510", - "isConverting" : False, - "isQuarantined" : False, - "lang" : "en", - "language" : "English", + "downloadable" : False, "pageCount" : 36, "publicationId" : "d99ec95935f15091b040cb8060f05510", "title" : "Motions by Issuu - Issue 1", - "userName" : "issuu", + "username" : "issuu", }, "extension": "jpg", "filename" : r"re:page_\d+", @@ -58,17 +55,18 @@ class IssuuPublicationExtractor(IssuuBase, GalleryExtractor): def metadata(self, page): data = json.loads(text.extract( - page, 'window.__INITIAL_STATE__ =', ';\n')[0]) + page, '