From c6b88a96bd191711fc540d7babab3d2e09c68da8 Mon Sep 17 00:00:00 2001 From: Unit 193 Date: Fri, 29 Apr 2022 01:58:56 -0400 Subject: New upstream version 1.21.2. --- gallery_dl/extractor/common.py | 23 +++++---- gallery_dl/extractor/cyberdrop.py | 6 +-- gallery_dl/extractor/danbooru.py | 26 ++++++----- gallery_dl/extractor/deviantart.py | 28 +++++++---- gallery_dl/extractor/exhentai.py | 2 +- gallery_dl/extractor/issuu.py | 44 +++++++++--------- gallery_dl/extractor/lolisafe.py | 2 +- gallery_dl/extractor/mangadex.py | 8 +++- gallery_dl/extractor/nijie.py | 12 ++--- gallery_dl/extractor/photovogue.py | 12 ++--- gallery_dl/extractor/pixiv.py | 82 +++++++++++++++++++++++---------- gallery_dl/extractor/readcomiconline.py | 37 +++++++++++++-- gallery_dl/extractor/sexcom.py | 9 +++- gallery_dl/extractor/twitter.py | 14 +++++- gallery_dl/extractor/unsplash.py | 4 +- gallery_dl/extractor/vk.py | 78 ++++++++++++++++++------------- gallery_dl/extractor/weibo.py | 16 ++++++- gallery_dl/extractor/wikiart.py | 4 +- gallery_dl/postprocessor/ugoira.py | 14 ++++-- gallery_dl/util.py | 2 + gallery_dl/version.py | 2 +- 21 files changed, 280 insertions(+), 145 deletions(-) (limited to 'gallery_dl') diff --git a/gallery_dl/extractor/common.py b/gallery_dl/extractor/common.py index ff49d89..abb352c 100644 --- a/gallery_dl/extractor/common.py +++ b/gallery_dl/extractor/common.py @@ -603,18 +603,21 @@ class BaseExtractor(Extractor): def __init__(self, match): if not self.category: - for index, group in enumerate(match.groups()): - if group is not None: - if index: - self.category, self.root = self.instances[index-1] - if not self.root: - self.root = text.root_from_url(match.group(0)) - else: - self.root = group - self.category = group.partition("://")[2] - break + self._init_category(match) Extractor.__init__(self, match) + def _init_category(self, match): + for index, group in enumerate(match.groups()): + if group is not None: + if index: + self.category, self.root = self.instances[index-1] + if not self.root: + self.root = text.root_from_url(match.group(0)) + else: + self.root = group + self.category = group.partition("://")[2] + break + @classmethod def update(cls, instances): extra_instances = config.get(("extractor",), cls.basecategory) diff --git a/gallery_dl/extractor/cyberdrop.py b/gallery_dl/extractor/cyberdrop.py index 6d6e192..1afaac8 100644 --- a/gallery_dl/extractor/cyberdrop.py +++ b/gallery_dl/extractor/cyberdrop.py @@ -10,10 +10,10 @@ from . import lolisafe from .. import text -class CyberdropAlbumExtractor(lolisafe.LolisafelbumExtractor): +class CyberdropAlbumExtractor(lolisafe.LolisafeAlbumExtractor): category = "cyberdrop" root = "https://cyberdrop.me" - pattern = r"(?:https?://)?(?:www\.)?cyberdrop\.me/a/([^/?#]+)" + pattern = r"(?:https?://)?(?:www\.)?cyberdrop\.(?:me|to)/a/([^/?#]+)" test = ( # images ("https://cyberdrop.me/a/keKRjm4t", { @@ -29,7 +29,7 @@ class CyberdropAlbumExtractor(lolisafe.LolisafelbumExtractor): }, }), # videos - ("https://cyberdrop.me/a/l8gIAXVD", { + ("https://cyberdrop.to/a/l8gIAXVD", { "pattern": r"https://fs-\d+\.cyberdrop\.to/.*\.mp4$", "count": 31, "keyword": { diff --git a/gallery_dl/extractor/danbooru.py b/gallery_dl/extractor/danbooru.py index 710950a..f21817e 100644 --- a/gallery_dl/extractor/danbooru.py +++ b/gallery_dl/extractor/danbooru.py @@ -9,6 +9,7 @@ """Extractors for https://danbooru.donmai.us/ and other Danbooru instances""" from .common import BaseExtractor, Message +from ..version import __version__ from .. import text import datetime @@ -22,16 +23,7 @@ class DanbooruExtractor(BaseExtractor): per_page = 200 def __init__(self, match): - BaseExtractor.__init__(self, match) - - self.ugoira = self.config("ugoira", False) - self.external = self.config("external", False) - self.extended_metadata = self.config("metadata", False) - - username, api_key = self._get_auth_info() - if username: - self.log.debug("Using HTTP Basic Auth for user '%s'", username) - self.session.auth = (username, api_key) + self._init_category(match) instance = INSTANCES.get(self.category) or {} iget = instance.get @@ -43,6 +35,17 @@ class DanbooruExtractor(BaseExtractor): self.request_interval_min = iget("request-interval-min", 0.0) self._pools = iget("pools") + BaseExtractor.__init__(self, match) + + self.ugoira = self.config("ugoira", False) + self.external = self.config("external", False) + self.extended_metadata = self.config("metadata", False) + + username, api_key = self._get_auth_info() + if username: + self.log.debug("Using HTTP Basic Auth for user '%s'", username) + self.session.auth = (username, api_key) + def request(self, url, **kwargs): kwargs["headers"] = self.headers return BaseExtractor.request(self, url, **kwargs) @@ -144,7 +147,8 @@ INSTANCES = { "e621": { "root": None, "pattern": r"e(?:621|926)\.net", - "headers": {"User-Agent": "gallery-dl/1.14.0 (by mikf)"}, + "headers": {"User-Agent": "gallery-dl/{} (by mikf)".format( + __version__)}, "pools": "sort", "page-limit": 750, "per-page": 320, diff --git a/gallery_dl/extractor/deviantart.py b/gallery_dl/extractor/deviantart.py index fda7220..85ec0cf 100644 --- a/gallery_dl/extractor/deviantart.py +++ b/gallery_dl/extractor/deviantart.py @@ -417,8 +417,8 @@ class DeviantartGalleryExtractor(DeviantartExtractor): pattern = BASE_PATTERN + r"/gallery(?:/all|/?\?catpath=)?/?$" test = ( ("https://www.deviantart.com/shimoda7/gallery/", { - "pattern": r"https://(api-da\.wixmp\.com/_api/download/file" - r"|images-wixmp-[^.]+.wixmp.com/f/.+/.+.jpg\?token=.+)", + "pattern": r"https://(images-)?wixmp-[^.]+\.wixmp\.com" + r"/f/.+/.+\.(jpg|png)\?token=.+", "count": ">= 30", "keyword": { "allows_comments": bool, @@ -563,7 +563,8 @@ class DeviantartStashExtractor(DeviantartExtractor): pattern = r"(?:https?://)?sta\.sh/([a-z0-9]+)" test = ( ("https://sta.sh/022c83odnaxc", { - "pattern": r"https://api-da\.wixmp\.com/_api/download/file", + "pattern": r"https://wixmp-[^.]+\.wixmp\.com" + r"/f/.+/.+\.png\?token=.+", "content": "057eb2f2861f6c8a96876b13cca1a4b7a408c11f", "count": 1, }), @@ -574,7 +575,8 @@ class DeviantartStashExtractor(DeviantartExtractor): }), # downloadable, but no "content" field (#307) ("https://sta.sh/024t4coz16mi", { - "pattern": r"https://api-da\.wixmp\.com/_api/download/file", + "pattern": r"https://wixmp-[^.]+\.wixmp\.com" + r"/f/.+/.+\.rar\?token=.+", "count": 1, }), # mixed folders and images (#659) @@ -863,8 +865,9 @@ class DeviantartDeviationExtractor(DeviantartExtractor): }), (("https://www.deviantart.com/myria-moon/art/Aime-Moi-261986576"), { "options": (("comments", True),), - "pattern": r"https://api-da\.wixmp\.com/_api/download/file", "keyword": {"comments": list}, + "pattern": r"https://wixmp-[^.]+\.wixmp\.com" + r"/f/.+/.+\.jpg\?token=.+", }), # wixmp URL rewrite (("https://www.deviantart.com/citizenfresh/art/Hverarond-789295466"), { @@ -878,8 +881,8 @@ class DeviantartDeviationExtractor(DeviantartExtractor): }), # Flash animation with GIF preview (#1731) ("https://www.deviantart.com/yuumei/art/Flash-Comic-214724929", { - "pattern": r"https://api-da\.wixmp\.com/_api/download" - r"/file\?downloadToken=.+", + "pattern": r"https://wixmp-[^.]+\.wixmp\.com" + r"/f/.+/.+\.swf\?token=.+", "keyword": { "filename": "flash_comic_tutorial_by_yuumei-d3juatd", "extension": "swf", @@ -1015,6 +1018,7 @@ class DeviantartOAuthAPI(): self.folders = extractor.config("folders", False) self.metadata = extractor.extra or extractor.config("metadata", False) + self.strategy = extractor.config("pagination") self.client_id = extractor.config("client-id") if self.client_id: @@ -1306,14 +1310,20 @@ class DeviantartOAuthAPI(): self._folders(results) yield from results - if not data["has_more"]: + if not data["has_more"] and ( + self.strategy != "manual" or not results): return + if "next_cursor" in data: params["offset"] = None params["cursor"] = data["next_cursor"] - else: + elif data["next_offset"] is not None: params["offset"] = data["next_offset"] params["cursor"] = None + else: + if params.get("offset") is None: + return + params["offset"] = int(params["offset"]) + len(results) def _pagination_list(self, endpoint, params, key="results"): result = [] diff --git a/gallery_dl/extractor/exhentai.py b/gallery_dl/extractor/exhentai.py index c23c36f..36b89f7 100644 --- a/gallery_dl/extractor/exhentai.py +++ b/gallery_dl/extractor/exhentai.py @@ -122,7 +122,7 @@ class ExhentaiGalleryExtractor(ExhentaiExtractor): "date": "dt:2018-03-18 20:15:00", "eh_category": "Non-H", "expunged": False, - "favorites": "20", + "favorites": "21", "filecount": "4", "filesize": 1488978, "gid": 1200119, diff --git a/gallery_dl/extractor/issuu.py b/gallery_dl/extractor/issuu.py index 88d57e5..ae4112b 100644 --- a/gallery_dl/extractor/issuu.py +++ b/gallery_dl/extractor/issuu.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2019-2020 Mike Fährmann +# Copyright 2019-2022 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -9,7 +9,7 @@ """Extractors for https://issuu.com/""" from .common import GalleryExtractor, Extractor, Message -from .. import text, util +from .. import text import json @@ -22,33 +22,30 @@ class IssuuBase(): class IssuuPublicationExtractor(IssuuBase, GalleryExtractor): """Extractor for a single publication""" subcategory = "publication" - directory_fmt = ("{category}", "{document[userName]}", - "{document[originalPublishDate]} {document[title]}") + directory_fmt = ("{category}", "{document[username]}", + "{document[date]:%Y-%m-%d} {document[title]}") filename_fmt = "{num:>03}.{extension}" - archive_fmt = "{document[id]}_{num}" + archive_fmt = "{document[publicationId]}_{num}" pattern = r"(?:https?://)?issuu\.com(/[^/?#]+/docs/[^/?#]+)" test = ("https://issuu.com/issuu/docs/motions-1-2019/", { "pattern": r"https://image.isu.pub/190916155301-\w+/jpg/page_\d+.jpg", "count" : 36, "keyword": { "document": { - "access" : "public", - "articleStories": list, - "contentRating" : dict, + "access" : "PUBLIC", + "contentRating" : { + "isAdsafe" : True, + "isExplicit": False, + "isReviewed": True, + }, "date" : "dt:2019-09-16 00:00:00", "description" : "re:Motions, the brand new publication by I", - "documentId" : r"re:\d+-d99ec95935f15091b040cb8060f05510", "documentName" : "motions-1-2019", - "downloadState" : "NOT_AVAILABLE", - "id" : r"re:\d+-d99ec95935f15091b040cb8060f05510", - "isConverting" : False, - "isQuarantined" : False, - "lang" : "en", - "language" : "English", + "downloadable" : False, "pageCount" : 36, "publicationId" : "d99ec95935f15091b040cb8060f05510", "title" : "Motions by Issuu - Issue 1", - "userName" : "issuu", + "username" : "issuu", }, "extension": "jpg", "filename" : r"re:page_\d+", @@ -58,17 +55,18 @@ class IssuuPublicationExtractor(IssuuBase, GalleryExtractor): def metadata(self, page): data = json.loads(text.extract( - page, 'window.__INITIAL_STATE__ =', ';\n')[0]) + page, '