diff options
Diffstat (limited to 'gallery_dl')
| -rw-r--r-- | gallery_dl/extractor/common.py | 23 | ||||
| -rw-r--r-- | gallery_dl/extractor/cyberdrop.py | 6 | ||||
| -rw-r--r-- | gallery_dl/extractor/danbooru.py | 26 | ||||
| -rw-r--r-- | gallery_dl/extractor/deviantart.py | 28 | ||||
| -rw-r--r-- | gallery_dl/extractor/exhentai.py | 2 | ||||
| -rw-r--r-- | gallery_dl/extractor/issuu.py | 44 | ||||
| -rw-r--r-- | gallery_dl/extractor/lolisafe.py | 2 | ||||
| -rw-r--r-- | gallery_dl/extractor/mangadex.py | 8 | ||||
| -rw-r--r-- | gallery_dl/extractor/nijie.py | 12 | ||||
| -rw-r--r-- | gallery_dl/extractor/photovogue.py | 12 | ||||
| -rw-r--r-- | gallery_dl/extractor/pixiv.py | 82 | ||||
| -rw-r--r-- | gallery_dl/extractor/readcomiconline.py | 37 | ||||
| -rw-r--r-- | gallery_dl/extractor/sexcom.py | 9 | ||||
| -rw-r--r-- | gallery_dl/extractor/twitter.py | 14 | ||||
| -rw-r--r-- | gallery_dl/extractor/unsplash.py | 4 | ||||
| -rw-r--r-- | gallery_dl/extractor/vk.py | 78 | ||||
| -rw-r--r-- | gallery_dl/extractor/weibo.py | 16 | ||||
| -rw-r--r-- | gallery_dl/extractor/wikiart.py | 4 | ||||
| -rw-r--r-- | gallery_dl/postprocessor/ugoira.py | 14 | ||||
| -rw-r--r-- | gallery_dl/util.py | 2 | ||||
| -rw-r--r-- | gallery_dl/version.py | 2 |
21 files changed, 280 insertions, 145 deletions
diff --git a/gallery_dl/extractor/common.py b/gallery_dl/extractor/common.py index ff49d89..abb352c 100644 --- a/gallery_dl/extractor/common.py +++ b/gallery_dl/extractor/common.py @@ -603,18 +603,21 @@ class BaseExtractor(Extractor): def __init__(self, match): if not self.category: - for index, group in enumerate(match.groups()): - if group is not None: - if index: - self.category, self.root = self.instances[index-1] - if not self.root: - self.root = text.root_from_url(match.group(0)) - else: - self.root = group - self.category = group.partition("://")[2] - break + self._init_category(match) Extractor.__init__(self, match) + def _init_category(self, match): + for index, group in enumerate(match.groups()): + if group is not None: + if index: + self.category, self.root = self.instances[index-1] + if not self.root: + self.root = text.root_from_url(match.group(0)) + else: + self.root = group + self.category = group.partition("://")[2] + break + @classmethod def update(cls, instances): extra_instances = config.get(("extractor",), cls.basecategory) diff --git a/gallery_dl/extractor/cyberdrop.py b/gallery_dl/extractor/cyberdrop.py index 6d6e192..1afaac8 100644 --- a/gallery_dl/extractor/cyberdrop.py +++ b/gallery_dl/extractor/cyberdrop.py @@ -10,10 +10,10 @@ from . import lolisafe from .. import text -class CyberdropAlbumExtractor(lolisafe.LolisafelbumExtractor): +class CyberdropAlbumExtractor(lolisafe.LolisafeAlbumExtractor): category = "cyberdrop" root = "https://cyberdrop.me" - pattern = r"(?:https?://)?(?:www\.)?cyberdrop\.me/a/([^/?#]+)" + pattern = r"(?:https?://)?(?:www\.)?cyberdrop\.(?:me|to)/a/([^/?#]+)" test = ( # images ("https://cyberdrop.me/a/keKRjm4t", { @@ -29,7 +29,7 @@ class CyberdropAlbumExtractor(lolisafe.LolisafelbumExtractor): }, }), # videos - ("https://cyberdrop.me/a/l8gIAXVD", { + ("https://cyberdrop.to/a/l8gIAXVD", { "pattern": r"https://fs-\d+\.cyberdrop\.to/.*\.mp4$", "count": 31, "keyword": { diff --git a/gallery_dl/extractor/danbooru.py b/gallery_dl/extractor/danbooru.py index 710950a..f21817e 100644 --- a/gallery_dl/extractor/danbooru.py +++ b/gallery_dl/extractor/danbooru.py @@ -9,6 +9,7 @@ """Extractors for https://danbooru.donmai.us/ and other Danbooru instances""" from .common import BaseExtractor, Message +from ..version import __version__ from .. import text import datetime @@ -22,16 +23,7 @@ class DanbooruExtractor(BaseExtractor): per_page = 200 def __init__(self, match): - BaseExtractor.__init__(self, match) - - self.ugoira = self.config("ugoira", False) - self.external = self.config("external", False) - self.extended_metadata = self.config("metadata", False) - - username, api_key = self._get_auth_info() - if username: - self.log.debug("Using HTTP Basic Auth for user '%s'", username) - self.session.auth = (username, api_key) + self._init_category(match) instance = INSTANCES.get(self.category) or {} iget = instance.get @@ -43,6 +35,17 @@ class DanbooruExtractor(BaseExtractor): self.request_interval_min = iget("request-interval-min", 0.0) self._pools = iget("pools") + BaseExtractor.__init__(self, match) + + self.ugoira = self.config("ugoira", False) + self.external = self.config("external", False) + self.extended_metadata = self.config("metadata", False) + + username, api_key = self._get_auth_info() + if username: + self.log.debug("Using HTTP Basic Auth for user '%s'", username) + self.session.auth = (username, api_key) + def request(self, url, **kwargs): kwargs["headers"] = self.headers return BaseExtractor.request(self, url, **kwargs) @@ -144,7 +147,8 @@ INSTANCES = { "e621": { "root": None, "pattern": r"e(?:621|926)\.net", - "headers": {"User-Agent": "gallery-dl/1.14.0 (by mikf)"}, + "headers": {"User-Agent": "gallery-dl/{} (by mikf)".format( + __version__)}, "pools": "sort", "page-limit": 750, "per-page": 320, diff --git a/gallery_dl/extractor/deviantart.py b/gallery_dl/extractor/deviantart.py index fda7220..85ec0cf 100644 --- a/gallery_dl/extractor/deviantart.py +++ b/gallery_dl/extractor/deviantart.py @@ -417,8 +417,8 @@ class DeviantartGalleryExtractor(DeviantartExtractor): pattern = BASE_PATTERN + r"/gallery(?:/all|/?\?catpath=)?/?$" test = ( ("https://www.deviantart.com/shimoda7/gallery/", { - "pattern": r"https://(api-da\.wixmp\.com/_api/download/file" - r"|images-wixmp-[^.]+.wixmp.com/f/.+/.+.jpg\?token=.+)", + "pattern": r"https://(images-)?wixmp-[^.]+\.wixmp\.com" + r"/f/.+/.+\.(jpg|png)\?token=.+", "count": ">= 30", "keyword": { "allows_comments": bool, @@ -563,7 +563,8 @@ class DeviantartStashExtractor(DeviantartExtractor): pattern = r"(?:https?://)?sta\.sh/([a-z0-9]+)" test = ( ("https://sta.sh/022c83odnaxc", { - "pattern": r"https://api-da\.wixmp\.com/_api/download/file", + "pattern": r"https://wixmp-[^.]+\.wixmp\.com" + r"/f/.+/.+\.png\?token=.+", "content": "057eb2f2861f6c8a96876b13cca1a4b7a408c11f", "count": 1, }), @@ -574,7 +575,8 @@ class DeviantartStashExtractor(DeviantartExtractor): }), # downloadable, but no "content" field (#307) ("https://sta.sh/024t4coz16mi", { - "pattern": r"https://api-da\.wixmp\.com/_api/download/file", + "pattern": r"https://wixmp-[^.]+\.wixmp\.com" + r"/f/.+/.+\.rar\?token=.+", "count": 1, }), # mixed folders and images (#659) @@ -863,8 +865,9 @@ class DeviantartDeviationExtractor(DeviantartExtractor): }), (("https://www.deviantart.com/myria-moon/art/Aime-Moi-261986576"), { "options": (("comments", True),), - "pattern": r"https://api-da\.wixmp\.com/_api/download/file", "keyword": {"comments": list}, + "pattern": r"https://wixmp-[^.]+\.wixmp\.com" + r"/f/.+/.+\.jpg\?token=.+", }), # wixmp URL rewrite (("https://www.deviantart.com/citizenfresh/art/Hverarond-789295466"), { @@ -878,8 +881,8 @@ class DeviantartDeviationExtractor(DeviantartExtractor): }), # Flash animation with GIF preview (#1731) ("https://www.deviantart.com/yuumei/art/Flash-Comic-214724929", { - "pattern": r"https://api-da\.wixmp\.com/_api/download" - r"/file\?downloadToken=.+", + "pattern": r"https://wixmp-[^.]+\.wixmp\.com" + r"/f/.+/.+\.swf\?token=.+", "keyword": { "filename": "flash_comic_tutorial_by_yuumei-d3juatd", "extension": "swf", @@ -1015,6 +1018,7 @@ class DeviantartOAuthAPI(): self.folders = extractor.config("folders", False) self.metadata = extractor.extra or extractor.config("metadata", False) + self.strategy = extractor.config("pagination") self.client_id = extractor.config("client-id") if self.client_id: @@ -1306,14 +1310,20 @@ class DeviantartOAuthAPI(): self._folders(results) yield from results - if not data["has_more"]: + if not data["has_more"] and ( + self.strategy != "manual" or not results): return + if "next_cursor" in data: params["offset"] = None params["cursor"] = data["next_cursor"] - else: + elif data["next_offset"] is not None: params["offset"] = data["next_offset"] params["cursor"] = None + else: + if params.get("offset") is None: + return + params["offset"] = int(params["offset"]) + len(results) def _pagination_list(self, endpoint, params, key="results"): result = [] diff --git a/gallery_dl/extractor/exhentai.py b/gallery_dl/extractor/exhentai.py index c23c36f..36b89f7 100644 --- a/gallery_dl/extractor/exhentai.py +++ b/gallery_dl/extractor/exhentai.py @@ -122,7 +122,7 @@ class ExhentaiGalleryExtractor(ExhentaiExtractor): "date": "dt:2018-03-18 20:15:00", "eh_category": "Non-H", "expunged": False, - "favorites": "20", + "favorites": "21", "filecount": "4", "filesize": 1488978, "gid": 1200119, diff --git a/gallery_dl/extractor/issuu.py b/gallery_dl/extractor/issuu.py index 88d57e5..ae4112b 100644 --- a/gallery_dl/extractor/issuu.py +++ b/gallery_dl/extractor/issuu.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2019-2020 Mike Fährmann +# Copyright 2019-2022 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -9,7 +9,7 @@ """Extractors for https://issuu.com/""" from .common import GalleryExtractor, Extractor, Message -from .. import text, util +from .. import text import json @@ -22,33 +22,30 @@ class IssuuBase(): class IssuuPublicationExtractor(IssuuBase, GalleryExtractor): """Extractor for a single publication""" subcategory = "publication" - directory_fmt = ("{category}", "{document[userName]}", - "{document[originalPublishDate]} {document[title]}") + directory_fmt = ("{category}", "{document[username]}", + "{document[date]:%Y-%m-%d} {document[title]}") filename_fmt = "{num:>03}.{extension}" - archive_fmt = "{document[id]}_{num}" + archive_fmt = "{document[publicationId]}_{num}" pattern = r"(?:https?://)?issuu\.com(/[^/?#]+/docs/[^/?#]+)" test = ("https://issuu.com/issuu/docs/motions-1-2019/", { "pattern": r"https://image.isu.pub/190916155301-\w+/jpg/page_\d+.jpg", "count" : 36, "keyword": { "document": { - "access" : "public", - "articleStories": list, - "contentRating" : dict, + "access" : "PUBLIC", + "contentRating" : { + "isAdsafe" : True, + "isExplicit": False, + "isReviewed": True, + }, "date" : "dt:2019-09-16 00:00:00", "description" : "re:Motions, the brand new publication by I", - "documentId" : r"re:\d+-d99ec95935f15091b040cb8060f05510", "documentName" : "motions-1-2019", - "downloadState" : "NOT_AVAILABLE", - "id" : r"re:\d+-d99ec95935f15091b040cb8060f05510", - "isConverting" : False, - "isQuarantined" : False, - "lang" : "en", - "language" : "English", + "downloadable" : False, "pageCount" : 36, "publicationId" : "d99ec95935f15091b040cb8060f05510", "title" : "Motions by Issuu - Issue 1", - "userName" : "issuu", + "username" : "issuu", }, "extension": "jpg", "filename" : r"re:page_\d+", @@ -58,17 +55,18 @@ class IssuuPublicationExtractor(IssuuBase, GalleryExtractor): def metadata(self, page): data = json.loads(text.extract( - page, 'window.__INITIAL_STATE__ =', ';\n')[0]) + page, '<script data-json="', '"')[0].replace(""", '"')) - doc = data["document"] - doc["lang"] = doc["language"] - doc["language"] = util.code_to_language(doc["language"]) + doc = data["initialDocumentData"]["document"] doc["date"] = text.parse_datetime( - doc["originalPublishDate"], "%Y-%m-%d") + doc["originalPublishDateInISOString"], "%Y-%m-%dT%H:%M:%S.%fZ") self._cnt = text.parse_int(doc["pageCount"]) - self._tpl = "https://{}/{}/jpg/page_{{}}.jpg".format( - data["config"]["hosts"]["image"], doc["id"]) + self._tpl = "https://{}/{}-{}/jpg/page_{{}}.jpg".format( + data["config"]["hosts"]["image"], + doc["revisionId"], + doc["publicationId"], + ) return {"document": doc} diff --git a/gallery_dl/extractor/lolisafe.py b/gallery_dl/extractor/lolisafe.py index 43377bd..ad7cd1d 100644 --- a/gallery_dl/extractor/lolisafe.py +++ b/gallery_dl/extractor/lolisafe.py @@ -25,7 +25,7 @@ BASE_PATTERN = LolisafeExtractor.update({ }) -class LolisafelbumExtractor(LolisafeExtractor): +class LolisafeAlbumExtractor(LolisafeExtractor): subcategory = "album" pattern = BASE_PATTERN + "/a/([^/?#]+)" test = ( diff --git a/gallery_dl/extractor/mangadex.py b/gallery_dl/extractor/mangadex.py index 7194757..0bc3527 100644 --- a/gallery_dl/extractor/mangadex.py +++ b/gallery_dl/extractor/mangadex.py @@ -104,9 +104,13 @@ class MangadexChapterExtractor(MangadexExtractor): "keyword": "6abcbe1e24eeb1049dc931958853cd767ee483fb", }), # MANGA Plus (#1154) - ("https://mangadex.org/chapter/8d50ed68-8298-4ac9-b63d-cb2aea143dd0", { + ("https://mangadex.org/chapter/74149a55-e7c4-44ea-8a37-98e879c1096f", { "exception": exception.StopExtraction, }), + # 'externalUrl', but still downloadable (#2503) + ("https://mangadex.org/chapter/364728a4-6909-4164-9eea-6b56354f7c78", { + "count": 39, + }), ) def items(self): @@ -116,7 +120,7 @@ class MangadexChapterExtractor(MangadexExtractor): chapter = self.api.chapter(self.uuid) data = self._transform(chapter) - if data.get("_external_url"): + if data.get("_external_url") and not data["count"]: raise exception.StopExtraction( "Chapter %s%s is not available on MangaDex and can instead be " "read on the official publisher's website at %s.", diff --git a/gallery_dl/extractor/nijie.py b/gallery_dl/extractor/nijie.py index 6cb7c05..90ca01d 100644 --- a/gallery_dl/extractor/nijie.py +++ b/gallery_dl/extractor/nijie.py @@ -1,12 +1,12 @@ # -*- coding: utf-8 -*- -# Copyright 2015-2020 Mike Fährmann +# Copyright 2015-2022 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as # published by the Free Software Foundation. -"""Extract images from https://nijie.info/""" +"""Extractors for https://nijie.info/""" from .common import Extractor, Message, AsynchronousMixin from .. import text, exception @@ -151,7 +151,7 @@ class NijieIllustrationExtractor(NijieExtractor): pattern = BASE_PATTERN + r"/members_illust\.php\?id=(\d+)" test = ( ("https://nijie.info/members_illust.php?id=44", { - "url": "66c4ff94c6e77c0765dd88f2d8c663055fda573e", + "url": "1553e5144df50a676f5947d02469299b401ad6c0", "keyword": { "artist_id": 44, "artist_name": "ED", @@ -163,7 +163,7 @@ class NijieIllustrationExtractor(NijieExtractor): "num": int, "tags": list, "title": str, - "url": r"re:https://pic.nijie.net/\d+/nijie_picture/.*jpg$", + "url": r"re:https://pic.nijie.net/\d+/nijie/.*jpg$", "user_id": 44, "user_name": "ED", }, @@ -223,8 +223,8 @@ class NijieImageExtractor(NijieExtractor): pattern = BASE_PATTERN + r"/view(?:_popup)?\.php\?id=(\d+)" test = ( ("https://nijie.info/view.php?id=70720", { - "url": "5497f897311397dafa188521258624346a0af2a3", - "keyword": "fd12bca6f4402a0c996315d28c65f7914ad70c51", + "url": "3d654e890212ba823c9647754767336aebc0a743", + "keyword": "41da5d0e178b04f01fe72460185df52fadc3c91b", "content": "d85e3ea896ed5e4da0bca2390ad310a4df716ca6", }), ("https://nijie.info/view.php?id=70724", { diff --git a/gallery_dl/extractor/photovogue.py b/gallery_dl/extractor/photovogue.py index a5c788a..3c68fd5 100644 --- a/gallery_dl/extractor/photovogue.py +++ b/gallery_dl/extractor/photovogue.py @@ -4,12 +4,12 @@ # it under the terms of the GNU General Public License version 2 as # published by the Free Software Foundation. -"""Extractors for https://www.vogue.it/en/photovogue/""" +"""Extractors for https://www.vogue.com/photovogue/""" from .common import Extractor, Message from .. import text -BASE_PATTERN = r"(?:https?://)?(?:www\.)?vogue\.it/(?:en/)?photovogue" +BASE_PATTERN = r"(?:https?://)?(?:www\.)?vogue\.com/photovogue" class PhotovogueUserExtractor(Extractor): @@ -18,10 +18,10 @@ class PhotovogueUserExtractor(Extractor): directory_fmt = ("{category}", "{photographer[id]} {photographer[name]}") filename_fmt = "{id} {title}.{extension}" archive_fmt = "{id}" - pattern = BASE_PATTERN + r"/portfolio/?\?id=(\d+)" + pattern = BASE_PATTERN + r"/photographers/(\d+)" test = ( - ("https://www.vogue.it/en/photovogue/portfolio/?id=221252"), - ("https://vogue.it/photovogue/portfolio?id=221252", { + ("https://www.vogue.com/photovogue/photographers/221252"), + ("https://vogue.com/photovogue/photographers/221252", { "pattern": r"https://images.vogue.it/Photovogue/[^/]+_gallery.jpg", "keyword": { "date": "type:datetime", @@ -67,7 +67,7 @@ class PhotovogueUserExtractor(Extractor): yield Message.Url, url, text.nameext_from_url(url, photo) def photos(self): - url = "https://api.vogue.it/production/photos" + url = "https://api.vogue.com/production/photos" params = { "count": "50", "order_by": "DESC", diff --git a/gallery_dl/extractor/pixiv.py b/gallery_dl/extractor/pixiv.py index 8943747..a33df42 100644 --- a/gallery_dl/extractor/pixiv.py +++ b/gallery_dl/extractor/pixiv.py @@ -100,7 +100,7 @@ class PixivExtractor(Extractor): class PixivUserExtractor(PixivExtractor): - """Extractor for works of a pixiv-user""" + """Extractor for works of a pixiv user""" subcategory = "user" pattern = (r"(?:https?://)?(?:www\.|touch\.)?pixiv\.net/(?:" r"(?:en/)?users/(\d+)(?:/(?:artworks|illustrations|manga)" @@ -120,12 +120,18 @@ class PixivUserExtractor(PixivExtractor): "&tag=%E6%89%8B%E3%81%B6%E3%82%8D"), { "url": "25b1cd81153a8ff82eec440dd9f20a4a22079658", }), - # avatar (#595, 623) + # avatar (#595, #623, #1124) ("https://www.pixiv.net/en/users/173530", { "options": (("avatar", True),), "content": "4e57544480cc2036ea9608103e8f024fa737fe66", "range": "1", }), + # background (#623, #1124, #2495) + ("https://www.pixiv.net/en/users/194921", { + "options": (("background", True),), + "content": "aeda3536003ea3002f70657cb93c5053f26f5843", + "range": "1", + }), # deleted account ("http://www.pixiv.net/member_illust.php?id=173531", { "options": (("metadata", True),), @@ -154,7 +160,7 @@ class PixivUserExtractor(PixivExtractor): def metadata(self): if self.config("metadata"): - return {"user": self.api.user_detail(self.user_id)} + return {"user": self.api.user_detail(self.user_id)["user"]} return {} def works(self): @@ -167,29 +173,55 @@ class PixivUserExtractor(PixivExtractor): if tag in [t["name"].lower() for t in work["tags"]] ) - if self.config("avatar"): - user = self.api.user_detail(self.user_id) - url = user["profile_image_urls"]["medium"].replace("_170.", ".") - avatar = { - "create_date" : None, - "height" : 0, - "id" : "avatar", - "image_urls" : None, - "meta_pages" : (), - "meta_single_page": {"original_image_url": url}, - "page_count" : 1, - "sanity_level" : 0, - "tags" : (), - "title" : "avatar", - "type" : "avatar", - "user" : user, - "width" : 0, - "x_restrict" : 0, - } - works = itertools.chain((avatar,), works) + avatar = self.config("avatar") + background = self.config("background") + if avatar or background: + work_list = [] + detail = self.api.user_detail(self.user_id) + user = detail["user"] + + if avatar: + url = user["profile_image_urls"]["medium"] + work_list.append((self._make_work( + "avatar", url.replace("_170.", "."), user),)) + + if background: + url = detail["profile"]["background_image_url"] + if url: + if "/c/" in url: + parts = url.split("/") + del parts[3:5] + url = "/".join(parts) + url = url.replace("_master1200.", ".") + work = self._make_work("background", url, user) + if url.endswith(".jpg"): + work["_fallback"] = (url[:-4] + ".png",) + work_list.append((work,)) + + work_list.append(works) + works = itertools.chain.from_iterable(work_list) return works + @staticmethod + def _make_work(kind, url, user): + return { + "create_date" : None, + "height" : 0, + "id" : kind, + "image_urls" : None, + "meta_pages" : (), + "meta_single_page": {"original_image_url": url}, + "page_count" : 1, + "sanity_level" : 0, + "tags" : (), + "title" : kind, + "type" : kind, + "user" : user, + "width" : 0, + "x_restrict" : 0, + } + class PixivMeExtractor(PixivExtractor): """Extractor for pixiv.me URLs""" @@ -350,7 +382,7 @@ class PixivFavoriteExtractor(PixivExtractor): def metadata(self): if self.user_id: - user = self.api.user_detail(self.user_id) + user = self.api.user_detail(self.user_id)["user"] else: self.api.login() user = self.api.user @@ -730,7 +762,7 @@ class PixivAppAPI(): def user_detail(self, user_id): params = {"user_id": user_id} - return self._call("v1/user/detail", params)["user"] + return self._call("v1/user/detail", params) def user_following(self, user_id, restrict="public"): params = {"user_id": user_id, "restrict": restrict} diff --git a/gallery_dl/extractor/readcomiconline.py b/gallery_dl/extractor/readcomiconline.py index e4075a2..c8b8c9a 100644 --- a/gallery_dl/extractor/readcomiconline.py +++ b/gallery_dl/extractor/readcomiconline.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2016-2021 Mike Fährmann +# Copyright 2016-2022 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -10,6 +10,7 @@ from .common import Extractor, ChapterExtractor, MangaExtractor from .. import text, exception +import binascii import re BASE_PATTERN = r"(?i)(?:https?://)?(?:www\.)?readcomiconline\.(?:li|to)" @@ -22,6 +23,7 @@ class ReadcomiconlineBase(): filename_fmt = "{comic}_{issue:>03}_{page:>03}.{extension}" archive_fmt = "{issue_id}_{page}" root = "https://readcomiconline.li" + browser = "firefox" def request(self, url, **kwargs): """Detect and handle redirects to CAPTCHA pages""" @@ -46,7 +48,7 @@ class ReadcomiconlineBase(): class ReadcomiconlineIssueExtractor(ReadcomiconlineBase, ChapterExtractor): """Extractor for comic-issues from readcomiconline.li""" subcategory = "issue" - pattern = BASE_PATTERN + r"(/Comic/[^/?#]+/[^/?#]+\?id=(\d+))" + pattern = BASE_PATTERN + r"(/Comic/[^/?#]+/[^/?#]+\?)([^#]+)" test = ("https://readcomiconline.li/Comic/W-i-t-c-h/Issue-130?id=22289", { "url": "30d29c5afc65043bfd384c010257ec2d0ecbafa6", "keyword": "2d9ec81ce1b11fac06ebf96ce33cdbfca0e85eb5", @@ -54,8 +56,18 @@ class ReadcomiconlineIssueExtractor(ReadcomiconlineBase, ChapterExtractor): def __init__(self, match): ChapterExtractor.__init__(self, match) - self.gallery_url += "&quality=hq" - self.issue_id = match.group(2) + + params = text.parse_query(match.group(2)) + quality = self.config("quality") + + if quality is None or quality == "auto": + if "quality" not in params: + params["quality"] = "hq" + else: + params["quality"] = str(quality) + + self.gallery_url += "&".join(k + "=" + v for k, v in params.items()) + self.issue_id = params.get("id") def metadata(self, page): comic, pos = text.extract(page, " - Read\r\n ", "\r\n") @@ -71,7 +83,7 @@ class ReadcomiconlineIssueExtractor(ReadcomiconlineBase, ChapterExtractor): def images(self, page): return [ - (url, None) + (beau(url), None) for url in text.extract_iter( page, 'lstImages.push("', '"' ) @@ -114,3 +126,18 @@ class ReadcomiconlineComicExtractor(ReadcomiconlineBase, MangaExtractor): "lang": "en", "language": "English", })) return results + + +def beau(url): + """https://readcomiconline.li/Scripts/rguard.min.js?v=1.1""" + if url.startswith("https"): + return url + + containsS0 = "=s0" in url + url = url[:-3 if containsS0 else -6] + url = url[4:22] + url[25:] + url = url[0:-6] + url[-2:] + url = binascii.a2b_base64(url).decode() + url = url[0:13] + url[17:] + url = url[0:-2] + ("=s0" if containsS0 else "=s1600") + return "https://2.bp.blogspot.com/" + url diff --git a/gallery_dl/extractor/sexcom.py b/gallery_dl/extractor/sexcom.py index edf35da..830274a 100644 --- a/gallery_dl/extractor/sexcom.py +++ b/gallery_dl/extractor/sexcom.py @@ -87,7 +87,10 @@ class SexcomExtractor(Extractor): data["extension"] = None data["url"] = "ytdl:" + src else: - data["url"] = text.unescape(extr(' src="', '"').partition("?")[0]) + data["_http_validate"] = _check_empty + url = text.unescape(extr(' src="', '"')) + data["url"] = url.partition("?")[0] + data["_fallback"] = (url,) text.nameext_from_url(data["url"], data) data["uploader"] = extr('itemprop="author">', '<') @@ -247,3 +250,7 @@ class SexcomSearchExtractor(SexcomExtractor): def pins(self): url = "{}/{}".format(self.root, self.path) return self._pagination(url) + + +def _check_empty(response): + return response.headers.get("content-length") != "0" diff --git a/gallery_dl/extractor/twitter.py b/gallery_dl/extractor/twitter.py index 4c46170..4c947e7 100644 --- a/gallery_dl/extractor/twitter.py +++ b/gallery_dl/extractor/twitter.py @@ -1291,10 +1291,21 @@ class TwitterAPI(): tweet["user"]["description"] = "" tweet["user"]["entities"] = {"description": {}} + tweet["user_id_str"] = tweet["user"]["id_str"] + + if tweet["id_str"] != tweet_id: + tweet["retweeted_status_id_str"] = tweet["id_str"] + tweet["id_str"] = retweet_id = tweet_id + else: + retweet_id = None if "video" in tweet: video = tweet["video"] - del video["variants"][:-1] + video["variants"] = (max( + (v for v in video["variants"] if v["type"] == "video/mp4"), + key=lambda v: text.parse_int( + v["src"].split("/")[-2].partition("x")[0]) + ),) video["variants"][0]["url"] = video["variants"][0]["src"] tweet["extended_entities"] = {"media": [{ "video_info" : video, @@ -1313,4 +1324,5 @@ class TwitterAPI(): "rest_id": tweet["id_str"], "legacy" : tweet, "user" : tweet["user"], + "_retweet_id_str": retweet_id, } diff --git a/gallery_dl/extractor/unsplash.py b/gallery_dl/extractor/unsplash.py index 6036322..ad1617c 100644 --- a/gallery_dl/extractor/unsplash.py +++ b/gallery_dl/extractor/unsplash.py @@ -115,9 +115,9 @@ class UnsplashImageExtractor(UnsplashExtractor): "id": "uMJXuywXLiU", "instagram_username": "just_midwest_rock", "last_name": "Hoefler", - "location": "Madison, WI", + "location": None, "name": "Dave Hoefler", - "portfolio_url": str, + "portfolio_url": None, "total_collections": int, "total_likes": int, "total_photos": int, diff --git a/gallery_dl/extractor/vk.py b/gallery_dl/extractor/vk.py index dd2eb4e..8fb9bbf 100644 --- a/gallery_dl/extractor/vk.py +++ b/gallery_dl/extractor/vk.py @@ -10,7 +10,6 @@ from .common import Extractor, Message from .. import text -import re BASE_PATTERN = r"(?:https://)?(?:www\.|m\.)?vk\.com" @@ -25,47 +24,63 @@ class VkExtractor(Extractor): request_interval = 1.0 def items(self): + sizes = "wzyxrqpo" + data = self.metadata() yield Message.Directory, data + for photo in self.photos(): + + for size in sizes: + size += "_" + if size in photo: + break + else: + self.log.warning("no photo URL found (%s)", photo.get("id")) + continue + photo.update(data) - yield Message.Url, photo["url"], photo + photo["url"], photo["width"], photo["height"] = photo[size] + photo["id"] = photo["id"].rpartition("_")[2] - def _pagination(self, photos_url, user_id): - sub = re.compile(r"/imp[fg]/").sub - needle = 'data-id="{}_'.format(user_id) + text.nameext_from_url(photo["url"], photo) + yield Message.Url, photo["url"], photo + def _pagination(self, photos_id): + url = self.root + "/al_photos.php" headers = { "X-Requested-With": "XMLHttpRequest", "Origin" : self.root, - "Referer" : photos_url, + "Referer" : self.root + "/" + photos_id, } - params = { - "al" : "1", - "al_ad" : "0", - "offset": 0, - "part" : "1", + data = { + "act" : "show", + "al" : "1", + "direction": "1", + "list" : photos_id, + "offset" : 0, } while True: payload = self.request( - photos_url, method="POST", headers=headers, data=params + url, method="POST", headers=headers, data=data, ).json()["payload"][1] - offset = payload[0] - html = payload[1] + total = payload[1] + photos = payload[3] - cnt = 0 - for photo in text.extract_iter(html, needle, ')'): - cnt += 1 - pid = photo[:photo.find('"')] - url = photo[photo.rindex("(")+1:] - url = sub("/", url.partition("?")[0]) - yield text.nameext_from_url(url, {"url": url, "id": pid}) + data["offset"] += len(photos) + if data["offset"] >= total: + # the last chunk of photos also contains the first few photos + # again if 'total' is not a multiple of 10 + extra = total - data["offset"] + if extra: + del photos[extra:] - if cnt <= 20 or offset == params["offset"]: + yield from photos return - params["offset"] = offset + + yield from photos class VkPhotosExtractor(VkExtractor): @@ -76,8 +91,8 @@ class VkPhotosExtractor(VkExtractor): r"|(?!album-?\d+_)([^/?#]+))") test = ( ("https://vk.com/id398982326", { - "pattern": r"https://sun\d+-\d+\.userapi\.com/sun\d+-\d+" - r"/c\d+/v\d+/[0-9a-f]+/[\w-]+\.jpg", + "pattern": r"https://sun\d+-\d+\.userapi\.com/s/v1/if1" + r"/[\w-]+\.jpg\?size=\d+x\d+&quality=96&type=album", "count": ">= 35", "keywords": { "id": r"re:\d+", @@ -90,7 +105,7 @@ class VkPhotosExtractor(VkExtractor): }, }), ("https://vk.com/cosplayinrussia", { - "range": "75-100", + "range": "25-35", "keywords": { "id": r"re:\d+", "user": { @@ -112,8 +127,7 @@ class VkPhotosExtractor(VkExtractor): self.user_id, self.user_name = match.groups() def photos(self): - url = "{}/photos{}".format(self.root, self.user_id) - return self._pagination(url, self.user_id) + return self._pagination("photos" + self.user_id) def metadata(self): if self.user_id: @@ -146,8 +160,8 @@ class VkAlbumExtractor(VkExtractor): directory_fmt = ("{category}", "{user[id]}", "{album[id]}") pattern = BASE_PATTERN + r"/album(-?\d+)_(\d+)$" test = ( - ("https://vk.com/album221469416_0", { - "count": 3, + ("https://vk.com/album232175027_00", { + "count": 8, }), ("https://vk.com/album-165740836_281339889", { "count": 12, @@ -159,8 +173,8 @@ class VkAlbumExtractor(VkExtractor): self.user_id, self.album_id = match.groups() def photos(self): - url = "{}/album{}_{}".format(self.root, self.user_id, self.album_id) - return self._pagination(url, self.user_id) + return self._pagination("album{}_{}".format( + self.user_id, self.album_id)) def metadata(self): return { diff --git a/gallery_dl/extractor/weibo.py b/gallery_dl/extractor/weibo.py index 81ca87f..1929f98 100644 --- a/gallery_dl/extractor/weibo.py +++ b/gallery_dl/extractor/weibo.py @@ -105,6 +105,10 @@ class WeiboUserExtractor(WeiboExtractor): ("https://m.weibo.cn/u/2314621010", { "range": "1-30", }), + # deleted (#2521) + ("https://weibo.com/u/7500315942", { + "count": 0, + }), ("https://m.weibo.cn/profile/2314621010"), ("https://m.weibo.cn/p/2304132314621010_-_WEIBO_SECOND_PROFILE_WEIBO"), ("https://www.weibo.com/p/1003062314621010/home"), @@ -132,14 +136,24 @@ class WeiboUserExtractor(WeiboExtractor): while True: response = self.request(url, params=params, headers=headers) headers["X-XSRF-TOKEN"] = response.cookies.get("XSRF-TOKEN") - data = response.json()["data"] + data = response.json() + if not data.get("ok"): + self.log.debug(response.content) + if "since_id" not in params: # first iteration + raise exception.StopExtraction( + '"%s"', data.get("msg") or "unknown error") + + data = data["data"] for card in data["cards"]: if "mblog" in card: yield card["mblog"] info = data.get("cardlistInfo") if not info: + # occasionally weibo returns an empty response + # repeating the same request usually/eventually yields + # the correct response. continue params["since_id"] = sid = info.get("since_id") diff --git a/gallery_dl/extractor/wikiart.py b/gallery_dl/extractor/wikiart.py index 05f27f1..0e06858 100644 --- a/gallery_dl/extractor/wikiart.py +++ b/gallery_dl/extractor/wikiart.py @@ -70,8 +70,8 @@ class WikiartArtistExtractor(WikiartExtractor): directory_fmt = ("{category}", "{artist[artistName]}") pattern = BASE_PATTERN + r"/(?!\w+-by-)([\w-]+)/?$" test = ("https://www.wikiart.org/en/thomas-cole", { - "url": "8514d743382720e6fdab7c9a73faf9e1ec940cfb", - "keyword": "58037afba35bfd7b4101c2316975a75d4ee92a68", + "url": "6844f207a5063c499fc1d5651b03127bc4fe2f73", + "keyword": "09230b5f504697119e267349bf92487e657a7384", }) def __init__(self, match): diff --git a/gallery_dl/postprocessor/ugoira.py b/gallery_dl/postprocessor/ugoira.py index c5477d2..fb57e84 100644 --- a/gallery_dl/postprocessor/ugoira.py +++ b/gallery_dl/postprocessor/ugoira.py @@ -49,7 +49,7 @@ class UgoiraPP(PostProcessor): mkvmerge or shutil.which("mkvmerge")): demuxer = "mkvmerge" else: - demuxer = "concat" if util.WINDOWS else "image2" + demuxer = "concat" if demuxer == "mkvmerge": self._process = self._process_mkvmerge @@ -137,6 +137,8 @@ class UgoiraPP(PostProcessor): self.log.error("Unable to invoke FFmpeg (%s: %s)", exc.__class__.__name__, exc) pathfmt.realpath = pathfmt.temppath + except Exception: + pathfmt.realpath = pathfmt.temppath else: if self.mtime: mtime = pathfmt.kwdict.get("_mtime") @@ -150,7 +152,13 @@ class UgoiraPP(PostProcessor): def _exec(self, args): self.log.debug(args) out = None if self.output else subprocess.DEVNULL - return subprocess.Popen(args, stdout=out, stderr=out).wait() + retcode = subprocess.Popen(args, stdout=out, stderr=out).wait() + if retcode: + print() + self.log.error("Non-zero exit status when running %s (%s)", + args, retcode) + raise ValueError() + return retcode def _process_concat(self, pathfmt, tempdir): rate_in, rate_out = self.calculate_framerate(self._frames) @@ -215,7 +223,7 @@ class UgoiraPP(PostProcessor): def _finalize_mkvmerge(self, pathfmt, tempdir): args = [ self.mkvmerge, - "-o", self._realpath, + "-o", pathfmt.path, # mkvmerge does not support "raw" paths "--timecodes", "0:" + self._write_mkvmerge_timecodes(tempdir), ] if self.extension == "webm": diff --git a/gallery_dl/util.py b/gallery_dl/util.py index e8af358..4bb220a 100644 --- a/gallery_dl/util.py +++ b/gallery_dl/util.py @@ -461,6 +461,8 @@ def compile_expression(expr, name="<expr>", globals=GLOBALS): def build_duration_func(duration, min=0.0): if not duration: + if min: + return lambda: min return None if isinstance(duration, str): diff --git a/gallery_dl/version.py b/gallery_dl/version.py index fe9a0f8..624f288 100644 --- a/gallery_dl/version.py +++ b/gallery_dl/version.py @@ -6,4 +6,4 @@ # it under the terms of the GNU General Public License version 2 as # published by the Free Software Foundation. -__version__ = "1.21.1" +__version__ = "1.21.2" |
