diff options
| author | 2020-05-03 00:06:40 -0400 | |
|---|---|---|
| committer | 2020-05-03 00:06:40 -0400 | |
| commit | 90e50db2e3c38f523bb5195d295290b06e5cedb0 (patch) | |
| tree | 4759dc0faea79f83fa5074e2d0bd82b18a9caaea /gallery_dl | |
| parent | d5b96ce44b7809f5ae01e3e9d70a1d58fe21ccf5 (diff) | |
New upstream version 1.13.6upstream/1.13.6
Diffstat (limited to 'gallery_dl')
| -rw-r--r-- | gallery_dl/cloudflare.py | 24 | ||||
| -rw-r--r-- | gallery_dl/downloader/http.py | 4 | ||||
| -rw-r--r-- | gallery_dl/extractor/__init__.py | 1 | ||||
| -rw-r--r-- | gallery_dl/extractor/artstation.py | 5 | ||||
| -rw-r--r-- | gallery_dl/extractor/blogger.py | 8 | ||||
| -rw-r--r-- | gallery_dl/extractor/deviantart.py | 3 | ||||
| -rw-r--r-- | gallery_dl/extractor/newgrounds.py | 10 | ||||
| -rw-r--r-- | gallery_dl/extractor/patreon.py | 46 | ||||
| -rw-r--r-- | gallery_dl/extractor/realbooru.py | 2 | ||||
| -rw-r--r-- | gallery_dl/extractor/speakerdeck.py | 70 | ||||
| -rw-r--r-- | gallery_dl/extractor/twitter.py | 10 | ||||
| -rw-r--r-- | gallery_dl/extractor/vsco.py | 15 | ||||
| -rw-r--r-- | gallery_dl/extractor/weibo.py | 4 | ||||
| -rw-r--r-- | gallery_dl/version.py | 2 |
14 files changed, 156 insertions, 48 deletions
diff --git a/gallery_dl/cloudflare.py b/gallery_dl/cloudflare.py index e3ebd1a..43ccdeb 100644 --- a/gallery_dl/cloudflare.py +++ b/gallery_dl/cloudflare.py @@ -144,11 +144,15 @@ def evaluate_expression(expr, page, netloc, *, # evaluate them, # and accumulate their values in 'result' result = "" - for subexpr in split_re.findall(expr) or (expr,): - result += str(sum( - VALUES[part] - for part in subexpr.split("[]") - )) + for subexpr in expr.strip("+()").split(")+("): + value = 0 + for part in subexpr.split("+"): + if "-" in part: + p1, _, p2 = part.partition("-") + value += VALUES[p1] - VALUES[p2] + else: + value += VALUES[part] + result += str(value) return int(result) @@ -158,12 +162,14 @@ OPERATORS = { "*": operator.mul, } + VALUES = { "": 0, - "+": 0, - "!+": 1, - "!!": 1, - "+!!": 1, + "!": 1, + "[]": 0, + "!![]": 1, + "(!![]": 1, + "(!![])": 1, } diff --git a/gallery_dl/downloader/http.py b/gallery_dl/downloader/http.py index 64a2978..021dc16 100644 --- a/gallery_dl/downloader/http.py +++ b/gallery_dl/downloader/http.py @@ -243,6 +243,10 @@ MIMETYPE_MAP = { "image/webp": "webp", "image/svg+xml": "svg", + "image/vnd.adobe.photoshop": "psd", + "image/x-photoshop": "psd", + "application/x-photoshop": "psd", + "video/webm": "webm", "video/ogg": "ogg", "video/mp4": "mp4", diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py index 2c87eb3..85fbddb 100644 --- a/gallery_dl/extractor/__init__.py +++ b/gallery_dl/extractor/__init__.py @@ -105,6 +105,7 @@ modules = [ "slickpic", "slideshare", "smugmug", + "speakerdeck", "tsumino", "tumblr", "twitter", diff --git a/gallery_dl/extractor/artstation.py b/gallery_dl/extractor/artstation.py index ceda29c..c504dba 100644 --- a/gallery_dl/extractor/artstation.py +++ b/gallery_dl/extractor/artstation.py @@ -39,8 +39,9 @@ class ArtstationExtractor(Extractor): if adict["has_embedded_player"] and self.external: player = adict["player_embedded"] - url = text.extract(player, 'src="', '"')[0] - if not url.startswith(self.root): + url = text.extract(player, 'src="', '"')[0] or \ + text.extract(player, "src='", "'")[0] + if url and not url.startswith(self.root): asset["extension"] = None yield Message.Url, "ytdl:" + url, asset continue diff --git a/gallery_dl/extractor/blogger.py b/gallery_dl/extractor/blogger.py index 2657b5d..331cfc2 100644 --- a/gallery_dl/extractor/blogger.py +++ b/gallery_dl/extractor/blogger.py @@ -109,7 +109,7 @@ class BloggerPostExtractor(BloggerExtractor): "posts" : int, "published" : "2010-11-21T10:19:42-08:00", "updated" : str, - "url" : "http://www.julianbunker.com/", + "url" : "http://julianbphotography.blogspot.com/", }, "post": { "author" : "Julian Bunker", @@ -128,9 +128,7 @@ class BloggerPostExtractor(BloggerExtractor): "url": str, }, }), - ("blogger:http://www.julianbunker.com/2010/12/moon-rise.html", { - "url": "9928429fb62f712eb4de80f53625eccecc614aae", - }), + ("blogger:http://www.julianbunker.com/2010/12/moon-rise.html"), # video (#587) (("http://cfnmscenesinmovies.blogspot.com/2011/11/" "cfnm-scene-jenna-fischer-in-office.html"), { @@ -156,7 +154,7 @@ class BloggerBlogExtractor(BloggerExtractor): "count": 25, "pattern": r"https://\d\.bp\.blogspot\.com/.*/s0/[^.]+\.jpg", }), - ("blogger:http://www.julianbunker.com/", { + ("blogger:https://www.kefblog.com.ng/", { "range": "1-25", "count": 25, }), diff --git a/gallery_dl/extractor/deviantart.py b/gallery_dl/extractor/deviantart.py index ca722b8..2631052 100644 --- a/gallery_dl/extractor/deviantart.py +++ b/gallery_dl/extractor/deviantart.py @@ -1006,7 +1006,8 @@ class DeviantartOAuthAPI(): msg = "API responded with {} {}".format( status, response.reason) if status == 429: - self.delay += 1 + if self.delay < 9: + self.delay += 1 self.log.warning("%s. Using %ds delay.", msg, 2 ** self.delay) else: self.log.error(msg) diff --git a/gallery_dl/extractor/newgrounds.py b/gallery_dl/extractor/newgrounds.py index bb87a69..17fe935 100644 --- a/gallery_dl/extractor/newgrounds.py +++ b/gallery_dl/extractor/newgrounds.py @@ -288,7 +288,7 @@ class NewgroundsMediaExtractor(NewgroundsExtractor): class NewgroundsArtExtractor(NewgroundsExtractor): """Extractor for all images of a newgrounds user""" subcategory = "art" - pattern = r"(?:https?://)?([^.]+)\.newgrounds\.com/art/?$" + pattern = r"(?:https?://)?([\w-]+)\.newgrounds\.com/art/?$" test = ("https://tomfulp.newgrounds.com/art", { "pattern": NewgroundsImageExtractor.pattern, "count": ">= 3", @@ -298,7 +298,7 @@ class NewgroundsArtExtractor(NewgroundsExtractor): class NewgroundsAudioExtractor(NewgroundsExtractor): """Extractor for all audio submissions of a newgrounds user""" subcategory = "audio" - pattern = r"(?:https?://)?([^.]+)\.newgrounds\.com/audio/?$" + pattern = r"(?:https?://)?([\w-]+)\.newgrounds\.com/audio/?$" test = ("https://tomfulp.newgrounds.com/audio", { "pattern": r"https://audio.ngfiles.com/\d+/\d+_.+\.mp3", "count": ">= 4", @@ -308,7 +308,7 @@ class NewgroundsAudioExtractor(NewgroundsExtractor): class NewgroundsMoviesExtractor(NewgroundsExtractor): """Extractor for all movies of a newgrounds user""" subcategory = "movies" - pattern = r"(?:https?://)?([^.]+)\.newgrounds\.com/movies/?$" + pattern = r"(?:https?://)?([\w-]+)\.newgrounds\.com/movies/?$" test = ("https://tomfulp.newgrounds.com/movies", { "pattern": r"https://uploads.ungrounded.net(/alternate)?/\d+/\d+_.+", "range": "1-10", @@ -319,7 +319,7 @@ class NewgroundsMoviesExtractor(NewgroundsExtractor): class NewgroundsUserExtractor(NewgroundsExtractor): """Extractor for a newgrounds user profile""" subcategory = "user" - pattern = r"(?:https?://)?([^.]+)\.newgrounds\.com/?$" + pattern = r"(?:https?://)?([\w-]+)\.newgrounds\.com/?$" test = ( ("https://tomfulp.newgrounds.com", { "pattern": "https://tomfulp.newgrounds.com/art$", @@ -414,6 +414,6 @@ class NewgroundsFollowingExtractor(NewgroundsFavoriteExtractor): @staticmethod def _extract_favorites(page): return [ - "https://" + user.rpartition('"')[2] + "https://" + user.rpartition('"')[2].lstrip("/:") for user in text.extract_iter(page, 'class="item-user', '"><img') ] diff --git a/gallery_dl/extractor/patreon.py b/gallery_dl/extractor/patreon.py index 18c10a6..570bd72 100644 --- a/gallery_dl/extractor/patreon.py +++ b/gallery_dl/extractor/patreon.py @@ -47,8 +47,8 @@ class PatreonExtractor(Extractor): self._attachments(post), self._content(post), ): - fhash = url.split("/")[9].partition("?")[0] - if fhash not in hashes: + fhash = self._filehash(url) + if fhash not in hashes or not fhash: hashes.add(fhash) post["hash"] = fhash post["type"] = kind @@ -158,12 +158,23 @@ class PatreonExtractor(Extractor): return attr def _filename(self, url): - """Fetch filename from its Content-Disposition header""" + """Fetch filename from an URL's Content-Disposition header""" response = self.request(url, method="HEAD", fatal=False) cd = response.headers.get("Content-Disposition") return text.extract(cd, 'filename="', '"')[0] @staticmethod + def _filehash(url): + """Extract MD5 hash from a download URL""" + parts = url.partition("?")[0].split("/") + parts.reverse() + + for part in parts: + if len(part) == 32: + return part + return "" + + @staticmethod def _build_url(endpoint, query): return ( "https://www.patreon.com/api/" + endpoint + @@ -194,7 +205,7 @@ class PatreonCreatorExtractor(PatreonExtractor): subcategory = "creator" pattern = (r"(?:https?://)?(?:www\.)?patreon\.com" r"/(?!(?:home|join|posts|login|signup)(?:$|[/?&#]))" - r"(?:user(?:/posts)?/?\?([^#]+)|([^/?&#]+)/?)") + r"([^/?&#]+)(?:/posts)?/?(?:\?([^#]+))?") test = ( ("https://www.patreon.com/koveliana", { "range": "1-25", @@ -213,6 +224,10 @@ class PatreonCreatorExtractor(PatreonExtractor): "title" : str, }, }), + ("https://www.patreon.com/koveliana/posts?filters[month]=2020-3", { + "count": 1, + "keyword": {"date": "dt:2020-03-30 21:21:44"}, + }), ("https://www.patreon.com/kovelianot", { "exception": exception.NotFoundError, }), @@ -222,26 +237,33 @@ class PatreonCreatorExtractor(PatreonExtractor): def __init__(self, match): PatreonExtractor.__init__(self, match) - self.query, self.creator = match.groups() + self.creator, self.query = match.groups() def posts(self): - if self.creator: - url = "{}/{}".format(self.root, self.creator.lower()) + query = text.parse_query(self.query) + + creator_id = query.get("u") + if creator_id: + url = "{}/user?u={}".format(self.root, creator_id) else: - query = text.parse_query(self.query) - url = "{}/user?u={}".format(self.root, query.get("u")) + url = "{}/{}".format(self.root, self.creator.lower()) page = self.request(url, notfound="creator").text campaign_id = text.extract(page, "/campaign/", "/")[0] - if not campaign_id: raise exception.NotFoundError("creator") + filters = "".join( + "&filter[{}={}".format(key[8:], text.escape(value)) + for key, value in query.items() + if key.startswith("filters[") + ) + url = self._build_url("posts", ( - "&sort=-published_at" + "&sort=" + query.get("sort", "-published_at") + "&filter[is_draft]=false" "&filter[contains_exclusive_posts]=true" - "&filter[campaign_id]=" + campaign_id + "&filter[campaign_id]=" + campaign_id + filters )) return self._pagination(url) diff --git a/gallery_dl/extractor/realbooru.py b/gallery_dl/extractor/realbooru.py index f6bb4df..4841743 100644 --- a/gallery_dl/extractor/realbooru.py +++ b/gallery_dl/extractor/realbooru.py @@ -53,7 +53,7 @@ class RealbooruPostExtractor(booru.PostMixin, RealbooruExtractor): "options": (("tags", True),), "keyword": { "tags_general" : str, - "tags_metadata": "cute tagme", + "tags_metadata": str, "tags_model" : "jennifer_lawrence", }, }) diff --git a/gallery_dl/extractor/speakerdeck.py b/gallery_dl/extractor/speakerdeck.py new file mode 100644 index 0000000..1a9691c --- /dev/null +++ b/gallery_dl/extractor/speakerdeck.py @@ -0,0 +1,70 @@ +# -*- coding: utf-8 -*- + +# Copyright 2020 Leonardo Taccari +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. + +"""Extract images from https://speakerdeck.com/""" + +from .common import Extractor, Message +from .. import text + + +class SpeakerdeckPresentationExtractor(Extractor): + """Extractor for images from a presentation on speakerdeck.com""" + category = "speakerdeck" + subcategory = "presentation" + directory_fmt = ("{category}", "{user}") + filename_fmt = "{presentation}-{num:>02}.{extension}" + archive_fmt = "{presentation}_{num}" + pattern = (r"(?:https?://)?(?:www\.)?speakerdeck\.com" + r"/([^/?&#]+)/([^/?&#]+)") + test = ( + (("https://speakerdeck.com/speakerdeck/introduction-to-speakerdeck"), { + "url": "e97d4a7d5c64267e921c13eb7946d7074794a0d2", + "content": "75c7abf0969b0bcab23e0da9712c95ee5113db3a", + }), + ) + + def __init__(self, match): + Extractor.__init__(self, match) + self.user, self.presentation = match.groups() + self.presentation_id = None + + def items(self): + data = self.get_job_metadata() + imgs = self.get_image_urls() + data["count"] = len(imgs) + yield Message.Version, 1 + yield Message.Directory, data + for data["num"], url in enumerate(imgs, 1): + yield Message.Url, url, text.nameext_from_url(url, data) + + def get_job_metadata(self): + """Collect metadata for extractor-job""" + url = "https://speakerdeck.com/oembed.json" + params = { + "url": "https://speakerdeck.com/" + self.user + + "/" + self.presentation, + } + + data = self.request(url, params=params).json() + + self.presentation_id, pos = \ + text.extract(data["html"], 'src="//speakerdeck.com/player/', '"') + + return { + "user": self.user, + "presentation": self.presentation, + "presentation_id": self.presentation_id, + "title": data["title"], + "author": data["author_name"], + } + + def get_image_urls(self): + """Extract and return a list of all image-urls""" + page = self.request("https://speakerdeck.com/player/" + + self.presentation_id).text + return list(text.extract_iter(page, 'js-sd-slide" data-url="', '"')) diff --git a/gallery_dl/extractor/twitter.py b/gallery_dl/extractor/twitter.py index 3a274c7..c409f54 100644 --- a/gallery_dl/extractor/twitter.py +++ b/gallery_dl/extractor/twitter.py @@ -33,6 +33,7 @@ class TwitterExtractor(Extractor): self._user_dict = None self.logged_in = False self.retweets = self.config("retweets", True) + self.replies = self.config("replies", True) self.twitpic = self.config("twitpic", False) self.content = self.config("content", False) self.videos = self.config("videos", True) @@ -48,7 +49,9 @@ class TwitterExtractor(Extractor): for tweet in self.tweets(): data = self._data_from_tweet(tweet) - if not data or not self.retweets and data["retweet_id"]: + if not data or \ + not self.retweets and data["retweet_id"] or \ + not self.replies and data["reply"]: continue data.update(metadata) @@ -370,6 +373,11 @@ class TwitterTweetExtractor(TwitterExtractor): "options": (("videos", "ytdl"),), "pattern": r"ytdl:https://twitter.com/i/web.+/1103767554424598528", }), + # 'replies' option (#705) + ("https://twitter.com/tyson_hesse/status/1103767554424598528", { + "options": (("replies", False),), + "count": 0, + }), # /i/web/ URL ("https://twitter.com/i/web/status/1155074198240292865", { "pattern": r"https://pbs.twimg.com/media/EAel0vUUYAAZ4Bq.jpg:orig", diff --git a/gallery_dl/extractor/vsco.py b/gallery_dl/extractor/vsco.py index 0306112..c9f0ec3 100644 --- a/gallery_dl/extractor/vsco.py +++ b/gallery_dl/extractor/vsco.py @@ -72,7 +72,7 @@ class VscoExtractor(Extractor): page = self.request(url, notfound=self.subcategory).text return json.loads(text.extract(page, "__PRELOADED_STATE__ = ", "<")[0]) - def _pagination(self, url, params, token, key, extra): + def _pagination(self, url, params, token, key, extra=None): headers = { "Referer" : "{}/{}".format(self.root, self.user), "Authorization" : "Bearer " + token, @@ -80,7 +80,8 @@ class VscoExtractor(Extractor): "X-Client-Build" : "1", } - yield from map(self._transform_media, extra) + if extra: + yield from map(self._transform_media, extra) while True: data = self.request(url, params=params, headers=headers).json() @@ -130,23 +131,17 @@ class VscoUserExtractor(VscoExtractor): def images(self): url = "{}/{}/gallery".format(self.root, self.user) data = self._extract_preload_state(url) - tkn = data["users"]["currentUser"]["tkn"] sid = str(data["sites"]["siteByUsername"][self.user]["site"]["id"]) - site = data["medias"]["bySiteId"][sid] url = "{}/api/3.0/medias/profile".format(self.root) params = { "site_id" : sid, "limit" : "14", - "show_only": "0", - "cursor" : site["nextCursor"], + "cursor" : None, } - return self._pagination(url, params, tkn, "media", ( - data["medias"]["byId"][media[media["type"]]]["media"] - for media in site["medias"] - )) + return self._pagination(url, params, tkn, "media") class VscoCollectionExtractor(VscoExtractor): diff --git a/gallery_dl/extractor/weibo.py b/gallery_dl/extractor/weibo.py index 9539c2f..aa9bdae 100644 --- a/gallery_dl/extractor/weibo.py +++ b/gallery_dl/extractor/weibo.py @@ -23,6 +23,7 @@ class WeiboExtractor(Extractor): def __init__(self, match): Extractor.__init__(self, match) self.retweets = self.config("retweets", True) + self.videos = self.config("videos", True) def items(self): yield Message.Version, 1 @@ -52,7 +53,7 @@ class WeiboExtractor(Extractor): yield Message.Url, image["url"], data num += 1 - if "page_info" in obj and "media_info" in obj["page_info"]: + if self.videos and "media_info" in obj.get("page_info", ()): info = obj["page_info"]["media_info"] url = info.get("stream_url_hd") or info.get("stream_url") @@ -70,6 +71,7 @@ class WeiboExtractor(Extractor): data["extension"] = "mp4" data["_ytdl_extra"] = {"protocol": "m3u8_native"} yield Message.Url, url, data + num += 1 if self.retweets and "retweeted_status" in obj: obj = obj["retweeted_status"] diff --git a/gallery_dl/version.py b/gallery_dl/version.py index 73920c2..40b5c73 100644 --- a/gallery_dl/version.py +++ b/gallery_dl/version.py @@ -6,4 +6,4 @@ # it under the terms of the GNU General Public License version 2 as # published by the Free Software Foundation. -__version__ = "1.13.5" +__version__ = "1.13.6" |
