summaryrefslogtreecommitdiffstats
path: root/gallery_dl
diff options
context:
space:
mode:
Diffstat (limited to 'gallery_dl')
-rw-r--r--gallery_dl/cloudflare.py24
-rw-r--r--gallery_dl/downloader/http.py4
-rw-r--r--gallery_dl/extractor/__init__.py1
-rw-r--r--gallery_dl/extractor/artstation.py5
-rw-r--r--gallery_dl/extractor/blogger.py8
-rw-r--r--gallery_dl/extractor/deviantart.py3
-rw-r--r--gallery_dl/extractor/newgrounds.py10
-rw-r--r--gallery_dl/extractor/patreon.py46
-rw-r--r--gallery_dl/extractor/realbooru.py2
-rw-r--r--gallery_dl/extractor/speakerdeck.py70
-rw-r--r--gallery_dl/extractor/twitter.py10
-rw-r--r--gallery_dl/extractor/vsco.py15
-rw-r--r--gallery_dl/extractor/weibo.py4
-rw-r--r--gallery_dl/version.py2
14 files changed, 156 insertions, 48 deletions
diff --git a/gallery_dl/cloudflare.py b/gallery_dl/cloudflare.py
index e3ebd1a..43ccdeb 100644
--- a/gallery_dl/cloudflare.py
+++ b/gallery_dl/cloudflare.py
@@ -144,11 +144,15 @@ def evaluate_expression(expr, page, netloc, *,
# evaluate them,
# and accumulate their values in 'result'
result = ""
- for subexpr in split_re.findall(expr) or (expr,):
- result += str(sum(
- VALUES[part]
- for part in subexpr.split("[]")
- ))
+ for subexpr in expr.strip("+()").split(")+("):
+ value = 0
+ for part in subexpr.split("+"):
+ if "-" in part:
+ p1, _, p2 = part.partition("-")
+ value += VALUES[p1] - VALUES[p2]
+ else:
+ value += VALUES[part]
+ result += str(value)
return int(result)
@@ -158,12 +162,14 @@ OPERATORS = {
"*": operator.mul,
}
+
VALUES = {
"": 0,
- "+": 0,
- "!+": 1,
- "!!": 1,
- "+!!": 1,
+ "!": 1,
+ "[]": 0,
+ "!![]": 1,
+ "(!![]": 1,
+ "(!![])": 1,
}
diff --git a/gallery_dl/downloader/http.py b/gallery_dl/downloader/http.py
index 64a2978..021dc16 100644
--- a/gallery_dl/downloader/http.py
+++ b/gallery_dl/downloader/http.py
@@ -243,6 +243,10 @@ MIMETYPE_MAP = {
"image/webp": "webp",
"image/svg+xml": "svg",
+ "image/vnd.adobe.photoshop": "psd",
+ "image/x-photoshop": "psd",
+ "application/x-photoshop": "psd",
+
"video/webm": "webm",
"video/ogg": "ogg",
"video/mp4": "mp4",
diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py
index 2c87eb3..85fbddb 100644
--- a/gallery_dl/extractor/__init__.py
+++ b/gallery_dl/extractor/__init__.py
@@ -105,6 +105,7 @@ modules = [
"slickpic",
"slideshare",
"smugmug",
+ "speakerdeck",
"tsumino",
"tumblr",
"twitter",
diff --git a/gallery_dl/extractor/artstation.py b/gallery_dl/extractor/artstation.py
index ceda29c..c504dba 100644
--- a/gallery_dl/extractor/artstation.py
+++ b/gallery_dl/extractor/artstation.py
@@ -39,8 +39,9 @@ class ArtstationExtractor(Extractor):
if adict["has_embedded_player"] and self.external:
player = adict["player_embedded"]
- url = text.extract(player, 'src="', '"')[0]
- if not url.startswith(self.root):
+ url = text.extract(player, 'src="', '"')[0] or \
+ text.extract(player, "src='", "'")[0]
+ if url and not url.startswith(self.root):
asset["extension"] = None
yield Message.Url, "ytdl:" + url, asset
continue
diff --git a/gallery_dl/extractor/blogger.py b/gallery_dl/extractor/blogger.py
index 2657b5d..331cfc2 100644
--- a/gallery_dl/extractor/blogger.py
+++ b/gallery_dl/extractor/blogger.py
@@ -109,7 +109,7 @@ class BloggerPostExtractor(BloggerExtractor):
"posts" : int,
"published" : "2010-11-21T10:19:42-08:00",
"updated" : str,
- "url" : "http://www.julianbunker.com/",
+ "url" : "http://julianbphotography.blogspot.com/",
},
"post": {
"author" : "Julian Bunker",
@@ -128,9 +128,7 @@ class BloggerPostExtractor(BloggerExtractor):
"url": str,
},
}),
- ("blogger:http://www.julianbunker.com/2010/12/moon-rise.html", {
- "url": "9928429fb62f712eb4de80f53625eccecc614aae",
- }),
+ ("blogger:http://www.julianbunker.com/2010/12/moon-rise.html"),
# video (#587)
(("http://cfnmscenesinmovies.blogspot.com/2011/11/"
"cfnm-scene-jenna-fischer-in-office.html"), {
@@ -156,7 +154,7 @@ class BloggerBlogExtractor(BloggerExtractor):
"count": 25,
"pattern": r"https://\d\.bp\.blogspot\.com/.*/s0/[^.]+\.jpg",
}),
- ("blogger:http://www.julianbunker.com/", {
+ ("blogger:https://www.kefblog.com.ng/", {
"range": "1-25",
"count": 25,
}),
diff --git a/gallery_dl/extractor/deviantart.py b/gallery_dl/extractor/deviantart.py
index ca722b8..2631052 100644
--- a/gallery_dl/extractor/deviantart.py
+++ b/gallery_dl/extractor/deviantart.py
@@ -1006,7 +1006,8 @@ class DeviantartOAuthAPI():
msg = "API responded with {} {}".format(
status, response.reason)
if status == 429:
- self.delay += 1
+ if self.delay < 9:
+ self.delay += 1
self.log.warning("%s. Using %ds delay.", msg, 2 ** self.delay)
else:
self.log.error(msg)
diff --git a/gallery_dl/extractor/newgrounds.py b/gallery_dl/extractor/newgrounds.py
index bb87a69..17fe935 100644
--- a/gallery_dl/extractor/newgrounds.py
+++ b/gallery_dl/extractor/newgrounds.py
@@ -288,7 +288,7 @@ class NewgroundsMediaExtractor(NewgroundsExtractor):
class NewgroundsArtExtractor(NewgroundsExtractor):
"""Extractor for all images of a newgrounds user"""
subcategory = "art"
- pattern = r"(?:https?://)?([^.]+)\.newgrounds\.com/art/?$"
+ pattern = r"(?:https?://)?([\w-]+)\.newgrounds\.com/art/?$"
test = ("https://tomfulp.newgrounds.com/art", {
"pattern": NewgroundsImageExtractor.pattern,
"count": ">= 3",
@@ -298,7 +298,7 @@ class NewgroundsArtExtractor(NewgroundsExtractor):
class NewgroundsAudioExtractor(NewgroundsExtractor):
"""Extractor for all audio submissions of a newgrounds user"""
subcategory = "audio"
- pattern = r"(?:https?://)?([^.]+)\.newgrounds\.com/audio/?$"
+ pattern = r"(?:https?://)?([\w-]+)\.newgrounds\.com/audio/?$"
test = ("https://tomfulp.newgrounds.com/audio", {
"pattern": r"https://audio.ngfiles.com/\d+/\d+_.+\.mp3",
"count": ">= 4",
@@ -308,7 +308,7 @@ class NewgroundsAudioExtractor(NewgroundsExtractor):
class NewgroundsMoviesExtractor(NewgroundsExtractor):
"""Extractor for all movies of a newgrounds user"""
subcategory = "movies"
- pattern = r"(?:https?://)?([^.]+)\.newgrounds\.com/movies/?$"
+ pattern = r"(?:https?://)?([\w-]+)\.newgrounds\.com/movies/?$"
test = ("https://tomfulp.newgrounds.com/movies", {
"pattern": r"https://uploads.ungrounded.net(/alternate)?/\d+/\d+_.+",
"range": "1-10",
@@ -319,7 +319,7 @@ class NewgroundsMoviesExtractor(NewgroundsExtractor):
class NewgroundsUserExtractor(NewgroundsExtractor):
"""Extractor for a newgrounds user profile"""
subcategory = "user"
- pattern = r"(?:https?://)?([^.]+)\.newgrounds\.com/?$"
+ pattern = r"(?:https?://)?([\w-]+)\.newgrounds\.com/?$"
test = (
("https://tomfulp.newgrounds.com", {
"pattern": "https://tomfulp.newgrounds.com/art$",
@@ -414,6 +414,6 @@ class NewgroundsFollowingExtractor(NewgroundsFavoriteExtractor):
@staticmethod
def _extract_favorites(page):
return [
- "https://" + user.rpartition('"')[2]
+ "https://" + user.rpartition('"')[2].lstrip("/:")
for user in text.extract_iter(page, 'class="item-user', '"><img')
]
diff --git a/gallery_dl/extractor/patreon.py b/gallery_dl/extractor/patreon.py
index 18c10a6..570bd72 100644
--- a/gallery_dl/extractor/patreon.py
+++ b/gallery_dl/extractor/patreon.py
@@ -47,8 +47,8 @@ class PatreonExtractor(Extractor):
self._attachments(post),
self._content(post),
):
- fhash = url.split("/")[9].partition("?")[0]
- if fhash not in hashes:
+ fhash = self._filehash(url)
+ if fhash not in hashes or not fhash:
hashes.add(fhash)
post["hash"] = fhash
post["type"] = kind
@@ -158,12 +158,23 @@ class PatreonExtractor(Extractor):
return attr
def _filename(self, url):
- """Fetch filename from its Content-Disposition header"""
+ """Fetch filename from an URL's Content-Disposition header"""
response = self.request(url, method="HEAD", fatal=False)
cd = response.headers.get("Content-Disposition")
return text.extract(cd, 'filename="', '"')[0]
@staticmethod
+ def _filehash(url):
+ """Extract MD5 hash from a download URL"""
+ parts = url.partition("?")[0].split("/")
+ parts.reverse()
+
+ for part in parts:
+ if len(part) == 32:
+ return part
+ return ""
+
+ @staticmethod
def _build_url(endpoint, query):
return (
"https://www.patreon.com/api/" + endpoint +
@@ -194,7 +205,7 @@ class PatreonCreatorExtractor(PatreonExtractor):
subcategory = "creator"
pattern = (r"(?:https?://)?(?:www\.)?patreon\.com"
r"/(?!(?:home|join|posts|login|signup)(?:$|[/?&#]))"
- r"(?:user(?:/posts)?/?\?([^#]+)|([^/?&#]+)/?)")
+ r"([^/?&#]+)(?:/posts)?/?(?:\?([^#]+))?")
test = (
("https://www.patreon.com/koveliana", {
"range": "1-25",
@@ -213,6 +224,10 @@ class PatreonCreatorExtractor(PatreonExtractor):
"title" : str,
},
}),
+ ("https://www.patreon.com/koveliana/posts?filters[month]=2020-3", {
+ "count": 1,
+ "keyword": {"date": "dt:2020-03-30 21:21:44"},
+ }),
("https://www.patreon.com/kovelianot", {
"exception": exception.NotFoundError,
}),
@@ -222,26 +237,33 @@ class PatreonCreatorExtractor(PatreonExtractor):
def __init__(self, match):
PatreonExtractor.__init__(self, match)
- self.query, self.creator = match.groups()
+ self.creator, self.query = match.groups()
def posts(self):
- if self.creator:
- url = "{}/{}".format(self.root, self.creator.lower())
+ query = text.parse_query(self.query)
+
+ creator_id = query.get("u")
+ if creator_id:
+ url = "{}/user?u={}".format(self.root, creator_id)
else:
- query = text.parse_query(self.query)
- url = "{}/user?u={}".format(self.root, query.get("u"))
+ url = "{}/{}".format(self.root, self.creator.lower())
page = self.request(url, notfound="creator").text
campaign_id = text.extract(page, "/campaign/", "/")[0]
-
if not campaign_id:
raise exception.NotFoundError("creator")
+ filters = "".join(
+ "&filter[{}={}".format(key[8:], text.escape(value))
+ for key, value in query.items()
+ if key.startswith("filters[")
+ )
+
url = self._build_url("posts", (
- "&sort=-published_at"
+ "&sort=" + query.get("sort", "-published_at") +
"&filter[is_draft]=false"
"&filter[contains_exclusive_posts]=true"
- "&filter[campaign_id]=" + campaign_id
+ "&filter[campaign_id]=" + campaign_id + filters
))
return self._pagination(url)
diff --git a/gallery_dl/extractor/realbooru.py b/gallery_dl/extractor/realbooru.py
index f6bb4df..4841743 100644
--- a/gallery_dl/extractor/realbooru.py
+++ b/gallery_dl/extractor/realbooru.py
@@ -53,7 +53,7 @@ class RealbooruPostExtractor(booru.PostMixin, RealbooruExtractor):
"options": (("tags", True),),
"keyword": {
"tags_general" : str,
- "tags_metadata": "cute tagme",
+ "tags_metadata": str,
"tags_model" : "jennifer_lawrence",
},
})
diff --git a/gallery_dl/extractor/speakerdeck.py b/gallery_dl/extractor/speakerdeck.py
new file mode 100644
index 0000000..1a9691c
--- /dev/null
+++ b/gallery_dl/extractor/speakerdeck.py
@@ -0,0 +1,70 @@
+# -*- coding: utf-8 -*-
+
+# Copyright 2020 Leonardo Taccari
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Extract images from https://speakerdeck.com/"""
+
+from .common import Extractor, Message
+from .. import text
+
+
+class SpeakerdeckPresentationExtractor(Extractor):
+ """Extractor for images from a presentation on speakerdeck.com"""
+ category = "speakerdeck"
+ subcategory = "presentation"
+ directory_fmt = ("{category}", "{user}")
+ filename_fmt = "{presentation}-{num:>02}.{extension}"
+ archive_fmt = "{presentation}_{num}"
+ pattern = (r"(?:https?://)?(?:www\.)?speakerdeck\.com"
+ r"/([^/?&#]+)/([^/?&#]+)")
+ test = (
+ (("https://speakerdeck.com/speakerdeck/introduction-to-speakerdeck"), {
+ "url": "e97d4a7d5c64267e921c13eb7946d7074794a0d2",
+ "content": "75c7abf0969b0bcab23e0da9712c95ee5113db3a",
+ }),
+ )
+
+ def __init__(self, match):
+ Extractor.__init__(self, match)
+ self.user, self.presentation = match.groups()
+ self.presentation_id = None
+
+ def items(self):
+ data = self.get_job_metadata()
+ imgs = self.get_image_urls()
+ data["count"] = len(imgs)
+ yield Message.Version, 1
+ yield Message.Directory, data
+ for data["num"], url in enumerate(imgs, 1):
+ yield Message.Url, url, text.nameext_from_url(url, data)
+
+ def get_job_metadata(self):
+ """Collect metadata for extractor-job"""
+ url = "https://speakerdeck.com/oembed.json"
+ params = {
+ "url": "https://speakerdeck.com/" + self.user +
+ "/" + self.presentation,
+ }
+
+ data = self.request(url, params=params).json()
+
+ self.presentation_id, pos = \
+ text.extract(data["html"], 'src="//speakerdeck.com/player/', '"')
+
+ return {
+ "user": self.user,
+ "presentation": self.presentation,
+ "presentation_id": self.presentation_id,
+ "title": data["title"],
+ "author": data["author_name"],
+ }
+
+ def get_image_urls(self):
+ """Extract and return a list of all image-urls"""
+ page = self.request("https://speakerdeck.com/player/" +
+ self.presentation_id).text
+ return list(text.extract_iter(page, 'js-sd-slide" data-url="', '"'))
diff --git a/gallery_dl/extractor/twitter.py b/gallery_dl/extractor/twitter.py
index 3a274c7..c409f54 100644
--- a/gallery_dl/extractor/twitter.py
+++ b/gallery_dl/extractor/twitter.py
@@ -33,6 +33,7 @@ class TwitterExtractor(Extractor):
self._user_dict = None
self.logged_in = False
self.retweets = self.config("retweets", True)
+ self.replies = self.config("replies", True)
self.twitpic = self.config("twitpic", False)
self.content = self.config("content", False)
self.videos = self.config("videos", True)
@@ -48,7 +49,9 @@ class TwitterExtractor(Extractor):
for tweet in self.tweets():
data = self._data_from_tweet(tweet)
- if not data or not self.retweets and data["retweet_id"]:
+ if not data or \
+ not self.retweets and data["retweet_id"] or \
+ not self.replies and data["reply"]:
continue
data.update(metadata)
@@ -370,6 +373,11 @@ class TwitterTweetExtractor(TwitterExtractor):
"options": (("videos", "ytdl"),),
"pattern": r"ytdl:https://twitter.com/i/web.+/1103767554424598528",
}),
+ # 'replies' option (#705)
+ ("https://twitter.com/tyson_hesse/status/1103767554424598528", {
+ "options": (("replies", False),),
+ "count": 0,
+ }),
# /i/web/ URL
("https://twitter.com/i/web/status/1155074198240292865", {
"pattern": r"https://pbs.twimg.com/media/EAel0vUUYAAZ4Bq.jpg:orig",
diff --git a/gallery_dl/extractor/vsco.py b/gallery_dl/extractor/vsco.py
index 0306112..c9f0ec3 100644
--- a/gallery_dl/extractor/vsco.py
+++ b/gallery_dl/extractor/vsco.py
@@ -72,7 +72,7 @@ class VscoExtractor(Extractor):
page = self.request(url, notfound=self.subcategory).text
return json.loads(text.extract(page, "__PRELOADED_STATE__ = ", "<")[0])
- def _pagination(self, url, params, token, key, extra):
+ def _pagination(self, url, params, token, key, extra=None):
headers = {
"Referer" : "{}/{}".format(self.root, self.user),
"Authorization" : "Bearer " + token,
@@ -80,7 +80,8 @@ class VscoExtractor(Extractor):
"X-Client-Build" : "1",
}
- yield from map(self._transform_media, extra)
+ if extra:
+ yield from map(self._transform_media, extra)
while True:
data = self.request(url, params=params, headers=headers).json()
@@ -130,23 +131,17 @@ class VscoUserExtractor(VscoExtractor):
def images(self):
url = "{}/{}/gallery".format(self.root, self.user)
data = self._extract_preload_state(url)
-
tkn = data["users"]["currentUser"]["tkn"]
sid = str(data["sites"]["siteByUsername"][self.user]["site"]["id"])
- site = data["medias"]["bySiteId"][sid]
url = "{}/api/3.0/medias/profile".format(self.root)
params = {
"site_id" : sid,
"limit" : "14",
- "show_only": "0",
- "cursor" : site["nextCursor"],
+ "cursor" : None,
}
- return self._pagination(url, params, tkn, "media", (
- data["medias"]["byId"][media[media["type"]]]["media"]
- for media in site["medias"]
- ))
+ return self._pagination(url, params, tkn, "media")
class VscoCollectionExtractor(VscoExtractor):
diff --git a/gallery_dl/extractor/weibo.py b/gallery_dl/extractor/weibo.py
index 9539c2f..aa9bdae 100644
--- a/gallery_dl/extractor/weibo.py
+++ b/gallery_dl/extractor/weibo.py
@@ -23,6 +23,7 @@ class WeiboExtractor(Extractor):
def __init__(self, match):
Extractor.__init__(self, match)
self.retweets = self.config("retweets", True)
+ self.videos = self.config("videos", True)
def items(self):
yield Message.Version, 1
@@ -52,7 +53,7 @@ class WeiboExtractor(Extractor):
yield Message.Url, image["url"], data
num += 1
- if "page_info" in obj and "media_info" in obj["page_info"]:
+ if self.videos and "media_info" in obj.get("page_info", ()):
info = obj["page_info"]["media_info"]
url = info.get("stream_url_hd") or info.get("stream_url")
@@ -70,6 +71,7 @@ class WeiboExtractor(Extractor):
data["extension"] = "mp4"
data["_ytdl_extra"] = {"protocol": "m3u8_native"}
yield Message.Url, url, data
+ num += 1
if self.retweets and "retweeted_status" in obj:
obj = obj["retweeted_status"]
diff --git a/gallery_dl/version.py b/gallery_dl/version.py
index 73920c2..40b5c73 100644
--- a/gallery_dl/version.py
+++ b/gallery_dl/version.py
@@ -6,4 +6,4 @@
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
-__version__ = "1.13.5"
+__version__ = "1.13.6"