aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLibravatarUnit 193 <unit193@gmail.com>2020-05-03 00:06:41 -0400
committerLibravatarUnit 193 <unit193@gmail.com>2020-05-03 00:06:41 -0400
commitf5a2f273c0ccfac264ddfd45384dadfe25d9d7a5 (patch)
tree92f4a00290fc49d27badcde094910f7a5bb8f783
parent6979a952d58bb018e3636276f141ae28c4599143 (diff)
parent90e50db2e3c38f523bb5195d295290b06e5cedb0 (diff)
downloadgallery-dl-f5a2f273c0ccfac264ddfd45384dadfe25d9d7a5.tar.bz2
gallery-dl-f5a2f273c0ccfac264ddfd45384dadfe25d9d7a5.tar.xz
gallery-dl-f5a2f273c0ccfac264ddfd45384dadfe25d9d7a5.tar.zst
Update upstream source from tag 'upstream/1.13.6'
Update to upstream version '1.13.6' with Debian dir 56019140fd27c135929da929f616e2bc3456deb9
-rw-r--r--PKG-INFO8
-rw-r--r--README.rst6
-rw-r--r--data/man/gallery-dl.12
-rw-r--r--data/man/gallery-dl.conf.534
-rw-r--r--gallery_dl.egg-info/PKG-INFO8
-rw-r--r--gallery_dl.egg-info/SOURCES.txt1
-rw-r--r--gallery_dl/cloudflare.py24
-rw-r--r--gallery_dl/downloader/http.py4
-rw-r--r--gallery_dl/extractor/__init__.py1
-rw-r--r--gallery_dl/extractor/artstation.py5
-rw-r--r--gallery_dl/extractor/blogger.py8
-rw-r--r--gallery_dl/extractor/deviantart.py3
-rw-r--r--gallery_dl/extractor/newgrounds.py10
-rw-r--r--gallery_dl/extractor/patreon.py46
-rw-r--r--gallery_dl/extractor/realbooru.py2
-rw-r--r--gallery_dl/extractor/speakerdeck.py70
-rw-r--r--gallery_dl/extractor/twitter.py10
-rw-r--r--gallery_dl/extractor/vsco.py15
-rw-r--r--gallery_dl/extractor/weibo.py4
-rw-r--r--gallery_dl/version.py2
-rw-r--r--test/test_cache.py11
-rw-r--r--test/test_config.py31
-rw-r--r--test/test_cookies.py6
-rw-r--r--test/test_downloader.py23
-rw-r--r--test/test_extractor.py25
-rw-r--r--test/test_oauth.py7
-rw-r--r--test/test_postprocessor.py15
-rw-r--r--test/test_results.py7
-rw-r--r--test/test_text.py6
-rw-r--r--test/test_util.py7
30 files changed, 294 insertions, 107 deletions
diff --git a/PKG-INFO b/PKG-INFO
index b9a9444..085159b 100644
--- a/PKG-INFO
+++ b/PKG-INFO
@@ -1,6 +1,6 @@
Metadata-Version: 2.1
Name: gallery_dl
-Version: 1.13.5
+Version: 1.13.6
Summary: Command-line program to download image-galleries and -collections from several image hosting sites
Home-page: https://github.com/mikf/gallery-dl
Author: Mike Fährmann
@@ -94,8 +94,8 @@ Description: ==========
put it into your `PATH <https://en.wikipedia.org/wiki/PATH_(variable)>`__,
and run it inside a command prompt (like ``cmd.exe``).
- - `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.13.5/gallery-dl.exe>`__
- - `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.13.5/gallery-dl.bin>`__
+ - `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.13.6/gallery-dl.exe>`__
+ - `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.13.6/gallery-dl.bin>`__
These executables include a Python 3.8 interpreter
and all required Python packages.
@@ -247,7 +247,7 @@ Description: ==========
.. _gallery-dl-example.conf: https://github.com/mikf/gallery-dl/blob/master/docs/gallery-dl-example.conf
.. _configuration.rst: https://github.com/mikf/gallery-dl/blob/master/docs/configuration.rst
.. _Supported Sites: https://github.com/mikf/gallery-dl/blob/master/docs/supportedsites.rst
- .. _stable: https://github.com/mikf/gallery-dl/archive/v1.13.5.tar.gz
+ .. _stable: https://github.com/mikf/gallery-dl/archive/v1.13.6.tar.gz
.. _dev: https://github.com/mikf/gallery-dl/archive/master.tar.gz
.. _Python: https://www.python.org/downloads/
diff --git a/README.rst b/README.rst
index 50a6fae..0fd597a 100644
--- a/README.rst
+++ b/README.rst
@@ -83,8 +83,8 @@ Download a standalone executable file,
put it into your `PATH <https://en.wikipedia.org/wiki/PATH_(variable)>`__,
and run it inside a command prompt (like ``cmd.exe``).
-- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.13.5/gallery-dl.exe>`__
-- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.13.5/gallery-dl.bin>`__
+- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.13.6/gallery-dl.exe>`__
+- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.13.6/gallery-dl.bin>`__
These executables include a Python 3.8 interpreter
and all required Python packages.
@@ -236,7 +236,7 @@ access to *gallery-dl*. Authorize it and you will be shown one or more
.. _gallery-dl-example.conf: https://github.com/mikf/gallery-dl/blob/master/docs/gallery-dl-example.conf
.. _configuration.rst: https://github.com/mikf/gallery-dl/blob/master/docs/configuration.rst
.. _Supported Sites: https://github.com/mikf/gallery-dl/blob/master/docs/supportedsites.rst
-.. _stable: https://github.com/mikf/gallery-dl/archive/v1.13.5.tar.gz
+.. _stable: https://github.com/mikf/gallery-dl/archive/v1.13.6.tar.gz
.. _dev: https://github.com/mikf/gallery-dl/archive/master.tar.gz
.. _Python: https://www.python.org/downloads/
diff --git a/data/man/gallery-dl.1 b/data/man/gallery-dl.1
index 9ef6a93..c115752 100644
--- a/data/man/gallery-dl.1
+++ b/data/man/gallery-dl.1
@@ -1,4 +1,4 @@
-.TH "GALLERY-DL" "1" "2020-04-27" "1.13.5" "gallery-dl Manual"
+.TH "GALLERY-DL" "1" "2020-05-02" "1.13.6" "gallery-dl Manual"
.\" disable hyphenation
.nh
diff --git a/data/man/gallery-dl.conf.5 b/data/man/gallery-dl.conf.5
index 6c48a70..a944167 100644
--- a/data/man/gallery-dl.conf.5
+++ b/data/man/gallery-dl.conf.5
@@ -1,4 +1,4 @@
-.TH "GALLERY-DL.CONF" "5" "2020-04-27" "1.13.5" "gallery-dl Manual"
+.TH "GALLERY-DL.CONF" "5" "2020-05-02" "1.13.6" "gallery-dl Manual"
.\" disable hyphenation
.nh
.\" disable justification (adjust text to left margin only)
@@ -1338,6 +1338,16 @@ You can use \f[I]"all"\f[] instead of listing all types separately.
.IP "Description:" 4
Extract tweet text as \f[I]content\f[] metadata.
+.SS extractor.twitter.replies
+.IP "Type:" 6
+\f[I]bool\f[]
+
+.IP "Default:" 9
+\f[I]true\f[]
+
+.IP "Description:" 4
+Extract media from replies to other Tweets.
+
.SS extractor.twitter.retweets
.IP "Type:" 6
\f[I]bool\f[]
@@ -1346,7 +1356,7 @@ Extract tweet text as \f[I]content\f[] metadata.
\f[I]true\f[]
.IP "Description:" 4
-Extract images from retweets.
+Extract media from Retweets.
.SS extractor.twitter.twitpic
.IP "Type:" 6
@@ -1400,6 +1410,26 @@ your account's browsing settings and default filters when searching.
See https://wallhaven.cc/help/api for more information.
+.SS extractor.weibo.retweets
+.IP "Type:" 6
+\f[I]bool\f[]
+
+.IP "Default:" 9
+\f[I]true\f[]
+
+.IP "Description:" 4
+Extract media from retweeted posts.
+
+.SS extractor.weibo.videos
+.IP "Type:" 6
+\f[I]bool\f[]
+
+.IP "Default:" 9
+\f[I]true\f[]
+
+.IP "Description:" 4
+Download video files.
+
.SS extractor.[booru].tags
.IP "Type:" 6
\f[I]bool\f[]
diff --git a/gallery_dl.egg-info/PKG-INFO b/gallery_dl.egg-info/PKG-INFO
index a0c6286..a79e69f 100644
--- a/gallery_dl.egg-info/PKG-INFO
+++ b/gallery_dl.egg-info/PKG-INFO
@@ -1,6 +1,6 @@
Metadata-Version: 2.1
Name: gallery-dl
-Version: 1.13.5
+Version: 1.13.6
Summary: Command-line program to download image-galleries and -collections from several image hosting sites
Home-page: https://github.com/mikf/gallery-dl
Author: Mike Fährmann
@@ -94,8 +94,8 @@ Description: ==========
put it into your `PATH <https://en.wikipedia.org/wiki/PATH_(variable)>`__,
and run it inside a command prompt (like ``cmd.exe``).
- - `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.13.5/gallery-dl.exe>`__
- - `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.13.5/gallery-dl.bin>`__
+ - `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.13.6/gallery-dl.exe>`__
+ - `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.13.6/gallery-dl.bin>`__
These executables include a Python 3.8 interpreter
and all required Python packages.
@@ -247,7 +247,7 @@ Description: ==========
.. _gallery-dl-example.conf: https://github.com/mikf/gallery-dl/blob/master/docs/gallery-dl-example.conf
.. _configuration.rst: https://github.com/mikf/gallery-dl/blob/master/docs/configuration.rst
.. _Supported Sites: https://github.com/mikf/gallery-dl/blob/master/docs/supportedsites.rst
- .. _stable: https://github.com/mikf/gallery-dl/archive/v1.13.5.tar.gz
+ .. _stable: https://github.com/mikf/gallery-dl/archive/v1.13.6.tar.gz
.. _dev: https://github.com/mikf/gallery-dl/archive/master.tar.gz
.. _Python: https://www.python.org/downloads/
diff --git a/gallery_dl.egg-info/SOURCES.txt b/gallery_dl.egg-info/SOURCES.txt
index 1df3675..5d3f07b 100644
--- a/gallery_dl.egg-info/SOURCES.txt
+++ b/gallery_dl.egg-info/SOURCES.txt
@@ -136,6 +136,7 @@ gallery_dl/extractor/simplyhentai.py
gallery_dl/extractor/slickpic.py
gallery_dl/extractor/slideshare.py
gallery_dl/extractor/smugmug.py
+gallery_dl/extractor/speakerdeck.py
gallery_dl/extractor/test.py
gallery_dl/extractor/tsumino.py
gallery_dl/extractor/tumblr.py
diff --git a/gallery_dl/cloudflare.py b/gallery_dl/cloudflare.py
index e3ebd1a..43ccdeb 100644
--- a/gallery_dl/cloudflare.py
+++ b/gallery_dl/cloudflare.py
@@ -144,11 +144,15 @@ def evaluate_expression(expr, page, netloc, *,
# evaluate them,
# and accumulate their values in 'result'
result = ""
- for subexpr in split_re.findall(expr) or (expr,):
- result += str(sum(
- VALUES[part]
- for part in subexpr.split("[]")
- ))
+ for subexpr in expr.strip("+()").split(")+("):
+ value = 0
+ for part in subexpr.split("+"):
+ if "-" in part:
+ p1, _, p2 = part.partition("-")
+ value += VALUES[p1] - VALUES[p2]
+ else:
+ value += VALUES[part]
+ result += str(value)
return int(result)
@@ -158,12 +162,14 @@ OPERATORS = {
"*": operator.mul,
}
+
VALUES = {
"": 0,
- "+": 0,
- "!+": 1,
- "!!": 1,
- "+!!": 1,
+ "!": 1,
+ "[]": 0,
+ "!![]": 1,
+ "(!![]": 1,
+ "(!![])": 1,
}
diff --git a/gallery_dl/downloader/http.py b/gallery_dl/downloader/http.py
index 64a2978..021dc16 100644
--- a/gallery_dl/downloader/http.py
+++ b/gallery_dl/downloader/http.py
@@ -243,6 +243,10 @@ MIMETYPE_MAP = {
"image/webp": "webp",
"image/svg+xml": "svg",
+ "image/vnd.adobe.photoshop": "psd",
+ "image/x-photoshop": "psd",
+ "application/x-photoshop": "psd",
+
"video/webm": "webm",
"video/ogg": "ogg",
"video/mp4": "mp4",
diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py
index 2c87eb3..85fbddb 100644
--- a/gallery_dl/extractor/__init__.py
+++ b/gallery_dl/extractor/__init__.py
@@ -105,6 +105,7 @@ modules = [
"slickpic",
"slideshare",
"smugmug",
+ "speakerdeck",
"tsumino",
"tumblr",
"twitter",
diff --git a/gallery_dl/extractor/artstation.py b/gallery_dl/extractor/artstation.py
index ceda29c..c504dba 100644
--- a/gallery_dl/extractor/artstation.py
+++ b/gallery_dl/extractor/artstation.py
@@ -39,8 +39,9 @@ class ArtstationExtractor(Extractor):
if adict["has_embedded_player"] and self.external:
player = adict["player_embedded"]
- url = text.extract(player, 'src="', '"')[0]
- if not url.startswith(self.root):
+ url = text.extract(player, 'src="', '"')[0] or \
+ text.extract(player, "src='", "'")[0]
+ if url and not url.startswith(self.root):
asset["extension"] = None
yield Message.Url, "ytdl:" + url, asset
continue
diff --git a/gallery_dl/extractor/blogger.py b/gallery_dl/extractor/blogger.py
index 2657b5d..331cfc2 100644
--- a/gallery_dl/extractor/blogger.py
+++ b/gallery_dl/extractor/blogger.py
@@ -109,7 +109,7 @@ class BloggerPostExtractor(BloggerExtractor):
"posts" : int,
"published" : "2010-11-21T10:19:42-08:00",
"updated" : str,
- "url" : "http://www.julianbunker.com/",
+ "url" : "http://julianbphotography.blogspot.com/",
},
"post": {
"author" : "Julian Bunker",
@@ -128,9 +128,7 @@ class BloggerPostExtractor(BloggerExtractor):
"url": str,
},
}),
- ("blogger:http://www.julianbunker.com/2010/12/moon-rise.html", {
- "url": "9928429fb62f712eb4de80f53625eccecc614aae",
- }),
+ ("blogger:http://www.julianbunker.com/2010/12/moon-rise.html"),
# video (#587)
(("http://cfnmscenesinmovies.blogspot.com/2011/11/"
"cfnm-scene-jenna-fischer-in-office.html"), {
@@ -156,7 +154,7 @@ class BloggerBlogExtractor(BloggerExtractor):
"count": 25,
"pattern": r"https://\d\.bp\.blogspot\.com/.*/s0/[^.]+\.jpg",
}),
- ("blogger:http://www.julianbunker.com/", {
+ ("blogger:https://www.kefblog.com.ng/", {
"range": "1-25",
"count": 25,
}),
diff --git a/gallery_dl/extractor/deviantart.py b/gallery_dl/extractor/deviantart.py
index ca722b8..2631052 100644
--- a/gallery_dl/extractor/deviantart.py
+++ b/gallery_dl/extractor/deviantart.py
@@ -1006,7 +1006,8 @@ class DeviantartOAuthAPI():
msg = "API responded with {} {}".format(
status, response.reason)
if status == 429:
- self.delay += 1
+ if self.delay < 9:
+ self.delay += 1
self.log.warning("%s. Using %ds delay.", msg, 2 ** self.delay)
else:
self.log.error(msg)
diff --git a/gallery_dl/extractor/newgrounds.py b/gallery_dl/extractor/newgrounds.py
index bb87a69..17fe935 100644
--- a/gallery_dl/extractor/newgrounds.py
+++ b/gallery_dl/extractor/newgrounds.py
@@ -288,7 +288,7 @@ class NewgroundsMediaExtractor(NewgroundsExtractor):
class NewgroundsArtExtractor(NewgroundsExtractor):
"""Extractor for all images of a newgrounds user"""
subcategory = "art"
- pattern = r"(?:https?://)?([^.]+)\.newgrounds\.com/art/?$"
+ pattern = r"(?:https?://)?([\w-]+)\.newgrounds\.com/art/?$"
test = ("https://tomfulp.newgrounds.com/art", {
"pattern": NewgroundsImageExtractor.pattern,
"count": ">= 3",
@@ -298,7 +298,7 @@ class NewgroundsArtExtractor(NewgroundsExtractor):
class NewgroundsAudioExtractor(NewgroundsExtractor):
"""Extractor for all audio submissions of a newgrounds user"""
subcategory = "audio"
- pattern = r"(?:https?://)?([^.]+)\.newgrounds\.com/audio/?$"
+ pattern = r"(?:https?://)?([\w-]+)\.newgrounds\.com/audio/?$"
test = ("https://tomfulp.newgrounds.com/audio", {
"pattern": r"https://audio.ngfiles.com/\d+/\d+_.+\.mp3",
"count": ">= 4",
@@ -308,7 +308,7 @@ class NewgroundsAudioExtractor(NewgroundsExtractor):
class NewgroundsMoviesExtractor(NewgroundsExtractor):
"""Extractor for all movies of a newgrounds user"""
subcategory = "movies"
- pattern = r"(?:https?://)?([^.]+)\.newgrounds\.com/movies/?$"
+ pattern = r"(?:https?://)?([\w-]+)\.newgrounds\.com/movies/?$"
test = ("https://tomfulp.newgrounds.com/movies", {
"pattern": r"https://uploads.ungrounded.net(/alternate)?/\d+/\d+_.+",
"range": "1-10",
@@ -319,7 +319,7 @@ class NewgroundsMoviesExtractor(NewgroundsExtractor):
class NewgroundsUserExtractor(NewgroundsExtractor):
"""Extractor for a newgrounds user profile"""
subcategory = "user"
- pattern = r"(?:https?://)?([^.]+)\.newgrounds\.com/?$"
+ pattern = r"(?:https?://)?([\w-]+)\.newgrounds\.com/?$"
test = (
("https://tomfulp.newgrounds.com", {
"pattern": "https://tomfulp.newgrounds.com/art$",
@@ -414,6 +414,6 @@ class NewgroundsFollowingExtractor(NewgroundsFavoriteExtractor):
@staticmethod
def _extract_favorites(page):
return [
- "https://" + user.rpartition('"')[2]
+ "https://" + user.rpartition('"')[2].lstrip("/:")
for user in text.extract_iter(page, 'class="item-user', '"><img')
]
diff --git a/gallery_dl/extractor/patreon.py b/gallery_dl/extractor/patreon.py
index 18c10a6..570bd72 100644
--- a/gallery_dl/extractor/patreon.py
+++ b/gallery_dl/extractor/patreon.py
@@ -47,8 +47,8 @@ class PatreonExtractor(Extractor):
self._attachments(post),
self._content(post),
):
- fhash = url.split("/")[9].partition("?")[0]
- if fhash not in hashes:
+ fhash = self._filehash(url)
+ if fhash not in hashes or not fhash:
hashes.add(fhash)
post["hash"] = fhash
post["type"] = kind
@@ -158,12 +158,23 @@ class PatreonExtractor(Extractor):
return attr
def _filename(self, url):
- """Fetch filename from its Content-Disposition header"""
+ """Fetch filename from an URL's Content-Disposition header"""
response = self.request(url, method="HEAD", fatal=False)
cd = response.headers.get("Content-Disposition")
return text.extract(cd, 'filename="', '"')[0]
@staticmethod
+ def _filehash(url):
+ """Extract MD5 hash from a download URL"""
+ parts = url.partition("?")[0].split("/")
+ parts.reverse()
+
+ for part in parts:
+ if len(part) == 32:
+ return part
+ return ""
+
+ @staticmethod
def _build_url(endpoint, query):
return (
"https://www.patreon.com/api/" + endpoint +
@@ -194,7 +205,7 @@ class PatreonCreatorExtractor(PatreonExtractor):
subcategory = "creator"
pattern = (r"(?:https?://)?(?:www\.)?patreon\.com"
r"/(?!(?:home|join|posts|login|signup)(?:$|[/?&#]))"
- r"(?:user(?:/posts)?/?\?([^#]+)|([^/?&#]+)/?)")
+ r"([^/?&#]+)(?:/posts)?/?(?:\?([^#]+))?")
test = (
("https://www.patreon.com/koveliana", {
"range": "1-25",
@@ -213,6 +224,10 @@ class PatreonCreatorExtractor(PatreonExtractor):
"title" : str,
},
}),
+ ("https://www.patreon.com/koveliana/posts?filters[month]=2020-3", {
+ "count": 1,
+ "keyword": {"date": "dt:2020-03-30 21:21:44"},
+ }),
("https://www.patreon.com/kovelianot", {
"exception": exception.NotFoundError,
}),
@@ -222,26 +237,33 @@ class PatreonCreatorExtractor(PatreonExtractor):
def __init__(self, match):
PatreonExtractor.__init__(self, match)
- self.query, self.creator = match.groups()
+ self.creator, self.query = match.groups()
def posts(self):
- if self.creator:
- url = "{}/{}".format(self.root, self.creator.lower())
+ query = text.parse_query(self.query)
+
+ creator_id = query.get("u")
+ if creator_id:
+ url = "{}/user?u={}".format(self.root, creator_id)
else:
- query = text.parse_query(self.query)
- url = "{}/user?u={}".format(self.root, query.get("u"))
+ url = "{}/{}".format(self.root, self.creator.lower())
page = self.request(url, notfound="creator").text
campaign_id = text.extract(page, "/campaign/", "/")[0]
-
if not campaign_id:
raise exception.NotFoundError("creator")
+ filters = "".join(
+ "&filter[{}={}".format(key[8:], text.escape(value))
+ for key, value in query.items()
+ if key.startswith("filters[")
+ )
+
url = self._build_url("posts", (
- "&sort=-published_at"
+ "&sort=" + query.get("sort", "-published_at") +
"&filter[is_draft]=false"
"&filter[contains_exclusive_posts]=true"
- "&filter[campaign_id]=" + campaign_id
+ "&filter[campaign_id]=" + campaign_id + filters
))
return self._pagination(url)
diff --git a/gallery_dl/extractor/realbooru.py b/gallery_dl/extractor/realbooru.py
index f6bb4df..4841743 100644
--- a/gallery_dl/extractor/realbooru.py
+++ b/gallery_dl/extractor/realbooru.py
@@ -53,7 +53,7 @@ class RealbooruPostExtractor(booru.PostMixin, RealbooruExtractor):
"options": (("tags", True),),
"keyword": {
"tags_general" : str,
- "tags_metadata": "cute tagme",
+ "tags_metadata": str,
"tags_model" : "jennifer_lawrence",
},
})
diff --git a/gallery_dl/extractor/speakerdeck.py b/gallery_dl/extractor/speakerdeck.py
new file mode 100644
index 0000000..1a9691c
--- /dev/null
+++ b/gallery_dl/extractor/speakerdeck.py
@@ -0,0 +1,70 @@
+# -*- coding: utf-8 -*-
+
+# Copyright 2020 Leonardo Taccari
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Extract images from https://speakerdeck.com/"""
+
+from .common import Extractor, Message
+from .. import text
+
+
+class SpeakerdeckPresentationExtractor(Extractor):
+ """Extractor for images from a presentation on speakerdeck.com"""
+ category = "speakerdeck"
+ subcategory = "presentation"
+ directory_fmt = ("{category}", "{user}")
+ filename_fmt = "{presentation}-{num:>02}.{extension}"
+ archive_fmt = "{presentation}_{num}"
+ pattern = (r"(?:https?://)?(?:www\.)?speakerdeck\.com"
+ r"/([^/?&#]+)/([^/?&#]+)")
+ test = (
+ (("https://speakerdeck.com/speakerdeck/introduction-to-speakerdeck"), {
+ "url": "e97d4a7d5c64267e921c13eb7946d7074794a0d2",
+ "content": "75c7abf0969b0bcab23e0da9712c95ee5113db3a",
+ }),
+ )
+
+ def __init__(self, match):
+ Extractor.__init__(self, match)
+ self.user, self.presentation = match.groups()
+ self.presentation_id = None
+
+ def items(self):
+ data = self.get_job_metadata()
+ imgs = self.get_image_urls()
+ data["count"] = len(imgs)
+ yield Message.Version, 1
+ yield Message.Directory, data
+ for data["num"], url in enumerate(imgs, 1):
+ yield Message.Url, url, text.nameext_from_url(url, data)
+
+ def get_job_metadata(self):
+ """Collect metadata for extractor-job"""
+ url = "https://speakerdeck.com/oembed.json"
+ params = {
+ "url": "https://speakerdeck.com/" + self.user +
+ "/" + self.presentation,
+ }
+
+ data = self.request(url, params=params).json()
+
+ self.presentation_id, pos = \
+ text.extract(data["html"], 'src="//speakerdeck.com/player/', '"')
+
+ return {
+ "user": self.user,
+ "presentation": self.presentation,
+ "presentation_id": self.presentation_id,
+ "title": data["title"],
+ "author": data["author_name"],
+ }
+
+ def get_image_urls(self):
+ """Extract and return a list of all image-urls"""
+ page = self.request("https://speakerdeck.com/player/" +
+ self.presentation_id).text
+ return list(text.extract_iter(page, 'js-sd-slide" data-url="', '"'))
diff --git a/gallery_dl/extractor/twitter.py b/gallery_dl/extractor/twitter.py
index 3a274c7..c409f54 100644
--- a/gallery_dl/extractor/twitter.py
+++ b/gallery_dl/extractor/twitter.py
@@ -33,6 +33,7 @@ class TwitterExtractor(Extractor):
self._user_dict = None
self.logged_in = False
self.retweets = self.config("retweets", True)
+ self.replies = self.config("replies", True)
self.twitpic = self.config("twitpic", False)
self.content = self.config("content", False)
self.videos = self.config("videos", True)
@@ -48,7 +49,9 @@ class TwitterExtractor(Extractor):
for tweet in self.tweets():
data = self._data_from_tweet(tweet)
- if not data or not self.retweets and data["retweet_id"]:
+ if not data or \
+ not self.retweets and data["retweet_id"] or \
+ not self.replies and data["reply"]:
continue
data.update(metadata)
@@ -370,6 +373,11 @@ class TwitterTweetExtractor(TwitterExtractor):
"options": (("videos", "ytdl"),),
"pattern": r"ytdl:https://twitter.com/i/web.+/1103767554424598528",
}),
+ # 'replies' option (#705)
+ ("https://twitter.com/tyson_hesse/status/1103767554424598528", {
+ "options": (("replies", False),),
+ "count": 0,
+ }),
# /i/web/ URL
("https://twitter.com/i/web/status/1155074198240292865", {
"pattern": r"https://pbs.twimg.com/media/EAel0vUUYAAZ4Bq.jpg:orig",
diff --git a/gallery_dl/extractor/vsco.py b/gallery_dl/extractor/vsco.py
index 0306112..c9f0ec3 100644
--- a/gallery_dl/extractor/vsco.py
+++ b/gallery_dl/extractor/vsco.py
@@ -72,7 +72,7 @@ class VscoExtractor(Extractor):
page = self.request(url, notfound=self.subcategory).text
return json.loads(text.extract(page, "__PRELOADED_STATE__ = ", "<")[0])
- def _pagination(self, url, params, token, key, extra):
+ def _pagination(self, url, params, token, key, extra=None):
headers = {
"Referer" : "{}/{}".format(self.root, self.user),
"Authorization" : "Bearer " + token,
@@ -80,7 +80,8 @@ class VscoExtractor(Extractor):
"X-Client-Build" : "1",
}
- yield from map(self._transform_media, extra)
+ if extra:
+ yield from map(self._transform_media, extra)
while True:
data = self.request(url, params=params, headers=headers).json()
@@ -130,23 +131,17 @@ class VscoUserExtractor(VscoExtractor):
def images(self):
url = "{}/{}/gallery".format(self.root, self.user)
data = self._extract_preload_state(url)
-
tkn = data["users"]["currentUser"]["tkn"]
sid = str(data["sites"]["siteByUsername"][self.user]["site"]["id"])
- site = data["medias"]["bySiteId"][sid]
url = "{}/api/3.0/medias/profile".format(self.root)
params = {
"site_id" : sid,
"limit" : "14",
- "show_only": "0",
- "cursor" : site["nextCursor"],
+ "cursor" : None,
}
- return self._pagination(url, params, tkn, "media", (
- data["medias"]["byId"][media[media["type"]]]["media"]
- for media in site["medias"]
- ))
+ return self._pagination(url, params, tkn, "media")
class VscoCollectionExtractor(VscoExtractor):
diff --git a/gallery_dl/extractor/weibo.py b/gallery_dl/extractor/weibo.py
index 9539c2f..aa9bdae 100644
--- a/gallery_dl/extractor/weibo.py
+++ b/gallery_dl/extractor/weibo.py
@@ -23,6 +23,7 @@ class WeiboExtractor(Extractor):
def __init__(self, match):
Extractor.__init__(self, match)
self.retweets = self.config("retweets", True)
+ self.videos = self.config("videos", True)
def items(self):
yield Message.Version, 1
@@ -52,7 +53,7 @@ class WeiboExtractor(Extractor):
yield Message.Url, image["url"], data
num += 1
- if "page_info" in obj and "media_info" in obj["page_info"]:
+ if self.videos and "media_info" in obj.get("page_info", ()):
info = obj["page_info"]["media_info"]
url = info.get("stream_url_hd") or info.get("stream_url")
@@ -70,6 +71,7 @@ class WeiboExtractor(Extractor):
data["extension"] = "mp4"
data["_ytdl_extra"] = {"protocol": "m3u8_native"}
yield Message.Url, url, data
+ num += 1
if self.retweets and "retweeted_status" in obj:
obj = obj["retweeted_status"]
diff --git a/gallery_dl/version.py b/gallery_dl/version.py
index 73920c2..40b5c73 100644
--- a/gallery_dl/version.py
+++ b/gallery_dl/version.py
@@ -6,4 +6,4 @@
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
-__version__ = "1.13.5"
+__version__ = "1.13.6"
diff --git a/test/test_cache.py b/test/test_cache.py
index 31ece7e..e19896e 100644
--- a/test/test_cache.py
+++ b/test/test_cache.py
@@ -7,14 +7,19 @@
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
+import os
+import sys
import unittest
-import tempfile
+
import time
+import tempfile
+
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+from gallery_dl import config, util # noqa E402
-from gallery_dl import config, util
dbpath = tempfile.mkstemp()[1]
config.set(("cache",), "file", dbpath)
-from gallery_dl import cache # noqa
+from gallery_dl import cache # noqa E402
def tearDownModule():
diff --git a/test/test_config.py b/test/test_config.py
index a9d3f54..cb202be 100644
--- a/test/test_config.py
+++ b/test/test_config.py
@@ -1,17 +1,22 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
-# Copyright 2015-2019 Mike Fährmann
+# Copyright 2015-2020 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
-import unittest
-import gallery_dl.config as config
import os
+import sys
+import unittest
+
+import json
import tempfile
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+from gallery_dl import config # noqa E402
+
class TestConfig(unittest.TestCase):
@@ -137,5 +142,25 @@ class TestConfig(unittest.TestCase):
self.assertEqual(config.get(("b",), "e"), "foo")
+class TestConfigFiles(unittest.TestCase):
+
+ def test_default_config(self):
+ cfg = self._load("gallery-dl.conf")
+ self.assertIsInstance(cfg, dict)
+ self.assertTrue(cfg)
+
+ def test_example_config(self):
+ cfg = self._load("gallery-dl-example.conf")
+ self.assertIsInstance(cfg, dict)
+ self.assertTrue(cfg)
+
+ @staticmethod
+ def _load(name):
+ rootdir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
+ path = os.path.join(rootdir, "docs", name)
+ with open(path) as fp:
+ return json.load(fp)
+
+
if __name__ == '__main__':
unittest.main()
diff --git a/test/test_cookies.py b/test/test_cookies.py
index c39a5e6..f691980 100644
--- a/test/test_cookies.py
+++ b/test/test_cookies.py
@@ -7,6 +7,8 @@
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
+import os
+import sys
import unittest
from unittest import mock
@@ -14,8 +16,8 @@ import logging
import tempfile
from os.path import join
-import gallery_dl.config as config
-import gallery_dl.extractor as extractor
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+from gallery_dl import config, extractor # noqa E402
class TestCookiejar(unittest.TestCase):
diff --git a/test/test_downloader.py b/test/test_downloader.py
index c43b533..9393040 100644
--- a/test/test_downloader.py
+++ b/test/test_downloader.py
@@ -1,29 +1,28 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
-# Copyright 2018 Mike Fährmann
+# Copyright 2018-2020 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
-import re
+import os
import sys
+import unittest
+from unittest.mock import Mock, MagicMock, patch
+
+import re
import base64
import os.path
import tempfile
import threading
import http.server
-import unittest
-from unittest.mock import Mock, MagicMock, patch
-
-import gallery_dl.downloader as downloader
-import gallery_dl.extractor as extractor
-import gallery_dl.config as config
-from gallery_dl.downloader.common import DownloaderBase
-from gallery_dl.output import NullOutput
-from gallery_dl.util import PathFormat
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+from gallery_dl import downloader, extractor, config, util # noqa E402
+from gallery_dl.downloader.common import DownloaderBase # noqa E402
+from gallery_dl.output import NullOutput # noqa E402
class MockDownloaderModule(Mock):
@@ -119,7 +118,7 @@ class TestDownloaderBase(unittest.TestCase):
"filename": name,
"extension": extension,
}
- pathfmt = PathFormat(cls.extractor)
+ pathfmt = util.PathFormat(cls.extractor)
pathfmt.set_directory(kwdict)
pathfmt.set_filename(kwdict)
diff --git a/test/test_extractor.py b/test/test_extractor.py
index e6f4963..043bd52 100644
--- a/test/test_extractor.py
+++ b/test/test_extractor.py
@@ -7,18 +7,20 @@
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
+import os
import sys
+import unittest
+from unittest.mock import patch
+
import time
import string
from datetime import datetime, timedelta
-import unittest
-from unittest.mock import patch
-
-from gallery_dl import extractor
-from gallery_dl.extractor import mastodon
-from gallery_dl.extractor.common import Extractor, Message
-from gallery_dl.extractor.directlink import DirectlinkExtractor as DLExtractor
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+from gallery_dl import extractor # noqa E402
+from gallery_dl.extractor import mastodon # noqa E402
+from gallery_dl.extractor.common import Extractor, Message # noqa E402
+from gallery_dl.extractor.directlink import DirectlinkExtractor # noqa E402
class FakeExtractor(Extractor):
@@ -78,7 +80,7 @@ class TestExtractorModule(unittest.TestCase):
test_uri = "test:"
fake_uri = "fake:"
- self.assertIsInstance(extractor.find(link_uri), DLExtractor)
+ self.assertIsInstance(extractor.find(link_uri), DirectlinkExtractor)
self.assertIsInstance(extractor.find(test_uri), Extractor)
self.assertIsNone(extractor.find(fake_uri))
@@ -87,12 +89,12 @@ class TestExtractorModule(unittest.TestCase):
self.assertIsInstance(extractor.find(test_uri), Extractor)
self.assertIsNone(extractor.find(fake_uri))
- with extractor.blacklist([], [DLExtractor, FakeExtractor]):
+ with extractor.blacklist([], [DirectlinkExtractor, FakeExtractor]):
self.assertIsNone(extractor.find(link_uri))
self.assertIsInstance(extractor.find(test_uri), Extractor)
self.assertIsNone(extractor.find(fake_uri))
- with extractor.blacklist(["test"], [DLExtractor]):
+ with extractor.blacklist(["test"], [DirectlinkExtractor]):
self.assertIsNone(extractor.find(link_uri))
self.assertIsNone(extractor.find(test_uri))
self.assertIsNone(extractor.find(fake_uri))
@@ -127,7 +129,8 @@ class TestExtractorModule(unittest.TestCase):
for extr2 in extractor._cache:
# skip DirectlinkExtractor pattern if it isn't tested
- if extr1 != DLExtractor and extr2 == DLExtractor:
+ if extr1 != DirectlinkExtractor and \
+ extr2 == DirectlinkExtractor:
continue
match = extr2.pattern.match(url)
diff --git a/test/test_oauth.py b/test/test_oauth.py
index 2ce5b43..58d4088 100644
--- a/test/test_oauth.py
+++ b/test/test_oauth.py
@@ -1,15 +1,18 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
-# Copyright 2018 Mike Fährmann
+# Copyright 2018-2020 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
+import os
+import sys
import unittest
-from gallery_dl import oauth, text
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+from gallery_dl import oauth, text # noqa E402
TESTSERVER = "http://term.ie/oauth/example"
CONSUMER_KEY = "key"
diff --git a/test/test_postprocessor.py b/test/test_postprocessor.py
index 629b0d7..354f9ff 100644
--- a/test/test_postprocessor.py
+++ b/test/test_postprocessor.py
@@ -1,23 +1,24 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
-# Copyright 2019 Mike Fährmann
+# Copyright 2019-2020 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
import os
-import os.path
+import sys
+import unittest
+from unittest.mock import Mock, mock_open, patch
+
import zipfile
import tempfile
from datetime import datetime, timezone as tz
-import unittest
-from unittest.mock import Mock, mock_open, patch
-
-from gallery_dl import postprocessor, extractor, util, config
-from gallery_dl.postprocessor.common import PostProcessor
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+from gallery_dl import postprocessor, extractor, util, config # noqa E402
+from gallery_dl.postprocessor.common import PostProcessor # noqa E402
class MockPostprocessorModule(Mock):
diff --git a/test/test_results.py b/test/test_results.py
index bfed2ca..046efc5 100644
--- a/test/test_results.py
+++ b/test/test_results.py
@@ -9,12 +9,15 @@
import os
import sys
+import unittest
+
import re
import json
import hashlib
import datetime
-import unittest
-from gallery_dl import extractor, util, job, config, exception
+
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+from gallery_dl import extractor, util, job, config, exception # noqa E402
# these don't work on Travis CI
diff --git a/test/test_text.py b/test/test_text.py
index 0390823..4f31d81 100644
--- a/test/test_text.py
+++ b/test/test_text.py
@@ -7,10 +7,14 @@
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
+import os
+import sys
import unittest
+
import datetime
-from gallery_dl import text
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+from gallery_dl import text # noqa E402
INVALID = ((), [], {}, None, 1, 2.3)
diff --git a/test/test_util.py b/test/test_util.py
index ffabd37..5fbaa4e 100644
--- a/test/test_util.py
+++ b/test/test_util.py
@@ -7,14 +7,17 @@
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
-import unittest
+import os
import sys
+import unittest
+
import io
import random
import string
import http.cookiejar
-from gallery_dl import util, text, exception
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+from gallery_dl import util, text, exception # noqa E402
class TestRange(unittest.TestCase):