New upstream version 1.13.6upstream/1.13.6

author: Unit 193 <unit193@gmail.com> 2020-05-03 00:06:40 -0400
committer: Unit 193 <unit193@gmail.com> 2020-05-03 00:06:40 -0400
commit: 90e50db2e3c38f523bb5195d295290b06e5cedb0 (patch)
tree: 4759dc0faea79f83fa5074e2d0bd82b18a9caaea /gallery_dl
parent: d5b96ce44b7809f5ae01e3e9d70a1d58fe21ccf5 (diff)
14 files changed, 156 insertions, 48 deletions
diff --git a/gallery_dl/cloudflare.py b/gallery_dl/cloudflare.py
index e3ebd1a..43ccdeb 100644
--- a/gallery_dl/cloudflare.py
+++ b/gallery_dl/cloudflare.py
@@ -144,11 +144,15 @@ def evaluate_expression(expr, page, netloc, *,
     # evaluate them,
     # and accumulate their values in 'result'
     result = ""
-    for subexpr in split_re.findall(expr) or (expr,):
-        result += str(sum(
-            VALUES[part]
-            for part in subexpr.split("[]")
-        ))
+    for subexpr in expr.strip("+()").split(")+("):
+        value = 0
+        for part in subexpr.split("+"):
+            if "-" in part:
+                p1, _, p2 = part.partition("-")
+                value += VALUES[p1] - VALUES[p2]
+            else:
+                value += VALUES[part]
+        result += str(value)
     return int(result)
 
 
@@ -158,12 +162,14 @@ OPERATORS = {
     "*": operator.mul,
 }
 
+
 VALUES = {
     "": 0,
-    "+": 0,
-    "!+": 1,
-    "!!": 1,
-    "+!!": 1,
+    "!": 1,
+    "[]": 0,
+    "!![]": 1,
+    "(!![]": 1,
+    "(!![])": 1,
 }
 
 
diff --git a/gallery_dl/downloader/http.py b/gallery_dl/downloader/http.py
index 64a2978..021dc16 100644
--- a/gallery_dl/downloader/http.py
+++ b/gallery_dl/downloader/http.py
@@ -243,6 +243,10 @@ MIMETYPE_MAP = {
     "image/webp": "webp",
     "image/svg+xml": "svg",
 
+    "image/vnd.adobe.photoshop": "psd",
+    "image/x-photoshop": "psd",
+    "application/x-photoshop": "psd",
+
     "video/webm": "webm",
     "video/ogg": "ogg",
     "video/mp4": "mp4",
diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py
index 2c87eb3..85fbddb 100644
--- a/gallery_dl/extractor/__init__.py
+++ b/gallery_dl/extractor/__init__.py
@@ -105,6 +105,7 @@ modules = [
     "slickpic",
     "slideshare",
     "smugmug",
+    "speakerdeck",
     "tsumino",
     "tumblr",
     "twitter",
diff --git a/gallery_dl/extractor/artstation.py b/gallery_dl/extractor/artstation.py
index ceda29c..c504dba 100644
--- a/gallery_dl/extractor/artstation.py
+++ b/gallery_dl/extractor/artstation.py
@@ -39,8 +39,9 @@ class ArtstationExtractor(Extractor):
 
                 if adict["has_embedded_player"] and self.external:
                     player = adict["player_embedded"]
-                    url = text.extract(player, 'src="', '"')[0]
-                    if not url.startswith(self.root):
+                    url = text.extract(player, 'src="', '"')[0] or \
+                        text.extract(player, "src='", "'")[0]
+                    if url and not url.startswith(self.root):
                         asset["extension"] = None
                         yield Message.Url, "ytdl:" + url, asset
                         continue
diff --git a/gallery_dl/extractor/blogger.py b/gallery_dl/extractor/blogger.py
index 2657b5d..331cfc2 100644
--- a/gallery_dl/extractor/blogger.py
+++ b/gallery_dl/extractor/blogger.py
@@ -109,7 +109,7 @@ class BloggerPostExtractor(BloggerExtractor):
                     "posts"      : int,
                     "published"  : "2010-11-21T10:19:42-08:00",
                     "updated"    : str,
-                    "url"        : "http://www.julianbunker.com/",
+                    "url"        : "http://julianbphotography.blogspot.com/",
                 },
                 "post": {
                     "author"     : "Julian Bunker",
@@ -128,9 +128,7 @@ class BloggerPostExtractor(BloggerExtractor):
                 "url": str,
             },
         }),
-        ("blogger:http://www.julianbunker.com/2010/12/moon-rise.html", {
-            "url": "9928429fb62f712eb4de80f53625eccecc614aae",
-        }),
+        ("blogger:http://www.julianbunker.com/2010/12/moon-rise.html"),
         # video (#587)
         (("http://cfnmscenesinmovies.blogspot.com/2011/11/"
           "cfnm-scene-jenna-fischer-in-office.html"), {
@@ -156,7 +154,7 @@ class BloggerBlogExtractor(BloggerExtractor):
             "count": 25,
             "pattern": r"https://\d\.bp\.blogspot\.com/.*/s0/[^.]+\.jpg",
         }),
-        ("blogger:http://www.julianbunker.com/", {
+        ("blogger:https://www.kefblog.com.ng/", {
             "range": "1-25",
             "count": 25,
         }),
diff --git a/gallery_dl/extractor/deviantart.py b/gallery_dl/extractor/deviantart.py
index ca722b8..2631052 100644
--- a/gallery_dl/extractor/deviantart.py
+++ b/gallery_dl/extractor/deviantart.py
@@ -1006,7 +1006,8 @@ class DeviantartOAuthAPI():
             msg = "API responded with {} {}".format(
                 status, response.reason)
             if status == 429:
-                self.delay += 1
+                if self.delay < 9:
+                    self.delay += 1
                 self.log.warning("%s. Using %ds delay.", msg, 2 ** self.delay)
             else:
                 self.log.error(msg)
diff --git a/gallery_dl/extractor/newgrounds.py b/gallery_dl/extractor/newgrounds.py
index bb87a69..17fe935 100644
--- a/gallery_dl/extractor/newgrounds.py
+++ b/gallery_dl/extractor/newgrounds.py
@@ -288,7 +288,7 @@ class NewgroundsMediaExtractor(NewgroundsExtractor):
 class NewgroundsArtExtractor(NewgroundsExtractor):
     """Extractor for all images of a newgrounds user"""
     subcategory = "art"
-    pattern = r"(?:https?://)?([^.]+)\.newgrounds\.com/art/?$"
+    pattern = r"(?:https?://)?([\w-]+)\.newgrounds\.com/art/?$"
     test = ("https://tomfulp.newgrounds.com/art", {
         "pattern": NewgroundsImageExtractor.pattern,
         "count": ">= 3",
@@ -298,7 +298,7 @@ class NewgroundsArtExtractor(NewgroundsExtractor):
 class NewgroundsAudioExtractor(NewgroundsExtractor):
     """Extractor for all audio submissions of a newgrounds user"""
     subcategory = "audio"
-    pattern = r"(?:https?://)?([^.]+)\.newgrounds\.com/audio/?$"
+    pattern = r"(?:https?://)?([\w-]+)\.newgrounds\.com/audio/?$"
     test = ("https://tomfulp.newgrounds.com/audio", {
         "pattern": r"https://audio.ngfiles.com/\d+/\d+_.+\.mp3",
         "count": ">= 4",
@@ -308,7 +308,7 @@ class NewgroundsAudioExtractor(NewgroundsExtractor):
 class NewgroundsMoviesExtractor(NewgroundsExtractor):
     """Extractor for all movies of a newgrounds user"""
     subcategory = "movies"
-    pattern = r"(?:https?://)?([^.]+)\.newgrounds\.com/movies/?$"
+    pattern = r"(?:https?://)?([\w-]+)\.newgrounds\.com/movies/?$"
     test = ("https://tomfulp.newgrounds.com/movies", {
         "pattern": r"https://uploads.ungrounded.net(/alternate)?/\d+/\d+_.+",
         "range": "1-10",
@@ -319,7 +319,7 @@ class NewgroundsMoviesExtractor(NewgroundsExtractor):
 class NewgroundsUserExtractor(NewgroundsExtractor):
     """Extractor for a newgrounds user profile"""
     subcategory = "user"
-    pattern = r"(?:https?://)?([^.]+)\.newgrounds\.com/?$"
+    pattern = r"(?:https?://)?([\w-]+)\.newgrounds\.com/?$"
     test = (
         ("https://tomfulp.newgrounds.com", {
             "pattern": "https://tomfulp.newgrounds.com/art$",
@@ -414,6 +414,6 @@ class NewgroundsFollowingExtractor(NewgroundsFavoriteExtractor):
     @staticmethod
     def _extract_favorites(page):
         return [
-            "https://" + user.rpartition('"')[2]
+            "https://" + user.rpartition('"')[2].lstrip("/:")
             for user in text.extract_iter(page, 'class="item-user', '"><img')
         ]
diff --git a/gallery_dl/extractor/patreon.py b/gallery_dl/extractor/patreon.py
index 18c10a6..570bd72 100644
--- a/gallery_dl/extractor/patreon.py
+++ b/gallery_dl/extractor/patreon.py
@@ -47,8 +47,8 @@ class PatreonExtractor(Extractor):
                 self._attachments(post),
                 self._content(post),
             ):
-                fhash = url.split("/")[9].partition("?")[0]
-                if fhash not in hashes:
+                fhash = self._filehash(url)
+                if fhash not in hashes or not fhash:
                     hashes.add(fhash)
                     post["hash"] = fhash
                     post["type"] = kind
@@ -158,12 +158,23 @@ class PatreonExtractor(Extractor):
         return attr
 
     def _filename(self, url):
-        """Fetch filename from its Content-Disposition header"""
+        """Fetch filename from an URL's Content-Disposition header"""
         response = self.request(url, method="HEAD", fatal=False)
         cd = response.headers.get("Content-Disposition")
         return text.extract(cd, 'filename="', '"')[0]
 
     @staticmethod
+    def _filehash(url):
+        """Extract MD5 hash from a download URL"""
+        parts = url.partition("?")[0].split("/")
+        parts.reverse()
+
+        for part in parts:
+            if len(part) == 32:
+                return part
+        return ""
+
+    @staticmethod
     def _build_url(endpoint, query):
         return (
             "https://www.patreon.com/api/" + endpoint +
@@ -194,7 +205,7 @@ class PatreonCreatorExtractor(PatreonExtractor):
     subcategory = "creator"
     pattern = (r"(?:https?://)?(?:www\.)?patreon\.com"
                r"/(?!(?:home|join|posts|login|signup)(?:$|[/?&#]))"
-               r"(?:user(?:/posts)?/?\?([^#]+)|([^/?&#]+)/?)")
+               r"([^/?&#]+)(?:/posts)?/?(?:\?([^#]+))?")
     test = (
         ("https://www.patreon.com/koveliana", {
             "range": "1-25",
@@ -213,6 +224,10 @@ class PatreonCreatorExtractor(PatreonExtractor):
                 "title"        : str,
             },
         }),
+        ("https://www.patreon.com/koveliana/posts?filters[month]=2020-3", {
+            "count": 1,
+            "keyword": {"date": "dt:2020-03-30 21:21:44"},
+        }),
         ("https://www.patreon.com/kovelianot", {
             "exception": exception.NotFoundError,
         }),
@@ -222,26 +237,33 @@ class PatreonCreatorExtractor(PatreonExtractor):
 
     def __init__(self, match):
         PatreonExtractor.__init__(self, match)
-        self.query, self.creator = match.groups()
+        self.creator, self.query = match.groups()
 
     def posts(self):
-        if self.creator:
-            url = "{}/{}".format(self.root, self.creator.lower())
+        query = text.parse_query(self.query)
+
+        creator_id = query.get("u")
+        if creator_id:
+            url = "{}/user?u={}".format(self.root, creator_id)
         else:
-            query = text.parse_query(self.query)
-            url = "{}/user?u={}".format(self.root, query.get("u"))
+            url = "{}/{}".format(self.root, self.creator.lower())
 
         page = self.request(url, notfound="creator").text
         campaign_id = text.extract(page, "/campaign/", "/")[0]
-
         if not campaign_id:
             raise exception.NotFoundError("creator")
 
+        filters = "".join(
+            "&filter[{}={}".format(key[8:], text.escape(value))
+            for key, value in query.items()
+            if key.startswith("filters[")
+        )
+
         url = self._build_url("posts", (
-            "&sort=-published_at"
+            "&sort=" + query.get("sort", "-published_at") +
             "&filter[is_draft]=false"
             "&filter[contains_exclusive_posts]=true"
-            "&filter[campaign_id]=" + campaign_id
+            "&filter[campaign_id]=" + campaign_id + filters
         ))
         return self._pagination(url)
 
diff --git a/gallery_dl/extractor/realbooru.py b/gallery_dl/extractor/realbooru.py
index f6bb4df..4841743 100644
--- a/gallery_dl/extractor/realbooru.py
+++ b/gallery_dl/extractor/realbooru.py
@@ -53,7 +53,7 @@ class RealbooruPostExtractor(booru.PostMixin, RealbooruExtractor):
         "options": (("tags", True),),
         "keyword": {
             "tags_general" : str,
-            "tags_metadata": "cute tagme",
+            "tags_metadata": str,
             "tags_model"   : "jennifer_lawrence",
         },
     })
diff --git a/gallery_dl/extractor/speakerdeck.py b/gallery_dl/extractor/speakerdeck.py
new file mode 100644
index 0000000..1a9691c
--- /dev/null
+++ b/gallery_dl/extractor/speakerdeck.py
@@ -0,0 +1,70 @@
+# -*- coding: utf-8 -*-
+
+# Copyright 2020 Leonardo Taccari
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Extract images from https://speakerdeck.com/"""
+
+from .common import Extractor, Message
+from .. import text
+
+
+class SpeakerdeckPresentationExtractor(Extractor):
+    """Extractor for images from a presentation on speakerdeck.com"""
+    category = "speakerdeck"
+    subcategory = "presentation"
+    directory_fmt = ("{category}", "{user}")
+    filename_fmt = "{presentation}-{num:>02}.{extension}"
+    archive_fmt = "{presentation}_{num}"
+    pattern = (r"(?:https?://)?(?:www\.)?speakerdeck\.com"
+               r"/([^/?&#]+)/([^/?&#]+)")
+    test = (
+        (("https://speakerdeck.com/speakerdeck/introduction-to-speakerdeck"), {
+            "url": "e97d4a7d5c64267e921c13eb7946d7074794a0d2",
+            "content": "75c7abf0969b0bcab23e0da9712c95ee5113db3a",
+        }),
+    )
+
+    def __init__(self, match):
+        Extractor.__init__(self, match)
+        self.user, self.presentation = match.groups()
+        self.presentation_id = None
+
+    def items(self):
+        data = self.get_job_metadata()
+        imgs = self.get_image_urls()
+        data["count"] = len(imgs)
+        yield Message.Version, 1
+        yield Message.Directory, data
+        for data["num"], url in enumerate(imgs, 1):
+            yield Message.Url, url, text.nameext_from_url(url, data)
+
+    def get_job_metadata(self):
+        """Collect metadata for extractor-job"""
+        url = "https://speakerdeck.com/oembed.json"
+        params = {
+            "url": "https://speakerdeck.com/" + self.user +
+                   "/" + self.presentation,
+        }
+
+        data = self.request(url, params=params).json()
+
+        self.presentation_id, pos = \
+            text.extract(data["html"], 'src="//speakerdeck.com/player/', '"')
+
+        return {
+            "user": self.user,
+            "presentation": self.presentation,
+            "presentation_id": self.presentation_id,
+            "title": data["title"],
+            "author": data["author_name"],
+        }
+
+    def get_image_urls(self):
+        """Extract and return a list of all image-urls"""
+        page = self.request("https://speakerdeck.com/player/" +
+                            self.presentation_id).text
+        return list(text.extract_iter(page, 'js-sd-slide" data-url="', '"'))
diff --git a/gallery_dl/extractor/twitter.py b/gallery_dl/extractor/twitter.py
index 3a274c7..c409f54 100644
--- a/gallery_dl/extractor/twitter.py
+++ b/gallery_dl/extractor/twitter.py
@@ -33,6 +33,7 @@ class TwitterExtractor(Extractor):
         self._user_dict = None
         self.logged_in = False
         self.retweets = self.config("retweets", True)
+        self.replies = self.config("replies", True)
         self.twitpic = self.config("twitpic", False)
         self.content = self.config("content", False)
         self.videos = self.config("videos", True)
@@ -48,7 +49,9 @@ class TwitterExtractor(Extractor):
 
         for tweet in self.tweets():
             data = self._data_from_tweet(tweet)
-            if not data or not self.retweets and data["retweet_id"]:
+            if not data or \
+                    not self.retweets and data["retweet_id"] or \
+                    not self.replies and data["reply"]:
                 continue
             data.update(metadata)
 
@@ -370,6 +373,11 @@ class TwitterTweetExtractor(TwitterExtractor):
             "options": (("videos", "ytdl"),),
             "pattern": r"ytdl:https://twitter.com/i/web.+/1103767554424598528",
         }),
+        # 'replies' option (#705)
+        ("https://twitter.com/tyson_hesse/status/1103767554424598528", {
+            "options": (("replies", False),),
+            "count": 0,
+        }),
         # /i/web/ URL
         ("https://twitter.com/i/web/status/1155074198240292865", {
             "pattern": r"https://pbs.twimg.com/media/EAel0vUUYAAZ4Bq.jpg:orig",
diff --git a/gallery_dl/extractor/vsco.py b/gallery_dl/extractor/vsco.py
index 0306112..c9f0ec3 100644
--- a/gallery_dl/extractor/vsco.py
+++ b/gallery_dl/extractor/vsco.py
@@ -72,7 +72,7 @@ class VscoExtractor(Extractor):
         page = self.request(url, notfound=self.subcategory).text
         return json.loads(text.extract(page, "__PRELOADED_STATE__ = ", "<")[0])
 
-    def _pagination(self, url, params, token, key, extra):
+    def _pagination(self, url, params, token, key, extra=None):
         headers = {
             "Referer"          : "{}/{}".format(self.root, self.user),
             "Authorization"    : "Bearer " + token,
@@ -80,7 +80,8 @@ class VscoExtractor(Extractor):
             "X-Client-Build"   : "1",
         }
 
-        yield from map(self._transform_media, extra)
+        if extra:
+            yield from map(self._transform_media, extra)
 
         while True:
             data = self.request(url, params=params, headers=headers).json()
@@ -130,23 +131,17 @@ class VscoUserExtractor(VscoExtractor):
     def images(self):
         url = "{}/{}/gallery".format(self.root, self.user)
         data = self._extract_preload_state(url)
-
         tkn = data["users"]["currentUser"]["tkn"]
         sid = str(data["sites"]["siteByUsername"][self.user]["site"]["id"])
-        site = data["medias"]["bySiteId"][sid]
 
         url = "{}/api/3.0/medias/profile".format(self.root)
         params = {
             "site_id"  : sid,
             "limit"    : "14",
-            "show_only": "0",
-            "cursor"   : site["nextCursor"],
+            "cursor"   : None,
         }
 
-        return self._pagination(url, params, tkn, "media", (
-            data["medias"]["byId"][media[media["type"]]]["media"]
-            for media in site["medias"]
-        ))
+        return self._pagination(url, params, tkn, "media")
 
 
 class VscoCollectionExtractor(VscoExtractor):
diff --git a/gallery_dl/extractor/weibo.py b/gallery_dl/extractor/weibo.py
index 9539c2f..aa9bdae 100644
--- a/gallery_dl/extractor/weibo.py
+++ b/gallery_dl/extractor/weibo.py
@@ -23,6 +23,7 @@ class WeiboExtractor(Extractor):
     def __init__(self, match):
         Extractor.__init__(self, match)
         self.retweets = self.config("retweets", True)
+        self.videos = self.config("videos", True)
 
     def items(self):
         yield Message.Version, 1
@@ -52,7 +53,7 @@ class WeiboExtractor(Extractor):
                         yield Message.Url, image["url"], data
                         num += 1
 
-                if "page_info" in obj and "media_info" in obj["page_info"]:
+                if self.videos and "media_info" in obj.get("page_info", ()):
                     info = obj["page_info"]["media_info"]
                     url = info.get("stream_url_hd") or info.get("stream_url")
 
@@ -70,6 +71,7 @@ class WeiboExtractor(Extractor):
                             data["extension"] = "mp4"
                             data["_ytdl_extra"] = {"protocol": "m3u8_native"}
                         yield Message.Url, url, data
+                        num += 1
 
                 if self.retweets and "retweeted_status" in obj:
                     obj = obj["retweeted_status"]
diff --git a/gallery_dl/version.py b/gallery_dl/version.py
index 73920c2..40b5c73 100644
--- a/gallery_dl/version.py
+++ b/gallery_dl/version.py
@@ -6,4 +6,4 @@
 # it under the terms of the GNU General Public License version 2 as
 # published by the Free Software Foundation.
 
-__version__ = "1.13.5"
+__version__ = "1.13.6"
author	Unit 193 <unit193@gmail.com>	2020-05-03 00:06:40 -0400
committer	Unit 193 <unit193@gmail.com>	2020-05-03 00:06:40 -0400
commit	90e50db2e3c38f523bb5195d295290b06e5cedb0 (patch)
tree	4759dc0faea79f83fa5074e2d0bd82b18a9caaea /gallery_dl
parent	d5b96ce44b7809f5ae01e3e9d70a1d58fe21ccf5 (diff)