summaryrefslogtreecommitdiffstats
path: root/gallery_dl
diff options
context:
space:
mode:
authorLibravatarUnit 193 <unit193@ubuntu.com>2019-11-10 22:14:10 -0500
committerLibravatarUnit 193 <unit193@ubuntu.com>2019-11-10 22:14:10 -0500
commit0c73e982fa596da07f23b377621ab894a9e64884 (patch)
tree96f6a40a5656c15a2ec7217a8a1efcff5827bcbb /gallery_dl
parent40f5fe6edef268632d3bc484e85e5b37bad67bff (diff)
New upstream version 1.11.1upstream/1.11.1
Diffstat (limited to 'gallery_dl')
-rw-r--r--gallery_dl/__init__.py10
-rw-r--r--gallery_dl/__main__.py4
-rw-r--r--gallery_dl/downloader/http.py2
-rw-r--r--gallery_dl/downloader/ytdl.py6
-rw-r--r--gallery_dl/exception.py81
-rw-r--r--gallery_dl/extractor/2chan.py4
-rw-r--r--gallery_dl/extractor/3dbooru.py14
-rw-r--r--gallery_dl/extractor/4chan.py43
-rw-r--r--gallery_dl/extractor/8chan.py29
-rw-r--r--gallery_dl/extractor/__init__.py5
-rw-r--r--gallery_dl/extractor/adultempire.py6
-rw-r--r--gallery_dl/extractor/blogger.py178
-rw-r--r--gallery_dl/extractor/bobx.py8
-rw-r--r--gallery_dl/extractor/chan.py61
-rw-r--r--gallery_dl/extractor/common.py93
-rw-r--r--gallery_dl/extractor/deviantart.py75
-rw-r--r--gallery_dl/extractor/exhentai.py19
-rw-r--r--gallery_dl/extractor/flickr.py9
-rw-r--r--gallery_dl/extractor/foolfuuka.py2
-rw-r--r--gallery_dl/extractor/foolslide.py7
-rw-r--r--gallery_dl/extractor/fuskator.py2
-rw-r--r--gallery_dl/extractor/hbrowse.py3
-rw-r--r--gallery_dl/extractor/hentaicafe.py4
-rw-r--r--gallery_dl/extractor/hentaifox.py2
-rw-r--r--gallery_dl/extractor/hentainexus.py2
-rw-r--r--gallery_dl/extractor/hitomi.py41
-rw-r--r--gallery_dl/extractor/imgbb.py2
-rw-r--r--gallery_dl/extractor/imgur.py269
-rw-r--r--gallery_dl/extractor/instagram.py41
-rw-r--r--gallery_dl/extractor/issuu.py109
-rw-r--r--gallery_dl/extractor/kissmanga.py3
-rw-r--r--gallery_dl/extractor/luscious.py524
-rw-r--r--gallery_dl/extractor/naver.py140
-rw-r--r--gallery_dl/extractor/nijie.py2
-rw-r--r--gallery_dl/extractor/nozomi.py185
-rw-r--r--gallery_dl/extractor/nsfwalbum.py4
-rw-r--r--gallery_dl/extractor/oauth.py4
-rw-r--r--gallery_dl/extractor/patreon.py2
-rw-r--r--gallery_dl/extractor/photobucket.py13
-rw-r--r--gallery_dl/extractor/pinterest.py3
-rw-r--r--gallery_dl/extractor/pixiv.py11
-rw-r--r--gallery_dl/extractor/plurk.py5
-rw-r--r--gallery_dl/extractor/reddit.py13
-rw-r--r--gallery_dl/extractor/sankaku.py5
-rw-r--r--gallery_dl/extractor/sexcom.py6
-rw-r--r--gallery_dl/extractor/simplyhentai.py17
-rw-r--r--gallery_dl/extractor/smugmug.py8
-rw-r--r--gallery_dl/extractor/tsumino.py9
-rw-r--r--gallery_dl/extractor/tumblr.py14
-rw-r--r--gallery_dl/extractor/twitter.py116
-rw-r--r--gallery_dl/extractor/wallhaven.py4
-rw-r--r--gallery_dl/job.py127
-rw-r--r--gallery_dl/oauth.py6
-rw-r--r--gallery_dl/option.py21
-rw-r--r--gallery_dl/postprocessor/common.py4
-rw-r--r--gallery_dl/postprocessor/exec.py22
-rw-r--r--gallery_dl/postprocessor/zip.py2
-rw-r--r--gallery_dl/util.py32
-rw-r--r--gallery_dl/version.py2
59 files changed, 1751 insertions, 684 deletions
diff --git a/gallery_dl/__init__.py b/gallery_dl/__init__.py
index 94a445a..9665823 100644
--- a/gallery_dl/__init__.py
+++ b/gallery_dl/__init__.py
@@ -234,6 +234,7 @@ def main():
if pformat and len(urls) > 1 and args.loglevel < logging.ERROR:
urls = progress(urls, pformat)
+ retval = 0
for url in urls:
try:
log.debug("Starting %s for '%s'", jobtype.__name__, url)
@@ -241,17 +242,20 @@ def main():
for key, value in url.gconfig:
config.set(key, value)
with config.apply(url.lconfig):
- jobtype(url.value).run()
+ retval |= jobtype(url.value).run()
else:
- jobtype(url).run()
+ retval |= jobtype(url).run()
except exception.NoExtractorError:
log.error("No suitable extractor found for '%s'", url)
+ retval |= 64
+ return retval
except KeyboardInterrupt:
sys.exit("\nKeyboardInterrupt")
except BrokenPipeError:
pass
- except IOError as exc:
+ except OSError as exc:
import errno
if exc.errno != errno.EPIPE:
raise
+ return 1
diff --git a/gallery_dl/__main__.py b/gallery_dl/__main__.py
index 04ea9fe..637d463 100644
--- a/gallery_dl/__main__.py
+++ b/gallery_dl/__main__.py
@@ -1,7 +1,7 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
-# Copyright 2017 Mike Fährmann
+# Copyright 2017-2019 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -17,4 +17,4 @@ if __package__ is None and not hasattr(sys, "frozen"):
import gallery_dl
if __name__ == "__main__":
- gallery_dl.main()
+ sys.exit(gallery_dl.main())
diff --git a/gallery_dl/downloader/http.py b/gallery_dl/downloader/http.py
index bb45de2..1c78cfb 100644
--- a/gallery_dl/downloader/http.py
+++ b/gallery_dl/downloader/http.py
@@ -107,7 +107,7 @@ class HttpDownloader(DownloaderBase):
elif code == 416 and filesize: # Requested Range Not Satisfiable
break
else:
- msg = "{}: {} for url: {}".format(code, response.reason, url)
+ msg = "'{} {}' for '{}'".format(code, response.reason, url)
if code == 429 or 500 <= code < 600: # Server Error
continue
self.log.warning("%s", msg)
diff --git a/gallery_dl/downloader/ytdl.py b/gallery_dl/downloader/ytdl.py
index ce921e3..fe6c4bc 100644
--- a/gallery_dl/downloader/ytdl.py
+++ b/gallery_dl/downloader/ytdl.py
@@ -29,6 +29,7 @@ class YoutubeDLDownloader(DownloaderBase):
"nocheckcertificate": not self.config("verify", extractor._verify),
"nopart": not self.part,
"updatetime": self.config("mtime", True),
+ "proxy": extractor.session.proxies.get("http"),
}
options.update(self.config("raw-options") or {})
@@ -58,6 +59,11 @@ class YoutubeDLDownloader(DownloaderBase):
return self._download_playlist(pathfmt, info_dict)
else:
info_dict = info_dict["entries"][index]
+
+ extra = pathfmt.kwdict.get("_ytdl_extra")
+ if extra:
+ info_dict.update(extra)
+
return self._download_video(pathfmt, info_dict)
def _download_video(self, pathfmt, info_dict):
diff --git a/gallery_dl/exception.py b/gallery_dl/exception.py
index 3e86177..783e2b2 100644
--- a/gallery_dl/exception.py
+++ b/gallery_dl/exception.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2015-2018 Mike Fährmann
+# Copyright 2015-2019 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -17,63 +17,90 @@ Exception
| +-- AuthorizationError
| +-- NotFoundError
| +-- HttpError
- +-- DownloadError
- | +-- DownloadComplete
- | +-- DownloadRetry
- +-- NoExtractorError
+-- FormatError
+ | +-- FilenameFormatError
+ | +-- DirectoryFormatError
+-- FilterError
+ +-- NoExtractorError
+-- StopExtraction
"""
class GalleryDLException(Exception):
"""Base class for GalleryDL exceptions"""
+ default = None
+ msgfmt = None
+ code = 1
+
+ def __init__(self, message=None):
+ if not message:
+ message = self.default
+ elif isinstance(message, Exception):
+ message = "{}: {}".format(message.__class__.__name__, message)
+ if self.msgfmt:
+ message = self.msgfmt.format(message)
+ Exception.__init__(self, message)
class ExtractionError(GalleryDLException):
"""Base class for exceptions during information extraction"""
-class AuthenticationError(ExtractionError):
- """Invalid or missing login information"""
-
-
-class AuthorizationError(ExtractionError):
- """Insufficient privileges to access a resource"""
+class HttpError(ExtractionError):
+ """HTTP request during data extraction failed"""
+ default = "HTTP request failed"
+ code = 4
class NotFoundError(ExtractionError):
- """Requested resource (gallery/image) does not exist"""
-
-
-class HttpError(ExtractionError):
- """HTTP request during extraction failed"""
+ """Requested resource (gallery/image) could not be found"""
+ msgfmt = "Requested {} could not be found"
+ default = "resource (gallery/image)"
+ code = 8
-class DownloadError(GalleryDLException):
- """Base class for exceptions during file downloads"""
+class AuthenticationError(ExtractionError):
+ """Invalid or missing login credentials"""
+ default = "Invalid or missing login credentials"
+ code = 16
-class DownloadRetry(DownloadError):
- """Download attempt failed and should be retried"""
+class AuthorizationError(ExtractionError):
+ """Insufficient privileges to access a resource"""
+ default = "Insufficient privileges to access the specified resource"
+ code = 16
-class DownloadComplete(DownloadError):
- """Output file of attempted download is already complete"""
+class FormatError(GalleryDLException):
+ """Error while building output paths"""
+ code = 32
-class NoExtractorError(GalleryDLException):
- """No extractor can handle the given URL"""
+class FilenameFormatError(FormatError):
+ """Error while building output filenames"""
+ msgfmt = "Applying filename format string failed ({})"
-class FormatError(GalleryDLException):
- """Error while building output path"""
+class DirectoryFormatError(FormatError):
+ """Error while building output directory paths"""
+ msgfmt = "Applying directory format string failed ({})"
class FilterError(GalleryDLException):
"""Error while evaluating a filter expression"""
+ msgfmt = "Evaluating filter expression failed ({})"
+ code = 32
+
+
+class NoExtractorError(GalleryDLException):
+ """No extractor can handle the given URL"""
+ code = 64
class StopExtraction(GalleryDLException):
- """Extraction should stop"""
+ """Stop data extraction"""
+
+ def __init__(self, message=None, *args):
+ GalleryDLException.__init__(self)
+ self.message = message % args if args else message
+ self.code = 1 if message else 0
diff --git a/gallery_dl/extractor/2chan.py b/gallery_dl/extractor/2chan.py
index 8df8645..33e7929 100644
--- a/gallery_dl/extractor/2chan.py
+++ b/gallery_dl/extractor/2chan.py
@@ -6,13 +6,13 @@
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
-"""Extract images from https://www.2chan.net/"""
+"""Extractors for https://www.2chan.net/"""
from .common import Extractor, Message
from .. import text
-class FutabaThreadExtractor(Extractor):
+class _2chanThreadExtractor(Extractor):
"""Extractor for images from threads on www.2chan.net"""
category = "2chan"
subcategory = "thread"
diff --git a/gallery_dl/extractor/3dbooru.py b/gallery_dl/extractor/3dbooru.py
index 15f4207..febbb51 100644
--- a/gallery_dl/extractor/3dbooru.py
+++ b/gallery_dl/extractor/3dbooru.py
@@ -11,7 +11,7 @@
from . import booru
-class ThreedeebooruExtractor(booru.MoebooruPageMixin, booru.BooruExtractor):
+class _3dbooruExtractor(booru.MoebooruPageMixin, booru.BooruExtractor):
"""Base class for 3dbooru extractors"""
category = "3dbooru"
api_url = "http://behoimi.org/post/index.json"
@@ -26,8 +26,7 @@ class ThreedeebooruExtractor(booru.MoebooruPageMixin, booru.BooruExtractor):
})
-class ThreedeebooruTagExtractor(booru.TagMixin,
- ThreedeebooruExtractor):
+class _3dbooruTagExtractor(booru.TagMixin, _3dbooruExtractor):
"""Extractor for images from behoimi.org based on search-tags"""
pattern = (r"(?:https?://)?(?:www\.)?behoimi\.org/post"
r"(?:/(?:index)?)?\?tags=(?P<tags>[^&#]+)")
@@ -37,8 +36,7 @@ class ThreedeebooruTagExtractor(booru.TagMixin,
})
-class ThreedeebooruPoolExtractor(booru.PoolMixin,
- ThreedeebooruExtractor):
+class _3dbooruPoolExtractor(booru.PoolMixin, _3dbooruExtractor):
"""Extractor for image-pools from behoimi.org"""
pattern = r"(?:https?://)?(?:www\.)?behoimi\.org/pool/show/(?P<pool>\d+)"
test = ("http://behoimi.org/pool/show/27", {
@@ -47,8 +45,7 @@ class ThreedeebooruPoolExtractor(booru.PoolMixin,
})
-class ThreedeebooruPostExtractor(booru.PostMixin,
- ThreedeebooruExtractor):
+class _3dbooruPostExtractor(booru.PostMixin, _3dbooruExtractor):
"""Extractor for single images from behoimi.org"""
pattern = r"(?:https?://)?(?:www\.)?behoimi\.org/post/show/(?P<post>\d+)"
test = ("http://behoimi.org/post/show/140852", {
@@ -64,8 +61,7 @@ class ThreedeebooruPostExtractor(booru.PostMixin,
})
-class ThreedeebooruPopularExtractor(booru.MoebooruPopularMixin,
- ThreedeebooruExtractor):
+class _3dbooruPopularExtractor(booru.MoebooruPopularMixin, _3dbooruExtractor):
"""Extractor for popular images from behoimi.org"""
pattern = (r"(?:https?://)?(?:www\.)?behoimi\.org"
r"/post/popular_(?P<scale>by_(?:day|week|month)|recent)"
diff --git a/gallery_dl/extractor/4chan.py b/gallery_dl/extractor/4chan.py
index e387b33..36a0573 100644
--- a/gallery_dl/extractor/4chan.py
+++ b/gallery_dl/extractor/4chan.py
@@ -6,15 +6,19 @@
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
-"""Extract images and videos from https://www.4chan.org/"""
+"""Extractors for https://www.4chan.org/"""
-from . import chan
+from .common import Extractor, Message
from .. import text
-class FourchanThreadExtractor(chan.ChanThreadExtractor):
- """Extractor for images from threads from 4chan.org"""
+class _4chanThreadExtractor(Extractor):
+ """Extractor for 4chan threads"""
category = "4chan"
+ subcategory = "thread"
+ directory_fmt = ("{category}", "{board}", "{thread} {title}")
+ filename_fmt = "{tim} {filename}.{extension}"
+ archive_fmt = "{board}_{thread}_{tim}"
pattern = (r"(?:https?://)?boards\.4chan(?:nel)?\.org"
r"/([^/]+)/thread/(\d+)")
test = (
@@ -28,9 +32,30 @@ class FourchanThreadExtractor(chan.ChanThreadExtractor):
"keyword": "7ae2f4049adf0d2f835eb91b6b26b7f4ec882e0a",
}),
)
- api_url = "https://a.4cdn.org/{board}/thread/{thread}.json"
- file_url = "https://i.4cdn.org/{board}/{tim}{ext}"
- def update(self, post, data=None):
- chan.ChanThreadExtractor.update(self, post, data)
- post["filename"] = text.unescape(post["filename"])
+ def __init__(self, match):
+ Extractor.__init__(self, match)
+ self.board, self.thread = match.groups()
+
+ def items(self):
+ url = "https://a.4cdn.org/{}/thread/{}.json".format(
+ self.board, self.thread)
+ posts = self.request(url).json()["posts"]
+ title = posts[0].get("sub") or text.remove_html(posts[0]["com"])
+
+ data = {
+ "board" : self.board,
+ "thread": self.thread,
+ "title" : text.unescape(title)[:50],
+ }
+
+ yield Message.Version, 1
+ yield Message.Directory, data
+ for post in posts:
+ if "filename" in post:
+ post.update(data)
+ post["extension"] = post["ext"][1:]
+ post["filename"] = text.unescape(post["filename"])
+ url = "https://i.4cdn.org/{}/{}{}".format(
+ post["board"], post["tim"], post["ext"])
+ yield Message.Url, url, post
diff --git a/gallery_dl/extractor/8chan.py b/gallery_dl/extractor/8chan.py
deleted file mode 100644
index e526da3..0000000
--- a/gallery_dl/extractor/8chan.py
+++ /dev/null
@@ -1,29 +0,0 @@
-# -*- coding: utf-8 -*-
-
-# Copyright 2014-2019 Mike Fährmann
-#
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License version 2 as
-# published by the Free Software Foundation.
-
-"""Extract images and videos from https://8ch.net/"""
-
-from . import chan
-
-
-class InfinitychanThreadExtractor(chan.ChanThreadExtractor):
- """Extractor for images from threads from 8ch.net"""
- category = "8chan"
- filename_fmt = "{time}-{filename}{ext}"
- pattern = r"(?:https?://)?(?:www\.)?8ch\.net/([^/]+)/res/(\d+)"
- test = ("https://8ch.net/builders/res/3.html", {
- "url": "5d85c0509f907f217aea379f862b41bf3d01f645",
- "keyword": "0c497190c0c0f826925fde09815351d01869c783",
- })
- api_url = "https://8ch.net/{board}/res/{thread}.json"
- file_url = "https://media.8ch.net/{board}/src/{tim}{ext}"
- file_url_v2 = "https://media.8ch.net/file_store/{tim}{ext}"
-
- def build_url(self, post):
- fmt = self.file_url if len(post["tim"]) < 64 else self.file_url_v2
- return fmt.format_map(post)
diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py
index 351c5df..b8f74d1 100644
--- a/gallery_dl/extractor/__init__.py
+++ b/gallery_dl/extractor/__init__.py
@@ -15,11 +15,11 @@ modules = [
"3dbooru",
"4chan",
"500px",
- "8chan",
"8muses",
"adultempire",
"artstation",
"behance",
+ "blogger",
"bobx",
"danbooru",
"deviantart",
@@ -49,6 +49,7 @@ modules = [
"imgth",
"imgur",
"instagram",
+ "issuu",
"keenspot",
"khinsider",
"kissmanga",
@@ -66,10 +67,12 @@ modules = [
"mangastream",
"mangoxo",
"myportfolio",
+ "naver",
"newgrounds",
"ngomik",
"nhentai",
"nijie",
+ "nozomi",
"nsfwalbum",
"paheal",
"patreon",
diff --git a/gallery_dl/extractor/adultempire.py b/gallery_dl/extractor/adultempire.py
index 85d8266..8160e48 100644
--- a/gallery_dl/extractor/adultempire.py
+++ b/gallery_dl/extractor/adultempire.py
@@ -21,12 +21,12 @@ class AdultempireGalleryExtractor(GalleryExtractor):
test = (
("https://www.adultempire.com/5998/gallery.html", {
"range": "1",
- "keyword": "25c8171f5623678491a0d7bdf38a7a6ebfa4a361",
+ "keyword": "5b3266e69801db0d78c22181da23bc102886e027",
"content": "5c6beb31e5e3cdc90ee5910d5c30f9aaec977b9e",
}),
("https://www.adultdvdempire.com/5683/gallery.html", {
"url": "b12cd1a65cae8019d837505adb4d6a2c1ed4d70d",
- "keyword": "9634eb16cc6dbf347eb9dcdd9b2a499dfd04d167",
+ "keyword": "8d448d79c4ac5f5b10a3019d5b5129ddb43655e5",
}),
)
@@ -55,4 +55,4 @@ class AdultempireGalleryExtractor(GalleryExtractor):
if len(urls) < 24:
return
params["page"] += 1
- page = self.request(self.chapter_url, params=params).text
+ page = self.request(self.gallery_url, params=params).text
diff --git a/gallery_dl/extractor/blogger.py b/gallery_dl/extractor/blogger.py
new file mode 100644
index 0000000..31bbaf8
--- /dev/null
+++ b/gallery_dl/extractor/blogger.py
@@ -0,0 +1,178 @@
+# -*- coding: utf-8 -*-
+
+# Copyright 2019 Mike Fährmann
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Extractors for Blogger blogs"""
+
+from .common import Extractor, Message
+from .. import text
+import re
+
+BASE_PATTERN = (
+ r"(?:blogger:(?:https?://)?([^/]+)|"
+ r"(?:https?://)?([^.]+\.blogspot\.com))")
+
+
+class BloggerExtractor(Extractor):
+ """Base class for blogger extractors"""
+ category = "blogger"
+ directory_fmt = ("{category}", "{blog[name]}",
+ "{post[date]:%Y-%m-%d} {post[title]}")
+ filename_fmt = "{num:>03}.{extension}"
+ archive_fmt = "{post[id]}_{num}"
+ root = "https://www.blogger.com"
+
+ def __init__(self, match):
+ Extractor.__init__(self, match)
+ self.blog = match.group(1) or match.group(2)
+ self.api = BloggerAPI(self)
+
+ def items(self):
+ yield Message.Version, 1
+
+ blog = self.api.blog_by_url("http://" + self.blog)
+ blog["pages"] = blog["pages"]["totalItems"]
+ blog["posts"] = blog["posts"]["totalItems"]
+ blog["date"] = text.parse_datetime(blog["published"])
+ del blog["selfLink"]
+
+ sub = re.compile(r"/s\d+/").sub
+ findall = re.compile(
+ r'src="(https?://\d+\.bp\.blogspot\.com/[^"]+)"').findall
+
+ for post in self.posts(blog):
+ images = findall(post["content"])
+ if not images:
+ continue
+
+ post["author"] = post["author"]["displayName"]
+ post["replies"] = post["replies"]["totalItems"]
+ post["content"] = text.remove_html(post["content"])
+ post["date"] = text.parse_datetime(post["published"])
+ del post["selfLink"]
+ del post["blog"]
+
+ yield Message.Directory, {"blog": blog, "post": post}
+ for num, url in enumerate(images, 1):
+ url = sub("/s0/", url).replace("http:", "https:", 1)
+ yield Message.Url, url, text.nameext_from_url(url, {
+ "blog": blog,
+ "post": post,
+ "url" : url,
+ "num" : num,
+ })
+
+ def posts(self, blog):
+ """Return an iterable with all relevant post objects"""
+
+
+class BloggerPostExtractor(BloggerExtractor):
+ """Extractor for a single blog post"""
+ subcategory = "post"
+ pattern = BASE_PATTERN + r"(/\d{4}/\d\d/[^/?&#]+\.html)"
+ test = (
+ ("https://julianbphotography.blogspot.com/2010/12/moon-rise.html", {
+ "url": "9928429fb62f712eb4de80f53625eccecc614aae",
+ "pattern": r"https://3.bp.blogspot.com/.*/s0/Icy-Moonrise-.*.jpg",
+ "keyword": {
+ "blog": {
+ "date" : "type:datetime",
+ "description": "",
+ "id" : "5623928067739466034",
+ "kind" : "blogger#blog",
+ "locale" : dict,
+ "name" : "Julian Bunker Photography",
+ "pages" : int,
+ "posts" : int,
+ "published" : "2010-11-21T10:19:42-08:00",
+ "updated" : str,
+ "url" : "http://www.julianbunker.com/",
+ },
+ "post": {
+ "author" : "Julian Bunker",
+ "content" : str,
+ "date" : "type:datetime",
+ "etag" : str,
+ "id" : "6955139236418998998",
+ "kind" : "blogger#post",
+ "published" : "2010-12-25T17:08:00-08:00",
+ "replies" : "0",
+ "title" : "Moon Rise",
+ "updated" : "2011-12-06T05:21:24-08:00",
+ "url" : "re:.+/2010/12/moon-rise.html$",
+ },
+ "num": int,
+ "url": str,
+ },
+ }),
+ ("blogger:http://www.julianbunker.com/2010/12/moon-rise.html", {
+ "url": "9928429fb62f712eb4de80f53625eccecc614aae",
+ }),
+ )
+
+ def __init__(self, match):
+ BloggerExtractor.__init__(self, match)
+ self.path = match.group(3)
+
+ def posts(self, blog):
+ return (self.api.post_by_path(blog["id"], self.path),)
+
+
+class BloggerBlogExtractor(BloggerExtractor):
+ """Extractor for an entire Blogger blog"""
+ subcategory = "blog"
+ pattern = BASE_PATTERN + "/?$"
+ test = (
+ ("https://julianbphotography.blogspot.com/", {
+ "range": "1-25",
+ "count": 25,
+ "pattern": r"https://\d\.bp\.blogspot\.com/.*/s0/[^.]+\.jpg",
+ }),
+ ("blogger:http://www.julianbunker.com/", {
+ "range": "1-25",
+ "count": 25,
+ }),
+ )
+
+ def posts(self, blog):
+ return self.api.blog_posts(blog["id"])
+
+
+class BloggerAPI():
+ """Minimal interface for the Blogger v3 API
+
+ Ref: https://developers.google.com/blogger
+ """
+ API_KEY = "AIzaSyCN9ax34oMMyM07g_M-5pjeDp_312eITK8"
+
+ def __init__(self, extractor):
+ self.extractor = extractor
+ self.api_key = extractor.config("api-key", self.API_KEY)
+
+ def blog_by_url(self, url):
+ return self._call("blogs/byurl", {"url": url})
+
+ def blog_posts(self, blog_id):
+ return self._pagination("blogs/{}/posts".format(blog_id), {})
+
+ def post_by_path(self, blog_id, path):
+ endpoint = "blogs/{}/posts/bypath".format(blog_id)
+ return self._call(endpoint, {"path": path})
+
+ def _call(self, endpoint, params):
+ url = "https://www.googleapis.com/blogger/v3/" + endpoint
+ params["key"] = self.api_key
+ return self.extractor.request(url, params=params).json()
+
+ def _pagination(self, endpoint, params):
+ while True:
+ data = self._call(endpoint, params)
+ yield from data["items"]
+
+ if "nextPageToken" not in data:
+ return
+ params["pageToken"] = data["nextPageToken"]
diff --git a/gallery_dl/extractor/bobx.py b/gallery_dl/extractor/bobx.py
index 67427a7..dba5fe7 100644
--- a/gallery_dl/extractor/bobx.py
+++ b/gallery_dl/extractor/bobx.py
@@ -94,7 +94,8 @@ class BobxIdolExtractor(BobxExtractor):
subcategory = "idol"
pattern = r"(?:https?://)?(?:www\.)?bobx\.com/([^/]+/[^/?&#]+)/?$"
test = ("http://www.bobx.com/idol/rin-okabe/", {
- "url": "74d80bfcd53b738b31909bb42e5cc97c41b475b8",
+ "pattern": BobxGalleryExtractor.pattern,
+ "count": ">= 6",
})
def items(self):
@@ -107,6 +108,5 @@ class BobxIdolExtractor(BobxExtractor):
for part in text.extract_iter(page, '="photoset/', '"'):
# skip every other entry
skip = not skip
- if skip:
- continue
- yield Message.Queue, "{}photoset/{}".format(url, part), data
+ if not skip:
+ yield Message.Queue, "{}photoset/{}".format(url, part), data
diff --git a/gallery_dl/extractor/chan.py b/gallery_dl/extractor/chan.py
deleted file mode 100644
index 5e44fd9..0000000
--- a/gallery_dl/extractor/chan.py
+++ /dev/null
@@ -1,61 +0,0 @@
-# -*- coding: utf-8 -*-
-
-# Copyright 2015-2019 Mike Fährmann
-#
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License version 2 as
-# published by the Free Software Foundation.
-
-"""Base classes for extractors for different Futaba Channel-like boards"""
-
-from .common import Extractor, Message
-from .. import text
-
-
-class ChanThreadExtractor(Extractor):
- """Base class for extractors for Futaba Channel-like boards"""
- category = "chan"
- subcategory = "thread"
- directory_fmt = ("{category}", "{board}", "{thread} - {title}")
- filename_fmt = "{tim}-{filename}.{extension}"
- archive_fmt = "{board}_{thread}_{tim}"
- api_url = ""
- file_url = ""
-
- def __init__(self, match):
- Extractor.__init__(self, match)
- self.metadata = {
- "board": match.group(1),
- "thread": match.group(2),
- }
-
- def items(self):
- yield Message.Version, 1
- url = self.api_url.format_map(self.metadata)
- posts = self.request(url).json()["posts"]
- self.metadata["title"] = self.get_thread_title(posts[0])
- yield Message.Directory, self.metadata
- for post in posts:
- if "filename" not in post:
- continue
- self.update(post)
- yield Message.Url, self.build_url(post), post
- if "extra_files" in post:
- for file in post["extra_files"]:
- self.update(post, file)
- yield Message.Url, self.build_url(post), post
-
- def update(self, post, data=None):
- """Update keyword dictionary"""
- post.update(data or self.metadata)
- post["extension"] = post["ext"][1:]
-
- def build_url(self, post):
- """Construct an image url out of a post object"""
- return self.file_url.format_map(post)
-
- @staticmethod
- def get_thread_title(post):
- """Return thread title from first post"""
- title = post["sub"] if "sub" in post else text.remove_html(post["com"])
- return text.unescape(title)[:50]
diff --git a/gallery_dl/extractor/common.py b/gallery_dl/extractor/common.py
index a90af1c..0d258eb 100644
--- a/gallery_dl/extractor/common.py
+++ b/gallery_dl/extractor/common.py
@@ -18,7 +18,7 @@ import requests
import threading
import http.cookiejar
from .message import Message
-from .. import config, text, exception, cloudflare
+from .. import config, text, util, exception, cloudflare
class Extractor():
@@ -37,9 +37,9 @@ class Extractor():
self.session = requests.Session()
self.log = logging.getLogger(self.category)
self.url = match.string
- self._init_headers()
- self._init_cookies()
- self._init_proxies()
+
+ self._cookiefile = None
+ self._cookiejar = self.session.cookies
self._retries = self.config("retries", 4)
self._timeout = self.config("timeout", 30)
self._verify = self.config("verify", True)
@@ -47,6 +47,10 @@ class Extractor():
if self._retries < 0:
self._retries = float("inf")
+ self._init_headers()
+ self._init_cookies()
+ self._init_proxies()
+
@classmethod
def from_url(cls, url):
if isinstance(cls.pattern, str):
@@ -67,7 +71,7 @@ class Extractor():
return config.interpolate(
("extractor", self.category, self.subcategory, key), default)
- def request(self, url, method="GET", *, session=None, retries=None,
+ def request(self, url, *, method="GET", session=None, retries=None,
encoding=None, fatal=True, notfound=None, **kwargs):
tries = 1
retries = self._retries if retries is None else retries
@@ -110,7 +114,7 @@ class Extractor():
msg = ""
self.log.warning("Cloudflare CAPTCHA" + msg)
- msg = "{}: {} for url: {}".format(code, response.reason, url)
+ msg = "'{} {}' for '{}'".format(code, response.reason, url)
if code < 500 and code != 429 and code != 430:
break
@@ -141,7 +145,7 @@ class Extractor():
return username, password
def _init_headers(self):
- """Set additional headers for the 'session' object"""
+ """Initialize HTTP headers for the 'session' object"""
headers = self.session.headers
headers.clear()
@@ -174,26 +178,43 @@ class Extractor():
if cookies:
if isinstance(cookies, dict):
self._update_cookies_dict(cookies, self.cookiedomain)
- else:
+ elif isinstance(cookies, str):
+ cookiefile = util.expand_path(cookies)
cookiejar = http.cookiejar.MozillaCookieJar()
try:
- cookiejar.load(cookies)
+ cookiejar.load(cookiefile)
except OSError as exc:
self.log.warning("cookies: %s", exc)
else:
- self.session.cookies.update(cookiejar)
+ self._cookiejar.update(cookiejar)
+ self._cookiefile = cookiefile
+ else:
+ self.log.warning(
+ "expected 'dict' or 'str' value for 'cookies' option, "
+ "got '%s' (%s)", cookies.__class__.__name__, cookies)
cookies = cloudflare.cookies(self.category)
if cookies:
domain, cookies = cookies
self._update_cookies_dict(cookies, domain)
+ def _store_cookies(self):
+ """Store the session's cookiejar in a cookies.txt file"""
+ if self._cookiefile and self.config("cookies-update", False):
+ cookiejar = http.cookiejar.MozillaCookieJar()
+ for cookie in self._cookiejar:
+ cookiejar.set_cookie(cookie)
+ try:
+ cookiejar.save(self._cookiefile)
+ except OSError as exc:
+ self.log.warning("cookies: %s", exc)
+
def _update_cookies(self, cookies, *, domain=""):
"""Update the session's cookiejar with 'cookies'"""
if isinstance(cookies, dict):
self._update_cookies_dict(cookies, domain or self.cookiedomain)
else:
- setcookie = self.session.cookies.set_cookie
+ setcookie = self._cookiejar.set_cookie
try:
cookies = iter(cookies)
except TypeError:
@@ -204,17 +225,17 @@ class Extractor():
def _update_cookies_dict(self, cookiedict, domain):
"""Update cookiejar with name-value pairs from a dict"""
- setcookie = self.session.cookies.set
+ setcookie = self._cookiejar.set
for name, value in cookiedict.items():
setcookie(name, value, domain=domain)
- def _check_cookies(self, cookienames, *, domain=""):
+ def _check_cookies(self, cookienames, *, domain=None):
"""Check if all 'cookienames' are in the session's cookiejar"""
- if not domain:
+ if domain is None:
domain = self.cookiedomain
try:
for name in cookienames:
- self.session.cookies._find(name, domain)
+ self._cookiejar._find(name, domain)
except KeyError:
return False
return True
@@ -249,24 +270,21 @@ class Extractor():
yield test
-class ChapterExtractor(Extractor):
+class GalleryExtractor(Extractor):
- subcategory = "chapter"
- directory_fmt = (
- "{category}", "{manga}",
- "{volume:?v/ />02}c{chapter:>03}{chapter_minor:?//}{title:?: //}")
- filename_fmt = (
- "{manga}_c{chapter:>03}{chapter_minor:?//}_{page:>03}.{extension}")
- archive_fmt = (
- "{manga}_{chapter}{chapter_minor}_{page}")
+ subcategory = "gallery"
+ filename_fmt = "{category}_{gallery_id}_{num:>03}.{extension}"
+ directory_fmt = ("{category}", "{gallery_id} {title}")
+ archive_fmt = "{gallery_id}_{num}"
+ enum = "num"
def __init__(self, match, url=None):
Extractor.__init__(self, match)
- self.chapter_url = url or self.root + match.group(1)
+ self.gallery_url = self.root + match.group(1) if url is None else url
def items(self):
self.login()
- page = self.request(self.chapter_url).text
+ page = self.request(self.gallery_url).text
data = self.metadata(page)
imgs = self.images(page)
@@ -284,7 +302,7 @@ class ChapterExtractor(Extractor):
yield Message.Version, 1
yield Message.Directory, data
- for data["page"], (url, imgdata) in images:
+ for data[self.enum], (url, imgdata) in images:
if imgdata:
data.update(imgdata)
yield Message.Url, url, text.nameext_from_url(url, data)
@@ -299,6 +317,19 @@ class ChapterExtractor(Extractor):
"""Return a list of all (image-url, metadata)-tuples"""
+class ChapterExtractor(GalleryExtractor):
+
+ subcategory = "chapter"
+ directory_fmt = (
+ "{category}", "{manga}",
+ "{volume:?v/ />02}c{chapter:>03}{chapter_minor:?//}{title:?: //}")
+ filename_fmt = (
+ "{manga}_c{chapter:>03}{chapter_minor:?//}_{page:>03}.{extension}")
+ archive_fmt = (
+ "{manga}_{chapter}{chapter_minor}_{page}")
+ enum = "page"
+
+
class MangaExtractor(Extractor):
subcategory = "manga"
@@ -333,14 +364,6 @@ class MangaExtractor(Extractor):
"""Return a list of all (chapter-url, metadata)-tuples"""
-class GalleryExtractor(ChapterExtractor):
-
- subcategory = "gallery"
- filename_fmt = "{category}_{gallery_id}_{page:>03}.{extension}"
- directory_fmt = ("{category}", "{gallery_id} {title}")
- archive_fmt = "{gallery_id}_{page}"
-
-
class AsynchronousMixin():
"""Run info extraction in a separate thread"""
diff --git a/gallery_dl/extractor/deviantart.py b/gallery_dl/extractor/deviantart.py
index ab32a00..eeee74a 100644
--- a/gallery_dl/extractor/deviantart.py
+++ b/gallery_dl/extractor/deviantart.py
@@ -58,9 +58,12 @@ class DeviantartExtractor(Extractor):
def items(self):
if self.user:
- self.group = not self.api.user_profile(self.user)
+ profile = self.api.user_profile(self.user)
+ self.group = not profile
if self.group:
self.subcategory = "group-" + self.subcategory
+ else:
+ self.user = profile["user"]["username"]
yield Message.Version, 1
for deviation in self.deviations():
@@ -260,11 +263,53 @@ class DeviantartExtractor(Extractor):
content.update(download)
+class DeviantartUserExtractor(Extractor):
+ """Extractor for an artist's user profile"""
+ category = "deviantart"
+ subcategory = "user"
+ pattern = BASE_PATTERN + r"/?$"
+ test = (
+ ("https://www.deviantart.com/shimoda7", {
+ "options": (("include", "gsjf"),),
+ "pattern": r"/shimoda7/(gallery(/scraps)?|posts|favourites)",
+ "count": 4,
+ }),
+ ("https://shimoda7.deviantart.com/"),
+ )
+
+ def __init__(self, match):
+ Extractor.__init__(self, match)
+ self.user = match.group(1) or match.group(2)
+
+ incl = self.config("include") or "g"
+ if isinstance(incl, list):
+ incl = "".join(item[0] for item in incl if item)
+ self.include = incl.lower()
+
+ def items(self):
+ base = "https://www.deviantart.com/{}/".format(self.user)
+ incl = self.include
+ data = {}
+
+ if "g" in incl:
+ data["_extractor"] = DeviantartGalleryExtractor
+ yield Message.Queue, base + "gallery", data
+ if "s" in incl:
+ data["_extractor"] = DeviantartScrapsExtractor
+ yield Message.Queue, base + "gallery/scraps", data
+ if "j" in incl:
+ data["_extractor"] = DeviantartJournalExtractor
+ yield Message.Queue, base + "posts", data
+ if "f" in incl:
+ data["_extractor"] = DeviantartFavoriteExtractor
+ yield Message.Queue, base + "favourites", data
+
+
class DeviantartGalleryExtractor(DeviantartExtractor):
"""Extractor for all deviations from an artist's gallery"""
subcategory = "gallery"
archive_fmt = "g_{username}_{index}.{extension}"
- pattern = BASE_PATTERN + r"(?:/(?:gallery/?(?:\?catpath=/)?)?)?$"
+ pattern = BASE_PATTERN + r"/gallery(?:/all|/?\?catpath=)?/?$"
test = (
("https://www.deviantart.com/shimoda7/gallery/", {
"pattern": r"https://(www.deviantart.com/download/\d+/"
@@ -315,12 +360,12 @@ class DeviantartGalleryExtractor(DeviantartExtractor):
},
}),
# group
- ("https://www.deviantart.com/yakuzafc", {
+ ("https://www.deviantart.com/yakuzafc/gallery", {
"pattern": r"https://www.deviantart.com/yakuzafc/gallery/0/",
"count": ">= 15",
}),
# 'folders' option (#276)
- ("https://www.deviantart.com/justatest235723", {
+ ("https://www.deviantart.com/justatest235723/gallery", {
"count": 3,
"options": (("metadata", 1), ("folders", 1), ("original", 0)),
"keyword": {
@@ -334,10 +379,12 @@ class DeviantartGalleryExtractor(DeviantartExtractor):
("https://www.deviantart.com/shimoda8/gallery/", {
"exception": exception.NotFoundError,
}),
- # old-style URLs
+
+ ("https://www.deviantart.com/shimoda7/gallery"),
+ ("https://www.deviantart.com/shimoda7/gallery/all"),
("https://www.deviantart.com/shimoda7/gallery/?catpath=/"),
("https://shimoda7.deviantart.com/gallery/"),
- ("https://yakuzafc.deviantart.com/"),
+ ("https://shimoda7.deviantart.com/gallery/all/"),
("https://shimoda7.deviantart.com/gallery/?catpath=/"),
)
@@ -794,6 +841,14 @@ class DeviantartScrapsExtractor(DeviantartExtractorV2):
)
def deviations(self):
+ # copy self.session
+ session = self.session.__class__()
+ for attr in session.__attrs__:
+ setattr(session, attr, getattr(self.session, attr, None))
+
+ # reset cookies in the original session object
+ self.session.cookies = session.cookies.__class__()
+
url = self.root + "/_napi/da-user-profile/api/gallery/contents"
params = {
"username" : self.user,
@@ -806,7 +861,8 @@ class DeviantartScrapsExtractor(DeviantartExtractorV2):
}
while True:
- data = self.request(url, params=params, headers=headers).json()
+ data = self.request(
+ url, session=session, params=params, headers=headers).json()
for obj in data["results"]:
yield obj["deviation"]
@@ -974,11 +1030,12 @@ class DeviantartAPI():
auth = (self.client_id, self.client_secret)
response = self.extractor.request(
- url, method="POST", data=data, auth=auth)
+ url, method="POST", data=data, auth=auth, fatal=False)
data = response.json()
if response.status_code != 200:
- raise exception.AuthenticationError('"{} ({})"'.format(
+ self.log.debug("Server response: %s", data)
+ raise exception.AuthenticationError('"{}" ({})'.format(
data.get("error_description"), data.get("error")))
if refresh_token:
_refresh_token_cache.update(refresh_token, data["refresh_token"])
diff --git a/gallery_dl/extractor/exhentai.py b/gallery_dl/extractor/exhentai.py
index 75e19d6..cba9627 100644
--- a/gallery_dl/extractor/exhentai.py
+++ b/gallery_dl/extractor/exhentai.py
@@ -69,8 +69,7 @@ class ExhentaiExtractor(Extractor):
def login(self):
"""Login and set necessary cookies"""
if self.LIMIT:
- self.log.error("Image limit reached!")
- raise exception.StopExtraction()
+ raise exception.StopExtraction("Image limit reached!")
if self._check_cookies(self.cookienames):
return
username, password = self._get_auth_info()
@@ -235,9 +234,9 @@ class ExhentaiGalleryExtractor(ExhentaiExtractor):
url = iurl
data = self._parse_image_info(url)
except IndexError:
- self.log.error("Unable to parse image info for '%s'", url)
self.log.debug("Page content:\n%s", page)
- raise exception.StopExtraction()
+ raise exception.StopExtraction(
+ "Unable to parse image info for '%s'", url)
data["num"] = self.image_num
data["image_token"] = self.key["start"] = extr('var startkey="', '";')
@@ -272,9 +271,9 @@ class ExhentaiGalleryExtractor(ExhentaiExtractor):
url = imgurl
data = self._parse_image_info(url)
except IndexError:
- self.log.error("Unable to parse image info for '%s'", url)
self.log.debug("Page content:\n%s", page)
- raise exception.StopExtraction()
+ raise exception.StopExtraction(
+ "Unable to parse image info for '%s'", url)
data["num"] = request["page"]
data["image_token"] = imgkey
@@ -311,12 +310,12 @@ class ExhentaiGalleryExtractor(ExhentaiExtractor):
self._remaining -= data["cost"]
if self._remaining <= 0:
+ ExhentaiExtractor.LIMIT = True
url = "{}/s/{}/{}-{}".format(
self.root, data["image_token"], self.gallery_id, data["num"])
- self.log.error("Image limit reached! Continue with "
- "'%s' as URL after resetting it.", url)
- ExhentaiExtractor.LIMIT = True
- raise exception.StopExtraction()
+ raise exception.StopExtraction(
+ "Image limit reached! Continue with '%s' "
+ "as URL after resetting it.", url)
def _update_limits(self):
url = "https://e-hentai.org/home.php"
diff --git a/gallery_dl/extractor/flickr.py b/gallery_dl/extractor/flickr.py
index 73b8ec4..b71fc4d 100644
--- a/gallery_dl/extractor/flickr.py
+++ b/gallery_dl/extractor/flickr.py
@@ -423,14 +423,15 @@ class FlickrAPI(oauth.OAuth1API):
params["api_key"] = self.api_key
data = self.request(self.API_URL, params=params).json()
if "code" in data:
+ msg = data.get("message")
+ self.log.debug("Server response: %s", data)
if data["code"] == 1:
raise exception.NotFoundError(self.extractor.subcategory)
elif data["code"] == 98:
- raise exception.AuthenticationError(data.get("message"))
+ raise exception.AuthenticationError(msg)
elif data["code"] == 99:
- raise exception.AuthorizationError()
- self.log.error("API call failed: %s", data.get("message"))
- raise exception.StopExtraction()
+ raise exception.AuthorizationError(msg)
+ raise exception.StopExtraction("API request failed: %s", msg)
return data
def _pagination(self, method, params, key="photos"):
diff --git a/gallery_dl/extractor/foolfuuka.py b/gallery_dl/extractor/foolfuuka.py
index 5f4c5b8..645b53a 100644
--- a/gallery_dl/extractor/foolfuuka.py
+++ b/gallery_dl/extractor/foolfuuka.py
@@ -78,7 +78,7 @@ class FoolfuukaThreadExtractor(SharedConfigMixin, Extractor):
EXTRACTORS = {
"4plebs": {
- "name": "fourplebs",
+ "name": "_4plebs",
"root": "https://archive.4plebs.org",
"pattern": r"(?:archive\.)?4plebs\.org",
"test-thread": ("https://archive.4plebs.org/tg/thread/54059290", {
diff --git a/gallery_dl/extractor/foolslide.py b/gallery_dl/extractor/foolslide.py
index 14baa36..fc7dbf9 100644
--- a/gallery_dl/extractor/foolslide.py
+++ b/gallery_dl/extractor/foolslide.py
@@ -44,14 +44,13 @@ class FoolslideBase(SharedConfigMixin):
class FoolslideChapterExtractor(FoolslideBase, ChapterExtractor):
"""Base class for chapter extractors for FoOlSlide based sites"""
- directory_fmt = (
- "{category}", "{manga}", "{chapter_string}")
+ directory_fmt = ("{category}", "{manga}", "{chapter_string}")
archive_fmt = "{id}"
pattern_fmt = r"(/read/[^/?&#]+/[a-z-]+/\d+/\d+(?:/\d+)?)"
decode = "default"
def items(self):
- page = self.request(self.chapter_url).text
+ page = self.request(self.gallery_url).text
data = self.metadata(page)
imgs = self.images(page)
@@ -77,7 +76,7 @@ class FoolslideChapterExtractor(FoolslideBase, ChapterExtractor):
def metadata(self, page):
extr = text.extract_from(page)
extr('<h1 class="tbtitle dnone">', '')
- return self.parse_chapter_url(self.chapter_url, {
+ return self.parse_chapter_url(self.gallery_url, {
"manga" : text.unescape(extr('title="', '"')).strip(),
"chapter_string": text.unescape(extr('title="', '"')),
})
diff --git a/gallery_dl/extractor/fuskator.py b/gallery_dl/extractor/fuskator.py
index dbcf2f2..eba1c39 100644
--- a/gallery_dl/extractor/fuskator.py
+++ b/gallery_dl/extractor/fuskator.py
@@ -42,7 +42,7 @@ class FuskatorGalleryExtractor(GalleryExtractor):
def metadata(self, page):
headers = {
- "Referer" : self.chapter_url,
+ "Referer" : self.gallery_url,
"X-Requested-With": "XMLHttpRequest",
}
auth = self.request(
diff --git a/gallery_dl/extractor/hbrowse.py b/gallery_dl/extractor/hbrowse.py
index 01793dc..43479c6 100644
--- a/gallery_dl/extractor/hbrowse.py
+++ b/gallery_dl/extractor/hbrowse.py
@@ -29,8 +29,7 @@ class HbrowseBase():
if not data["manga"] and "<b>Warning</b>" in page:
msg = page.rpartition(">")[2].strip()
- self.log.error("Site is not accessible: '%s'", msg)
- raise exception.StopExtraction()
+ raise exception.StopExtraction("Site is not accessible: '%s'", msg)
tags = text.extract(page, 'class="listTable"', '</table>', pos)[0]
diff --git a/gallery_dl/extractor/hentaicafe.py b/gallery_dl/extractor/hentaicafe.py
index 161073b..1ab71d6 100644
--- a/gallery_dl/extractor/hentaicafe.py
+++ b/gallery_dl/extractor/hentaicafe.py
@@ -31,10 +31,10 @@ class HentaicafeChapterExtractor(foolslide.FoolslideChapterExtractor):
info = text.unescape(text.extract(page, '<title>', '</title>')[0])
manga, _, chapter_string = info.partition(" :: ")
- data = self._data(self.chapter_url.split("/")[5])
+ data = self._data(self.gallery_url.split("/")[5])
data["manga"] = manga
data["chapter_string"] = chapter_string.rstrip(" :")
- return self.parse_chapter_url(self.chapter_url, data)
+ return self.parse_chapter_url(self.gallery_url, data)
@memcache(keyarg=1)
def _data(self, manga):
diff --git a/gallery_dl/extractor/hentaifox.py b/gallery_dl/extractor/hentaifox.py
index cf4871f..7e0b63c 100644
--- a/gallery_dl/extractor/hentaifox.py
+++ b/gallery_dl/extractor/hentaifox.py
@@ -24,7 +24,7 @@ class HentaifoxGalleryExtractor(HentaifoxBase, GalleryExtractor):
test = ("https://hentaifox.com/gallery/56622/", {
"pattern": r"https://i\d*\.hentaifox\.com/\d+/\d+/\d+\.jpg",
"count": 24,
- "keyword": "38f8517605feb6854d48833297da6b05c6541b69",
+ "keyword": "903ebe227d85e484460382fc6cbab42be7a244d5",
})
def __init__(self, match):
diff --git a/gallery_dl/extractor/hentainexus.py b/gallery_dl/extractor/hentainexus.py
index d875817..9e2ee9f 100644
--- a/gallery_dl/extractor/hentainexus.py
+++ b/gallery_dl/extractor/hentainexus.py
@@ -22,7 +22,7 @@ class HentainexusGalleryExtractor(GalleryExtractor):
test = (
("https://hentainexus.com/view/5688", {
"url": "746d0043e20030f1171aae5ea113176607302517",
- "keyword": "b05986369fbaf29cfa08b118960d92c49e59524b",
+ "keyword": "9512cf5f258130e5f75de9954d7a13217c2405e7",
}),
("https://hentainexus.com/read/5688"),
)
diff --git a/gallery_dl/extractor/hitomi.py b/gallery_dl/extractor/hitomi.py
index ef08d69..e53b051 100644
--- a/gallery_dl/extractor/hitomi.py
+++ b/gallery_dl/extractor/hitomi.py
@@ -11,17 +11,20 @@
from .common import GalleryExtractor
from .. import text, util
import string
+import json
class HitomiGalleryExtractor(GalleryExtractor):
"""Extractor for image galleries from hitomi.la"""
category = "hitomi"
root = "https://hitomi.la"
- pattern = r"(?:https?://)?hitomi\.la/(?:galleries|reader)/(\d+)"
+ pattern = (r"(?:https?://)?hitomi\.la"
+ r"/(?:manga|doujinshi|cg|gamecg|galleries|reader)"
+ r"/(?:[^/?&#]+-)?(\d+)")
test = (
("https://hitomi.la/galleries/867789.html", {
"pattern": r"https://aa.hitomi.la/galleries/867789/\d+.jpg",
- "keyword": "d097a8db8e810045131b4510c41714004f9eff3a",
+ "keyword": "6701f8f588f119ef84cd29bdf99a399417b0a6a2",
"count": 16,
}),
("https://hitomi.la/galleries/1401410.html", {
@@ -39,6 +42,11 @@ class HitomiGalleryExtractor(GalleryExtractor):
"url": "055c898a36389719799d6bce76889cc4ea4421fc",
"count": 1413,
}),
+ ("https://hitomi.la/manga/amazon-no-hiyaku-867789.html"),
+ ("https://hitomi.la/manga/867789.html"),
+ ("https://hitomi.la/doujinshi/867789.html"),
+ ("https://hitomi.la/cg/867789.html"),
+ ("https://hitomi.la/gamecg/867789.html"),
("https://hitomi.la/reader/867789.html"),
)
@@ -54,6 +62,11 @@ class HitomiGalleryExtractor(GalleryExtractor):
self.fallback = True
url = url.replace("/galleries/", "/reader/")
response = GalleryExtractor.request(self, url, **kwargs)
+ elif b"<title>Redirect</title>" in response.content:
+ url = text.extract(response.text, "href='", "'")[0]
+ if not url.startswith("http"):
+ url = text.urljoin(self.root, url)
+ response = self.request(url, **kwargs)
return response
def metadata(self, page):
@@ -86,25 +99,19 @@ class HitomiGalleryExtractor(GalleryExtractor):
# see https://ltn.hitomi.la/common.js
offset = text.parse_int(self.gallery_id[-1]) % 3
subdomain = chr(97 + offset) + "a"
- base = "https://" + subdomain + ".hitomi.la/galleries/"
+ base = "https://{}.hitomi.la/galleries/{}/".format(
+ subdomain, self.gallery_id)
# set Referer header before image downloads (#239)
- self.session.headers["Referer"] = self.chapter_url
-
- # handle Game CG galleries with scenes (#321)
- scenes = text.extract(page, "var scene_indexes = [", "]")[0]
- if scenes and scenes.strip():
- url = "{}/reader/{}.html".format(self.root, self.gallery_id)
- page = self.request(url).text
- begin, end = ">//g.hitomi.la/galleries/", "</div>"
- elif self.fallback:
- begin, end = ">//g.hitomi.la/galleries/", "</div>"
- else:
- begin, end = "'//tn.hitomi.la/smalltn/", ".jpg',"
+ self.session.headers["Referer"] = self.gallery_url
+
+ # get 'galleryinfo'
+ url = "https://ltn.hitomi.la/galleries/{}.js".format(self.gallery_id)
+ page = self.request(url).text
return [
- (base + urlpart, None)
- for urlpart in text.extract_iter(page, begin, end)
+ (base + image["name"], None)
+ for image in json.loads(page.partition("=")[2])
]
@staticmethod
diff --git a/gallery_dl/extractor/imgbb.py b/gallery_dl/extractor/imgbb.py
index 2a8dcad..fb321d0 100644
--- a/gallery_dl/extractor/imgbb.py
+++ b/gallery_dl/extractor/imgbb.py
@@ -90,7 +90,7 @@ class ImgbbExtractor(Extractor):
return
params["seek"] = data["seekEnd"]
params["page"] += 1
- data = self.request(endpoint, "POST", data=params).json()
+ data = self.request(endpoint, method="POST", data=params).json()
page = data["html"]
diff --git a/gallery_dl/extractor/imgur.py b/gallery_dl/extractor/imgur.py
index cb36c30..b1be995 100644
--- a/gallery_dl/extractor/imgur.py
+++ b/gallery_dl/extractor/imgur.py
@@ -10,8 +10,6 @@
from .common import Extractor, Message
from .. import text, exception
-import itertools
-import json
BASE_PATTERN = r"(?:https?://)?(?:www\.|[im]\.)?imgur\.com"
@@ -21,103 +19,89 @@ class ImgurExtractor(Extractor):
"""Base class for imgur extractors"""
category = "imgur"
root = "https://imgur.com"
- api_root = "https://api.imgur.com"
def __init__(self, match):
Extractor.__init__(self, match)
+ self.api = ImgurAPI(self)
self.key = match.group(1)
self.mp4 = self.config("mp4", True)
- def _extract_data(self, path):
- response = self.request(self.root + path, notfound=self.subcategory)
- data = json.loads(text.extract(
- response.text, "image : ", ",\n")[0])
+ def _prepare(self, image):
try:
- del data["adConfig"]
- del data["isAd"]
+ del image["ad_url"]
+ del image["ad_type"]
+ del image["ad_config"]
except KeyError:
pass
- return data
- def _prepare(self, image):
- image["ext"] = image["ext"].partition("?")[0]
- if image["ext"] == ".gif" and (
- (self.mp4 and image["prefer_video"]) or self.mp4 == "always"):
- image["ext"] = ".mp4"
- url = "https://i.imgur.com/" + image["hash"] + image["ext"]
- image["extension"] = image["ext"][1:]
+ url = image["mp4"] if image["animated"] and self.mp4 else image["link"]
+ image["date"] = text.parse_timestamp(image["datetime"])
+ text.nameext_from_url(url, image)
+
return url
- def _items_apiv3(self, urlfmt):
+ def _items_queue(self, items):
album_ex = ImgurAlbumExtractor
image_ex = ImgurImageExtractor
- params = {
- "IMGURPLATFORM" : "web",
- "album_previews": "0",
- "client_id" : "546c25a59c58ad7",
- }
- headers = {
- "Origin" : self.root,
- "Referer": self.root + "/",
- }
-
yield Message.Version, 1
-
- for num in itertools.count(0):
- url = urlfmt.format(num)
- data = self.request(url, params=params, headers=headers).json()
-
- for item in data["data"]:
- item["_extractor"] = album_ex if item["is_album"] else image_ex
- yield Message.Queue, item["link"], item
-
- if len(data["data"]) < 60:
- return
+ for item in items:
+ item["_extractor"] = album_ex if item["is_album"] else image_ex
+ yield Message.Queue, item["link"], item
class ImgurImageExtractor(ImgurExtractor):
"""Extractor for individual images on imgur.com"""
subcategory = "image"
- filename_fmt = "{category}_{hash}{title:?_//}.{extension}"
- archive_fmt = "{hash}"
+ filename_fmt = "{category}_{id}{title:?_//}.{extension}"
+ archive_fmt = "{id}"
pattern = BASE_PATTERN + r"/(?!gallery)(\w{7}|\w{5})[sbtmlh]?\.?"
test = (
("https://imgur.com/21yMxCS", {
"url": "6f2dcfb86815bdd72808c313e5f715610bc7b9b2",
"content": "0c8768055e4e20e7c7259608b67799171b691140",
"keyword": {
- "animated": False,
- "datetime": "2016-11-10 14:24:35",
- "description": str,
- "ext": ".png",
- "extension": "png",
- "hash": "21yMxCS",
- "height": "32",
- "is_moderated": False,
- "is_safe": False,
- "is_viral": 0,
- "looping": False,
- "mimetype": "image/png",
- "name": None,
- "prefer_video": False,
- "size": 182,
- "source": "",
- "title": "Test",
- "video_host": None,
- "video_source": None,
- "width": "64",
+ "account_id" : None,
+ "account_url" : None,
+ "animated" : False,
+ "bandwidth" : int,
+ "date" : "type:datetime",
+ "datetime" : 1478787875,
+ "description" : None,
+ "edited" : "0",
+ "extension" : "png",
+ "favorite" : False,
+ "filename" : "21yMxCS",
+ "has_sound" : False,
+ "height" : 32,
+ "id" : "21yMxCS",
+ "in_gallery" : False,
+ "in_most_viral": False,
+ "is_ad" : False,
+ "link" : "https://i.imgur.com/21yMxCS.png",
+ "nsfw" : False,
+ "section" : None,
+ "size" : 182,
+ "tags" : [],
+ "title" : "Test",
+ "type" : "image/png",
+ "views" : int,
+ "vote" : None,
+ "width" : 64,
},
}),
("http://imgur.com/0gybAXR", { # gifv/mp4 video
"url": "a2220eb265a55b0c95e0d3d721ec7665460e3fd7",
"content": "a3c080e43f58f55243ab830569ba02309d59abfc",
}),
+ ("https://imgur.com/XFfsmuC", { # missing title in API response (#467)
+ "keyword": {"title": "Tears are a natural response to irritants"},
+ }),
("https://imgur.com/HjoXJAd", { # url ends with '.jpg?1'
- "url": "73f361b50753ab25da64160aa50bc5d139480d45",
+ "url": "ec2cf11a2bfb4939feff374781a6e6f3e9af8e8e",
}),
("https://imgur.com/zzzzzzz", { # not found
- "exception": exception.NotFoundError,
+ "exception": exception.HttpError,
}),
("https://www.imgur.com/21yMxCS"), # www
("https://m.imgur.com/21yMxCS"), # mobile
@@ -129,7 +113,11 @@ class ImgurImageExtractor(ImgurExtractor):
)
def items(self):
- image = self._extract_data("/" + self.key)
+ image = self.api.image(self.key)
+ if not image["title"]:
+ page = self.request(self.root + "/" + self.key, fatal=False).text
+ title = text.extract(page, "<title>", "<")[0]
+ image["title"] = (title or "").rpartition(" - ")[0].strip()
url = self._prepare(image)
yield Message.Version, 1
yield Message.Directory, image
@@ -139,42 +127,67 @@ class ImgurImageExtractor(ImgurExtractor):
class ImgurAlbumExtractor(ImgurExtractor):
"""Extractor for imgur albums"""
subcategory = "album"
- directory_fmt = ("{category}", "{album[hash]}{album[title]:? - //}")
- filename_fmt = "{category}_{album[hash]}_{num:>03}_{hash}.{extension}"
- archive_fmt = "{album[hash]}_{hash}"
+ directory_fmt = ("{category}", "{album[id]}{album[title]:? - //}")
+ filename_fmt = "{category}_{album[id]}_{num:>03}_{id}.{extension}"
+ archive_fmt = "{album[id]}_{id}"
pattern = BASE_PATTERN + r"/(?:a|t/unmuted)/(\w{7}|\w{5})"
test = (
("https://imgur.com/a/TcBmP", {
"url": "ce3552f550a5b5316bd9c7ae02e21e39f30c0563",
"keyword": {
"album": {
- "album_cover": "693j2Kr",
- "album_description": None,
- "cover": "693j2Kr",
- "datetime": "2015-10-09 10:37:50",
- "description": None,
- "hash": "TcBmP",
- "id": "TcBmP",
- "is_album": True,
- "num_images": "19",
- "title": "138",
- "title_clean": "TcBmP",
- "views": str,
+ "account_id" : None,
+ "account_url" : None,
+ "cover" : "693j2Kr",
+ "cover_edited": None,
+ "cover_height": 1400,
+ "cover_width" : 951,
+ "date" : "type:datetime",
+ "datetime" : 1444387070,
+ "description" : None,
+ "favorite" : False,
+ "id" : "TcBmP",
+ "images_count": 19,
+ "in_gallery" : False,
+ "is_ad" : False,
+ "is_album" : True,
+ "layout" : "blog",
+ "link" : "https://imgur.com/a/TcBmP",
+ "nsfw" : False,
+ "privacy" : "hidden",
+ "section" : None,
+ "title" : "138",
+ "views" : int,
},
- "animated": bool,
- "datetime": str,
- "extension": str,
- "hash": str,
- "height": int,
- "num": int,
- "prefer_video": bool,
- "size": int,
- "title": str,
- "width": int,
+ "account_id" : None,
+ "account_url": None,
+ "animated" : bool,
+ "bandwidth" : int,
+ "date" : "type:datetime",
+ "datetime" : int,
+ "description": None,
+ "edited" : "0",
+ "favorite" : False,
+ "has_sound" : False,
+ "height" : int,
+ "id" : str,
+ "in_gallery" : False,
+ "is_ad" : False,
+ "link" : r"re:https://i\.imgur\.com/\w+\.jpg",
+ "nsfw" : None,
+ "num" : int,
+ "section" : None,
+ "size" : int,
+ "tags" : list,
+ "title" : None,
+ "type" : "image/jpeg",
+ "views" : int,
+ "vote" : None,
+ "width" : int,
},
}),
("https://imgur.com/a/eD9CT", { # large album
- "url": "4ee94de31ff26be416271bc0b1ea27b9349c9937",
+ "url": "de748c181a04d18bef1de9d4f4866ef0a06d632b",
}),
("https://imgur.com/a/RhJXhVT/all", { # 7 character album hash
"url": "695ef0c950023362a0163ee5041796300db76674",
@@ -183,21 +196,22 @@ class ImgurAlbumExtractor(ImgurExtractor):
"url": "86b4747f8147cec7602f0214e267309af73a8655",
}),
("https://imgur.com/a/TcBmQ", {
- "exception": exception.NotFoundError,
+ "exception": exception.HttpError,
}),
("https://www.imgur.com/a/TcBmP"), # www
("https://m.imgur.com/a/TcBmP"), # mobile
)
def items(self):
- album = self._extract_data("/a/" + self.key + "/all")
- images = album["album_images"]["images"]
- del album["album_images"]
+ album = self.api.album(self.key)
+ album["date"] = text.parse_timestamp(album["datetime"])
+ images = album["images"]
- if int(album["num_images"]) > len(images):
- url = "{}/ajaxalbums/getimages/{}/hit.json".format(
- self.root, self.key)
- images = self.request(url).json()["data"]["images"]
+ try:
+ del album["images"]
+ del album["ad_config"]
+ except KeyError:
+ pass
yield Message.Version, 1
yield Message.Directory, {"album": album, "count": len(images)}
@@ -224,13 +238,11 @@ class ImgurGalleryExtractor(ImgurExtractor):
def items(self):
url = self.root + "/a/" + self.key
with self.request(url, method="HEAD", fatal=False) as response:
- code = response.status_code
-
- if code < 400:
- extr = ImgurAlbumExtractor
- else:
- extr = ImgurImageExtractor
- url = self.root + "/" + self.key
+ if response.status_code < 400:
+ extr = ImgurAlbumExtractor
+ else:
+ extr = ImgurImageExtractor
+ url = self.root + "/" + self.key
yield Message.Version, 1
yield Message.Queue, url, {"_extractor": extr}
@@ -251,9 +263,7 @@ class ImgurUserExtractor(ImgurExtractor):
)
def items(self):
- urlfmt = "{}/3/account/{}/submissions/{{}}/newest".format(
- self.api_root, self.key)
- return self._items_apiv3(urlfmt)
+ return self._items_queue(self.api.account_submissions(self.key))
class ImgurFavoriteExtractor(ImgurExtractor):
@@ -267,6 +277,43 @@ class ImgurFavoriteExtractor(ImgurExtractor):
})
def items(self):
- urlfmt = "{}/3/account/{}/gallery_favorites/{{}}/newest".format(
- self.api_root, self.key)
- return self._items_apiv3(urlfmt)
+ return self._items_queue(self.api.account_favorites(self.key))
+
+
+class ImgurAPI():
+
+ def __init__(self, extractor):
+ self.extractor = extractor
+ self.headers = {
+ "Authorization": "Client-ID " + extractor.config(
+ "client-id", "546c25a59c58ad7"),
+ }
+
+ def account_favorites(self, account):
+ endpoint = "account/{}/gallery_favorites".format(account)
+ return self._pagination(endpoint)
+
+ def account_submissions(self, account):
+ endpoint = "account/{}/submissions".format(account)
+ return self._pagination(endpoint)
+
+ def album(self, album_hash):
+ return self._call("album/" + album_hash)
+
+ def image(self, image_hash):
+ return self._call("image/" + image_hash)
+
+ def _call(self, endpoint):
+ return self.extractor.request(
+ "https://api.imgur.com/3/" + endpoint, headers=self.headers,
+ ).json()["data"]
+
+ def _pagination(self, endpoint):
+ num = 0
+
+ while True:
+ data = self._call("{}/{}".format(endpoint, num))
+ if not data:
+ return
+ yield from data
+ num += 1
diff --git a/gallery_dl/extractor/instagram.py b/gallery_dl/extractor/instagram.py
index 8eee390..a14225f 100644
--- a/gallery_dl/extractor/instagram.py
+++ b/gallery_dl/extractor/instagram.py
@@ -36,17 +36,13 @@ class InstagramExtractor(Extractor):
data.update(metadata)
yield Message.Directory, data
- if data['typename'] in ('GraphImage', 'GraphStoryImage', 'GraphStoryVideo'):
- yield Message.Url, data['display_url'], \
- text.nameext_from_url(data['display_url'], data)
- elif data['typename'] == 'GraphVideo':
- data["extension"] = None
- yield Message.Url, \
- 'ytdl:{}/p/{}/'.format(self.root, data['shortcode']), data
- elif data['typename'] == 'GraphHighlightReel':
+ if data['typename'] == 'GraphHighlightReel':
url = '{}/stories/highlights/{}/'.format(self.root, data['id'])
data['_extractor'] = InstagramStoriesExtractor
yield Message.Queue, url, data
+ else:
+ url = data['video_url'] or data['display_url']
+ yield Message.Url, url, text.nameext_from_url(url, data)
def login(self):
if self._check_cookies(self.cookienames):
@@ -101,12 +97,20 @@ class InstagramExtractor(Extractor):
def _extract_shared_data(self, url):
page = self.request(url).text
- data = text.extract(page, 'window._sharedData = ', ';</script>')[0]
- return json.loads(data)
+ shared_data, pos = text.extract(
+ page, 'window._sharedData =', ';</script>')
+ additional_data, pos = text.extract(
+ page, 'window.__additionalDataLoaded(', ');</script>', pos)
+
+ data = json.loads(shared_data)
+ if additional_data:
+ next(iter(data['entry_data'].values()))[0] = \
+ json.loads(additional_data.partition(',')[2])
+ return data
def _extract_postpage(self, url):
- shared_data = self._extract_shared_data(url)
- media = shared_data['entry_data']['PostPage'][0]['graphql']['shortcode_media']
+ data = self.request(url + "?__a=1").json()
+ media = data['graphql']['shortcode_media']
common = {
'date': text.parse_timestamp(media['taken_at_timestamp']),
@@ -122,7 +126,6 @@ class InstagramExtractor(Extractor):
medias = []
if media['__typename'] == 'GraphSidecar':
- yi = 0
for n in media['edge_sidecar_to_children']['edges']:
children = n['node']
media_data = {
@@ -130,14 +133,12 @@ class InstagramExtractor(Extractor):
'shortcode': children['shortcode'],
'typename': children['__typename'],
'display_url': children['display_url'],
+ 'video_url': children.get('video_url'),
'height': text.parse_int(children['dimensions']['height']),
'width': text.parse_int(children['dimensions']['width']),
'sidecar_media_id': media['id'],
'sidecar_shortcode': media['shortcode'],
}
- if children['__typename'] == 'GraphVideo':
- media_data['_ytdl_index'] = yi
- yi += 1
media_data.update(common)
medias.append(media_data)
@@ -147,6 +148,7 @@ class InstagramExtractor(Extractor):
'shortcode': media['shortcode'],
'typename': media['__typename'],
'display_url': media['display_url'],
+ 'video_url': media.get('video_url'),
'height': text.parse_int(media['dimensions']['height']),
'width': text.parse_int(media['dimensions']['width']),
}
@@ -318,7 +320,7 @@ class InstagramImageExtractor(InstagramExtractor):
# GraphVideo
("https://www.instagram.com/p/Bqxp0VSBgJg/", {
- "url": "8f38c1cf460c9804842f7306c487410f33f82e7e",
+ "pattern": r"/47129943_191645575115739_8539303288426725376_n\.mp4",
"keyword": {
"date": "type:datetime",
"description": str,
@@ -334,7 +336,7 @@ class InstagramImageExtractor(InstagramExtractor):
# GraphVideo (IGTV)
("https://www.instagram.com/tv/BkQjCfsBIzi/", {
- "url": "64208f408e11cbbca86c2df4488e90262ae9d9ec",
+ "pattern": r"/10000000_1760663964018792_716207142595461120_n\.mp4",
"keyword": {
"date": "type:datetime",
"description": str,
@@ -351,11 +353,10 @@ class InstagramImageExtractor(InstagramExtractor):
# GraphSidecar with 2 embedded GraphVideo objects
("https://www.instagram.com/p/BtOvDOfhvRr/", {
"count": 2,
- "url": "e290d4180a58ae50c910d51d3b04d5f5c4622cd7",
"keyword": {
"sidecar_media_id": "1967717017113261163",
"sidecar_shortcode": "BtOvDOfhvRr",
- "_ytdl_index": int,
+ "video_url": str,
}
})
)
diff --git a/gallery_dl/extractor/issuu.py b/gallery_dl/extractor/issuu.py
new file mode 100644
index 0000000..12d7487
--- /dev/null
+++ b/gallery_dl/extractor/issuu.py
@@ -0,0 +1,109 @@
+# -*- coding: utf-8 -*-
+
+# Copyright 2019 Mike Fährmann
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Extractors for https://issuu.com/"""
+
+from .common import GalleryExtractor, Extractor, Message
+from .. import text, util
+import json
+
+
+class IssuuBase():
+ """Base class for issuu extractors"""
+ category = "issuu"
+ root = "https://issuu.com"
+
+
+class IssuuPublicationExtractor(IssuuBase, GalleryExtractor):
+ """Extractor for a single publication"""
+ subcategory = "publication"
+ directory_fmt = ("{category}", "{document[userName]}",
+ "{document[originalPublishDate]} {document[title]}")
+ filename_fmt = "{num:>03}.{extension}"
+ archive_fmt = "{document[id]}_{num}"
+ pattern = r"(?:https?://)?issuu\.com(/[^/?&#]+/docs/[^/?&#]+)"
+ test = ("https://issuu.com/issuu/docs/motions-1-2019/", {
+ "pattern": r"https://image.isu.pub/190916155301-\w+/jpg/page_\d+.jpg",
+ "count" : 36,
+ "keyword": {
+ "document": {
+ "access" : "public",
+ "contentRating": dict,
+ "date" : "type:datetime",
+ "description" : "re:Motions, the brand new publication by Is",
+ "documentId" : r"re:\d+-d99ec95935f15091b040cb8060f05510",
+ "documentName" : "motions-1-2019",
+ "downloadState": "NOT_AVAILABLE",
+ "id" : r"re:\d+-d99ec95935f15091b040cb8060f05510",
+ "isConverting" : False,
+ "isQuarantined": False,
+ "lang" : "en",
+ "language" : "English",
+ "pageCount" : 36,
+ "publicationId": "d99ec95935f15091b040cb8060f05510",
+ "sections" : list,
+ "title" : "Motions by Issuu - Issue 1",
+ "userName" : "issuu",
+ },
+ "extension": "jpg",
+ "filename" : r"re:page_\d+",
+ "num" : int,
+ },
+ })
+
+ def metadata(self, page):
+ data = json.loads(text.extract(
+ page, 'window.__INITIAL_STATE__ =', ';\n')[0])
+
+ doc = data["document"]
+ doc["lang"] = doc["language"]
+ doc["language"] = util.code_to_language(doc["language"])
+ doc["date"] = text.parse_datetime(
+ doc["originalPublishDate"], "%Y-%m-%d")
+
+ self._cnt = text.parse_int(doc["pageCount"])
+ self._tpl = "https://{}/{}/jpg/page_{{}}.jpg".format(
+ data["config"]["hosts"]["image"], doc["id"])
+
+ return {"document": doc}
+
+ def images(self, page):
+ fmt = self._tpl.format
+ return [(fmt(i), None) for i in range(1, self._cnt + 1)]
+
+
+class IssuuUserExtractor(IssuuBase, Extractor):
+ """Extractor for all publications of a user/publisher"""
+ subcategory = "user"
+ pattern = r"(?:https?://)?issuu\.com/([^/?&#]+)/?$"
+ test = ("https://issuu.com/issuu", {
+ "pattern": IssuuPublicationExtractor.pattern,
+ "count" : "> 25",
+ })
+
+ def __init__(self, match):
+ Extractor.__init__(self, match)
+ self.user = match.group(1)
+
+ def items(self):
+ url = "{}/call/profile/v1/documents/{}".format(self.root, self.user)
+ params = {"offset": 0, "limit": "25"}
+
+ yield Message.Version, 1
+ while True:
+ data = self.request(url, params=params).json()
+
+ for publication in data["items"]:
+ publication["url"] = "{}/{}/docs/{}".format(
+ self.root, self.user, publication["uri"])
+ publication["_extractor"] = IssuuPublicationExtractor
+ yield Message.Queue, publication["url"], publication
+
+ if not data["hasMore"]:
+ return
+ params["offset"] += data["limit"]
diff --git a/gallery_dl/extractor/kissmanga.py b/gallery_dl/extractor/kissmanga.py
index 6314a94..bb89f93 100644
--- a/gallery_dl/extractor/kissmanga.py
+++ b/gallery_dl/extractor/kissmanga.py
@@ -33,10 +33,9 @@ class RedirectMixin():
except (EOFError, OSError):
pass
else:
- self.log.error(
+ raise exception.StopExtraction(
"Redirect to \n%s\nVisit this URL in your browser and "
"solve the CAPTCHA to continue", response.url)
- raise exception.StopExtraction()
class KissmangaBase(RedirectMixin):
diff --git a/gallery_dl/extractor/luscious.py b/gallery_dl/extractor/luscious.py
index 965daa0..0aeeb4a 100644
--- a/gallery_dl/extractor/luscious.py
+++ b/gallery_dl/extractor/luscious.py
@@ -6,75 +6,109 @@
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
-"""Extractors for https://luscious.net/"""
+"""Extractors for https://members.luscious.net/"""
-from .common import GalleryExtractor, Extractor, Message
+from .common import Extractor, Message
from .. import text, exception
-from ..cache import cache
-class LusciousBase(Extractor):
+class LusciousExtractor(Extractor):
"""Base class for luscious extractors"""
category = "luscious"
cookiedomain = ".luscious.net"
root = "https://members.luscious.net"
- def login(self):
- """Login and set necessary cookies"""
- username, password = self._get_auth_info()
- if username:
- self._update_cookies(self._login_impl(username, password))
-
- @cache(maxage=14*24*3600, keyarg=1)
- def _login_impl(self, username, password):
- self.log.info("Logging in as %s", username)
- url = "https://members.luscious.net/accounts/login/"
- headers = {"Referer": "https://members.luscious.net/login/"}
+ def _graphql(self, op, variables, query):
data = {
- "login": username,
- "password": password,
- "remember": "on",
- "next": "/",
+ "id" : 1,
+ "operationName": op,
+ "query" : query,
+ "variables" : variables,
}
+ response = self.request(
+ "{}/graphql/nobatch/?operationName={}".format(self.root, op),
+ method="POST", json=data, fatal=False,
+ )
- response = self.request(url, method="POST", headers=headers, data=data)
- if "/accounts/login/" in response.url or not response.history:
- raise exception.AuthenticationError()
- for cookie in response.history[0].cookies:
- if cookie.name.startswith("sessionid_"):
- return {cookie.name: cookie.value}
- raise exception.AuthenticationError()
+ if response.status_code >= 400:
+ self.log.debug("Server response: %s", response.text)
+ raise exception.StopExtraction(
+ "GraphQL query failed ('%s %s')",
+ response.status_code, response.reason)
- @staticmethod
- def _parse_tags(tags):
- return [
- text.unescape(tag.replace(":_", ":"))
- for tag in text.extract_iter(tags or "", "/tags/", "/")
- ]
+ return response.json()["data"]
-class LusciousAlbumExtractor(LusciousBase, GalleryExtractor):
+class LusciousAlbumExtractor(LusciousExtractor):
"""Extractor for image albums from luscious.net"""
subcategory = "album"
- archive_fmt = "{gallery_id}_{image_id}"
+ filename_fmt = "{category}_{album[id]}_{num:>03}.{extension}"
+ directory_fmt = ("{category}", "{album[id]} {album[title]}")
+ archive_fmt = "{album[id]}_{id}"
pattern = (r"(?:https?://)?(?:www\.|members\.)?luscious\.net"
- r"/(?:albums|pictures/c/[^/?&#]+/album)/([^/?&#]+_(\d+))")
+ r"/(?:albums|pictures/c/[^/?&#]+/album)/[^/?&#]+_(\d+)")
test = (
("https://luscious.net/albums/okinami-no-koigokoro_277031/", {
"url": "7e4984a271a1072ac6483e4228a045895aff86f3",
- "keyword": "07c0b915f2ab1cc3bbf28b76e7950fccee1213f3",
- "content": "b3a747a6464509440bd0ff6d1267e6959f8d6ff3",
+ # "content": "b3a747a6464509440bd0ff6d1267e6959f8d6ff3",
+ "keyword": {
+ "album": {
+ "__typename" : "Album",
+ "audiences" : list,
+ "content" : "Hentai",
+ "cover" : "re:https://cdnio.luscious.net/.+/277031/",
+ "created" : 1479625853,
+ "created_by" : "NTRshouldbeillegal",
+ "date" : "type:datetime",
+ "description" : "Enjoy.",
+ "download_url": "/download/824778/277031/",
+ "genres" : list,
+ "id" : 277031,
+ "is_manga" : True,
+ "labels" : list,
+ "language" : "English",
+ "like_status" : "none",
+ "modified" : int,
+ "permissions" : list,
+ "rating" : float,
+ "slug" : "okinami-no-koigokoro",
+ "status" : "not_moderated",
+ "tags" : list,
+ "title" : "Okinami no Koigokoro",
+ "url" : "/albums/okinami-no-koigokoro_277031/",
+ "marked_for_deletion": False,
+ "marked_for_processing": False,
+ "number_of_animated_pictures": 0,
+ "number_of_favorites": int,
+ "number_of_pictures": 18,
+ },
+ "aspect_ratio": r"re:\d+:\d+",
+ "category" : "luscious",
+ "created" : int,
+ "date" : "type:datetime",
+ "height" : int,
+ "id" : int,
+ "is_animated" : False,
+ "like_status" : "none",
+ "position" : int,
+ "resolution" : r"re:\d+x\d+",
+ "status" : "not_moderated",
+ "tags" : list,
+ "thumbnail" : str,
+ "title" : str,
+ "width" : int,
+ "number_of_comments": int,
+ "number_of_favorites": int,
+ },
}),
("https://luscious.net/albums/virgin-killer-sweater_282582/", {
"url": "21cc68a7548f4d71dfd67d8caf96349dde7e791c",
- "keyword": "e1202078b504adeccd521aa932f456a5a85479a0",
}),
("https://luscious.net/albums/not-found_277035/", {
"exception": exception.NotFoundError,
}),
("https://members.luscious.net/albums/login-required_323871/", {
- "options": (("username", None),),
- "exception": exception.HttpError,
+ "count": 78,
}),
("https://www.luscious.net/albums/okinami_277031/"),
("https://members.luscious.net/albums/okinami_277031/"),
@@ -83,126 +117,340 @@ class LusciousAlbumExtractor(LusciousBase, GalleryExtractor):
)
def __init__(self, match):
- path, self.gallery_id = match.groups()
- url = "{}/albums/{}/".format(self.root, path)
- GalleryExtractor.__init__(self, match, url)
+ LusciousExtractor.__init__(self, match)
+ self.album_id = match.group(1)
- def metadata(self, page):
- title, pos = text.extract(page, '"og:title" content="', '"')
+ def items(self):
+ album = self.metadata()
+ yield Message.Version, 1
+ yield Message.Directory, {"album": album}
+ for num, image in enumerate(self.images(), 1):
+ image["num"] = num
+ image["album"] = album
+
+ image["thumbnail"] = image.pop("thumbnails")[0]["url"]
+ image["tags"] = [item["text"] for item in image["tags"]]
+ image["date"] = text.parse_timestamp(image["created"])
+ image["id"] = text.parse_int(image["id"])
+
+ url = image["url_to_video"] or image["url_to_original"]
+ yield Message.Url, url, text.nameext_from_url(url, image)
+
+ def metadata(self):
+ variables = {
+ "id": self.album_id,
+ }
- if title is None:
- msg = text.extract(page, '<div class="content">', '</div>', pos)[0]
- if msg:
- raise exception.AuthorizationError(msg)
+ query = """
+query AlbumGet($id: ID!) {
+ album {
+ get(id: $id) {
+ ... on Album {
+ ...AlbumStandard
+ }
+ ... on MutationError {
+ errors {
+ code
+ message
+ }
+ }
+ }
+ }
+}
+
+fragment AlbumStandard on Album {
+ __typename
+ id
+ title
+ labels
+ description
+ created
+ modified
+ like_status
+ number_of_favorites
+ rating
+ status
+ marked_for_deletion
+ marked_for_processing
+ number_of_pictures
+ number_of_animated_pictures
+ slug
+ is_manga
+ url
+ download_url
+ permissions
+ cover {
+ width
+ height
+ size
+ url
+ }
+ created_by {
+ id
+ name
+ display_name
+ user_title
+ avatar {
+ url
+ size
+ }
+ url
+ }
+ content {
+ id
+ title
+ url
+ }
+ language {
+ id
+ title
+ url
+ }
+ tags {
+ id
+ category
+ text
+ url
+ count
+ }
+ genres {
+ id
+ title
+ slug
+ url
+ }
+ audiences {
+ id
+ title
+ url
+ url
+ }
+ last_viewed_picture {
+ id
+ position
+ url
+ }
+}
+"""
+ album = self._graphql("AlbumGet", variables, query)["album"]["get"]
+ if "errors" in album:
raise exception.NotFoundError("album")
- info , pos = text.extract(page, '<li class="user_info">', "", pos)
- if info is None:
- count, pos = text.extract(page, '>Pages:', '<', pos)
- else:
- count, pos = text.extract(page, '<p>', ' ', pos)
- genre, pos = text.extract(page, '<p>Genre:', '</p>', pos)
- adnce, pos = text.extract(page, '<p>Audience:', '</p>', pos)
- tags , pos = text.extract(page, '"tag_list static">', '</ol>', pos)
-
- return {
- "gallery_id": text.parse_int(self.gallery_id),
- "title" : text.unescape(title or ""),
- "count" : text.parse_int(count),
- "genre" : text.remove_html(genre),
- "audience" : text.remove_html(adnce),
- "tags" : self._parse_tags(tags),
+ album["audiences"] = [item["title"] for item in album["audiences"]]
+ album["genres"] = [item["title"] for item in album["genres"]]
+ album["tags"] = [item["text"] for item in album["tags"]]
+
+ album["cover"] = album["cover"]["url"]
+ album["content"] = album["content"]["title"]
+ album["language"] = album["language"]["title"].partition(" ")[0]
+ album["created_by"] = album["created_by"]["display_name"]
+
+ album["id"] = text.parse_int(album["id"])
+ album["date"] = text.parse_timestamp(album["created"])
+
+ return album
+
+ def images(self):
+ variables = {
+ "input": {
+ "filters": [{
+ "name" : "album_id",
+ "value": self.album_id,
+ }],
+ "display": "position",
+ "page" : 1,
+ },
}
- def images(self, page):
- extr = text.extract
-
- url = "{}/pictures/album/x_{}/sorted/old/page/1/".format(
- self.root, self.gallery_id)
- page = self.request(url).text
- pos = page.find('<div id="picture_page_')
- url = extr(page, '<a href="', '"', pos)[0]
- iurl = None
-
- while url and not url.endswith("/more_like_this/"):
- page = self.request(self.root + url).text
-
- if not iurl: # first loop iteraton
- current = extr(page, '"pj_current_page" value="', '"')[0]
- if current and current != "1":
- url = "{}/albums/{}/jump_to_page/1/".format(
- self.root, self.gallery_id)
- page = self.request(url, method="POST").text
-
- iid , pos = extr(url , '/id/', '/')
- url , pos = extr(page, '<link rel="next" href="', '"')
- name, pos = extr(page, '<h1 id="picture_title">', '</h1>', pos)
- _ , pos = extr(page, '<ul class="image_option_icons">', '', pos)
- iurl, pos = extr(page, '<li><a href="', '"', pos+100)
-
- if iurl[0] == "/":
- iurl = text.urljoin(self.root, iurl)
-
- yield iurl, {
- "name": name,
- "image_id": text.parse_int(iid),
+ query = """
+query AlbumListOwnPictures($input: PictureListInput!) {
+ picture {
+ list(input: $input) {
+ info {
+ ...FacetCollectionInfo
+ }
+ items {
+ ...PictureStandardWithoutAlbum
}
+ }
+ }
+}
+
+fragment FacetCollectionInfo on FacetCollectionInfo {
+ page
+ has_next_page
+ has_previous_page
+ total_items
+ total_pages
+ items_per_page
+ url_complete
+ url_filters_only
+}
+
+fragment PictureStandardWithoutAlbum on Picture {
+ __typename
+ id
+ title
+ created
+ like_status
+ number_of_comments
+ number_of_favorites
+ status
+ width
+ height
+ resolution
+ aspect_ratio
+ url_to_original
+ url_to_video
+ is_animated
+ position
+ tags {
+ id
+ category
+ text
+ url
+ }
+ permissions
+ url
+ thumbnails {
+ width
+ height
+ size
+ url
+ }
+}
+"""
+ while True:
+ data = self._graphql("AlbumListOwnPictures", variables, query)
+ yield from data["picture"]["list"]["items"]
+
+ if not data["picture"]["list"]["info"]["has_next_page"]:
+ return
+ variables["input"]["page"] += 1
-class LusciousSearchExtractor(LusciousBase, Extractor):
+class LusciousSearchExtractor(LusciousExtractor):
"""Extractor for album searches on luscious.net"""
subcategory = "search"
pattern = (r"(?:https?://)?(?:www\.|members\.)?luscious\.net"
- r"/(albums(?:/(?![^/?&#]+_\d+)[^/?&#]+)+|manga|pictures)/?$")
+ r"/albums/list/?(?:\?([^#]+))?")
test = (
- ("https://luscious.net/manga/"),
- ("https://members.luscious.net/albums/sorted/updated/album_type/manga"
- "/content_id/2/tagged/+full_color/page/1/", {
+ ("https://members.luscious.net/albums/list/"),
+ ("https://members.luscious.net/albums/list/"
+ "?display=date_newest&language_ids=%2B1&tagged=+full_color&page=1", {
"pattern": LusciousAlbumExtractor.pattern,
- "range": "20-40",
- "count": 21,
+ "range": "41-60",
+ "count": 20,
}),
)
def __init__(self, match):
- Extractor.__init__(self, match)
- self.path = match.group(1).partition("/page/")[0]
- if not self.path.startswith("albums/"):
- self.path = "albums/sorted/updated/album_type/" + self.path
+ LusciousExtractor.__init__(self, match)
+ self.query = match.group(1)
def items(self):
- self.login()
- yield Message.Version, 1
- for album in self.albums():
- url, data = self.parse_album(album)
- yield Message.Queue, url, data
+ query = text.parse_query(self.query)
+ display = query.pop("display", "date_newest")
+ page = query.pop("page", None)
+
+ variables = {
+ "input": {
+ "display": display,
+ "filters": [{"name": n, "value": v} for n, v in query.items()],
+ "page": text.parse_int(page, 1),
+ },
+ }
- def albums(self, pnum=1):
+ query = """
+query AlbumListWithPeek($input: AlbumListInput!) {
+ album {
+ list(input: $input) {
+ info {
+ ...FacetCollectionInfo
+ }
+ items {
+ ...AlbumMinimal
+ peek_thumbnails {
+ width
+ height
+ size
+ url
+ }
+ }
+ }
+ }
+}
+
+fragment FacetCollectionInfo on FacetCollectionInfo {
+ page
+ has_next_page
+ has_previous_page
+ total_items
+ total_pages
+ items_per_page
+ url_complete
+ url_filters_only
+}
+
+fragment AlbumMinimal on Album {
+ __typename
+ id
+ title
+ labels
+ description
+ created
+ modified
+ number_of_favorites
+ number_of_pictures
+ slug
+ is_manga
+ url
+ download_url
+ cover {
+ width
+ height
+ size
+ url
+ }
+ content {
+ id
+ title
+ url
+ }
+ language {
+ id
+ title
+ url
+ }
+ tags {
+ id
+ category
+ text
+ url
+ count
+ }
+ genres {
+ id
+ title
+ slug
+ url
+ }
+ audiences {
+ id
+ title
+ url
+ }
+}
+"""
+ yield Message.Version, 1
while True:
- url = "{}/{}/page/{}/.json/".format(self.root, self.path, pnum)
- data = self.request(url).json()
+ data = self._graphql("AlbumListWithPeek", variables, query)
- yield from text.extract_iter(
- data["html"], "<figcaption>", "</figcaption>")
+ for album in data["album"]["list"]["items"]:
+ album["url"] = self.root + album["url"]
+ album["_extractor"] = LusciousAlbumExtractor
+ yield Message.Queue, album["url"], album
- if data["paginator_complete"]:
+ if not data["album"]["list"]["info"]["has_next_page"]:
return
- pnum += 1
-
- def parse_album(self, album):
- url , pos = text.extract(album, 'href="', '"')
- title, pos = text.extract(album, ">", "<", pos)
- count, pos = text.extract(album, "# of pictures:", "<", pos)
- date , pos = text.extract(album, "Updated:&nbsp;", "<", pos)
- desc , pos = text.extract(album, "class='desc'>", "<", pos)
- tags , pos = text.extract(album, "<ol ", "</ol>", pos)
-
- return text.urljoin(self.root, url), {
- "title": text.unescape(title or ""),
- "description": text.unescape(desc or ""),
- "gallery_id": text.parse_int(url.rpartition("_")[2].rstrip("/")),
- "count": text.parse_int(count),
- "date": date,
- "tags": self._parse_tags(tags),
- "_extractor": LusciousAlbumExtractor,
- }
+ variables["input"]["page"] += 1
diff --git a/gallery_dl/extractor/naver.py b/gallery_dl/extractor/naver.py
new file mode 100644
index 0000000..c980a38
--- /dev/null
+++ b/gallery_dl/extractor/naver.py
@@ -0,0 +1,140 @@
+# -*- coding: utf-8 -*-
+
+# Copyright 2019 Mike Fährmann
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Extractors for https://blog.naver.com/"""
+
+from .common import GalleryExtractor, Extractor, Message
+from .. import text
+
+
+class NaverBase():
+ """Base class for naver extractors"""
+ category = "naver"
+ root = "https://blog.naver.com"
+
+
+class NaverPostExtractor(NaverBase, GalleryExtractor):
+ """Extractor for blog posts on blog.naver.com"""
+ subcategory = "post"
+ filename_fmt = "{num:>03}.{extension}"
+ directory_fmt = ("{category}", "{blog[user]} {blog[id]}",
+ "{post[date]:%Y-%m-%d} {post[title]}")
+ archive_fmt = "{blog[id]}_{post[num]}_{num}"
+ pattern = (r"(?:https?://)?blog\.naver\.com/"
+ r"(?:PostView\.nhn\?blogId=(\w+)&logNo=(\d+)|(\w+)/(\d+)/?$)")
+ test = (
+ ("https://blog.naver.com/rlfqjxm0/221430673006", {
+ "url": "6c694f3aced075ed5e9511f1e796d14cb26619cc",
+ "keyword": "a6e23d19afbee86b37d6e7ad934650c379d2cb1e",
+ }),
+ (("https://blog.naver.com/PostView.nhn"
+ "?blogId=rlfqjxm0&logNo=221430673006"), {
+ "url": "6c694f3aced075ed5e9511f1e796d14cb26619cc",
+ "keyword": "a6e23d19afbee86b37d6e7ad934650c379d2cb1e",
+ }),
+ )
+
+ def __init__(self, match):
+ blog_id = match.group(1)
+ if blog_id:
+ self.blog_id = blog_id
+ self.post_id = match.group(2)
+ else:
+ self.blog_id = match.group(3)
+ self.post_id = match.group(4)
+
+ url = "{}/PostView.nhn?blogId={}&logNo={}".format(
+ self.root, self.blog_id, self.post_id)
+ GalleryExtractor.__init__(self, match, url)
+
+ def metadata(self, page):
+ extr = text.extract_from(page)
+ data = {
+ "post": {
+ "title" : extr('"og:title" content="', '"'),
+ "description": extr('"og:description" content="', '"'),
+ "num" : text.parse_int(self.post_id),
+ },
+ "blog": {
+ "id" : self.blog_id,
+ "num" : text.parse_int(extr("var blogNo = '", "'")),
+ "user" : extr("var nickName = '", "'"),
+ },
+ }
+ data["post"]["date"] = text.parse_datetime(
+ extr('se_publishDate pcol2">', '<') or
+ extr('_postAddDate">', '<'), "%Y. %m. %d. %H:%M")
+ return data
+
+ def images(self, page):
+ return [
+ (url.replace("://post", "://blog", 1).partition("?")[0], None)
+ for url in text.extract_iter(page, 'data-lazy-src="', '"')
+ ]
+
+
+class NaverBlogExtractor(NaverBase, Extractor):
+ """Extractor for a user's blog on blog.naver.com"""
+ subcategory = "blog"
+ pattern = (r"(?:https?://)?blog\.naver\.com/"
+ r"(?:PostList.nhn\?(?:[^&#]+&)*blogId=([^&#]+)|(\w+)/?$)")
+ test = (
+ ("https://blog.naver.com/gukjung", {
+ "pattern": NaverPostExtractor.pattern,
+ "count": 12,
+ "range": "1-12",
+ }),
+ ("https://blog.naver.com/PostList.nhn?blogId=gukjung", {
+ "pattern": NaverPostExtractor.pattern,
+ "count": 12,
+ "range": "1-12",
+ }),
+ )
+
+ def __init__(self, match):
+ Extractor.__init__(self, match)
+ self.blog_id = match.group(1) or match.group(2)
+
+ def items(self):
+ yield Message.Version, 1
+
+ # fetch first post number
+ url = "{}/PostList.nhn?blogId={}".format(self.root, self.blog_id)
+ post_num = text.extract(
+ self.request(url).text, 'gnFirstLogNo = "', '"',
+ )[0]
+
+ # setup params for API calls
+ url = "{}/PostViewBottomTitleListAsync.nhn".format(self.root)
+ params = {
+ "blogId" : self.blog_id,
+ "logNo" : post_num or "0",
+ "viewDate" : "",
+ "categoryNo" : "",
+ "parentCategoryNo" : "",
+ "showNextPage" : "true",
+ "showPreviousPage" : "false",
+ "sortDateInMilli" : "",
+ "isThumbnailViewType": "false",
+ "countPerPage" : "",
+ }
+
+ # loop over all posts
+ while True:
+ data = self.request(url, params=params).json()
+
+ for post in data["postList"]:
+ post["url"] = "{}/PostView.nhn?blogId={}&logNo={}".format(
+ self.root, self.blog_id, post["logNo"])
+ post["_extractor"] = NaverPostExtractor
+ yield Message.Queue, post["url"], post
+
+ if not data["hasNextPage"]:
+ return
+ params["logNo"] = data["nextIndexLogNo"]
+ params["sortDateInMilli"] = data["nextIndexSortDate"]
diff --git a/gallery_dl/extractor/nijie.py b/gallery_dl/extractor/nijie.py
index fdfad87..0bd858f 100644
--- a/gallery_dl/extractor/nijie.py
+++ b/gallery_dl/extractor/nijie.py
@@ -20,7 +20,7 @@ class NijieExtractor(AsynchronousMixin, Extractor):
"""Base class for nijie extractors"""
category = "nijie"
directory_fmt = ("{category}", "{user_id}")
- filename_fmt = "{category}_{artist_id}_{image_id}_p{num:>02}.{extension}"
+ filename_fmt = "{image_id}_p{num}.{extension}"
archive_fmt = "{image_id}_{num}"
cookiedomain = "nijie.info"
cookienames = ("nemail", "nlogin")
diff --git a/gallery_dl/extractor/nozomi.py b/gallery_dl/extractor/nozomi.py
new file mode 100644
index 0000000..97be789
--- /dev/null
+++ b/gallery_dl/extractor/nozomi.py
@@ -0,0 +1,185 @@
+# -*- coding: utf-8 -*-
+
+# Copyright 2019 Mike Fährmann
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Extractors for https://nozomi.la/"""
+
+from .common import Extractor, Message
+from .. import text
+
+
+class NozomiExtractor(Extractor):
+ """Base class for nozomi extractors"""
+ category = "nozomi"
+ root = "https://nozomi.la"
+ filename_fmt = "{postid}.{extension}"
+ archive_fmt = "{postid}"
+
+ def items(self):
+ yield Message.Version, 1
+
+ data = self.metadata()
+ self.session.headers["Origin"] = self.root
+ self.session.headers["Referer"] = self.root + "/"
+
+ for post_id in map(str, self.posts()):
+ url = "https://j.nozomi.la/post/{}/{}/{}.json".format(
+ post_id[-1], post_id[-3:-1], post_id)
+ response = self.request(url, fatal=False)
+
+ if response.status_code >= 400:
+ self.log.warning(
+ "Skipping post %s ('%s %s')",
+ post_id, response.status_code, response.reason)
+ continue
+
+ image = response.json()
+ image["tags"] = self._list(image.get("general"))
+ image["artist"] = self._list(image.get("artist"))
+ image["copyright"] = self._list(image.get("copyright"))
+ image["character"] = self._list(image.get("character"))
+ image["is_video"] = bool(image.get("is_video"))
+ image["date"] = text.parse_datetime(
+ image["date"] + ":00", "%Y-%m-%d %H:%M:%S%z")
+ image["url"] = text.urljoin(self.root, image["imageurl"])
+ text.nameext_from_url(image["url"], image)
+ image.update(data)
+
+ for key in ("general", "imageurl", "imageurls"):
+ if key in image:
+ del image[key]
+
+ yield Message.Directory, image
+ yield Message.Url, image["url"], image
+
+ def metadata(self):
+ return {}
+
+ def posts(self):
+ return ()
+
+ @staticmethod
+ def _list(src):
+ if not src:
+ return []
+ return [x["tagname_display"] for x in src]
+
+ @staticmethod
+ def _unpack(b):
+ for i in range(0, len(b), 4):
+ yield (b[i] << 24) + (b[i+1] << 16) + (b[i+2] << 8) + b[i+3]
+
+
+class NozomiPostExtractor(NozomiExtractor):
+ """Extractor for individual posts on nozomi.la"""
+ subcategory = "post"
+ pattern = r"(?:https?://)?nozomi\.la/post/(\d+)"
+ test = ("https://nozomi.la/post/3649262.html", {
+ "url": "f4522adfc8159355fd0476de28761b5be0f02068",
+ "content": "cd20d2c5149871a0b80a1b0ce356526278964999",
+ "keyword": {
+ "artist" : ["hammer (sunset beach)"],
+ "character": ["patchouli knowledge"],
+ "copyright": ["touhou"],
+ "dataid" : "re:aaa9f7c632cde1e1a5baaff3fb6a6d857ec73df7fdc5cf5a",
+ "date" : "type:datetime",
+ "extension": "jpg",
+ "favorites": int,
+ "filename" : str,
+ "height" : 768,
+ "is_video" : False,
+ "postid" : 3649262,
+ "source" : "danbooru",
+ "sourceid" : 2434215,
+ "tags" : list,
+ "type" : "jpg",
+ "url" : str,
+ "width" : 1024,
+ },
+ })
+
+ def __init__(self, match):
+ NozomiExtractor.__init__(self, match)
+ self.post_id = match.group(1)
+
+ def posts(self):
+ return (self.post_id,)
+
+
+class NozomiTagExtractor(NozomiExtractor):
+ """Extractor for posts from tag searches on nozomi.la"""
+ subcategory = "tag"
+ directory_fmt = ("{category}", "{search_tags}")
+ archive_fmt = "t_{search_tags}_{postid}"
+ pattern = r"(?:https?://)?nozomi\.la/tag/([^/?&#]+)-\d+\."
+ test = ("https://nozomi.la/tag/3:1_aspect_ratio-1.html", {
+ "pattern": r"^https://i.nozomi.la/\w/\w\w/\w+\.\w+$",
+ "count": ">= 75",
+ "range": "1-75",
+ })
+
+ def __init__(self, match):
+ NozomiExtractor.__init__(self, match)
+ self.tags = text.unquote(match.group(1)).lower()
+
+ def metadata(self):
+ return {"search_tags": self.tags}
+
+ def posts(self):
+ url = "https://n.nozomi.la/nozomi/{}.nozomi".format(self.tags)
+ i = 0
+
+ while True:
+ headers = {"Range": "bytes={}-{}".format(i, i+255)}
+ response = self.request(url, headers=headers)
+ yield from self._unpack(response.content)
+
+ i += 256
+ cr = response.headers.get("Content-Range", "").rpartition("/")[2]
+ if text.parse_int(cr, i) <= i:
+ return
+
+
+class NozomiSearchExtractor(NozomiExtractor):
+ """Extractor for search results on nozomi.la"""
+ subcategory = "search"
+ directory_fmt = ("{category}", "{search_tags:J }")
+ archive_fmt = "t_{search_tags}_{postid}"
+ pattern = r"(?:https?://)?nozomi\.la/search\.html\?q=([^&#]+)"
+ test = ("https://nozomi.la/search.html?q=hibiscus%203:4_ratio#1", {
+ "count": ">= 5",
+ })
+
+ def __init__(self, match):
+ NozomiExtractor.__init__(self, match)
+ self.tags = text.unquote(match.group(1)).lower().split()
+
+ def metadata(self):
+ return {"search_tags": self.tags}
+
+ def posts(self):
+ index = None
+ result = set()
+
+ def nozomi(path):
+ url = "https://j.nozomi.la/" + path + ".nozomi"
+ return self._unpack(self.request(url).content)
+
+ for tag in self.tags:
+ if tag[0] == "-":
+ if not index:
+ index = set(nozomi("index"))
+ items = index.difference(nozomi("nozomi/" + tag[1:]))
+ else:
+ items = nozomi("nozomi/" + tag)
+
+ if result:
+ result.intersection_update(items)
+ else:
+ result.update(items)
+
+ return result
diff --git a/gallery_dl/extractor/nsfwalbum.py b/gallery_dl/extractor/nsfwalbum.py
index 5005fb4..8f1f3f2 100644
--- a/gallery_dl/extractor/nsfwalbum.py
+++ b/gallery_dl/extractor/nsfwalbum.py
@@ -17,14 +17,14 @@ class NsfwalbumAlbumExtractor(GalleryExtractor):
category = "nsfwalbum"
subcategory = "album"
root = "https://nsfwalbum.com"
- filename_fmt = "{album_id}_{page:>03}_{id}.{extension}"
+ filename_fmt = "{album_id}_{num:>03}_{id}.{extension}"
directory_fmt = ("{category}", "{album_id} {title}")
archive_fmt = "{id}"
pattern = r"(?:https?://)?(?:www\.)?nsfwalbum\.com(/album/(\d+))"
test = ("https://nsfwalbum.com/album/401611", {
"range": "1-5",
"url": "b0481fc7fad5982da397b6359fbed8421b8ba284",
- "keyword": "fc1ad4ebcd6d4cf32da15203120112b8bcf12eec",
+ "keyword": "e98f9b0d473c00000831618d0235863b1dd78294",
})
def __init__(self, match):
diff --git a/gallery_dl/extractor/oauth.py b/gallery_dl/extractor/oauth.py
index 6c6dd0a..912447b 100644
--- a/gallery_dl/extractor/oauth.py
+++ b/gallery_dl/extractor/oauth.py
@@ -296,8 +296,8 @@ class OAuthMastodon(OAuthBase):
data = self.session.post(url, data=data).json()
if "client_id" not in data or "client_secret" not in data:
- self.log.error("Failed to register new application: '%s'", data)
- raise exception.StopExtraction()
+ raise exception.StopExtraction(
+ "Failed to register new application: '%s'", data)
data["client-id"] = data.pop("client_id")
data["client-secret"] = data.pop("client_secret")
diff --git a/gallery_dl/extractor/patreon.py b/gallery_dl/extractor/patreon.py
index ab5932d..9b13391 100644
--- a/gallery_dl/extractor/patreon.py
+++ b/gallery_dl/extractor/patreon.py
@@ -76,6 +76,8 @@ class PatreonExtractor(Extractor):
headers = {"Referer": self.root}
while url:
+ if not url.startswith("http"):
+ url = "https://" + url.lstrip("/:")
posts = self.request(url, headers=headers).json()
if "included" in posts:
diff --git a/gallery_dl/extractor/photobucket.py b/gallery_dl/extractor/photobucket.py
index 83f75a3..8456f97 100644
--- a/gallery_dl/extractor/photobucket.py
+++ b/gallery_dl/extractor/photobucket.py
@@ -22,11 +22,11 @@ class PhotobucketAlbumExtractor(Extractor):
filename_fmt = "{offset:>03}{pictureId:?_//}_{titleOrFilename}.{extension}"
archive_fmt = "{id}"
pattern = (r"(?:https?://)?((?:[^.]+\.)?photobucket\.com)"
- r"/user/[^/?&#]+/library/[^?&#]*")
+ r"/user/[^/?&#]+/library(?:/[^?&#]*)?")
test = (
- ("https://s258.photobucket.com/user/focolandia/library/", {
- "pattern": r"https?://[oi]+\d+.photobucket.com/albums/hh280/",
- "count": ">= 39"
+ ("https://s369.photobucket.com/user/CrpyLrkr/library", {
+ "pattern": r"https?://[oi]+\d+.photobucket.com/albums/oo139/",
+ "count": ">= 50"
}),
# subalbums of main "directory"
("https://s271.photobucket.com/user/lakerfanryan/library/", {
@@ -149,10 +149,9 @@ class PhotobucketImageExtractor(Extractor):
if "message" not in image:
break # success
tries += 1
- self.log.debug("'%s'", image["message"])
+ self.log.debug(image["message"])
else:
- self.log.error("%s", image["message"])
- raise exception.StopExtraction()
+ raise exception.StopExtraction(image["message"])
# adjust metadata entries to be at least somewhat similar
# to what the 'album' extractor provides
diff --git a/gallery_dl/extractor/pinterest.py b/gallery_dl/extractor/pinterest.py
index f5b8869..e36a82b 100644
--- a/gallery_dl/extractor/pinterest.py
+++ b/gallery_dl/extractor/pinterest.py
@@ -241,9 +241,8 @@ class PinterestAPI():
if response.status_code == 404 or response.history:
resource = self.extractor.subcategory.rpartition("-")[2]
raise exception.NotFoundError(resource)
- self.extractor.log.error("API request failed")
self.extractor.log.debug("%s", response.text)
- raise exception.StopExtraction()
+ raise exception.StopExtraction("API request failed")
def _pagination(self, resource, options):
while True:
diff --git a/gallery_dl/extractor/pixiv.py b/gallery_dl/extractor/pixiv.py
index d313daa..d32f245 100644
--- a/gallery_dl/extractor/pixiv.py
+++ b/gallery_dl/extractor/pixiv.py
@@ -335,11 +335,9 @@ class PixivSearchExtractor(PixivExtractor):
def get_metadata(self, user=None):
query = text.parse_query(self.query)
- if "word" in query:
- self.word = text.unescape(query["word"])
- else:
- self.log.error("missing search term")
- raise exception.StopExtraction()
+ if "word" not in query:
+ raise exception.StopExtraction("Missing search term")
+ self.word = query["word"]
sort = query.get("order", "date_d")
sort_map = {
@@ -504,8 +502,7 @@ class PixivAppAPI():
return response.json()
if response.status_code == 404:
raise exception.NotFoundError()
- self.log.error("API request failed: %s", response.text)
- raise exception.StopExtraction()
+ raise exception.StopExtraction("API request failed: %s", response.text)
def _pagination(self, endpoint, params):
while True:
diff --git a/gallery_dl/extractor/plurk.py b/gallery_dl/extractor/plurk.py
index 325c6a0..2bb66ac 100644
--- a/gallery_dl/extractor/plurk.py
+++ b/gallery_dl/extractor/plurk.py
@@ -49,7 +49,7 @@ class PlurkExtractor(Extractor):
data = {"plurk_id": plurk["id"], "count": "200"}
while True:
- info = self.request(url, "POST", data=data).json()
+ info = self.request(url, method="POST", data=data).json()
yield from info["responses"]
if not info["has_newer"]:
return
@@ -91,7 +91,8 @@ class PlurkTimelineExtractor(PlurkExtractor):
offset = datetime.datetime.strptime(
plurks[-1]["posted"], "%a, %d %b %Y %H:%M:%S %Z")
data["offset"] = offset.strftime("%Y-%m-%dT%H:%M:%S.000Z")
- response = self.request(url, "POST", headers=headers, data=data)
+ response = self.request(
+ url, method="POST", headers=headers, data=data)
plurks = response.json()["plurks"]
diff --git a/gallery_dl/extractor/reddit.py b/gallery_dl/extractor/reddit.py
index 9c283de..ecce003 100644
--- a/gallery_dl/extractor/reddit.py
+++ b/gallery_dl/extractor/reddit.py
@@ -259,12 +259,17 @@ class RedditAPI():
data = {"grant_type": ("https://oauth.reddit.com/"
"grants/installed_client"),
"device_id": "DO_NOT_TRACK_THIS_DEVICE"}
+
+ auth = (self.client_id, "")
response = self.extractor.request(
- url, method="POST", data=data, auth=(self.client_id, ""))
+ url, method="POST", data=data, auth=auth, fatal=False)
+ data = response.json()
+
if response.status_code != 200:
- raise exception.AuthenticationError('"{} ({})"'.format(
- response.json().get("message"), response.status_code))
- return "Bearer " + response.json()["access_token"]
+ self.log.debug("Server response: %s", data)
+ raise exception.AuthenticationError('"{}: {}"'.format(
+ data.get("error"), data.get("message")))
+ return "Bearer " + data["access_token"]
def _call(self, endpoint, params):
url = "https://oauth.reddit.com" + endpoint
diff --git a/gallery_dl/extractor/sankaku.py b/gallery_dl/extractor/sankaku.py
index bb8a2ae..b07d024 100644
--- a/gallery_dl/extractor/sankaku.py
+++ b/gallery_dl/extractor/sankaku.py
@@ -201,9 +201,8 @@ class SankakuTagExtractor(SankakuExtractor):
tags = self.tags.split()
if not self.logged_in and len(tags) > 4:
- self.log.error("Unauthenticated users cannot use "
- "more than 4 tags at once.")
- raise exception.StopExtraction()
+ raise exception.StopExtraction(
+ "Unauthenticated users cannot use more than 4 tags at once.")
return {"search_tags": " ".join(tags)}
def get_posts(self):
diff --git a/gallery_dl/extractor/sexcom.py b/gallery_dl/extractor/sexcom.py
index 38b7813..c4597af 100644
--- a/gallery_dl/extractor/sexcom.py
+++ b/gallery_dl/extractor/sexcom.py
@@ -21,6 +21,7 @@ class SexcomExtractor(Extractor):
root = "https://www.sex.com"
def items(self):
+ self.session.headers["Referer"] = self.root
yield Message.Version, 1
yield Message.Directory, self.metadata()
for pin in map(self._parse_pin, self.pins()):
@@ -52,7 +53,7 @@ class SexcomExtractor(Extractor):
def _parse_pin(self, url):
response = self.request(url, fatal=False)
if response.status_code >= 400:
- self.log.warning('Unable to fetch %s ("%s: %s")',
+ self.log.warning('Unable to fetch %s ("%s %s")',
url, response.status_code, response.reason)
return None
extr = text.extract_from(response.text)
@@ -102,6 +103,7 @@ class SexcomPinExtractor(SexcomExtractor):
# picture
("https://www.sex.com/pin/56714360/", {
"url": "599190d6e3d79f9f49dda194a0a58cb0ffa3ab86",
+ "content": "963ed681cf53904173c7581b713c7f9471f04db0",
"keyword": {
"comments": int,
"date": "2018-10-02T21:18:17-04:00",
@@ -150,7 +152,7 @@ class SexcomRelatedPinExtractor(SexcomPinExtractor):
directory_fmt = ("{category}", "related {original_pin[pin_id]}")
pattern = r"(?:https?://)?(?:www\.)?sex\.com/pin/(\d+).*#related$"
test = ("https://www.sex.com/pin/56714360/#related", {
- "count": 24,
+ "count": ">= 22",
})
def metadata(self):
diff --git a/gallery_dl/extractor/simplyhentai.py b/gallery_dl/extractor/simplyhentai.py
index ba0fcf4..82a61da 100644
--- a/gallery_dl/extractor/simplyhentai.py
+++ b/gallery_dl/extractor/simplyhentai.py
@@ -23,7 +23,7 @@ class SimplyhentaiGalleryExtractor(GalleryExtractor):
(("https://original-work.simply-hentai.com"
"/amazon-no-hiyaku-amazon-elixir"), {
"url": "21613585ae5ec2f69ea579e9713f536fceab5bd5",
- "keyword": "bf75f9ff0fb60756b1b9b92403526a72d9178d23",
+ "keyword": "9e87a0973553b2922ddee37958b8f5d87910af72",
}),
("https://www.simply-hentai.com/notfound", {
"exception": exception.GalleryDLException,
@@ -43,7 +43,7 @@ class SimplyhentaiGalleryExtractor(GalleryExtractor):
extr = text.extract_from(page)
split = text.split_html
- self.chapter_url = extr('<link rel="canonical" href="', '"')
+ self.gallery_url = extr('<link rel="canonical" href="', '"')
title = extr('<meta property="og:title" content="', '"')
if not title:
raise exception.NotFoundError("gallery")
@@ -63,11 +63,14 @@ class SimplyhentaiGalleryExtractor(GalleryExtractor):
return data
def images(self, _):
- url = self.chapter_url + "/all-pages"
+ url = self.gallery_url + "/all-pages"
headers = {"Accept": "application/json"}
images = self.request(url, headers=headers).json()
return [
- (urls["full"], {"image_id": text.parse_int(image_id)})
+ (
+ urls["full"].replace("/giant_thumb_", "/"),
+ {"image_id": text.parse_int(image_id)},
+ )
for image_id, urls in sorted(images.items())
]
@@ -84,12 +87,12 @@ class SimplyhentaiImageExtractor(Extractor):
test = (
(("https://www.simply-hentai.com/image"
"/pheromomania-vol-1-kanzenban-isao-3949d8b3-400c-4b6"), {
- "url": "0338eb137830ab6f81e5f410d3936ef785d063d9",
+ "url": "3d8eb55240a960134891bd77fe1df7988fcdc455",
"keyword": "e10e5588481cab68329ef6ec1e5325206b2079a2",
}),
("https://www.simply-hentai.com/gif/8915dfcf-0b6a-47c", {
- "url": "11c060d7ec4dfd0bd105300b6e1fd454674a5af1",
- "keyword": "dd97a4bb449c397d6fec9f43a1303c0fb168ae65",
+ "url": "f73916527211b4a40f26568ee26cd8999f5f4f30",
+ "keyword": "f94d775177fed918759c8a78a50976f867425b48",
}),
)
diff --git a/gallery_dl/extractor/smugmug.py b/gallery_dl/extractor/smugmug.py
index 2e6508c..be29dcf 100644
--- a/gallery_dl/extractor/smugmug.py
+++ b/gallery_dl/extractor/smugmug.py
@@ -259,11 +259,9 @@ class SmugmugAPI(oauth.OAuth1API):
if data["Code"] == 404:
raise exception.NotFoundError()
if data["Code"] == 429:
- self.log.error("Rate limit reached")
- else:
- self.log.error("API request failed")
- self.log.debug(data)
- raise exception.StopExtraction()
+ raise exception.StopExtraction("Rate limit reached")
+ self.log.debug(data)
+ raise exception.StopExtraction("API request failed")
def _expansion(self, endpoint, expands, params=None):
endpoint = self._extend(endpoint, expands)
diff --git a/gallery_dl/extractor/tsumino.py b/gallery_dl/extractor/tsumino.py
index cc0dc90..298b7e0 100644
--- a/gallery_dl/extractor/tsumino.py
+++ b/gallery_dl/extractor/tsumino.py
@@ -109,14 +109,13 @@ class TsuminoGalleryExtractor(TsuminoBase, GalleryExtractor):
def images(self, page):
url = "{}/Read/Index/{}?page=1".format(self.root, self.gallery_id)
- headers = {"Referer": self.chapter_url}
+ headers = {"Referer": self.gallery_url}
response = self.request(url, headers=headers, fatal=False)
if "/Auth/" in response.url:
- self.log.error(
+ raise exception.StopExtraction(
"Failed to get gallery JSON data. Visit '%s' in a browser "
"and solve the CAPTCHA to continue.", response.url)
- raise exception.StopExtraction()
page = response.text
tpl, pos = text.extract(page, 'data-cdn="', '"')
@@ -195,8 +194,8 @@ class TsuminoSearchExtractor(TsuminoBase, Extractor):
return self._parse_simple(query)
return self._parse_jsurl(query)
except Exception as exc:
- self.log.error("Invalid search query: '%s' (%s)", query, exc)
- raise exception.StopExtraction()
+ raise exception.StopExtraction(
+ "Invalid search query '%s' (%s)", query, exc)
@staticmethod
def _parse_simple(query):
diff --git a/gallery_dl/extractor/tumblr.py b/gallery_dl/extractor/tumblr.py
index 8abbaf7..998eed4 100644
--- a/gallery_dl/extractor/tumblr.py
+++ b/gallery_dl/extractor/tumblr.py
@@ -407,26 +407,22 @@ class TumblrAPI(oauth.OAuth1API):
# daily rate limit
if response.headers.get("x-ratelimit-perday-remaining") == "0":
reset = response.headers.get("x-ratelimit-perday-reset")
- self.log.error(
+ raise exception.StopExtraction(
"Daily API rate limit exceeded: aborting; "
- "rate limit will reset at %s",
- self._to_time(reset),
+ "rate limit will reset at %s", self._to_time(reset),
)
- raise exception.StopExtraction()
# hourly rate limit
reset = response.headers.get("x-ratelimit-perhour-reset")
if reset:
self.log.info(
- "Hourly API rate limit exceeded; "
- "waiting until %s for rate limit reset",
- self._to_time(reset),
+ "Hourly API rate limit exceeded; waiting until "
+ "%s for rate limit reset", self._to_time(reset),
)
time.sleep(int(reset) + 1)
return self._call(blog, endpoint, params)
- self.log.error(data)
- raise exception.StopExtraction()
+ raise exception.StopExtraction(data)
@staticmethod
def _to_time(reset):
diff --git a/gallery_dl/extractor/twitter.py b/gallery_dl/extractor/twitter.py
index 8105ede..dfafc1f 100644
--- a/gallery_dl/extractor/twitter.py
+++ b/gallery_dl/extractor/twitter.py
@@ -10,7 +10,7 @@
from .common import Extractor, Message
from .. import text, exception
-from ..cache import cache
+from ..cache import cache, memcache
import re
@@ -26,6 +26,7 @@ class TwitterExtractor(Extractor):
def __init__(self, match):
Extractor.__init__(self, match)
self.user = match.group(1)
+ self.logged_in = False
self.retweets = self.config("retweets", True)
self.content = self.config("content", False)
self.videos = self.config("videos", False)
@@ -53,10 +54,20 @@ class TwitterExtractor(Extractor):
yield Message.Urllist, urls, data
if self.videos and "-videoContainer" in tweet:
+ if self.videos == "ytdl":
+ data["extension"] = None
+ url = "ytdl:{}/{}/status/{}".format(
+ self.root, data["user"], data["tweet_id"])
+ else:
+ url = self._video_from_tweet(data["tweet_id"])
+ ext = text.ext_from_url(url)
+ if ext == "m3u8":
+ url = "ytdl:" + url
+ data["extension"] = "mp4"
+ data["_ytdl_extra"] = {"protocol": "m3u8_native"}
+ else:
+ data["extension"] = ext
data["num"] = 1
- data["extension"] = None
- url = "ytdl:{}/{}/status/{}".format(
- self.root, data["user"], data["tweet_id"])
yield Message.Url, url, data
def metadata(self):
@@ -70,6 +81,7 @@ class TwitterExtractor(Extractor):
username, password = self._get_auth_info()
if username:
self._update_cookies(self._login_impl(username, password))
+ self.logged_in = True
@cache(maxage=360*24*3600, keyarg=1)
def _login_impl(self, username, password):
@@ -115,17 +127,48 @@ class TwitterExtractor(Extractor):
data["content"] = cl if cl and len(cr) < 16 else content
return data
- def _tweets_from_api(self, url):
+ def _video_from_tweet(self, tweet_id):
+ url = "https://api.twitter.com/1.1/videos/tweet/config/{}.json".format(
+ tweet_id)
+ cookies = None
+ headers = {
+ "Origin" : self.root,
+ "Referer" : "{}/i/web/status/{}".format(self.root, tweet_id),
+ "x-csrf-token" : self.session.cookies.get("ct0"),
+ "authorization": "Bearer AAAAAAAAAAAAAAAAAAAAAPYXBAAAAAAACLXUNDekM"
+ "xqa8h%2F40K4moUkGsoc%3DTYfbDKbT3jJPCEVnMYqilB28N"
+ "HfOPqkca3qaAxGfsyKCs0wRbw",
+ }
+
+ if self.logged_in:
+ headers["x-twitter-auth-type"] = "OAuth2Session"
+ else:
+ token = self._guest_token(headers)
+ cookies = {"gt": token}
+ headers["x-guest-token"] = token
+
+ data = self.request(url, cookies=cookies, headers=headers).json()
+ return data["track"]["playbackUrl"]
+
+ @memcache()
+ def _guest_token(self, headers):
+ return self.request(
+ "https://api.twitter.com/1.1/guest/activate.json",
+ method="POST", headers=headers,
+ ).json().get("guest_token")
+
+ def _tweets_from_api(self, url, max_position=None):
params = {
"include_available_features": "1",
"include_entities": "1",
+ "max_position": max_position,
"reset_error_state": "false",
"lang": "en",
}
headers = {
"X-Requested-With": "XMLHttpRequest",
"X-Twitter-Active-User": "yes",
- "Referer": "{}/{}".format(self.root, self.user)
+ "Referer": self.root + "/",
}
while True:
@@ -140,18 +183,23 @@ class TwitterExtractor(Extractor):
if not data["has_more_items"]:
return
- position = text.parse_int(text.extract(
- tweet, 'data-tweet-id="', '"')[0])
- if "max_position" in params and position >= params["max_position"]:
- return
- params["max_position"] = position
+ if "min_position" in data:
+ position = data["min_position"]
+ if position == max_position:
+ return
+ else:
+ position = text.parse_int(text.extract(
+ tweet, 'data-tweet-id="', '"')[0])
+ if max_position and position >= max_position:
+ return
+ params["max_position"] = max_position = position
class TwitterTimelineExtractor(TwitterExtractor):
"""Extractor for all images from a user's timeline"""
subcategory = "timeline"
pattern = (r"(?:https?://)?(?:www\.|mobile\.)?twitter\.com"
- r"/([^/?&#]+)/?(?:$|[?#])")
+ r"/(?!search)([^/?&#]+)/?(?:$|[?#])")
test = (
("https://twitter.com/supernaturepics", {
"range": "1-40",
@@ -171,7 +219,7 @@ class TwitterMediaExtractor(TwitterExtractor):
"""Extractor for all images from a user's Media Tweets"""
subcategory = "media"
pattern = (r"(?:https?://)?(?:www\.|mobile\.)?twitter\.com"
- r"/([^/?&#]+)/media(?!\w)")
+ r"/(?!search)([^/?&#]+)/media(?!\w)")
test = (
("https://twitter.com/supernaturepics/media", {
"range": "1-40",
@@ -186,6 +234,26 @@ class TwitterMediaExtractor(TwitterExtractor):
return self._tweets_from_api(url)
+class TwitterSearchExtractor(TwitterExtractor):
+ """Extractor for all images from a search timeline"""
+ subcategory = "search"
+ directory_fmt = ("{category}", "Search", "{search}")
+ pattern = (r"(?:https?://)?(?:www\.|mobile\.)?twitter\.com"
+ r"/search/?\?(?:[^&#]+&)*q=([^&#]+)")
+ test = ("https://twitter.com/search?q=nature", {
+ "range": "1-40",
+ "count": 40,
+ })
+
+ def metadata(self):
+ return {"search": self.user}
+
+ def tweets(self):
+ url = "{}/i/search/timeline?f=tweets&q={}".format(
+ self.root, self.user)
+ return self._tweets_from_api(url, "-1")
+
+
class TwitterTweetExtractor(TwitterExtractor):
"""Extractor for images from individual tweets"""
subcategory = "tweet"
@@ -205,17 +273,17 @@ class TwitterTweetExtractor(TwitterExtractor):
# video
("https://twitter.com/perrypumas/status/1065692031626829824", {
"options": (("videos", True),),
- "pattern": r"ytdl:https://twitter.com/perrypumas/status/\d+",
+ "pattern": r"ytdl:https://video.twimg.com/ext_tw_video/.*.m3u8",
}),
# content with emoji, newlines, hashtags (#338)
("https://twitter.com/yumi_san0112/status/1151144618936823808", {
"options": (("content", True),),
- "keyword": "b13b6c4cd0b0c15b2ea7685479e7fedde3c47b9e",
+ "keyword": "b133464b73aec33871521ab021a3166204194285",
}),
# Reply to another tweet (#403)
("https://twitter.com/tyson_hesse/status/1103767554424598528", {
- "options": (("videos", True),),
- "pattern": r"ytdl:https://twitter.com/.*/1103767554424598528$",
+ "options": (("videos", "ytdl"),),
+ "pattern": r"ytdl:https://twitter.com/.+/1103767554424598528",
}),
# /i/web/ URL
("https://twitter.com/i/web/status/1155074198240292865", {
@@ -231,9 +299,19 @@ class TwitterTweetExtractor(TwitterExtractor):
return {"user": self.user, "tweet_id": self.tweet_id}
def tweets(self):
- self.session.cookies.clear()
url = "{}/i/web/status/{}".format(self.root, self.tweet_id)
- page = self.request(url).text
+ cookies = {"app_shell_visited": "1"}
+ headers = {
+ "Referer" : url,
+ "User-Agent": "Mozilla/5.0 (Windows NT 6.1; WOW64; "
+ "Trident/7.0; rv:11.0) like Gecko",
+ }
+
+ response = self.request(url, cookies=cookies, headers=headers)
+ if response.history and response.url == self.root + "/":
+ raise exception.AuthorizationError()
+ page = response.text
+
end = page.index('class="js-tweet-stats-container')
beg = page.rindex('<div class="tweet ', 0, end)
return (page[beg:end],)
diff --git a/gallery_dl/extractor/wallhaven.py b/gallery_dl/extractor/wallhaven.py
index 4326582..09a166c 100644
--- a/gallery_dl/extractor/wallhaven.py
+++ b/gallery_dl/extractor/wallhaven.py
@@ -33,8 +33,8 @@ class WallhavenSearchExtractor(WallhavenExtractor):
("https://wallhaven.cc/search?q=touhou"),
(("https://wallhaven.cc/search?q=id%3A87"
"&categories=111&purity=100&sorting=date_added&order=asc&page=3"), {
- "count": 4,
- "url": "d024bc11895d758b76ffdb0fa85a627e53f072cf",
+ "count": 5,
+ "url": "d477b68a534c3416d506ae1f159b25debab64678",
}),
)
diff --git a/gallery_dl/job.py b/gallery_dl/job.py
index 8b61024..9c76336 100644
--- a/gallery_dl/job.py
+++ b/gallery_dl/job.py
@@ -29,6 +29,7 @@ class Job():
extr.log.job = self
extr.log.debug("Using %s for '%s'", extr.__class__.__name__, extr.url)
+ self.status = 0
self.pred_url = self._prepare_predicates("image", True)
self.pred_queue = self._prepare_predicates("chapter", False)
@@ -46,34 +47,18 @@ class Job():
log = self.extractor.log
for msg in self.extractor:
self.dispatch(msg)
- except exception.AuthenticationError as exc:
- msg = str(exc) or "Please provide a valid username/password pair."
- log.error("Authentication failed: %s", msg)
- except exception.AuthorizationError:
- log.error("You do not have permission to access the resource "
- "at '%s'", self.extractor.url)
- except exception.NotFoundError as exc:
- res = str(exc) or "resource (gallery/image/user)"
- log.error("The %s at '%s' does not exist", res, self.extractor.url)
- except exception.HttpError as exc:
- err = exc.args[0]
- if isinstance(err, Exception):
- err = "{}: {}".format(err.__class__.__name__, err)
- log.error("HTTP request failed: %s", err)
- except exception.FormatError as exc:
- err, obj = exc.args
- log.error("Applying %s format string failed: %s: %s",
- obj, err.__class__.__name__, err)
- except exception.FilterError as exc:
- err = exc.args[0]
- log.error("Evaluating filter expression failed: %s: %s",
- err.__class__.__name__, err)
- except exception.StopExtraction:
- pass
+ except exception.StopExtraction as exc:
+ if exc.message:
+ log.error(exc.message)
+ self.status |= exc.code
+ except exception.GalleryDLException as exc:
+ log.error("%s: %s", exc.__class__.__name__, exc)
+ self.status |= exc.code
except OSError as exc:
log.error("Unable to download data: %s: %s",
exc.__class__.__name__, exc)
log.debug("", exc_info=True)
+ self.status |= 128
except Exception as exc:
log.error(("An unexpected error occurred: %s - %s. "
"Please run gallery-dl again with the --verbose flag, "
@@ -81,8 +66,13 @@ class Job():
"https://github.com/mikf/gallery-dl/issues ."),
exc.__class__.__name__, exc)
log.debug("", exc_info=True)
+ self.status |= 1
+ except BaseException:
+ self.status |= 1
+ raise
finally:
self.handle_finalize()
+ return self.status
def dispatch(self, msg):
"""Call the appropriate message handler"""
@@ -114,17 +104,17 @@ class Job():
)
# TODO: support for multiple message versions
- def handle_url(self, url, keywords):
+ def handle_url(self, url, kwdict):
"""Handle Message.Url"""
- def handle_urllist(self, urls, keywords):
+ def handle_urllist(self, urls, kwdict):
"""Handle Message.Urllist"""
- self.handle_url(urls[0], keywords)
+ self.handle_url(urls[0], kwdict)
- def handle_directory(self, keywords):
+ def handle_directory(self, kwdict):
"""Handle Message.Directory"""
- def handle_queue(self, url, keywords):
+ def handle_queue(self, url, kwdict):
"""Handle Message.Queue"""
def handle_finalize(self):
@@ -132,8 +122,9 @@ class Job():
def update_kwdict(self, kwdict):
"""Update 'kwdict' with additional metadata"""
- kwdict["category"] = self.extractor.category
- kwdict["subcategory"] = self.extractor.subcategory
+ extr = self.extractor
+ kwdict["category"] = extr.category
+ kwdict["subcategory"] = extr.subcategory
if self.userkwds:
kwdict.update(self.userkwds)
@@ -189,14 +180,14 @@ class DownloadJob(Job):
self.postprocessors = None
self.out = output.select()
- def handle_url(self, url, keywords, fallback=None):
+ def handle_url(self, url, kwdict, fallback=None):
"""Download the resource specified in 'url'"""
postprocessors = self.postprocessors
pathfmt = self.pathfmt
archive = self.archive
# prepare download
- pathfmt.set_filename(keywords)
+ pathfmt.set_filename(kwdict)
if postprocessors:
for pp in postprocessors:
@@ -219,6 +210,7 @@ class DownloadJob(Job):
break
else:
# download failed
+ self.status |= 4
self.log.error("Failed to download %s",
pathfmt.filename or url)
return
@@ -236,41 +228,45 @@ class DownloadJob(Job):
pathfmt.finalize()
self.out.success(pathfmt.path, 0)
if archive:
- archive.add(keywords)
+ archive.add(kwdict)
if postprocessors:
for pp in postprocessors:
pp.run_after(pathfmt)
self._skipcnt = 0
- def handle_urllist(self, urls, keywords):
+ def handle_urllist(self, urls, kwdict):
"""Download the resource specified in 'url'"""
fallback = iter(urls)
url = next(fallback)
- self.handle_url(url, keywords, fallback)
+ self.handle_url(url, kwdict, fallback)
- def handle_directory(self, keywords):
+ def handle_directory(self, kwdict):
"""Set and create the target directory for downloads"""
if not self.pathfmt:
- self.initialize(keywords)
+ self.initialize(kwdict)
else:
- self.pathfmt.set_directory(keywords)
+ self.pathfmt.set_directory(kwdict)
- def handle_queue(self, url, keywords):
- if "_extractor" in keywords:
- extr = keywords["_extractor"].from_url(url)
+ def handle_queue(self, url, kwdict):
+ if "_extractor" in kwdict:
+ extr = kwdict["_extractor"].from_url(url)
else:
extr = extractor.find(url)
if extr:
- self.__class__(extr, self).run()
+ self.status |= self.__class__(extr, self).run()
else:
self._write_unsupported(url)
def handle_finalize(self):
- if self.postprocessors:
- for pp in self.postprocessors:
- pp.finalize()
+ pathfmt = self.pathfmt
if self.archive:
self.archive.close()
+ if pathfmt:
+ self.extractor._store_cookies()
+ if self.postprocessors:
+ status = self.status
+ for pp in self.postprocessors:
+ pp.run_final(pathfmt, status)
def handle_skip(self):
self.out.skip(self.pathfmt.path)
@@ -308,11 +304,11 @@ class DownloadJob(Job):
self.downloaders[scheme] = instance
return instance
- def initialize(self, keywords=None):
+ def initialize(self, kwdict=None):
"""Delayed initialization of PathFormat, etc."""
self.pathfmt = util.PathFormat(self.extractor)
- if keywords:
- self.pathfmt.set_directory(keywords)
+ if kwdict:
+ self.pathfmt.set_directory(kwdict)
self.sleep = self.extractor.config("sleep")
if not self.extractor.config("download", True):
@@ -379,15 +375,15 @@ class DownloadJob(Job):
class SimulationJob(DownloadJob):
"""Simulate the extraction process without downloading anything"""
- def handle_url(self, url, keywords, fallback=None):
- self.pathfmt.set_filename(keywords)
+ def handle_url(self, url, kwdict, fallback=None):
+ self.pathfmt.set_filename(kwdict)
self.out.skip(self.pathfmt.path)
if self.sleep:
time.sleep(self.sleep)
if self.archive:
- self.archive.add(keywords)
+ self.archive.add(kwdict)
- def handle_directory(self, keywords):
+ def handle_directory(self, kwdict):
if not self.pathfmt:
self.initialize()
@@ -395,19 +391,19 @@ class SimulationJob(DownloadJob):
class KeywordJob(Job):
"""Print available keywords"""
- def handle_url(self, url, keywords):
+ def handle_url(self, url, kwdict):
print("\nKeywords for filenames and --filter:")
print("------------------------------------")
- self.print_keywords(keywords)
+ self.print_kwdict(kwdict)
raise exception.StopExtraction()
- def handle_directory(self, keywords):
+ def handle_directory(self, kwdict):
print("Keywords for directory names:")
print("-----------------------------")
- self.print_keywords(keywords)
+ self.print_kwdict(kwdict)
- def handle_queue(self, url, keywords):
- if not keywords:
+ def handle_queue(self, url, kwdict):
+ if not kwdict:
self.extractor.log.info(
"This extractor delegates work to other extractors "
"and does not provide any keywords on its own. Try "
@@ -415,27 +411,27 @@ class KeywordJob(Job):
else:
print("Keywords for --chapter-filter:")
print("------------------------------")
- self.print_keywords(keywords)
+ self.print_kwdict(kwdict)
if self.extractor.categorytransfer:
print()
KeywordJob(url, self).run()
raise exception.StopExtraction()
@staticmethod
- def print_keywords(keywords, prefix=""):
- """Print key-value pairs with formatting"""
+ def print_kwdict(kwdict, prefix=""):
+ """Print key-value pairs in 'kwdict' with formatting"""
suffix = "]" if prefix else ""
- for key, value in sorted(keywords.items()):
+ for key, value in sorted(kwdict.items()):
if key[0] == "_":
continue
key = prefix + key + suffix
if isinstance(value, dict):
- KeywordJob.print_keywords(value, key + "[")
+ KeywordJob.print_kwdict(value, key + "[")
elif isinstance(value, list):
if value and isinstance(value[0], dict):
- KeywordJob.print_keywords(value[0], key + "[][")
+ KeywordJob.print_kwdict(value[0], key + "[][")
else:
print(key, "[]", sep="")
for val in value:
@@ -502,6 +498,7 @@ class DataJob(Job):
# dump to 'file'
util.dump_json(self.data, self.file, self.ascii, 2)
+ return 0
def handle_url(self, url, kwdict):
self.data.append((Message.Url, url, self._filter(kwdict)))
diff --git a/gallery_dl/oauth.py b/gallery_dl/oauth.py
index 69ab4f6..3093a72 100644
--- a/gallery_dl/oauth.py
+++ b/gallery_dl/oauth.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2018 Mike Fährmann
+# Copyright 2018-2019 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -126,7 +126,7 @@ class OAuth1API():
self.session = extractor.session
self.api_key = api_key
- def request(self, url, method="GET", **kwargs):
+ def request(self, url, **kwargs):
kwargs["fatal"] = None
kwargs["session"] = self.session
- return self.extractor.request(url, method, **kwargs)
+ return self.extractor.request(url, **kwargs)
diff --git a/gallery_dl/option.py b/gallery_dl/option.py
index d3119b7..3118b83 100644
--- a/gallery_dl/option.py
+++ b/gallery_dl/option.py
@@ -321,13 +321,26 @@ def build_parser():
)
postprocessor.add_argument(
"--ugoira-conv",
- dest="postprocessors",
- action="append_const", const={"name": "ugoira", "ffmpeg-args": (
- "-c:v", "libvpx", "-crf", "4", "-b:v", "5000k", "-an"),
- "whitelist": ("pixiv", "danbooru")},
+ dest="postprocessors", action="append_const", const={
+ "name" : "ugoira",
+ "ffmpeg-args" : ("-c:v", "libvpx", "-crf", "4", "-b:v", "5000k"),
+ "ffmpeg-twopass": True,
+ "whitelist" : ("pixiv", "danbooru"),
+ },
help="Convert Pixiv Ugoira to WebM (requires FFmpeg)",
)
postprocessor.add_argument(
+ "--ugoira-conv-lossless",
+ dest="postprocessors", action="append_const", const={
+ "name" : "ugoira",
+ "ffmpeg-args" : ("-c:v", "libvpx-vp9", "-lossless", "1",
+ "-pix_fmt", "yuv420p"),
+ "ffmpeg-twopass": False,
+ "whitelist" : ("pixiv", "danbooru"),
+ },
+ help="Convert Pixiv Ugoira to WebM in VP9 lossless mode",
+ )
+ postprocessor.add_argument(
"--write-metadata",
dest="postprocessors",
action="append_const", const={"name": "metadata"},
diff --git a/gallery_dl/postprocessor/common.py b/gallery_dl/postprocessor/common.py
index 71ef932..83b42eb 100644
--- a/gallery_dl/postprocessor/common.py
+++ b/gallery_dl/postprocessor/common.py
@@ -31,8 +31,8 @@ class PostProcessor():
"""Execute postprocessor after moving a file to its target location"""
@staticmethod
- def finalize():
- """Cleanup"""
+ def run_final(pathfmt, status):
+ """Postprocessor finalization after all files have been downloaded"""
def __repr__(self):
return self.__class__.__name__
diff --git a/gallery_dl/postprocessor/exec.py b/gallery_dl/postprocessor/exec.py
index 19a9b87..0a56281 100644
--- a/gallery_dl/postprocessor/exec.py
+++ b/gallery_dl/postprocessor/exec.py
@@ -26,17 +26,26 @@ class ExecPP(PostProcessor):
def __init__(self, pathfmt, options):
PostProcessor.__init__(self)
args = options["command"]
+ final = options.get("final", False)
if isinstance(args, str):
+ if final:
+ self._format = self._format_args_directory
+ else:
+ self._format = self._format_args_path
if "{}" not in args:
args += " {}"
self.args = args
self.shell = True
- self._format = self._format_args_string
else:
+ self._format = self._format_args_list
self.args = [util.Formatter(arg) for arg in args]
self.shell = False
- self._format = self._format_args_list
+
+ if final:
+ self.run_after = PostProcessor.run_after
+ else:
+ self.run_final = PostProcessor.run_final
if options.get("async", False):
self._exec = self._exec_async
@@ -44,9 +53,16 @@ class ExecPP(PostProcessor):
def run_after(self, pathfmt):
self._exec(self._format(pathfmt))
- def _format_args_string(self, pathfmt):
+ def run_final(self, pathfmt, status):
+ if status == 0:
+ self._exec(self._format(pathfmt))
+
+ def _format_args_path(self, pathfmt):
return self.args.replace("{}", quote(pathfmt.realpath))
+ def _format_args_directory(self, pathfmt):
+ return self.args.replace("{}", quote(pathfmt.realdirectory))
+
def _format_args_list(self, pathfmt):
kwdict = pathfmt.kwdict
kwdict["_directory"] = pathfmt.realdirectory
diff --git a/gallery_dl/postprocessor/zip.py b/gallery_dl/postprocessor/zip.py
index 6659a8d..42f7608 100644
--- a/gallery_dl/postprocessor/zip.py
+++ b/gallery_dl/postprocessor/zip.py
@@ -59,7 +59,7 @@ class ZipPP(PostProcessor):
with zipfile.ZipFile(*self.args) as zfile:
self._write(pathfmt, zfile)
- def finalize(self):
+ def run_final(self, pathfmt, status):
if self.zfile:
self.zfile.close()
diff --git a/gallery_dl/util.py b/gallery_dl/util.py
index d87184d..fb51edf 100644
--- a/gallery_dl/util.py
+++ b/gallery_dl/util.py
@@ -52,10 +52,10 @@ def advance(iterable, num):
return iterator
-def raises(obj):
- """Returns a function that raises 'obj' as exception"""
- def wrap():
- raise obj
+def raises(cls):
+ """Returns a function that raises 'cls' as exception"""
+ def wrap(*args):
+ raise cls(*args)
return wrap
@@ -287,21 +287,21 @@ class UniquePredicate():
class FilterPredicate():
"""Predicate; True if evaluating the given expression returns True"""
- globalsdict = {
- "parse_int": text.parse_int,
- "urlsplit": urllib.parse.urlsplit,
- "datetime": datetime.datetime,
- "abort": raises(exception.StopExtraction()),
- "re": re,
- }
def __init__(self, filterexpr, target="image"):
name = "<{} filter>".format(target)
self.codeobj = compile(filterexpr, name, "eval")
+ self.globals = {
+ "parse_int": text.parse_int,
+ "urlsplit" : urllib.parse.urlsplit,
+ "datetime" : datetime.datetime,
+ "abort" : raises(exception.StopExtraction),
+ "re" : re,
+ }
def __call__(self, url, kwds):
try:
- return eval(self.codeobj, self.globalsdict, kwds)
+ return eval(self.codeobj, self.globals, kwds)
except exception.GalleryDLException:
raise
except Exception as exc:
@@ -528,7 +528,7 @@ class PathFormat():
self.filename_formatter = Formatter(
filename_fmt, kwdefault).format_map
except Exception as exc:
- raise exception.FormatError(exc, "filename")
+ raise exception.FilenameFormatError(exc)
try:
self.directory_formatters = [
@@ -536,7 +536,7 @@ class PathFormat():
for dirfmt in directory_fmt
]
except Exception as exc:
- raise exception.FormatError(exc, "directory")
+ raise exception.DirectoryFormatError(exc)
self.directory = self.realdirectory = ""
self.filename = ""
@@ -616,7 +616,7 @@ class PathFormat():
if segment:
append(self.clean_segment(segment))
except Exception as exc:
- raise exception.FormatError(exc, "directory")
+ raise exception.DirectoryFormatError(exc)
# Join path segements
sep = os.sep
@@ -673,7 +673,7 @@ class PathFormat():
self.filename = filename = self.clean_path(self.clean_segment(
self.filename_formatter(self.kwdict)))
except Exception as exc:
- raise exception.FormatError(exc, "filename")
+ raise exception.FilenameFormatError(exc)
# Combine directory and filename to full paths
self.path = self.directory + filename
diff --git a/gallery_dl/version.py b/gallery_dl/version.py
index e83bed6..4d73139 100644
--- a/gallery_dl/version.py
+++ b/gallery_dl/version.py
@@ -6,4 +6,4 @@
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
-__version__ = "1.10.6"
+__version__ = "1.11.1"