aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLibravatarUnit 193 <unit193@ubuntu.com>2019-08-04 17:52:59 -0400
committerLibravatarUnit 193 <unit193@ubuntu.com>2019-08-04 17:52:59 -0400
commit64ad8e7bd15df71ab1116eede414558631bcad32 (patch)
tree7416e191aedce591087903a943198aed13fa0b26
parent2a63a9c9b7032a76894c48ac4d9cea732fcaee49 (diff)
New upstream version 1.10.1upstream/1.10.1
-rw-r--r--CHANGELOG.md30
-rw-r--r--README.rst15
-rw-r--r--docs/configuration.rst55
-rw-r--r--docs/gallery-dl.conf2
-rw-r--r--docs/supportedsites.rst8
-rw-r--r--gallery_dl/cache.py19
-rw-r--r--gallery_dl/downloader/ytdl.py7
-rw-r--r--gallery_dl/extractor/__init__.py3
-rw-r--r--gallery_dl/extractor/adultempire.py58
-rw-r--r--gallery_dl/extractor/behance.py22
-rw-r--r--gallery_dl/extractor/dynastyscans.py2
-rw-r--r--gallery_dl/extractor/exhentai.py102
-rw-r--r--gallery_dl/extractor/gelbooru.py1
-rw-r--r--gallery_dl/extractor/imgbb.py179
-rw-r--r--gallery_dl/extractor/luscious.py2
-rw-r--r--gallery_dl/extractor/ngomik.py2
-rw-r--r--gallery_dl/extractor/sankaku.py4
-rw-r--r--gallery_dl/extractor/sankakucomplex.py4
-rw-r--r--gallery_dl/extractor/tsumino.py2
-rw-r--r--gallery_dl/extractor/vsco.py176
-rw-r--r--gallery_dl/job.py3
-rw-r--r--gallery_dl/postprocessor/zip.py38
-rw-r--r--gallery_dl/text.py22
-rw-r--r--gallery_dl/util.py25
-rw-r--r--gallery_dl/version.py2
-rwxr-xr-xscripts/supportedsites.py8
-rw-r--r--test/test_results.py7
-rw-r--r--test/test_text.py20
28 files changed, 688 insertions, 130 deletions
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 625018a..58e295c 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,34 @@
# Changelog
+## 1.10.1 - 2019-08-02
+## Fixes
+- Restore functionality of both domains for `exhentai` extractors
+
+## 1.10.0 - 2019-08-01
+### Warning
+- Prior to version 1.10.0 all cache files were created world readable (mode `644`)
+ leading to possible sensitive information disclosure on multi-user systems
+- It is recommended to restrict access permissions of already existing files
+ (`/tmp/.gallery-dl.cache`) with `chmod 600`
+- Windows users should not be affected
+### Additions
+- Support for
+ - `vsco` - https://vsco.co/ ([#331](https://github.com/mikf/gallery-dl/issues/331))
+ - `imgbb` - https://imgbb.com/ ([#361](https://github.com/mikf/gallery-dl/issues/361))
+ - `adultempire` - https://www.adultempire.com/ ([#340](https://github.com/mikf/gallery-dl/issues/340))
+- `restrict-filenames` option to create Windows-compatible filenames on any platform ([#348](https://github.com/mikf/gallery-dl/issues/348))
+- `forward-cookies` option to control cookie forwarding to youtube-dl ([#352](https://github.com/mikf/gallery-dl/issues/352))
+### Changes
+- The default cache file location on non-Windows systems is now
+ - `$XDG_CACHE_HOME/gallery-dl/cache.sqlite3` or
+ - `~/.cache/gallery-dl/cache.sqlite3`
+- New cache files are created with mode `600`
+- `exhentai` extractors will always use `e-hentai.org` as domain
+### Fixes
+- Better handling of `exhentai` image limits and errors ([#356](https://github.com/mikf/gallery-dl/issues/356), [#360](https://github.com/mikf/gallery-dl/issues/360))
+- Try to prevent ZIP file corruption ([#355](https://github.com/mikf/gallery-dl/issues/355))
+- Miscellaneous fixes for `behance`, `ngomik`
+
## 1.9.0 - 2019-07-19
### Additions
- Support for
@@ -25,7 +54,6 @@
- Forward cookies to `youtube-dl` to allow downloading private videos
- Miscellaneous fixes for `35photo`, `500px`, `newgrounds`, `simplyhentai`
-
## 1.8.7 - 2019-06-28
### Additions
- Support for
diff --git a/README.rst b/README.rst
index 72e5cad..3bca007 100644
--- a/README.rst
+++ b/README.rst
@@ -78,8 +78,8 @@ Download a standalone executable file,
put it into your `PATH <https://en.wikipedia.org/wiki/PATH_(variable)>`__,
and run it inside a command prompt (like ``cmd.exe``).
-- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.9.0/gallery-dl.exe>`__
-- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.9.0/gallery-dl.bin>`__
+- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.10.1/gallery-dl.exe>`__
+- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.10.1/gallery-dl.bin>`__
These executables include a Python 3.7 interpreter
and all required Python packages.
@@ -168,10 +168,11 @@ Username & Password
-------------------
Some extractors require you to provide valid login-credentials in the form of
-a username & password pair.
-This is necessary for ``pixiv``, ``nijie`` and ``seiga``
-and optional (but strongly recommended) for ``exhentai``, ``luscious``,
-``sankaku``, ``idolcomplex``, ``tsumino`` and ``wallhaven``.
+a username & password pair. This is necessary for
+``pixiv``, ``nijie``, and ``seiga``
+and optional (but strongly recommended) for
+``danbooru``, ``exhentai``, ``idolcomplex``, ``instagram``,
+``luscious``, ``sankaku``, ``tsumino``, and ``twitter``.
You can set the necessary information in your configuration file
(cf. gallery-dl.conf_)
@@ -223,7 +224,7 @@ access to *gallery-dl*. Authorize it and you will be shown one or more
.. _gallery-dl-example.conf: https://github.com/mikf/gallery-dl/blob/master/docs/gallery-dl-example.conf
.. _configuration.rst: https://github.com/mikf/gallery-dl/blob/master/docs/configuration.rst
.. _Supported Sites: https://github.com/mikf/gallery-dl/blob/master/docs/supportedsites.rst
-.. _stable: https://github.com/mikf/gallery-dl/archive/v1.9.0.zip
+.. _stable: https://github.com/mikf/gallery-dl/archive/v1.10.1.zip
.. _dev: https://github.com/mikf/gallery-dl/archive/master.zip
.. _Python: https://www.python.org/downloads/
diff --git a/docs/configuration.rst b/docs/configuration.rst
index 32a529a..c6f757d 100644
--- a/docs/configuration.rst
+++ b/docs/configuration.rst
@@ -108,6 +108,24 @@ Description Directory path used as the base for all download destinations.
=========== =====
+extractor.*.restrict-filenames
+------------------------------
+=========== =====
+Type ``string``
+Default ``"auto"``
+Example ``"/!? ()[]{}"``
+Description Characters to replace with underscores (``_``) when generating
+ directory and file names.
+
+ Special values:
+
+ * ``"auto"``: Use characters from ``"unix"`` or ``"windows"``
+ depending on the local operating system
+ * ``"unix"``: ``"/"``
+ * ``"windows"``: ``"<>:\"\\|/?*"``
+=========== =====
+
+
extractor.*.skip
----------------
=========== =====
@@ -146,10 +164,11 @@ Default ``null``
Description The username and password to use when attempting to log in to
another site.
- Specifying username and password is
- required for the ``pixiv``, ``nijie`` and ``seiga`` modules and
- optional (but strongly recommended) for ``danbooru``, ``exhentai``,
- ``sankaku`` and ``idolcomplex``.
+ Specifying username and password is required for the
+ ``pixiv``, ``nijie``, and ``seiga``
+ modules and optional (but strongly recommended) for
+ ``danbooru``, ``exhentai``, ``idolcomplex``, ``instagram``,
+ ``luscious``, ``sankaku``, ``tsumino``, and ``twitter``.
These values can also be set via the ``-u/--username`` and
``-p/--password`` command-line options or by using a |.netrc|_ file.
@@ -1090,6 +1109,15 @@ Description Video `format selection
=========== =====
+downloader.ytdl.forward-cookies
+-------------------------------
+=========== =====
+Type ``bool``
+Default ``true``
+Description Forward cookies to youtube-dl.
+=========== =====
+
+
downloader.ytdl.logging
-----------------------
=========== =====
@@ -1438,6 +1466,22 @@ Default ``false``
Description Keep the actual files after writing them to a ZIP archive.
=========== =====
+zip.mode
+--------
+=========== =====
+Type ``string``
+Default ``"default"``
+Description * ``"default"``: Write the central directory file header
+ once after everything is done or an exception is raised.
+
+ * ``"safe"``: Update the central directory file header
+ each time a file is stored in a ZIP archive.
+
+ This greatly reduces the chance a ZIP archive gets corrupted in
+ case the Python interpreter gets shut down unexpectedly
+ (power outage, SIGKILL) but is also a lot slower.
+=========== =====
+
Miscellaneous Options
@@ -1448,7 +1492,8 @@ cache.file
----------
=========== =====
Type |Path|_
-Default |tempfile.gettempdir()|_ + ``".gallery-dl.cache"``
+Default * |tempfile.gettempdir()|_ + ``".gallery-dl.cache"`` on Windows
+ * (``$XDG_CACHE_HOME`` or ``"~/.cache"``) + ``"/gallery-dl/cache.sqlite3"`` on all other platforms
Description Path of the SQLite3 database used to cache login sessions,
cookies and API tokens across `gallery-dl` invocations.
diff --git a/docs/gallery-dl.conf b/docs/gallery-dl.conf
index 835ed17..a4a9ee0 100644
--- a/docs/gallery-dl.conf
+++ b/docs/gallery-dl.conf
@@ -9,6 +9,7 @@
"skip": true,
"sleep": 0,
"user-agent": "Mozilla/5.0 (X11; Linux x86_64; rv:68.0) Gecko/20100101 Firefox/68.0",
+ "restrict-filenames": "auto",
"artstation":
{
@@ -163,6 +164,7 @@
"ytdl":
{
"format": null,
+ "forward-cookies": true,
"mtime": true,
"rate": null,
"retries": 4,
diff --git a/docs/supportedsites.rst b/docs/supportedsites.rst
index 2a1a1ed..d2fb4ea 100644
--- a/docs/supportedsites.rst
+++ b/docs/supportedsites.rst
@@ -13,6 +13,7 @@ Site URL Capabilities
8chan https://8ch.net/ Threads
8muses https://www.8muses.com/ Albums
Adobe Portfolio https://www.myportfolio.com/ Galleries
+Adult Empire https://www.adultempire.com/ Galleries
arch.b4k.co https://arch.b4k.co/ Threads
Archive of Sins https://archiveofsins.com/ Threads
Archived.Moe https://archived.moe/ Threads
@@ -24,9 +25,9 @@ Desuarchive https://desuarchive.org/ Threads
DeviantArt https://www.deviantart.com/ |deviantart-C| Optional (OAuth)
Doki Reader https://kobato.hologfx.com/reader/ Chapters, Manga
Dynasty Reader https://dynasty-scans.com/ Chapters, individual Images, Search Results
+E-Hentai https://e-hentai.org/ Favorites, Galleries, Search Results Optional
e621 https://e621.net/ Pools, Popular Images, Posts, Tag-Searches
EroLord.com http://erolord.com/ Galleries
-ExHentai https://exhentai.org/ Favorites, Galleries, Search Results Optional
Fallen Angels Scans https://www.fascans.com/ Chapters, Manga
Fashion Nova https://www.fashionnova.com/ Collections, Products
Fireden https://boards.fireden.net/ Threads
@@ -46,6 +47,7 @@ Hypnohub https://hypnohub.net/ Pools, Popular Images,
Idol Complex https://idol.sankakucomplex.com/ Pools, Posts, Tag-Searches Optional
ImageBam http://www.imagebam.com/ Galleries, individual Images
ImageFap https://imagefap.com/ Images from Users, Galleries, individual Images
+ImgBB https://imgbb.com/ Images from Users, Albums Optional
imgbox https://imgbox.com/ Galleries, individual Images
imgth https://imgth.com/ Galleries
imgur https://imgur.com/ Albums, individual Images
@@ -106,7 +108,8 @@ The /b/ Archive https://thebarchive.com/ Threads
Tsumino https://www.tsumino.com/ Galleries, Search Results Optional
Tumblr https://www.tumblr.com/ Images from Users, Likes, Posts, Tag-Searches Optional (OAuth)
Twitter https://twitter.com/ Media Timelines, Timelines, Tweets Optional
-Wallhaven https://wallhaven.cc/ individual Images, Search Results
+VSCO https://vsco.co/ Images from Users, Collections, individual Images
+Wallhaven https://wallhaven.cc/ individual Images, Search Results |wallhaven-A|
Warosu https://warosu.org/ Threads
Weibo https://www.weibo.com/ Images from Users, Images from Statuses
WikiArt.org https://www.wikiart.org/ Artists, Artworks
@@ -137,4 +140,5 @@ Turboimagehost https://www.turboimagehost.com/ individual Images
.. |pixnet-C| replace:: Images from Users, Folders, individual Images, Sets
.. |pornreactor-C| replace:: Images from Users, Posts, Search Results, Tag-Searches
.. |smugmug-C| replace:: Albums, individual Images, Images from Users and Folders
+.. |wallhaven-A| replace:: Optional (`API Key <configuration.rst#extractorwallhavenapi-key>`__)
.. |yuki-S| replace:: yuki.la 4chan archive
diff --git a/gallery_dl/cache.py b/gallery_dl/cache.py
index e6ba61a..3ceef75 100644
--- a/gallery_dl/cache.py
+++ b/gallery_dl/cache.py
@@ -11,6 +11,7 @@
import sqlite3
import pickle
import time
+import os
import functools
from . import config, util
@@ -188,17 +189,25 @@ def clear():
def _path():
path = config.get(("cache", "file"), -1)
+ if path != -1:
+ return util.expand_path(path)
- if path == -1:
+ if os.name == "nt":
import tempfile
- import os.path
return os.path.join(tempfile.gettempdir(), ".gallery-dl.cache")
- return util.expand_path(path)
+ cachedir = util.expand_path(os.path.join(
+ os.environ.get("XDG_CACHE_HOME", "~/.cache"), "gallery-dl"))
+ os.makedirs(cachedir, exist_ok=True)
+ return os.path.join(cachedir, "cache.sqlite3")
try:
+ dbfile = _path()
+ if os.name != "nt":
+ # restrict access permissions for new db files
+ os.close(os.open(dbfile, os.O_CREAT | os.O_RDONLY, 0o600))
DatabaseCacheDecorator.db = sqlite3.connect(
- _path(), timeout=30, check_same_thread=False)
-except (TypeError, sqlite3.OperationalError):
+ dbfile, timeout=30, check_same_thread=False)
+except (OSError, TypeError, sqlite3.OperationalError):
cache = memcache # noqa: F811
diff --git a/gallery_dl/downloader/ytdl.py b/gallery_dl/downloader/ytdl.py
index da57935..a233487 100644
--- a/gallery_dl/downloader/ytdl.py
+++ b/gallery_dl/downloader/ytdl.py
@@ -34,12 +34,15 @@ class YoutubeDLDownloader(DownloaderBase):
if self.config("logging", True):
options["logger"] = self.log
+ self.forward_cookies = self.config("forward-cookies", True)
self.ytdl = YoutubeDL(options)
def download(self, url, pathfmt):
- for cookie in self.session.cookies:
- self.ytdl.cookiejar.set_cookie(cookie)
+ if self.forward_cookies:
+ set_cookie = self.ytdl.cookiejar.set_cookie
+ for cookie in self.session.cookies:
+ set_cookie(cookie)
try:
info_dict = self.ytdl.extract_info(url[5:], download=False)
diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py
index 189c163..0b24111 100644
--- a/gallery_dl/extractor/__init__.py
+++ b/gallery_dl/extractor/__init__.py
@@ -17,6 +17,7 @@ modules = [
"500px",
"8chan",
"8muses",
+ "adultempire",
"artstation",
"behance",
"bobx",
@@ -42,6 +43,7 @@ modules = [
"idolcomplex",
"imagebam",
"imagefap",
+ "imgbb",
"imgbox",
"imgth",
"imgur",
@@ -95,6 +97,7 @@ modules = [
"tumblr",
"twitter",
"vanillarock",
+ "vsco",
"wallhaven",
"warosu",
"weibo",
diff --git a/gallery_dl/extractor/adultempire.py b/gallery_dl/extractor/adultempire.py
new file mode 100644
index 0000000..5ea835f
--- /dev/null
+++ b/gallery_dl/extractor/adultempire.py
@@ -0,0 +1,58 @@
+# -*- coding: utf-8 -*-
+
+# Copyright 2019 Mike Fährmann
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Extractors for https://www.adultempire.com/"""
+
+from .common import GalleryExtractor
+from .. import text
+
+
+class AdultempireGalleryExtractor(GalleryExtractor):
+ """Extractor for image galleries from www.adultempire.com"""
+ category = "adultempire"
+ root = "https://www.adultempire.com"
+ pattern = (r"(?:https?://)?(?:www\.)?adult(?:dvd)?empire\.com"
+ r"(/(\d+)/gallery\.html)")
+ test = (
+ ("https://www.adultempire.com/5998/gallery.html", {
+ "range": "1",
+ "keyword": "0533ef1184892be8ac02b17286797c95f389ba63",
+ "content": "5c6beb31e5e3cdc90ee5910d5c30f9aaec977b9e",
+ }),
+ ("https://www.adultdvdempire.com/5683/gallery.html", {
+ "url": "b12cd1a65cae8019d837505adb4d6a2c1ed4d70d",
+ "keyword": "59fe5d95929efc5040a819a5f77aba7a022bb85a",
+ }),
+ )
+
+ def __init__(self, match):
+ GalleryExtractor.__init__(self, match)
+ self.gallery_id = match.group(2)
+
+ def metadata(self, page):
+ extr = text.extract_from(page, page.index('<div id="content">'))
+ return {
+ "gallery_id": text.parse_int(self.gallery_id),
+ "title" : text.unescape(extr('title="', '"')),
+ "studio" : extr(">studio</small>", "<").strip(),
+ "date" : text.parse_datetime(extr(
+ ">released</small>", "<").strip(), "%m/%d/%Y"),
+ "actors" : text.split_html(extr(
+ '<ul class="item-details item-cast-list ', '</ul>'))[1:],
+ }
+
+ def images(self, page):
+ params = {"page": 1}
+ while True:
+ urls = list(text.extract_iter(page, 'rel="L"><img src="', '"'))
+ for url in urls:
+ yield url.replace("_200.", "_9600."), None
+ if len(urls) < 24:
+ return
+ params["page"] += 1
+ page = self.request(self.chapter_url, params=params).text
diff --git a/gallery_dl/extractor/behance.py b/gallery_dl/extractor/behance.py
index 111d560..467a935 100644
--- a/gallery_dl/extractor/behance.py
+++ b/gallery_dl/extractor/behance.py
@@ -30,7 +30,8 @@ class BehanceExtractor(Extractor):
@staticmethod
def _update(data):
# compress data to simple lists
- data["fields"] = [field["name"] for field in data["fields"]]
+ if data["fields"] and isinstance(data["fields"][0], dict):
+ data["fields"] = [field["name"] for field in data["fields"]]
data["owners"] = [owner["display_name"] for owner in data["owners"]]
if "tags" in data:
data["tags"] = [tag["title"] for tag in data["tags"]]
@@ -140,11 +141,11 @@ class BehanceUserExtractor(BehanceExtractor):
def galleries(self):
url = "{}/{}/projects".format(self.root, self.user)
- headers = {"X-Requested-With": "XMLHttpRequest"}
params = {"offset": 0}
+ headers = {"X-Requested-With": "XMLHttpRequest"}
while True:
- data = self.request(url, headers=headers, params=params).json()
+ data = self.request(url, params=params, headers=headers).json()
work = data["profile"]["activeSection"]["work"]
yield from work["projects"]
if not work["hasMore"]:
@@ -157,8 +158,8 @@ class BehanceCollectionExtractor(BehanceExtractor):
subcategory = "collection"
categorytransfer = True
pattern = r"(?:https?://)?(?:www\.)?behance\.net/collection/(\d+)"
- test = ("https://www.behance.net/collection/170615607/Sky", {
- "count": ">= 13",
+ test = ("https://www.behance.net/collection/71340149/inspiration", {
+ "count": ">= 145",
"pattern": BehanceGalleryExtractor.pattern,
})
@@ -168,12 +169,13 @@ class BehanceCollectionExtractor(BehanceExtractor):
def galleries(self):
url = "{}/collection/{}/a".format(self.root, self.collection_id)
+ params = {"offset": 0}
headers = {"X-Requested-With": "XMLHttpRequest"}
- params = {}
while True:
- data = self.request(url, headers=headers, params=params).json()
- yield from data["output"]
- if not data.get("offset"):
+ data = self.request(url, params=params, headers=headers).json()
+ for item in data["items"]:
+ yield item["project"]
+ if len(data["items"]) < 40:
return
- params["offset"] = data["offset"]
+ params["offset"] += len(data["items"])
diff --git a/gallery_dl/extractor/dynastyscans.py b/gallery_dl/extractor/dynastyscans.py
index b10bd35..9cc6738 100644
--- a/gallery_dl/extractor/dynastyscans.py
+++ b/gallery_dl/extractor/dynastyscans.py
@@ -100,7 +100,7 @@ class DynastyscansSearchExtractor(DynastyscansBase, Extractor):
test = (
("https://dynasty-scans.com/images?with[]=4930&with[]=5211", {
"url": "6b570eedd8a741c2cd34fb98b22a49d772f84191",
- "keyword": "a1e2d05c1406a08b02f347389616a6babb1b50bf",
+ "keyword": "fa7ff94f82cdf942f7734741d758f160a6b0905a",
}),
("https://dynasty-scans.com/images", {
"range": "1",
diff --git a/gallery_dl/extractor/exhentai.py b/gallery_dl/extractor/exhentai.py
index 20e0746..1833b1a 100644
--- a/gallery_dl/extractor/exhentai.py
+++ b/gallery_dl/extractor/exhentai.py
@@ -6,7 +6,7 @@
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
-"""Extract images from galleries at https://exhentai.org/"""
+"""Extractors for https://e-hentai.org/ and https://exhentai.org/"""
from .common import Extractor, Message
from .. import text, util, exception
@@ -23,16 +23,19 @@ BASE_PATTERN = r"(?:https?://)?(e[x-]|g\.e-)hentai\.org"
class ExhentaiExtractor(Extractor):
"""Base class for exhentai extractors"""
category = "exhentai"
- directory_fmt = ("{category}", "{gallery_id}")
+ directory_fmt = ("{category}", "{gallery_id} {title}")
filename_fmt = (
"{gallery_id}_{num:>04}_{image_token}_{filename}.{extension}")
archive_fmt = "{gallery_id}_{num}"
- cookiedomain = ".exhentai.org"
cookienames = ("ipb_member_id", "ipb_pass_hash")
+ cookiedomain = ".exhentai.org"
root = "https://exhentai.org"
+ LIMIT = False
+
def __init__(self, match):
- if match.group(1) != "ex":
+ version = match.group(1)
+ if version != "ex":
self.root = "https://e-hentai.org"
self.cookiedomain = ".e-hentai.org"
Extractor.__init__(self, match)
@@ -45,6 +48,8 @@ class ExhentaiExtractor(Extractor):
if self.wait_max < self.wait_min:
self.wait_max = self.wait_min
self.session.headers["Referer"] = self.root + "/"
+ if version != "ex":
+ self.session.cookies.set("nw", "1", domain=self.cookiedomain)
def request(self, *args, **kwargs):
response = Extractor.request(self, *args, **kwargs)
@@ -63,6 +68,9 @@ class ExhentaiExtractor(Extractor):
def login(self):
"""Login and set necessary cookies"""
+ if self.LIMIT:
+ self.log.error("Image limit reached!")
+ raise exception.StopExtraction()
if self._check_cookies(self.cookienames):
return
username, password = self._get_auth_info()
@@ -92,7 +100,7 @@ class ExhentaiExtractor(Extractor):
}
response = self.request(url, method="POST", headers=headers, data=data)
- if "You are now logged in as:" not in response.text:
+ if b"You are now logged in as:" not in response.content:
raise exception.AuthenticationError()
return {c: response.cookies[c] for c in self.cookienames}
@@ -112,9 +120,9 @@ class ExhentaiGalleryExtractor(ExhentaiExtractor):
r"(?:/g/(\d+)/([\da-f]{10})"
r"|/s/([\da-f]{10})/(\d+)-(\d+))")
test = (
- ("https://exhentai.org/g/960460/4f0e369d82/", {
- "keyword": "1532ca4d0e4e0738dc994ca725a228af04a4e480",
- "content": "493d759de534355c9f55f8e365565b62411de146",
+ ("https://exhentai.org/g/1200119/d55c44d3d0/", {
+ "keyword": "1b353fad00dff0665b1746cdd151ab5cc326df23",
+ "content": "e9891a4c017ed0bb734cd1efba5cd03f594d31ff",
}),
("https://exhentai.org/g/960461/4f0e369d82/", {
"exception": exception.NotFoundError,
@@ -122,13 +130,13 @@ class ExhentaiGalleryExtractor(ExhentaiExtractor):
("http://exhentai.org/g/962698/7f02358e00/", {
"exception": exception.AuthorizationError,
}),
- ("https://exhentai.org/s/3957343c3b/960460-5", {
+ ("https://exhentai.org/s/f68367b4c8/1200119-3", {
"count": 2,
}),
- ("https://e-hentai.org/s/3957343c3b/960460-5", {
+ ("https://e-hentai.org/s/f68367b4c8/1200119-3", {
"count": 2,
}),
- ("https://g.e-hentai.org/g/960460/4f0e369d82/"),
+ ("https://g.e-hentai.org/g/1200119/d55c44d3d0/"),
)
def __init__(self, match):
@@ -143,14 +151,25 @@ class ExhentaiGalleryExtractor(ExhentaiExtractor):
def items(self):
self.login()
+ if self.limits:
+ self._init_limits()
+
if self.gallery_token:
gpage = self._gallery_page()
self.image_token = text.extract(gpage, 'hentai.org/s/', '"')[0]
+ if not self.image_token:
+ self.log.error("Failed to extract initial image token")
+ self.log.debug("Page content:\n%s", gpage)
+ return
self.wait()
ipage = self._image_page()
else:
ipage = self._image_page()
part = text.extract(ipage, 'hentai.org/g/', '"')[0]
+ if not part:
+ self.log.error("Failed to extract gallery token")
+ self.log.debug("Page content:\n%s", ipage)
+ return
self.gallery_token = part.split("/")[1]
self.wait()
gpage = self._gallery_page()
@@ -211,12 +230,17 @@ class ExhentaiGalleryExtractor(ExhentaiExtractor):
iurl = extr('<img id="img" src="', '"')
orig = extr('hentai.org/fullimg.php', '"')
- if self.original and orig:
- url = self.root + "/fullimg.php" + text.unescape(orig)
- data = self._parse_original_info(extr('ownload original', '<'))
- else:
- url = iurl
- data = self._parse_image_info(url)
+ try:
+ if self.original and orig:
+ url = self.root + "/fullimg.php" + text.unescape(orig)
+ data = self._parse_original_info(extr('ownload original', '<'))
+ else:
+ url = iurl
+ data = self._parse_image_info(url)
+ except IndexError:
+ self.log.error("Unable to parse image info for '%s'", url)
+ self.log.debug("Page content:\n%s", page)
+ raise exception.StopExtraction()
data["num"] = self.image_num
data["image_token"] = self.key["start"] = extr('var startkey="', '";')
@@ -242,13 +266,18 @@ class ExhentaiGalleryExtractor(ExhentaiExtractor):
imgurl , pos = text.extract(page["i3"], 'id="img" src="', '"', pos)
origurl, pos = text.extract(page["i7"], '<a href="', '"')
- if self.original and origurl:
- url = text.unescape(origurl)
- data = self._parse_original_info(
- text.extract(page["i7"], "ownload original", "<", pos)[0])
- else:
- url = imgurl
- data = self._parse_image_info(url)
+ try:
+ if self.original and origurl:
+ url = text.unescape(origurl)
+ data = self._parse_original_info(text.extract(
+ page["i7"], "ownload original", "<", pos)[0])
+ else:
+ url = imgurl
+ data = self._parse_image_info(url)
+ except IndexError:
+ self.log.error("Unable to parse image info for '%s'", url)
+ self.log.debug("Page content:\n%s", page)
+ raise exception.StopExtraction()
data["num"] = request["page"]
data["image_token"] = imgkey
@@ -266,6 +295,8 @@ class ExhentaiGalleryExtractor(ExhentaiExtractor):
raise exception.AuthorizationError()
if page.startswith(("Key missing", "Gallery not found")):
raise exception.NotFoundError("gallery")
+ if "hentai.org/mpv/" in page:
+ self.log.warning("Enabled Multi-Page Viewer is not supported")
return page
def _image_page(self):
@@ -277,17 +308,24 @@ class ExhentaiGalleryExtractor(ExhentaiExtractor):
raise exception.NotFoundError("image page")
return page
+ def _init_limits(self):
+ self._update_limits()
+ if self._remaining <= 0:
+ self.log.error("Image limit reached!")
+ ExhentaiExtractor.LIMIT = True
+ raise exception.StopExtraction()
+
def _check_limits(self, data):
- if not self._remaining or data["num"] % 20 == 0:
+ if data["num"] % 20 == 0:
self._update_limits()
self._remaining -= data["cost"]
if self._remaining <= 0:
url = "{}/s/{}/{}-{}".format(
self.root, data["image_token"], self.gallery_id, data["num"])
- self.log.error(
- "Image limit reached! Reset it and continue with "
- "'%s' as URL.", url)
+ self.log.error("Image limit reached! Continue with "
+ "'%s' as URL after resetting it.", url)
+ ExhentaiExtractor.LIMIT = True
raise exception.StopExtraction()
def _update_limits(self):
@@ -301,6 +339,7 @@ class ExhentaiGalleryExtractor(ExhentaiExtractor):
page = self.request(url, cookies=cookies).text
current, pos = text.extract(page, "<strong>", "</strong>")
maximum, pos = text.extract(page, "<strong>", "</strong>", pos)
+ self.log.debug("Image Limits: %s/%s", current, maximum)
self._remaining = text.parse_int(maximum) - text.parse_int(current)
@staticmethod
@@ -330,7 +369,7 @@ class ExhentaiSearchExtractor(ExhentaiExtractor):
subcategory = "search"
pattern = BASE_PATTERN + r"/?\?(.*)$"
test = (
- ("https://exhentai.org/?f_search=touhou"),
+ ("https://e-hentai.org/?f_search=touhou"),
(("https://exhentai.org/?f_doujinshi=0&f_manga=0&f_artistcg=0"
"&f_gamecg=0&f_western=0&f_non-h=1&f_imageset=0&f_cosplay=0"
"&f_asianporn=0&f_misc=0&f_search=touhou&f_apply=Apply+Filter"), {
@@ -372,7 +411,10 @@ class ExhentaiFavoriteExtractor(ExhentaiSearchExtractor):
subcategory = "favorite"
pattern = BASE_PATTERN + r"/favorites\.php(?:\?(.*))?"
test = (
- ("https://exhentai.org/favorites.php"),
+ ("https://e-hentai.org/favorites.php", {
+ "count": 1,
+ "pattern": r"https?://e-hentai\.org/g/1200119/d55c44d3d0"
+ }),
("https://exhentai.org/favorites.php?favcat=1&f_search=touhou"
"&f_apply=Search+Favorites"),
)
diff --git a/gallery_dl/extractor/gelbooru.py b/gallery_dl/extractor/gelbooru.py
index 15bd0a8..ce2e83b 100644
--- a/gallery_dl/extractor/gelbooru.py
+++ b/gallery_dl/extractor/gelbooru.py
@@ -30,6 +30,7 @@ class GelbooruExtractor(booru.XmlParserMixin,
self.params.update({"page": "dapi", "s": "post", "q": "index"})
else:
self.items = self.items_noapi
+ self.session.cookies["fringeBenefits"] = "yup"
def items_noapi(self):
data = self.get_metadata()
diff --git a/gallery_dl/extractor/imgbb.py b/gallery_dl/extractor/imgbb.py
new file mode 100644
index 0000000..442634b
--- /dev/null
+++ b/gallery_dl/extractor/imgbb.py
@@ -0,0 +1,179 @@
+# -*- coding: utf-8 -*-
+
+# Copyright 2019 Mike Fährmann
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Extractors for https://imgbb.com/"""
+
+from .common import Extractor, Message
+from .. import text, exception
+from ..cache import cache
+import json
+
+
+class ImgbbExtractor(Extractor):
+ """Base class for imgbb extractors"""
+ category = "imgbb"
+ filename_fmt = "{title} {id}.{extension}"
+ archive_fmt = "{id}"
+ root = "https://imgbb.com"
+
+ def __init__(self, match):
+ Extractor.__init__(self, match)
+ self.page_url = self.sort = None
+
+ def items(self):
+ self.login()
+ page = self.request(self.page_url, params={"sort": self.sort}).text
+ data = self.metadata(page)
+ first = True
+
+ yield Message.Version, 1
+ for img in self.images(page):
+ image = {
+ "id" : img["url_viewer"].rpartition("/")[2],
+ "user" : img["user"]["username"],
+ "title" : text.unescape(img["title"]),
+ "url" : img["image"]["url"],
+ "extension": img["image"]["extension"],
+ "size" : text.parse_int(img["image"]["size"]),
+ "width" : text.parse_int(img["width"]),
+ "height" : text.parse_int(img["height"]),
+ }
+ image.update(data)
+ if first:
+ first = False
+ yield Message.Directory, data
+ yield Message.Url, image["url"], image
+
+ def login(self):
+ username, password = self._get_auth_info()
+ if username:
+ self._update_cookies(self._login_impl(username, password))
+
+ @cache(maxage=360*24*3600, keyarg=1)
+ def _login_impl(self, username, password):
+ self.log.info("Logging in as %s", username)
+
+ url = self.root + "/login"
+ page = self.request(url).text
+ token = text.extract(page, 'PF.obj.config.auth_token="', '"')[0]
+
+ headers = {"Referer": url}
+ data = {
+ "auth_token" : token,
+ "login-subject": username,
+ "password" : password,
+ }
+ response = self.request(url, method="POST", headers=headers, data=data)
+
+ if not response.history:
+ raise exception.AuthenticationError()
+ return self.session.cookies
+
+ def _pagination(self, page, endpoint, params):
+ params["page"] = 2
+ data = None
+
+ while True:
+ for img in text.extract_iter(page, "data-object='", "'"):
+ yield json.loads(text.unquote(img))
+ if data:
+ if params["seek"] == data["seekEnd"]:
+ return
+ params["seek"] = data["seekEnd"]
+ params["page"] += 1
+ data = self.request(endpoint, "POST", data=params).json()
+ page = data["html"]
+
+
+class ImgbbAlbumExtractor(ImgbbExtractor):
+ """Extractor for albums on imgbb.com"""
+ subcategory = "album"
+ directory_fmt = ("{category}", "{user}", "{album_name} {album_id}")
+ pattern = r"(?:https?://)?ibb\.co/album/([^/?&#]+)/?(?:\?([^#]+))?"
+ test = (
+ ("https://ibb.co/album/c6p5Yv", {
+ "range": "1-80",
+ "url": "8adaf0f7dfc19ff8bc4712c97f534af8b1e06412",
+ "keyword": "155b665a53e83d359e914cab7c69d5b829444d64",
+ }),
+ ("https://ibb.co/album/c6p5Yv?sort=title_asc", {
+ "range": "1-80",
+ "url": "d6c45041d5c8323c435b183a976f3fde2af7c547",
+ "keyword": "30c3262214e2044bbcf6bf2dee8e3ca7ebd62b71",
+ }),
+ )
+
+ def __init__(self, match):
+ ImgbbExtractor.__init__(self, match)
+ self.album_name = None
+ self.album_id = match.group(1)
+ self.sort = text.parse_query(match.group(2)).get("sort", "date_desc")
+ self.page_url = "https://ibb.co/album/" + self.album_id
+
+ def metadata(self, page):
+ album, pos = text.extract(page, '"og:title" content="', '"')
+ user , pos = text.extract(page, 'rel="author">', '<', pos)
+ return {
+ "album_id" : self.album_id,
+ "album_name": text.unescape(album),
+ "user" : user.lower(),
+ }
+
+ def images(self, page):
+ seek, pos = text.extract(page, 'data-seek="', '"')
+ tokn, pos = text.extract(page, 'PF.obj.config.auth_token="', '"', pos)
+
+ return self._pagination(page, "https://ibb.co/json", {
+ "action" : "list",
+ "list" : "images",
+ "from" : "album",
+ "sort" : self.sort,
+ "albumid" : self.album_id,
+ "seek" : seek,
+ "auth_token": tokn,
+ "params_hidden[list]" : "images",
+ "params_hidden[from]" : "album",
+ "params_hidden[albumid]": self.album_id,
+ })
+
+
+class ImgbbUserExtractor(ImgbbExtractor):
+ """Extractor for user profiles in imgbb.com"""
+ subcategory = "user"
+ directory_fmt = ("{category}", "{user}")
+ pattern = r"(?:https?://)?([^.]+)\.imgbb\.com/?(?:\?([^#]+))?$"
+ test = ("https://folkie.imgbb.com", {
+ "range": "1-80",
+ "pattern": r"https?://i\.ibb\.co/\w+/[^/?&#]+",
+ })
+
+ def __init__(self, match):
+ ImgbbExtractor.__init__(self, match)
+ self.user = match.group(1)
+ self.sort = text.parse_query(match.group(2)).get("sort", "date_desc")
+ self.page_url = "https://{}.imgbb.com/".format(self.user)
+
+ def metadata(self, page):
+ return {"user": self.user}
+
+ def images(self, page):
+ seek, pos = text.extract(page, 'data-seek="', '"')
+ tokn, pos = text.extract(page, 'PF.obj.config.auth_token="', '"', pos)
+ user, pos = text.extract(page, '.obj.resource={"id":"', '"', pos)
+
+ return self._pagination(page, self.page_url + "json", {
+ "action" : "list",
+ "list" : "images",
+ "from" : "user",
+ "sort" : self.sort,
+ "seek" : seek,
+ "userid" : user,
+ "auth_token": tokn,
+ "params_hidden[userid]": user,
+ "params_hidden[from]" : "user",
+ })
diff --git a/gallery_dl/extractor/luscious.py b/gallery_dl/extractor/luscious.py
index 65ae843..879d38b 100644
--- a/gallery_dl/extractor/luscious.py
+++ b/gallery_dl/extractor/luscious.py
@@ -62,7 +62,7 @@ class LusciousAlbumExtractor(LusciousBase, GalleryExtractor):
test = (
("https://luscious.net/albums/okinami-no-koigokoro_277031/", {
"url": "7e4984a271a1072ac6483e4228a045895aff86f3",
- "keyword": "c597c132834f4990f90bf5dee5de2a9d4ba263a4",
+ "keyword": "ab4e5b71583fd439b4c8012a642aa8b58d8d0758",
"content": "b3a747a6464509440bd0ff6d1267e6959f8d6ff3",
}),
("https://luscious.net/albums/virgin-killer-sweater_282582/", {
diff --git a/gallery_dl/extractor/ngomik.py b/gallery_dl/extractor/ngomik.py
index 8135a8a..f3608b2 100644
--- a/gallery_dl/extractor/ngomik.py
+++ b/gallery_dl/extractor/ngomik.py
@@ -44,7 +44,7 @@ class NgomikChapterExtractor(ChapterExtractor):
@staticmethod
def images(page):
- readerarea = text.extract(page, 'id=readerarea', 'class=chnav')[0]
+ readerarea = text.extract(page, 'id="readerarea"', 'class="chnav"')[0]
return [
(text.unescape(url), None)
for url in re.findall(r"\ssrc=[\"']?([^\"' >]+)", readerarea)
diff --git a/gallery_dl/extractor/sankaku.py b/gallery_dl/extractor/sankaku.py
index 012cb8b..da9735e 100644
--- a/gallery_dl/extractor/sankaku.py
+++ b/gallery_dl/extractor/sankaku.py
@@ -283,9 +283,9 @@ class SankakuPostExtractor(SankakuExtractor):
"options": (("tags", True),),
"keyword": {
"tags_artist": "bonocho",
- "tags_copyright": "batman_(series) the_dark_knight",
- "tags_medium": "sketch copyright_name",
"tags_studio": "dc_comics",
+ "tags_medium": "sketch copyright_name",
+ "tags_copyright": str,
"tags_character": str,
"tags_general": str,
},
diff --git a/gallery_dl/extractor/sankakucomplex.py b/gallery_dl/extractor/sankakucomplex.py
index 55eda9f..0189fc9 100644
--- a/gallery_dl/extractor/sankakucomplex.py
+++ b/gallery_dl/extractor/sankakucomplex.py
@@ -34,11 +34,11 @@ class SankakucomplexArticleExtractor(SankakucomplexExtractor):
test = (
("https://www.sankakucomplex.com/2019/05/11/twitter-cosplayers", {
"url": "4a9ecc5ae917fbce469280da5b6a482510cae84d",
- "keyword": "4b3b5766b277a5d0acbec90fa8f2343262b07efd",
+ "keyword": "bfe08310e7d9a572f568f6900e0ed0eb295aa2b3",
}),
("https://www.sankakucomplex.com/2009/12/01/sexy-goddesses-of-2ch", {
"url": "a1e249173fd6c899a8134fcfbd9c925588a63f7c",
- "keyword": "f47a416d680717855bbc3e4f0cd44479f61d9aa4",
+ "keyword": "e78fcc23c2711befc0969a45ea5082a29efccf68",
}),
)
diff --git a/gallery_dl/extractor/tsumino.py b/gallery_dl/extractor/tsumino.py
index 03ee144..66ad431 100644
--- a/gallery_dl/extractor/tsumino.py
+++ b/gallery_dl/extractor/tsumino.py
@@ -65,7 +65,7 @@ class TsuminoGalleryExtractor(TsuminoBase, GalleryExtractor):
"uploader" : "sehki",
"lang" : "en",
"language" : "English",
- "thumbnail" : "http://www.tsumino.com/Image/Thumb/40996",
+ "thumbnail" : "re:https?://www.tsumino.com/Image/Thumb/40996",
},
}),
("https://www.tsumino.com/Read/View/45834"),
diff --git a/gallery_dl/extractor/vsco.py b/gallery_dl/extractor/vsco.py
new file mode 100644
index 0000000..639ec82
--- /dev/null
+++ b/gallery_dl/extractor/vsco.py
@@ -0,0 +1,176 @@
+# -*- coding: utf-8 -*-
+
+# Copyright 2019 Mike Fährmann
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Extractors for https://vsco.co/"""
+
+from .common import Extractor, Message
+from .. import text
+import json
+
+
+BASE_PATTERN = r"(?:https?://)?(?:www\.)?vsco\.co/([^/]+)"
+
+
+class VscoExtractor(Extractor):
+ """Base class for vsco extractors"""
+ category = "vsco"
+ root = "https://vsco.co"
+ directory_fmt = ("{category}", "{user}")
+ filename_fmt = "{id}.{extension}"
+ archive_fmt = "{id}"
+
+ def __init__(self, match):
+ Extractor.__init__(self, match)
+ self.user = match.group(1).lower()
+
+ def items(self):
+ yield Message.Version, 1
+ yield Message.Directory, {"user": self.user}
+ for img in self.images():
+ url = "https://" + (img.get("video_url") or img["responsive_url"])
+ data = text.nameext_from_url(url, {
+ "id" : img["_id"],
+ "user" : self.user,
+ "grid" : img["grid_name"],
+ "meta" : img.get("image_meta") or {},
+ "tags" : [tag["text"] for tag in img.get("tags") or ()],
+ "date" : text.parse_timestamp(img["upload_date"] // 1000),
+ "video" : img["is_video"],
+ "width" : img["width"],
+ "height": img["height"],
+ "description": img["description"],
+ })
+ yield Message.Url, url, data
+
+ def images(self):
+ """Return an iterable with all relevant image objects"""
+
+ def _extract_preload_state(self, url):
+ page = self.request(url, notfound=self.subcategory).text
+ return json.loads(text.extract(page, "__PRELOADED_STATE__ = ", "<")[0])
+
+ def _pagination(self, url, params, token, key, extra):
+ headers = {
+ "Referer" : "{}/{}".format(self.root, self.user),
+ "Authorization" : "Bearer " + token,
+ "X-Client-Platform": "web",
+ "X-Client-Build" : "1",
+ }
+
+ yield from map(self._transform_media, extra)
+
+ while True:
+ data = self.request(url, params=params, headers=headers).json()
+ if not data.get(key):
+ return
+ yield from data[key]
+ params["page"] += 1
+
+ @staticmethod
+ def _transform_media(media):
+ media["_id"] = media["id"]
+ media["is_video"] = media["isVideo"]
+ media["grid_name"] = media["gridName"]
+ media["upload_date"] = media["uploadDate"]
+ media["responsive_url"] = media["responsiveUrl"]
+ media["video_url"] = media.get("videoUrl")
+ media["image_meta"] = media.get("imageMeta")
+ return media
+
+
+class VscoUserExtractor(VscoExtractor):
+ """Extractor for images from a user on vsco.co"""
+ subcategory = "user"
+ pattern = BASE_PATTERN + r"/images/"
+ test = ("https://vsco.co/missuri/images/1", {
+ "range": "1-80",
+ "count": 80,
+ "pattern": r"https://im\.vsco\.co/[^/]+/[0-9a-f/]+/vsco\w+\.\w+",
+ })
+
+ def images(self):
+ url = "{}/{}/images/1".format(self.root, self.user)
+ data = self._extract_preload_state(url)
+
+ tkn = data["users"]["currentUser"]["tkn"]
+ sid = str(data["sites"]["siteByUsername"][self.user]["site"]["id"])
+
+ url = "{}/api/2.0/medias".format(self.root)
+ params = {"page": 2, "size": "30", "site_id": sid}
+ return self._pagination(url, params, tkn, "media", (
+ data["medias"]["byId"][mid]["media"]
+ for mid in data["medias"]["bySiteId"][sid]["medias"]["1"]
+ ))
+
+
+class VscoCollectionExtractor(VscoExtractor):
+ """Extractor for images from a collection on vsco.co"""
+ subcategory = "collection"
+ directory_fmt = ("{category}", "{user}", "collection")
+ archive_fmt = "c_{user}_{id}"
+ pattern = BASE_PATTERN + r"/collection/"
+ test = ("https://vsco.co/vsco/collection/1", {
+ "range": "1-80",
+ "count": 80,
+ "pattern": r"https://im\.vsco\.co/[^/]+/[0-9a-f/]+/vsco\w+\.\w+",
+ })
+
+ def images(self):
+ url = "{}/{}/collection/1".format(self.root, self.user)
+ data = self._extract_preload_state(url)
+
+ tkn = data["users"]["currentUser"]["tkn"]
+ cid = (data["sites"]["siteByUsername"][self.user]
+ ["site"]["siteCollectionId"])
+
+ url = "{}/api/2.0/collections/{}/medias".format(self.root, cid)
+ params = {"page": 2, "size": "20"}
+ return self._pagination(url, params, tkn, "medias", (
+ data["medias"]["byId"][mid]["media"]
+ for mid in data
+ ["collections"]["byCollectionId"][cid]["collection"]["1"]
+ ))
+
+
+class VscoImageExtractor(VscoExtractor):
+ """Extractor for individual images on vsco.co"""
+ subcategory = "image"
+ pattern = BASE_PATTERN + r"/media/([0-9a-fA-F]+)"
+ test = (
+ ("https://vsco.co/erenyildiz/media/5d34b93ef632433030707ce2", {
+ "url": "faa214d10f859f374ad91da3f7547d2439f5af08",
+ "content": "1394d070828d82078035f19a92f404557b56b83f",
+ "keyword": {
+ "id" : "5d34b93ef632433030707ce2",
+ "user" : "erenyildiz",
+ "grid" : "erenyildiz",
+ "meta" : dict,
+ "tags" : list,
+ "date" : "type:datetime",
+ "video" : False,
+ "width" : 1537,
+ "height": 1537,
+ "description": "re:Ni seviyorum. #vsco #vscox #vscochallenges",
+ },
+ }),
+ ("https://vsco.co/jimenalazof/media/5b4feec558f6c45c18c040fd", {
+ "url": "08e7eef3301756ce81206c0b47c1e9373756a74a",
+ "content": "e739f058d726ee42c51c180a505747972a7dfa47",
+ "keyword": {"video" : True},
+ }),
+ )
+
+ def __init__(self, match):
+ VscoExtractor.__init__(self, match)
+ self.media_id = match.group(2)
+
+ def images(self):
+ url = "{}/{}/media/{}".format(self.root, self.user, self.media_id)
+ data = self._extract_preload_state(url)
+ media = data["medias"]["byId"].popitem()[1]["media"]
+ return (self._transform_media(media),)
diff --git a/gallery_dl/job.py b/gallery_dl/job.py
index 20823a6..637561a 100644
--- a/gallery_dl/job.py
+++ b/gallery_dl/job.py
@@ -81,7 +81,8 @@ class Job():
"https://github.com/mikf/gallery-dl/issues ."),
exc.__class__.__name__, exc)
log.debug("", exc_info=True)
- self.handle_finalize()
+ finally:
+ self.handle_finalize()
def dispatch(self, msg):
"""Call the appropriate message handler"""
diff --git a/gallery_dl/postprocessor/zip.py b/gallery_dl/postprocessor/zip.py
index 3a0c323..1075c70 100644
--- a/gallery_dl/postprocessor/zip.py
+++ b/gallery_dl/postprocessor/zip.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright 2018 Mike Fährmann
+# Copyright 2018-2019 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@@ -25,7 +25,7 @@ class ZipPP(PostProcessor):
def __init__(self, pathfmt, options):
PostProcessor.__init__(self)
self.delete = not options.get("keep-files", False)
- self.ext = "." + options.get("extension", "zip")
+ ext = "." + options.get("extension", "zip")
algorithm = options.get("compression", "store")
if algorithm not in self.COMPRESSION_ALGORITHMS:
self.log.warning(
@@ -34,29 +34,45 @@ class ZipPP(PostProcessor):
algorithm = "store"
self.path = pathfmt.realdirectory
- self.zfile = zipfile.ZipFile(
- self.path + self.ext, "a",
- self.COMPRESSION_ALGORITHMS[algorithm], True)
+ args = (self.path + ext, "a",
+ self.COMPRESSION_ALGORITHMS[algorithm], True)
- def run(self, pathfmt):
+ if options.get("mode") == "safe":
+ self.run = self._write_safe
+ self.zfile = None
+ self.args = args
+ else:
+ self.run = self._write
+ self.zfile = zipfile.ZipFile(*args)
+
+ def _write(self, pathfmt, zfile=None):
# 'NameToInfo' is not officially documented, but it's available
# for all supported Python versions and using it directly is a lot
- # better than calling getinfo()
- if pathfmt.filename not in self.zfile.NameToInfo:
- self.zfile.write(pathfmt.temppath, pathfmt.filename)
+ # faster than calling getinfo()
+ if zfile is None:
+ zfile = self.zfile
+ if pathfmt.filename not in zfile.NameToInfo:
+ zfile.write(pathfmt.temppath, pathfmt.filename)
pathfmt.delete = self.delete
+ def _write_safe(self, pathfmt):
+ with zipfile.ZipFile(*self.args) as zfile:
+ self._write(pathfmt, zfile)
+
def finalize(self):
- self.zfile.close()
+ if self.zfile:
+ self.zfile.close()
if self.delete:
try:
+ # remove target directory
os.rmdir(self.path)
except OSError:
pass
- if not self.zfile.NameToInfo:
+ if self.zfile and not self.zfile.NameToInfo:
try:
+ # delete empty zip archive
os.unlink(self.zfile.filename)
except OSError:
pass
diff --git a/gallery_dl/text.py b/gallery_dl/text.py
index 81e87b5..72dad5b 100644
--- a/gallery_dl/text.py
+++ b/gallery_dl/text.py
@@ -83,22 +83,6 @@ def nameext_from_url(url, data=None):
return data
-def clean_path_windows(path):
- """Remove illegal characters from a path-segment (Windows)"""
- try:
- return re.sub(r'[<>:"\\/|?*]', "_", path)
- except TypeError:
- return ""
-
-
-def clean_path_posix(path):
- """Remove illegal characters from a path-segment (Posix)"""
- try:
- return path.replace("/", "_")
- except AttributeError:
- return ""
-
-
def extract(txt, begin, end, pos=0):
"""Extract the text between 'begin' and 'end' from 'txt'
@@ -266,12 +250,6 @@ def parse_datetime(date_string, format="%Y-%m-%dT%H:%M:%S%z"):
return date_string
-if os.name == "nt":
- clean_path = clean_path_windows
-else:
- clean_path = clean_path_posix
-
-
urljoin = urllib.parse.urljoin
quote = urllib.parse.quote
diff --git a/gallery_dl/util.py b/gallery_dl/util.py
index 14ae3d2..02d998d 100644
--- a/gallery_dl/util.py
+++ b/gallery_dl/util.py
@@ -535,6 +535,27 @@ class PathFormat():
if os.altsep and os.altsep in self.basedirectory:
self.basedirectory = self.basedirectory.replace(os.altsep, os.sep)
+ restrict = extractor.config("restrict-filenames", "auto")
+ if restrict == "auto":
+ restrict = "<>:\"\\/|?*" if os.name == "nt" else "/"
+ elif restrict == "unix":
+ restrict = "/"
+ elif restrict == "windows":
+ restrict = "<>:\"\\/|?*"
+ self.clean_path = self._build_cleanfunc(restrict)
+
+ @staticmethod
+ def _build_cleanfunc(repl):
+ if not repl:
+ return lambda x: x
+ elif len(repl) == 1:
+ def func(x, r=repl):
+ return x.replace(r, "_")
+ else:
+ def func(x, sub=re.compile("[" + re.escape(repl) + "]").sub):
+ return sub("_", x)
+ return func
+
def open(self, mode="wb"):
"""Open file and return a corresponding file object"""
return open(self.temppath, mode)
@@ -551,7 +572,7 @@ class PathFormat():
"""Build directory path and create it if necessary"""
try:
segments = [
- text.clean_path(
+ self.clean_path(
Formatter(segment, self.kwdefault)
.format_map(keywords).strip())
for segment in self.directory_fmt
@@ -597,7 +618,7 @@ class PathFormat():
def build_path(self):
"""Use filename-keywords and directory to build a full path"""
try:
- self.filename = text.clean_path(
+ self.filename = self.clean_path(
self.formatter.format_map(self.keywords))
except Exception as exc:
raise exception.FormatError(exc, "filename")
diff --git a/gallery_dl/version.py b/gallery_dl/version.py
index d970ed6..d9cc3d6 100644
--- a/gallery_dl/version.py
+++ b/gallery_dl/version.py
@@ -6,4 +6,4 @@
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
-__version__ = "1.9.0"
+__version__ = "1.10.1"
diff --git a/scripts/supportedsites.py b/scripts/supportedsites.py
index 3d86110..498e3fc 100755
--- a/scripts/supportedsites.py
+++ b/scripts/supportedsites.py
@@ -13,6 +13,7 @@ from gallery_dl import extractor
CATEGORY_MAP = {
"2chan" : "Futaba Channel",
"35photo" : "35PHOTO",
+ "adultempire" : "Adult Empire",
"archivedmoe" : "Archived.Moe",
"archiveofsins" : "Archive of Sins",
"artstation" : "ArtStation",
@@ -23,7 +24,7 @@ CATEGORY_MAP = {
"dynastyscans" : "Dynasty Reader",
"e621" : "e621",
"erolord" : "EroLord.com",
- "exhentai" : "ExHentai",
+ "exhentai" : "E-Hentai",
"fallenangels" : "Fallen Angels Scans",
"fashionnova" : "Fashion Nova",
"hbrowse" : "HBrowse",
@@ -36,6 +37,7 @@ CATEGORY_MAP = {
"idolcomplex" : "Idol Complex",
"imagebam" : "ImageBam",
"imagefap" : "ImageFap",
+ "imgbb" : "ImgBB",
"imgbox" : "imgbox",
"imgth" : "imgth",
"imgur" : "imgur",
@@ -71,6 +73,7 @@ CATEGORY_MAP = {
"smugmug" : "SmugMug",
"thebarchive" : "The /b/ Archive",
"vanillarock" : "もえぴりあ",
+ "vsco" : "VSCO",
"wikiart" : "WikiArt.org",
"worldthree" : "World Three",
"xhamster" : "xHamster",
@@ -109,6 +112,7 @@ AUTH_MAP = {
"exhentai" : "Optional",
"flickr" : "Optional (OAuth)",
"idolcomplex": "Optional",
+ "imgbb" : "Optional",
"instagram" : "Optional",
"luscious" : "Optional",
"mangoxo" : "Optional",
@@ -121,6 +125,8 @@ AUTH_MAP = {
"tsumino" : "Optional",
"tumblr" : "Optional (OAuth)",
"twitter" : "Optional",
+ "wallhaven" : ("Optional (`API Key "
+ "<configuration.rst#extractorwallhavenapi-key>`__)"),
}
IGNORE_LIST = (
diff --git a/test/test_results.py b/test/test_results.py
index 41390a8..839a75c 100644
--- a/test/test_results.py
+++ b/test/test_results.py
@@ -26,9 +26,12 @@ TRAVIS_SKIP = {
# temporary issues, etc.
BROKEN = {
- "komikcast",
- "konachan",
+ "adultempire",
+ "flickr",
+ "imgth",
+ "mangafox",
"mangapark",
+ "pixnet",
}
diff --git a/test/test_text.py b/test/test_text.py
index 405acd3..6a6d83a 100644
--- a/test/test_text.py
+++ b/test/test_text.py
@@ -139,26 +139,6 @@ class TestText(unittest.TestCase):
for value in INVALID:
self.assertEqual(f(value), empty)
- def test_clean_path_windows(self, f=text.clean_path_windows):
- self.assertEqual(f(""), "")
- self.assertEqual(f("foo"), "foo")
- self.assertEqual(f("foo/bar"), "foo_bar")
- self.assertEqual(f("foo<>:\"\\/|?*bar"), "foo_________bar")
-
- # invalid arguments
- for value in INVALID:
- self.assertEqual(f(value), "")
-
- def test_clean_path_posix(self, f=text.clean_path_posix):
- self.assertEqual(f(""), "")
- self.assertEqual(f("foo"), "foo")
- self.assertEqual(f("foo/bar"), "foo_bar")
- self.assertEqual(f("foo<>:\"\\/|?*bar"), "foo<>:\"\\_|?*bar")
-
- # invalid arguments
- for value in INVALID:
- self.assertEqual(f(value), "")
-
def test_extract(self, f=text.extract):
txt = "<a><b>"
self.assertEqual(f(txt, "<", ">"), ("a" , 3))