diff options
| author | 2019-08-04 17:53:04 -0400 | |
|---|---|---|
| committer | 2019-08-04 17:53:04 -0400 | |
| commit | 09e19cd4b63183a3cc38cea7bc5c5b8d308d22fa (patch) | |
| tree | 2ef7e5afcc539bf5ca7fc2a0c525709b41e309d7 | |
| parent | 1d18be9fc5a9d6577eb1bbb5f9a135bfa0ce0495 (diff) | |
| parent | 64ad8e7bd15df71ab1116eede414558631bcad32 (diff) | |
Update upstream source from tag 'upstream/1.10.1'
Update to upstream version '1.10.1'
with Debian dir 81401e5e3e324250ded90d4caf7bf60cd0b9affb
28 files changed, 688 insertions, 130 deletions
diff --git a/CHANGELOG.md b/CHANGELOG.md index 625018a..58e295c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,34 @@ # Changelog +## 1.10.1 - 2019-08-02 +## Fixes +- Restore functionality of both domains for `exhentai` extractors + +## 1.10.0 - 2019-08-01 +### Warning +- Prior to version 1.10.0 all cache files were created world readable (mode `644`) + leading to possible sensitive information disclosure on multi-user systems +- It is recommended to restrict access permissions of already existing files + (`/tmp/.gallery-dl.cache`) with `chmod 600` +- Windows users should not be affected +### Additions +- Support for + - `vsco` - https://vsco.co/ ([#331](https://github.com/mikf/gallery-dl/issues/331)) + - `imgbb` - https://imgbb.com/ ([#361](https://github.com/mikf/gallery-dl/issues/361)) + - `adultempire` - https://www.adultempire.com/ ([#340](https://github.com/mikf/gallery-dl/issues/340)) +- `restrict-filenames` option to create Windows-compatible filenames on any platform ([#348](https://github.com/mikf/gallery-dl/issues/348)) +- `forward-cookies` option to control cookie forwarding to youtube-dl ([#352](https://github.com/mikf/gallery-dl/issues/352)) +### Changes +- The default cache file location on non-Windows systems is now + - `$XDG_CACHE_HOME/gallery-dl/cache.sqlite3` or + - `~/.cache/gallery-dl/cache.sqlite3` +- New cache files are created with mode `600` +- `exhentai` extractors will always use `e-hentai.org` as domain +### Fixes +- Better handling of `exhentai` image limits and errors ([#356](https://github.com/mikf/gallery-dl/issues/356), [#360](https://github.com/mikf/gallery-dl/issues/360)) +- Try to prevent ZIP file corruption ([#355](https://github.com/mikf/gallery-dl/issues/355)) +- Miscellaneous fixes for `behance`, `ngomik` + ## 1.9.0 - 2019-07-19 ### Additions - Support for @@ -25,7 +54,6 @@ - Forward cookies to `youtube-dl` to allow downloading private videos - Miscellaneous fixes for `35photo`, `500px`, `newgrounds`, `simplyhentai` - ## 1.8.7 - 2019-06-28 ### Additions - Support for @@ -78,8 +78,8 @@ Download a standalone executable file, put it into your `PATH <https://en.wikipedia.org/wiki/PATH_(variable)>`__, and run it inside a command prompt (like ``cmd.exe``). -- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.9.0/gallery-dl.exe>`__ -- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.9.0/gallery-dl.bin>`__ +- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.10.1/gallery-dl.exe>`__ +- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.10.1/gallery-dl.bin>`__ These executables include a Python 3.7 interpreter and all required Python packages. @@ -168,10 +168,11 @@ Username & Password ------------------- Some extractors require you to provide valid login-credentials in the form of -a username & password pair. -This is necessary for ``pixiv``, ``nijie`` and ``seiga`` -and optional (but strongly recommended) for ``exhentai``, ``luscious``, -``sankaku``, ``idolcomplex``, ``tsumino`` and ``wallhaven``. +a username & password pair. This is necessary for +``pixiv``, ``nijie``, and ``seiga`` +and optional (but strongly recommended) for +``danbooru``, ``exhentai``, ``idolcomplex``, ``instagram``, +``luscious``, ``sankaku``, ``tsumino``, and ``twitter``. You can set the necessary information in your configuration file (cf. gallery-dl.conf_) @@ -223,7 +224,7 @@ access to *gallery-dl*. Authorize it and you will be shown one or more .. _gallery-dl-example.conf: https://github.com/mikf/gallery-dl/blob/master/docs/gallery-dl-example.conf .. _configuration.rst: https://github.com/mikf/gallery-dl/blob/master/docs/configuration.rst .. _Supported Sites: https://github.com/mikf/gallery-dl/blob/master/docs/supportedsites.rst -.. _stable: https://github.com/mikf/gallery-dl/archive/v1.9.0.zip +.. _stable: https://github.com/mikf/gallery-dl/archive/v1.10.1.zip .. _dev: https://github.com/mikf/gallery-dl/archive/master.zip .. _Python: https://www.python.org/downloads/ diff --git a/docs/configuration.rst b/docs/configuration.rst index 32a529a..c6f757d 100644 --- a/docs/configuration.rst +++ b/docs/configuration.rst @@ -108,6 +108,24 @@ Description Directory path used as the base for all download destinations. =========== ===== +extractor.*.restrict-filenames +------------------------------ +=========== ===== +Type ``string`` +Default ``"auto"`` +Example ``"/!? ()[]{}"`` +Description Characters to replace with underscores (``_``) when generating + directory and file names. + + Special values: + + * ``"auto"``: Use characters from ``"unix"`` or ``"windows"`` + depending on the local operating system + * ``"unix"``: ``"/"`` + * ``"windows"``: ``"<>:\"\\|/?*"`` +=========== ===== + + extractor.*.skip ---------------- =========== ===== @@ -146,10 +164,11 @@ Default ``null`` Description The username and password to use when attempting to log in to another site. - Specifying username and password is - required for the ``pixiv``, ``nijie`` and ``seiga`` modules and - optional (but strongly recommended) for ``danbooru``, ``exhentai``, - ``sankaku`` and ``idolcomplex``. + Specifying username and password is required for the + ``pixiv``, ``nijie``, and ``seiga`` + modules and optional (but strongly recommended) for + ``danbooru``, ``exhentai``, ``idolcomplex``, ``instagram``, + ``luscious``, ``sankaku``, ``tsumino``, and ``twitter``. These values can also be set via the ``-u/--username`` and ``-p/--password`` command-line options or by using a |.netrc|_ file. @@ -1090,6 +1109,15 @@ Description Video `format selection =========== ===== +downloader.ytdl.forward-cookies +------------------------------- +=========== ===== +Type ``bool`` +Default ``true`` +Description Forward cookies to youtube-dl. +=========== ===== + + downloader.ytdl.logging ----------------------- =========== ===== @@ -1438,6 +1466,22 @@ Default ``false`` Description Keep the actual files after writing them to a ZIP archive. =========== ===== +zip.mode +-------- +=========== ===== +Type ``string`` +Default ``"default"`` +Description * ``"default"``: Write the central directory file header + once after everything is done or an exception is raised. + + * ``"safe"``: Update the central directory file header + each time a file is stored in a ZIP archive. + + This greatly reduces the chance a ZIP archive gets corrupted in + case the Python interpreter gets shut down unexpectedly + (power outage, SIGKILL) but is also a lot slower. +=========== ===== + Miscellaneous Options @@ -1448,7 +1492,8 @@ cache.file ---------- =========== ===== Type |Path|_ -Default |tempfile.gettempdir()|_ + ``".gallery-dl.cache"`` +Default * |tempfile.gettempdir()|_ + ``".gallery-dl.cache"`` on Windows + * (``$XDG_CACHE_HOME`` or ``"~/.cache"``) + ``"/gallery-dl/cache.sqlite3"`` on all other platforms Description Path of the SQLite3 database used to cache login sessions, cookies and API tokens across `gallery-dl` invocations. diff --git a/docs/gallery-dl.conf b/docs/gallery-dl.conf index 835ed17..a4a9ee0 100644 --- a/docs/gallery-dl.conf +++ b/docs/gallery-dl.conf @@ -9,6 +9,7 @@ "skip": true, "sleep": 0, "user-agent": "Mozilla/5.0 (X11; Linux x86_64; rv:68.0) Gecko/20100101 Firefox/68.0", + "restrict-filenames": "auto", "artstation": { @@ -163,6 +164,7 @@ "ytdl": { "format": null, + "forward-cookies": true, "mtime": true, "rate": null, "retries": 4, diff --git a/docs/supportedsites.rst b/docs/supportedsites.rst index 2a1a1ed..d2fb4ea 100644 --- a/docs/supportedsites.rst +++ b/docs/supportedsites.rst @@ -13,6 +13,7 @@ Site URL Capabilities 8chan https://8ch.net/ Threads 8muses https://www.8muses.com/ Albums Adobe Portfolio https://www.myportfolio.com/ Galleries +Adult Empire https://www.adultempire.com/ Galleries arch.b4k.co https://arch.b4k.co/ Threads Archive of Sins https://archiveofsins.com/ Threads Archived.Moe https://archived.moe/ Threads @@ -24,9 +25,9 @@ Desuarchive https://desuarchive.org/ Threads DeviantArt https://www.deviantart.com/ |deviantart-C| Optional (OAuth) Doki Reader https://kobato.hologfx.com/reader/ Chapters, Manga Dynasty Reader https://dynasty-scans.com/ Chapters, individual Images, Search Results +E-Hentai https://e-hentai.org/ Favorites, Galleries, Search Results Optional e621 https://e621.net/ Pools, Popular Images, Posts, Tag-Searches EroLord.com http://erolord.com/ Galleries -ExHentai https://exhentai.org/ Favorites, Galleries, Search Results Optional Fallen Angels Scans https://www.fascans.com/ Chapters, Manga Fashion Nova https://www.fashionnova.com/ Collections, Products Fireden https://boards.fireden.net/ Threads @@ -46,6 +47,7 @@ Hypnohub https://hypnohub.net/ Pools, Popular Images, Idol Complex https://idol.sankakucomplex.com/ Pools, Posts, Tag-Searches Optional ImageBam http://www.imagebam.com/ Galleries, individual Images ImageFap https://imagefap.com/ Images from Users, Galleries, individual Images +ImgBB https://imgbb.com/ Images from Users, Albums Optional imgbox https://imgbox.com/ Galleries, individual Images imgth https://imgth.com/ Galleries imgur https://imgur.com/ Albums, individual Images @@ -106,7 +108,8 @@ The /b/ Archive https://thebarchive.com/ Threads Tsumino https://www.tsumino.com/ Galleries, Search Results Optional Tumblr https://www.tumblr.com/ Images from Users, Likes, Posts, Tag-Searches Optional (OAuth) Twitter https://twitter.com/ Media Timelines, Timelines, Tweets Optional -Wallhaven https://wallhaven.cc/ individual Images, Search Results +VSCO https://vsco.co/ Images from Users, Collections, individual Images +Wallhaven https://wallhaven.cc/ individual Images, Search Results |wallhaven-A| Warosu https://warosu.org/ Threads Weibo https://www.weibo.com/ Images from Users, Images from Statuses WikiArt.org https://www.wikiart.org/ Artists, Artworks @@ -137,4 +140,5 @@ Turboimagehost https://www.turboimagehost.com/ individual Images .. |pixnet-C| replace:: Images from Users, Folders, individual Images, Sets .. |pornreactor-C| replace:: Images from Users, Posts, Search Results, Tag-Searches .. |smugmug-C| replace:: Albums, individual Images, Images from Users and Folders +.. |wallhaven-A| replace:: Optional (`API Key <configuration.rst#extractorwallhavenapi-key>`__) .. |yuki-S| replace:: yuki.la 4chan archive diff --git a/gallery_dl/cache.py b/gallery_dl/cache.py index e6ba61a..3ceef75 100644 --- a/gallery_dl/cache.py +++ b/gallery_dl/cache.py @@ -11,6 +11,7 @@ import sqlite3 import pickle import time +import os import functools from . import config, util @@ -188,17 +189,25 @@ def clear(): def _path(): path = config.get(("cache", "file"), -1) + if path != -1: + return util.expand_path(path) - if path == -1: + if os.name == "nt": import tempfile - import os.path return os.path.join(tempfile.gettempdir(), ".gallery-dl.cache") - return util.expand_path(path) + cachedir = util.expand_path(os.path.join( + os.environ.get("XDG_CACHE_HOME", "~/.cache"), "gallery-dl")) + os.makedirs(cachedir, exist_ok=True) + return os.path.join(cachedir, "cache.sqlite3") try: + dbfile = _path() + if os.name != "nt": + # restrict access permissions for new db files + os.close(os.open(dbfile, os.O_CREAT | os.O_RDONLY, 0o600)) DatabaseCacheDecorator.db = sqlite3.connect( - _path(), timeout=30, check_same_thread=False) -except (TypeError, sqlite3.OperationalError): + dbfile, timeout=30, check_same_thread=False) +except (OSError, TypeError, sqlite3.OperationalError): cache = memcache # noqa: F811 diff --git a/gallery_dl/downloader/ytdl.py b/gallery_dl/downloader/ytdl.py index da57935..a233487 100644 --- a/gallery_dl/downloader/ytdl.py +++ b/gallery_dl/downloader/ytdl.py @@ -34,12 +34,15 @@ class YoutubeDLDownloader(DownloaderBase): if self.config("logging", True): options["logger"] = self.log + self.forward_cookies = self.config("forward-cookies", True) self.ytdl = YoutubeDL(options) def download(self, url, pathfmt): - for cookie in self.session.cookies: - self.ytdl.cookiejar.set_cookie(cookie) + if self.forward_cookies: + set_cookie = self.ytdl.cookiejar.set_cookie + for cookie in self.session.cookies: + set_cookie(cookie) try: info_dict = self.ytdl.extract_info(url[5:], download=False) diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py index 189c163..0b24111 100644 --- a/gallery_dl/extractor/__init__.py +++ b/gallery_dl/extractor/__init__.py @@ -17,6 +17,7 @@ modules = [ "500px", "8chan", "8muses", + "adultempire", "artstation", "behance", "bobx", @@ -42,6 +43,7 @@ modules = [ "idolcomplex", "imagebam", "imagefap", + "imgbb", "imgbox", "imgth", "imgur", @@ -95,6 +97,7 @@ modules = [ "tumblr", "twitter", "vanillarock", + "vsco", "wallhaven", "warosu", "weibo", diff --git a/gallery_dl/extractor/adultempire.py b/gallery_dl/extractor/adultempire.py new file mode 100644 index 0000000..5ea835f --- /dev/null +++ b/gallery_dl/extractor/adultempire.py @@ -0,0 +1,58 @@ +# -*- coding: utf-8 -*- + +# Copyright 2019 Mike Fährmann +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. + +"""Extractors for https://www.adultempire.com/""" + +from .common import GalleryExtractor +from .. import text + + +class AdultempireGalleryExtractor(GalleryExtractor): + """Extractor for image galleries from www.adultempire.com""" + category = "adultempire" + root = "https://www.adultempire.com" + pattern = (r"(?:https?://)?(?:www\.)?adult(?:dvd)?empire\.com" + r"(/(\d+)/gallery\.html)") + test = ( + ("https://www.adultempire.com/5998/gallery.html", { + "range": "1", + "keyword": "0533ef1184892be8ac02b17286797c95f389ba63", + "content": "5c6beb31e5e3cdc90ee5910d5c30f9aaec977b9e", + }), + ("https://www.adultdvdempire.com/5683/gallery.html", { + "url": "b12cd1a65cae8019d837505adb4d6a2c1ed4d70d", + "keyword": "59fe5d95929efc5040a819a5f77aba7a022bb85a", + }), + ) + + def __init__(self, match): + GalleryExtractor.__init__(self, match) + self.gallery_id = match.group(2) + + def metadata(self, page): + extr = text.extract_from(page, page.index('<div id="content">')) + return { + "gallery_id": text.parse_int(self.gallery_id), + "title" : text.unescape(extr('title="', '"')), + "studio" : extr(">studio</small>", "<").strip(), + "date" : text.parse_datetime(extr( + ">released</small>", "<").strip(), "%m/%d/%Y"), + "actors" : text.split_html(extr( + '<ul class="item-details item-cast-list ', '</ul>'))[1:], + } + + def images(self, page): + params = {"page": 1} + while True: + urls = list(text.extract_iter(page, 'rel="L"><img src="', '"')) + for url in urls: + yield url.replace("_200.", "_9600."), None + if len(urls) < 24: + return + params["page"] += 1 + page = self.request(self.chapter_url, params=params).text diff --git a/gallery_dl/extractor/behance.py b/gallery_dl/extractor/behance.py index 111d560..467a935 100644 --- a/gallery_dl/extractor/behance.py +++ b/gallery_dl/extractor/behance.py @@ -30,7 +30,8 @@ class BehanceExtractor(Extractor): @staticmethod def _update(data): # compress data to simple lists - data["fields"] = [field["name"] for field in data["fields"]] + if data["fields"] and isinstance(data["fields"][0], dict): + data["fields"] = [field["name"] for field in data["fields"]] data["owners"] = [owner["display_name"] for owner in data["owners"]] if "tags" in data: data["tags"] = [tag["title"] for tag in data["tags"]] @@ -140,11 +141,11 @@ class BehanceUserExtractor(BehanceExtractor): def galleries(self): url = "{}/{}/projects".format(self.root, self.user) - headers = {"X-Requested-With": "XMLHttpRequest"} params = {"offset": 0} + headers = {"X-Requested-With": "XMLHttpRequest"} while True: - data = self.request(url, headers=headers, params=params).json() + data = self.request(url, params=params, headers=headers).json() work = data["profile"]["activeSection"]["work"] yield from work["projects"] if not work["hasMore"]: @@ -157,8 +158,8 @@ class BehanceCollectionExtractor(BehanceExtractor): subcategory = "collection" categorytransfer = True pattern = r"(?:https?://)?(?:www\.)?behance\.net/collection/(\d+)" - test = ("https://www.behance.net/collection/170615607/Sky", { - "count": ">= 13", + test = ("https://www.behance.net/collection/71340149/inspiration", { + "count": ">= 145", "pattern": BehanceGalleryExtractor.pattern, }) @@ -168,12 +169,13 @@ class BehanceCollectionExtractor(BehanceExtractor): def galleries(self): url = "{}/collection/{}/a".format(self.root, self.collection_id) + params = {"offset": 0} headers = {"X-Requested-With": "XMLHttpRequest"} - params = {} while True: - data = self.request(url, headers=headers, params=params).json() - yield from data["output"] - if not data.get("offset"): + data = self.request(url, params=params, headers=headers).json() + for item in data["items"]: + yield item["project"] + if len(data["items"]) < 40: return - params["offset"] = data["offset"] + params["offset"] += len(data["items"]) diff --git a/gallery_dl/extractor/dynastyscans.py b/gallery_dl/extractor/dynastyscans.py index b10bd35..9cc6738 100644 --- a/gallery_dl/extractor/dynastyscans.py +++ b/gallery_dl/extractor/dynastyscans.py @@ -100,7 +100,7 @@ class DynastyscansSearchExtractor(DynastyscansBase, Extractor): test = ( ("https://dynasty-scans.com/images?with[]=4930&with[]=5211", { "url": "6b570eedd8a741c2cd34fb98b22a49d772f84191", - "keyword": "a1e2d05c1406a08b02f347389616a6babb1b50bf", + "keyword": "fa7ff94f82cdf942f7734741d758f160a6b0905a", }), ("https://dynasty-scans.com/images", { "range": "1", diff --git a/gallery_dl/extractor/exhentai.py b/gallery_dl/extractor/exhentai.py index 20e0746..1833b1a 100644 --- a/gallery_dl/extractor/exhentai.py +++ b/gallery_dl/extractor/exhentai.py @@ -6,7 +6,7 @@ # it under the terms of the GNU General Public License version 2 as # published by the Free Software Foundation. -"""Extract images from galleries at https://exhentai.org/""" +"""Extractors for https://e-hentai.org/ and https://exhentai.org/""" from .common import Extractor, Message from .. import text, util, exception @@ -23,16 +23,19 @@ BASE_PATTERN = r"(?:https?://)?(e[x-]|g\.e-)hentai\.org" class ExhentaiExtractor(Extractor): """Base class for exhentai extractors""" category = "exhentai" - directory_fmt = ("{category}", "{gallery_id}") + directory_fmt = ("{category}", "{gallery_id} {title}") filename_fmt = ( "{gallery_id}_{num:>04}_{image_token}_{filename}.{extension}") archive_fmt = "{gallery_id}_{num}" - cookiedomain = ".exhentai.org" cookienames = ("ipb_member_id", "ipb_pass_hash") + cookiedomain = ".exhentai.org" root = "https://exhentai.org" + LIMIT = False + def __init__(self, match): - if match.group(1) != "ex": + version = match.group(1) + if version != "ex": self.root = "https://e-hentai.org" self.cookiedomain = ".e-hentai.org" Extractor.__init__(self, match) @@ -45,6 +48,8 @@ class ExhentaiExtractor(Extractor): if self.wait_max < self.wait_min: self.wait_max = self.wait_min self.session.headers["Referer"] = self.root + "/" + if version != "ex": + self.session.cookies.set("nw", "1", domain=self.cookiedomain) def request(self, *args, **kwargs): response = Extractor.request(self, *args, **kwargs) @@ -63,6 +68,9 @@ class ExhentaiExtractor(Extractor): def login(self): """Login and set necessary cookies""" + if self.LIMIT: + self.log.error("Image limit reached!") + raise exception.StopExtraction() if self._check_cookies(self.cookienames): return username, password = self._get_auth_info() @@ -92,7 +100,7 @@ class ExhentaiExtractor(Extractor): } response = self.request(url, method="POST", headers=headers, data=data) - if "You are now logged in as:" not in response.text: + if b"You are now logged in as:" not in response.content: raise exception.AuthenticationError() return {c: response.cookies[c] for c in self.cookienames} @@ -112,9 +120,9 @@ class ExhentaiGalleryExtractor(ExhentaiExtractor): r"(?:/g/(\d+)/([\da-f]{10})" r"|/s/([\da-f]{10})/(\d+)-(\d+))") test = ( - ("https://exhentai.org/g/960460/4f0e369d82/", { - "keyword": "1532ca4d0e4e0738dc994ca725a228af04a4e480", - "content": "493d759de534355c9f55f8e365565b62411de146", + ("https://exhentai.org/g/1200119/d55c44d3d0/", { + "keyword": "1b353fad00dff0665b1746cdd151ab5cc326df23", + "content": "e9891a4c017ed0bb734cd1efba5cd03f594d31ff", }), ("https://exhentai.org/g/960461/4f0e369d82/", { "exception": exception.NotFoundError, @@ -122,13 +130,13 @@ class ExhentaiGalleryExtractor(ExhentaiExtractor): ("http://exhentai.org/g/962698/7f02358e00/", { "exception": exception.AuthorizationError, }), - ("https://exhentai.org/s/3957343c3b/960460-5", { + ("https://exhentai.org/s/f68367b4c8/1200119-3", { "count": 2, }), - ("https://e-hentai.org/s/3957343c3b/960460-5", { + ("https://e-hentai.org/s/f68367b4c8/1200119-3", { "count": 2, }), - ("https://g.e-hentai.org/g/960460/4f0e369d82/"), + ("https://g.e-hentai.org/g/1200119/d55c44d3d0/"), ) def __init__(self, match): @@ -143,14 +151,25 @@ class ExhentaiGalleryExtractor(ExhentaiExtractor): def items(self): self.login() + if self.limits: + self._init_limits() + if self.gallery_token: gpage = self._gallery_page() self.image_token = text.extract(gpage, 'hentai.org/s/', '"')[0] + if not self.image_token: + self.log.error("Failed to extract initial image token") + self.log.debug("Page content:\n%s", gpage) + return self.wait() ipage = self._image_page() else: ipage = self._image_page() part = text.extract(ipage, 'hentai.org/g/', '"')[0] + if not part: + self.log.error("Failed to extract gallery token") + self.log.debug("Page content:\n%s", ipage) + return self.gallery_token = part.split("/")[1] self.wait() gpage = self._gallery_page() @@ -211,12 +230,17 @@ class ExhentaiGalleryExtractor(ExhentaiExtractor): iurl = extr('<img id="img" src="', '"') orig = extr('hentai.org/fullimg.php', '"') - if self.original and orig: - url = self.root + "/fullimg.php" + text.unescape(orig) - data = self._parse_original_info(extr('ownload original', '<')) - else: - url = iurl - data = self._parse_image_info(url) + try: + if self.original and orig: + url = self.root + "/fullimg.php" + text.unescape(orig) + data = self._parse_original_info(extr('ownload original', '<')) + else: + url = iurl + data = self._parse_image_info(url) + except IndexError: + self.log.error("Unable to parse image info for '%s'", url) + self.log.debug("Page content:\n%s", page) + raise exception.StopExtraction() data["num"] = self.image_num data["image_token"] = self.key["start"] = extr('var startkey="', '";') @@ -242,13 +266,18 @@ class ExhentaiGalleryExtractor(ExhentaiExtractor): imgurl , pos = text.extract(page["i3"], 'id="img" src="', '"', pos) origurl, pos = text.extract(page["i7"], '<a href="', '"') - if self.original and origurl: - url = text.unescape(origurl) - data = self._parse_original_info( - text.extract(page["i7"], "ownload original", "<", pos)[0]) - else: - url = imgurl - data = self._parse_image_info(url) + try: + if self.original and origurl: + url = text.unescape(origurl) + data = self._parse_original_info(text.extract( + page["i7"], "ownload original", "<", pos)[0]) + else: + url = imgurl + data = self._parse_image_info(url) + except IndexError: + self.log.error("Unable to parse image info for '%s'", url) + self.log.debug("Page content:\n%s", page) + raise exception.StopExtraction() data["num"] = request["page"] data["image_token"] = imgkey @@ -266,6 +295,8 @@ class ExhentaiGalleryExtractor(ExhentaiExtractor): raise exception.AuthorizationError() if page.startswith(("Key missing", "Gallery not found")): raise exception.NotFoundError("gallery") + if "hentai.org/mpv/" in page: + self.log.warning("Enabled Multi-Page Viewer is not supported") return page def _image_page(self): @@ -277,17 +308,24 @@ class ExhentaiGalleryExtractor(ExhentaiExtractor): raise exception.NotFoundError("image page") return page + def _init_limits(self): + self._update_limits() + if self._remaining <= 0: + self.log.error("Image limit reached!") + ExhentaiExtractor.LIMIT = True + raise exception.StopExtraction() + def _check_limits(self, data): - if not self._remaining or data["num"] % 20 == 0: + if data["num"] % 20 == 0: self._update_limits() self._remaining -= data["cost"] if self._remaining <= 0: url = "{}/s/{}/{}-{}".format( self.root, data["image_token"], self.gallery_id, data["num"]) - self.log.error( - "Image limit reached! Reset it and continue with " - "'%s' as URL.", url) + self.log.error("Image limit reached! Continue with " + "'%s' as URL after resetting it.", url) + ExhentaiExtractor.LIMIT = True raise exception.StopExtraction() def _update_limits(self): @@ -301,6 +339,7 @@ class ExhentaiGalleryExtractor(ExhentaiExtractor): page = self.request(url, cookies=cookies).text current, pos = text.extract(page, "<strong>", "</strong>") maximum, pos = text.extract(page, "<strong>", "</strong>", pos) + self.log.debug("Image Limits: %s/%s", current, maximum) self._remaining = text.parse_int(maximum) - text.parse_int(current) @staticmethod @@ -330,7 +369,7 @@ class ExhentaiSearchExtractor(ExhentaiExtractor): subcategory = "search" pattern = BASE_PATTERN + r"/?\?(.*)$" test = ( - ("https://exhentai.org/?f_search=touhou"), + ("https://e-hentai.org/?f_search=touhou"), (("https://exhentai.org/?f_doujinshi=0&f_manga=0&f_artistcg=0" "&f_gamecg=0&f_western=0&f_non-h=1&f_imageset=0&f_cosplay=0" "&f_asianporn=0&f_misc=0&f_search=touhou&f_apply=Apply+Filter"), { @@ -372,7 +411,10 @@ class ExhentaiFavoriteExtractor(ExhentaiSearchExtractor): subcategory = "favorite" pattern = BASE_PATTERN + r"/favorites\.php(?:\?(.*))?" test = ( - ("https://exhentai.org/favorites.php"), + ("https://e-hentai.org/favorites.php", { + "count": 1, + "pattern": r"https?://e-hentai\.org/g/1200119/d55c44d3d0" + }), ("https://exhentai.org/favorites.php?favcat=1&f_search=touhou" "&f_apply=Search+Favorites"), ) diff --git a/gallery_dl/extractor/gelbooru.py b/gallery_dl/extractor/gelbooru.py index 15bd0a8..ce2e83b 100644 --- a/gallery_dl/extractor/gelbooru.py +++ b/gallery_dl/extractor/gelbooru.py @@ -30,6 +30,7 @@ class GelbooruExtractor(booru.XmlParserMixin, self.params.update({"page": "dapi", "s": "post", "q": "index"}) else: self.items = self.items_noapi + self.session.cookies["fringeBenefits"] = "yup" def items_noapi(self): data = self.get_metadata() diff --git a/gallery_dl/extractor/imgbb.py b/gallery_dl/extractor/imgbb.py new file mode 100644 index 0000000..442634b --- /dev/null +++ b/gallery_dl/extractor/imgbb.py @@ -0,0 +1,179 @@ +# -*- coding: utf-8 -*- + +# Copyright 2019 Mike Fährmann +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. + +"""Extractors for https://imgbb.com/""" + +from .common import Extractor, Message +from .. import text, exception +from ..cache import cache +import json + + +class ImgbbExtractor(Extractor): + """Base class for imgbb extractors""" + category = "imgbb" + filename_fmt = "{title} {id}.{extension}" + archive_fmt = "{id}" + root = "https://imgbb.com" + + def __init__(self, match): + Extractor.__init__(self, match) + self.page_url = self.sort = None + + def items(self): + self.login() + page = self.request(self.page_url, params={"sort": self.sort}).text + data = self.metadata(page) + first = True + + yield Message.Version, 1 + for img in self.images(page): + image = { + "id" : img["url_viewer"].rpartition("/")[2], + "user" : img["user"]["username"], + "title" : text.unescape(img["title"]), + "url" : img["image"]["url"], + "extension": img["image"]["extension"], + "size" : text.parse_int(img["image"]["size"]), + "width" : text.parse_int(img["width"]), + "height" : text.parse_int(img["height"]), + } + image.update(data) + if first: + first = False + yield Message.Directory, data + yield Message.Url, image["url"], image + + def login(self): + username, password = self._get_auth_info() + if username: + self._update_cookies(self._login_impl(username, password)) + + @cache(maxage=360*24*3600, keyarg=1) + def _login_impl(self, username, password): + self.log.info("Logging in as %s", username) + + url = self.root + "/login" + page = self.request(url).text + token = text.extract(page, 'PF.obj.config.auth_token="', '"')[0] + + headers = {"Referer": url} + data = { + "auth_token" : token, + "login-subject": username, + "password" : password, + } + response = self.request(url, method="POST", headers=headers, data=data) + + if not response.history: + raise exception.AuthenticationError() + return self.session.cookies + + def _pagination(self, page, endpoint, params): + params["page"] = 2 + data = None + + while True: + for img in text.extract_iter(page, "data-object='", "'"): + yield json.loads(text.unquote(img)) + if data: + if params["seek"] == data["seekEnd"]: + return + params["seek"] = data["seekEnd"] + params["page"] += 1 + data = self.request(endpoint, "POST", data=params).json() + page = data["html"] + + +class ImgbbAlbumExtractor(ImgbbExtractor): + """Extractor for albums on imgbb.com""" + subcategory = "album" + directory_fmt = ("{category}", "{user}", "{album_name} {album_id}") + pattern = r"(?:https?://)?ibb\.co/album/([^/?&#]+)/?(?:\?([^#]+))?" + test = ( + ("https://ibb.co/album/c6p5Yv", { + "range": "1-80", + "url": "8adaf0f7dfc19ff8bc4712c97f534af8b1e06412", + "keyword": "155b665a53e83d359e914cab7c69d5b829444d64", + }), + ("https://ibb.co/album/c6p5Yv?sort=title_asc", { + "range": "1-80", + "url": "d6c45041d5c8323c435b183a976f3fde2af7c547", + "keyword": "30c3262214e2044bbcf6bf2dee8e3ca7ebd62b71", + }), + ) + + def __init__(self, match): + ImgbbExtractor.__init__(self, match) + self.album_name = None + self.album_id = match.group(1) + self.sort = text.parse_query(match.group(2)).get("sort", "date_desc") + self.page_url = "https://ibb.co/album/" + self.album_id + + def metadata(self, page): + album, pos = text.extract(page, '"og:title" content="', '"') + user , pos = text.extract(page, 'rel="author">', '<', pos) + return { + "album_id" : self.album_id, + "album_name": text.unescape(album), + "user" : user.lower(), + } + + def images(self, page): + seek, pos = text.extract(page, 'data-seek="', '"') + tokn, pos = text.extract(page, 'PF.obj.config.auth_token="', '"', pos) + + return self._pagination(page, "https://ibb.co/json", { + "action" : "list", + "list" : "images", + "from" : "album", + "sort" : self.sort, + "albumid" : self.album_id, + "seek" : seek, + "auth_token": tokn, + "params_hidden[list]" : "images", + "params_hidden[from]" : "album", + "params_hidden[albumid]": self.album_id, + }) + + +class ImgbbUserExtractor(ImgbbExtractor): + """Extractor for user profiles in imgbb.com""" + subcategory = "user" + directory_fmt = ("{category}", "{user}") + pattern = r"(?:https?://)?([^.]+)\.imgbb\.com/?(?:\?([^#]+))?$" + test = ("https://folkie.imgbb.com", { + "range": "1-80", + "pattern": r"https?://i\.ibb\.co/\w+/[^/?&#]+", + }) + + def __init__(self, match): + ImgbbExtractor.__init__(self, match) + self.user = match.group(1) + self.sort = text.parse_query(match.group(2)).get("sort", "date_desc") + self.page_url = "https://{}.imgbb.com/".format(self.user) + + def metadata(self, page): + return {"user": self.user} + + def images(self, page): + seek, pos = text.extract(page, 'data-seek="', '"') + tokn, pos = text.extract(page, 'PF.obj.config.auth_token="', '"', pos) + user, pos = text.extract(page, '.obj.resource={"id":"', '"', pos) + + return self._pagination(page, self.page_url + "json", { + "action" : "list", + "list" : "images", + "from" : "user", + "sort" : self.sort, + "seek" : seek, + "userid" : user, + "auth_token": tokn, + "params_hidden[userid]": user, + "params_hidden[from]" : "user", + }) diff --git a/gallery_dl/extractor/luscious.py b/gallery_dl/extractor/luscious.py index 65ae843..879d38b 100644 --- a/gallery_dl/extractor/luscious.py +++ b/gallery_dl/extractor/luscious.py @@ -62,7 +62,7 @@ class LusciousAlbumExtractor(LusciousBase, GalleryExtractor): test = ( ("https://luscious.net/albums/okinami-no-koigokoro_277031/", { "url": "7e4984a271a1072ac6483e4228a045895aff86f3", - "keyword": "c597c132834f4990f90bf5dee5de2a9d4ba263a4", + "keyword": "ab4e5b71583fd439b4c8012a642aa8b58d8d0758", "content": "b3a747a6464509440bd0ff6d1267e6959f8d6ff3", }), ("https://luscious.net/albums/virgin-killer-sweater_282582/", { diff --git a/gallery_dl/extractor/ngomik.py b/gallery_dl/extractor/ngomik.py index 8135a8a..f3608b2 100644 --- a/gallery_dl/extractor/ngomik.py +++ b/gallery_dl/extractor/ngomik.py @@ -44,7 +44,7 @@ class NgomikChapterExtractor(ChapterExtractor): @staticmethod def images(page): - readerarea = text.extract(page, 'id=readerarea', 'class=chnav')[0] + readerarea = text.extract(page, 'id="readerarea"', 'class="chnav"')[0] return [ (text.unescape(url), None) for url in re.findall(r"\ssrc=[\"']?([^\"' >]+)", readerarea) diff --git a/gallery_dl/extractor/sankaku.py b/gallery_dl/extractor/sankaku.py index 012cb8b..da9735e 100644 --- a/gallery_dl/extractor/sankaku.py +++ b/gallery_dl/extractor/sankaku.py @@ -283,9 +283,9 @@ class SankakuPostExtractor(SankakuExtractor): "options": (("tags", True),), "keyword": { "tags_artist": "bonocho", - "tags_copyright": "batman_(series) the_dark_knight", - "tags_medium": "sketch copyright_name", "tags_studio": "dc_comics", + "tags_medium": "sketch copyright_name", + "tags_copyright": str, "tags_character": str, "tags_general": str, }, diff --git a/gallery_dl/extractor/sankakucomplex.py b/gallery_dl/extractor/sankakucomplex.py index 55eda9f..0189fc9 100644 --- a/gallery_dl/extractor/sankakucomplex.py +++ b/gallery_dl/extractor/sankakucomplex.py @@ -34,11 +34,11 @@ class SankakucomplexArticleExtractor(SankakucomplexExtractor): test = ( ("https://www.sankakucomplex.com/2019/05/11/twitter-cosplayers", { "url": "4a9ecc5ae917fbce469280da5b6a482510cae84d", - "keyword": "4b3b5766b277a5d0acbec90fa8f2343262b07efd", + "keyword": "bfe08310e7d9a572f568f6900e0ed0eb295aa2b3", }), ("https://www.sankakucomplex.com/2009/12/01/sexy-goddesses-of-2ch", { "url": "a1e249173fd6c899a8134fcfbd9c925588a63f7c", - "keyword": "f47a416d680717855bbc3e4f0cd44479f61d9aa4", + "keyword": "e78fcc23c2711befc0969a45ea5082a29efccf68", }), ) diff --git a/gallery_dl/extractor/tsumino.py b/gallery_dl/extractor/tsumino.py index 03ee144..66ad431 100644 --- a/gallery_dl/extractor/tsumino.py +++ b/gallery_dl/extractor/tsumino.py @@ -65,7 +65,7 @@ class TsuminoGalleryExtractor(TsuminoBase, GalleryExtractor): "uploader" : "sehki", "lang" : "en", "language" : "English", - "thumbnail" : "http://www.tsumino.com/Image/Thumb/40996", + "thumbnail" : "re:https?://www.tsumino.com/Image/Thumb/40996", }, }), ("https://www.tsumino.com/Read/View/45834"), diff --git a/gallery_dl/extractor/vsco.py b/gallery_dl/extractor/vsco.py new file mode 100644 index 0000000..639ec82 --- /dev/null +++ b/gallery_dl/extractor/vsco.py @@ -0,0 +1,176 @@ +# -*- coding: utf-8 -*- + +# Copyright 2019 Mike Fährmann +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. + +"""Extractors for https://vsco.co/""" + +from .common import Extractor, Message +from .. import text +import json + + +BASE_PATTERN = r"(?:https?://)?(?:www\.)?vsco\.co/([^/]+)" + + +class VscoExtractor(Extractor): + """Base class for vsco extractors""" + category = "vsco" + root = "https://vsco.co" + directory_fmt = ("{category}", "{user}") + filename_fmt = "{id}.{extension}" + archive_fmt = "{id}" + + def __init__(self, match): + Extractor.__init__(self, match) + self.user = match.group(1).lower() + + def items(self): + yield Message.Version, 1 + yield Message.Directory, {"user": self.user} + for img in self.images(): + url = "https://" + (img.get("video_url") or img["responsive_url"]) + data = text.nameext_from_url(url, { + "id" : img["_id"], + "user" : self.user, + "grid" : img["grid_name"], + "meta" : img.get("image_meta") or {}, + "tags" : [tag["text"] for tag in img.get("tags") or ()], + "date" : text.parse_timestamp(img["upload_date"] // 1000), + "video" : img["is_video"], + "width" : img["width"], + "height": img["height"], + "description": img["description"], + }) + yield Message.Url, url, data + + def images(self): + """Return an iterable with all relevant image objects""" + + def _extract_preload_state(self, url): + page = self.request(url, notfound=self.subcategory).text + return json.loads(text.extract(page, "__PRELOADED_STATE__ = ", "<")[0]) + + def _pagination(self, url, params, token, key, extra): + headers = { + "Referer" : "{}/{}".format(self.root, self.user), + "Authorization" : "Bearer " + token, + "X-Client-Platform": "web", + "X-Client-Build" : "1", + } + + yield from map(self._transform_media, extra) + + while True: + data = self.request(url, params=params, headers=headers).json() + if not data.get(key): + return + yield from data[key] + params["page"] += 1 + + @staticmethod + def _transform_media(media): + media["_id"] = media["id"] + media["is_video"] = media["isVideo"] + media["grid_name"] = media["gridName"] + media["upload_date"] = media["uploadDate"] + media["responsive_url"] = media["responsiveUrl"] + media["video_url"] = media.get("videoUrl") + media["image_meta"] = media.get("imageMeta") + return media + + +class VscoUserExtractor(VscoExtractor): + """Extractor for images from a user on vsco.co""" + subcategory = "user" + pattern = BASE_PATTERN + r"/images/" + test = ("https://vsco.co/missuri/images/1", { + "range": "1-80", + "count": 80, + "pattern": r"https://im\.vsco\.co/[^/]+/[0-9a-f/]+/vsco\w+\.\w+", + }) + + def images(self): + url = "{}/{}/images/1".format(self.root, self.user) + data = self._extract_preload_state(url) + + tkn = data["users"]["currentUser"]["tkn"] + sid = str(data["sites"]["siteByUsername"][self.user]["site"]["id"]) + + url = "{}/api/2.0/medias".format(self.root) + params = {"page": 2, "size": "30", "site_id": sid} + return self._pagination(url, params, tkn, "media", ( + data["medias"]["byId"][mid]["media"] + for mid in data["medias"]["bySiteId"][sid]["medias"]["1"] + )) + + +class VscoCollectionExtractor(VscoExtractor): + """Extractor for images from a collection on vsco.co""" + subcategory = "collection" + directory_fmt = ("{category}", "{user}", "collection") + archive_fmt = "c_{user}_{id}" + pattern = BASE_PATTERN + r"/collection/" + test = ("https://vsco.co/vsco/collection/1", { + "range": "1-80", + "count": 80, + "pattern": r"https://im\.vsco\.co/[^/]+/[0-9a-f/]+/vsco\w+\.\w+", + }) + + def images(self): + url = "{}/{}/collection/1".format(self.root, self.user) + data = self._extract_preload_state(url) + + tkn = data["users"]["currentUser"]["tkn"] + cid = (data["sites"]["siteByUsername"][self.user] + ["site"]["siteCollectionId"]) + + url = "{}/api/2.0/collections/{}/medias".format(self.root, cid) + params = {"page": 2, "size": "20"} + return self._pagination(url, params, tkn, "medias", ( + data["medias"]["byId"][mid]["media"] + for mid in data + ["collections"]["byCollectionId"][cid]["collection"]["1"] + )) + + +class VscoImageExtractor(VscoExtractor): + """Extractor for individual images on vsco.co""" + subcategory = "image" + pattern = BASE_PATTERN + r"/media/([0-9a-fA-F]+)" + test = ( + ("https://vsco.co/erenyildiz/media/5d34b93ef632433030707ce2", { + "url": "faa214d10f859f374ad91da3f7547d2439f5af08", + "content": "1394d070828d82078035f19a92f404557b56b83f", + "keyword": { + "id" : "5d34b93ef632433030707ce2", + "user" : "erenyildiz", + "grid" : "erenyildiz", + "meta" : dict, + "tags" : list, + "date" : "type:datetime", + "video" : False, + "width" : 1537, + "height": 1537, + "description": "re:Ni seviyorum. #vsco #vscox #vscochallenges", + }, + }), + ("https://vsco.co/jimenalazof/media/5b4feec558f6c45c18c040fd", { + "url": "08e7eef3301756ce81206c0b47c1e9373756a74a", + "content": "e739f058d726ee42c51c180a505747972a7dfa47", + "keyword": {"video" : True}, + }), + ) + + def __init__(self, match): + VscoExtractor.__init__(self, match) + self.media_id = match.group(2) + + def images(self): + url = "{}/{}/media/{}".format(self.root, self.user, self.media_id) + data = self._extract_preload_state(url) + media = data["medias"]["byId"].popitem()[1]["media"] + return (self._transform_media(media),) diff --git a/gallery_dl/job.py b/gallery_dl/job.py index 20823a6..637561a 100644 --- a/gallery_dl/job.py +++ b/gallery_dl/job.py @@ -81,7 +81,8 @@ class Job(): "https://github.com/mikf/gallery-dl/issues ."), exc.__class__.__name__, exc) log.debug("", exc_info=True) - self.handle_finalize() + finally: + self.handle_finalize() def dispatch(self, msg): """Call the appropriate message handler""" diff --git a/gallery_dl/postprocessor/zip.py b/gallery_dl/postprocessor/zip.py index 3a0c323..1075c70 100644 --- a/gallery_dl/postprocessor/zip.py +++ b/gallery_dl/postprocessor/zip.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2018 Mike Fährmann +# Copyright 2018-2019 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -25,7 +25,7 @@ class ZipPP(PostProcessor): def __init__(self, pathfmt, options): PostProcessor.__init__(self) self.delete = not options.get("keep-files", False) - self.ext = "." + options.get("extension", "zip") + ext = "." + options.get("extension", "zip") algorithm = options.get("compression", "store") if algorithm not in self.COMPRESSION_ALGORITHMS: self.log.warning( @@ -34,29 +34,45 @@ class ZipPP(PostProcessor): algorithm = "store" self.path = pathfmt.realdirectory - self.zfile = zipfile.ZipFile( - self.path + self.ext, "a", - self.COMPRESSION_ALGORITHMS[algorithm], True) + args = (self.path + ext, "a", + self.COMPRESSION_ALGORITHMS[algorithm], True) - def run(self, pathfmt): + if options.get("mode") == "safe": + self.run = self._write_safe + self.zfile = None + self.args = args + else: + self.run = self._write + self.zfile = zipfile.ZipFile(*args) + + def _write(self, pathfmt, zfile=None): # 'NameToInfo' is not officially documented, but it's available # for all supported Python versions and using it directly is a lot - # better than calling getinfo() - if pathfmt.filename not in self.zfile.NameToInfo: - self.zfile.write(pathfmt.temppath, pathfmt.filename) + # faster than calling getinfo() + if zfile is None: + zfile = self.zfile + if pathfmt.filename not in zfile.NameToInfo: + zfile.write(pathfmt.temppath, pathfmt.filename) pathfmt.delete = self.delete + def _write_safe(self, pathfmt): + with zipfile.ZipFile(*self.args) as zfile: + self._write(pathfmt, zfile) + def finalize(self): - self.zfile.close() + if self.zfile: + self.zfile.close() if self.delete: try: + # remove target directory os.rmdir(self.path) except OSError: pass - if not self.zfile.NameToInfo: + if self.zfile and not self.zfile.NameToInfo: try: + # delete empty zip archive os.unlink(self.zfile.filename) except OSError: pass diff --git a/gallery_dl/text.py b/gallery_dl/text.py index 81e87b5..72dad5b 100644 --- a/gallery_dl/text.py +++ b/gallery_dl/text.py @@ -83,22 +83,6 @@ def nameext_from_url(url, data=None): return data -def clean_path_windows(path): - """Remove illegal characters from a path-segment (Windows)""" - try: - return re.sub(r'[<>:"\\/|?*]', "_", path) - except TypeError: - return "" - - -def clean_path_posix(path): - """Remove illegal characters from a path-segment (Posix)""" - try: - return path.replace("/", "_") - except AttributeError: - return "" - - def extract(txt, begin, end, pos=0): """Extract the text between 'begin' and 'end' from 'txt' @@ -266,12 +250,6 @@ def parse_datetime(date_string, format="%Y-%m-%dT%H:%M:%S%z"): return date_string -if os.name == "nt": - clean_path = clean_path_windows -else: - clean_path = clean_path_posix - - urljoin = urllib.parse.urljoin quote = urllib.parse.quote diff --git a/gallery_dl/util.py b/gallery_dl/util.py index 14ae3d2..02d998d 100644 --- a/gallery_dl/util.py +++ b/gallery_dl/util.py @@ -535,6 +535,27 @@ class PathFormat(): if os.altsep and os.altsep in self.basedirectory: self.basedirectory = self.basedirectory.replace(os.altsep, os.sep) + restrict = extractor.config("restrict-filenames", "auto") + if restrict == "auto": + restrict = "<>:\"\\/|?*" if os.name == "nt" else "/" + elif restrict == "unix": + restrict = "/" + elif restrict == "windows": + restrict = "<>:\"\\/|?*" + self.clean_path = self._build_cleanfunc(restrict) + + @staticmethod + def _build_cleanfunc(repl): + if not repl: + return lambda x: x + elif len(repl) == 1: + def func(x, r=repl): + return x.replace(r, "_") + else: + def func(x, sub=re.compile("[" + re.escape(repl) + "]").sub): + return sub("_", x) + return func + def open(self, mode="wb"): """Open file and return a corresponding file object""" return open(self.temppath, mode) @@ -551,7 +572,7 @@ class PathFormat(): """Build directory path and create it if necessary""" try: segments = [ - text.clean_path( + self.clean_path( Formatter(segment, self.kwdefault) .format_map(keywords).strip()) for segment in self.directory_fmt @@ -597,7 +618,7 @@ class PathFormat(): def build_path(self): """Use filename-keywords and directory to build a full path""" try: - self.filename = text.clean_path( + self.filename = self.clean_path( self.formatter.format_map(self.keywords)) except Exception as exc: raise exception.FormatError(exc, "filename") diff --git a/gallery_dl/version.py b/gallery_dl/version.py index d970ed6..d9cc3d6 100644 --- a/gallery_dl/version.py +++ b/gallery_dl/version.py @@ -6,4 +6,4 @@ # it under the terms of the GNU General Public License version 2 as # published by the Free Software Foundation. -__version__ = "1.9.0" +__version__ = "1.10.1" diff --git a/scripts/supportedsites.py b/scripts/supportedsites.py index 3d86110..498e3fc 100755 --- a/scripts/supportedsites.py +++ b/scripts/supportedsites.py @@ -13,6 +13,7 @@ from gallery_dl import extractor CATEGORY_MAP = { "2chan" : "Futaba Channel", "35photo" : "35PHOTO", + "adultempire" : "Adult Empire", "archivedmoe" : "Archived.Moe", "archiveofsins" : "Archive of Sins", "artstation" : "ArtStation", @@ -23,7 +24,7 @@ CATEGORY_MAP = { "dynastyscans" : "Dynasty Reader", "e621" : "e621", "erolord" : "EroLord.com", - "exhentai" : "ExHentai", + "exhentai" : "E-Hentai", "fallenangels" : "Fallen Angels Scans", "fashionnova" : "Fashion Nova", "hbrowse" : "HBrowse", @@ -36,6 +37,7 @@ CATEGORY_MAP = { "idolcomplex" : "Idol Complex", "imagebam" : "ImageBam", "imagefap" : "ImageFap", + "imgbb" : "ImgBB", "imgbox" : "imgbox", "imgth" : "imgth", "imgur" : "imgur", @@ -71,6 +73,7 @@ CATEGORY_MAP = { "smugmug" : "SmugMug", "thebarchive" : "The /b/ Archive", "vanillarock" : "もえぴりあ", + "vsco" : "VSCO", "wikiart" : "WikiArt.org", "worldthree" : "World Three", "xhamster" : "xHamster", @@ -109,6 +112,7 @@ AUTH_MAP = { "exhentai" : "Optional", "flickr" : "Optional (OAuth)", "idolcomplex": "Optional", + "imgbb" : "Optional", "instagram" : "Optional", "luscious" : "Optional", "mangoxo" : "Optional", @@ -121,6 +125,8 @@ AUTH_MAP = { "tsumino" : "Optional", "tumblr" : "Optional (OAuth)", "twitter" : "Optional", + "wallhaven" : ("Optional (`API Key " + "<configuration.rst#extractorwallhavenapi-key>`__)"), } IGNORE_LIST = ( diff --git a/test/test_results.py b/test/test_results.py index 41390a8..839a75c 100644 --- a/test/test_results.py +++ b/test/test_results.py @@ -26,9 +26,12 @@ TRAVIS_SKIP = { # temporary issues, etc. BROKEN = { - "komikcast", - "konachan", + "adultempire", + "flickr", + "imgth", + "mangafox", "mangapark", + "pixnet", } diff --git a/test/test_text.py b/test/test_text.py index 405acd3..6a6d83a 100644 --- a/test/test_text.py +++ b/test/test_text.py @@ -139,26 +139,6 @@ class TestText(unittest.TestCase): for value in INVALID: self.assertEqual(f(value), empty) - def test_clean_path_windows(self, f=text.clean_path_windows): - self.assertEqual(f(""), "") - self.assertEqual(f("foo"), "foo") - self.assertEqual(f("foo/bar"), "foo_bar") - self.assertEqual(f("foo<>:\"\\/|?*bar"), "foo_________bar") - - # invalid arguments - for value in INVALID: - self.assertEqual(f(value), "") - - def test_clean_path_posix(self, f=text.clean_path_posix): - self.assertEqual(f(""), "") - self.assertEqual(f("foo"), "foo") - self.assertEqual(f("foo/bar"), "foo_bar") - self.assertEqual(f("foo<>:\"\\/|?*bar"), "foo<>:\"\\_|?*bar") - - # invalid arguments - for value in INVALID: - self.assertEqual(f(value), "") - def test_extract(self, f=text.extract): txt = "<a><b>" self.assertEqual(f(txt, "<", ">"), ("a" , 3)) |
