diff options
| -rw-r--r-- | CHANGELOG.md | 11 | ||||
| -rw-r--r-- | README.rst | 6 | ||||
| -rw-r--r-- | docs/configuration.rst | 22 | ||||
| -rw-r--r-- | docs/gallery-dl.conf | 1 | ||||
| -rw-r--r-- | gallery_dl/downloader/http.py | 15 | ||||
| -rw-r--r-- | gallery_dl/downloader/ytdl.py | 17 | ||||
| -rw-r--r-- | gallery_dl/extractor/8muses.py | 2 | ||||
| -rw-r--r-- | gallery_dl/extractor/adultempire.py | 2 | ||||
| -rw-r--r-- | gallery_dl/extractor/deviantart.py | 127 | ||||
| -rw-r--r-- | gallery_dl/extractor/flickr.py | 16 | ||||
| -rw-r--r-- | gallery_dl/extractor/piczel.py | 72 | ||||
| -rw-r--r-- | gallery_dl/extractor/pornhub.py | 2 | ||||
| -rw-r--r-- | gallery_dl/extractor/seiga.py | 5 | ||||
| -rw-r--r-- | gallery_dl/postprocessor/zip.py | 2 | ||||
| -rw-r--r-- | gallery_dl/util.py | 26 | ||||
| -rw-r--r-- | gallery_dl/version.py | 2 | ||||
| -rw-r--r-- | test/test_postprocessor.py | 100 | ||||
| -rw-r--r-- | test/test_results.py | 4 |
18 files changed, 309 insertions, 123 deletions
diff --git a/CHANGELOG.md b/CHANGELOG.md index 99df78a..a2b5109 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,16 @@ # Changelog +## 1.10.3 - 2019-08-30 +### Additions +- Provide `filename` metadata for all `deviantart` files ([#392](https://github.com/mikf/gallery-dl/issues/392), [#400](https://github.com/mikf/gallery-dl/issues/400)) +- Implement a `ytdl.outtmpl` option to let youtube-dl handle filenames by itself ([#395](https://github.com/mikf/gallery-dl/issues/395)) +- Support `seiga` mobile URLs ([#401](https://github.com/mikf/gallery-dl/issues/401)) +### Fixes +- Extract more than the first 32 posts from `piczel` galleries ([#396](https://github.com/mikf/gallery-dl/issues/396)) +- Fix filenames of archives created with `--zip` ([#397](https://github.com/mikf/gallery-dl/issues/397)) +- Skip unavailable images and videos on `flickr` ([#398](https://github.com/mikf/gallery-dl/issues/398)) +- Fix filesystem paths on Windows with Python 3.6 and lower ([#402](https://github.com/mikf/gallery-dl/issues/402)) + ## 1.10.2 - 2019-08-23 ### Additions - Support for `instagram` stories and IGTV ([#371](https://github.com/mikf/gallery-dl/issues/371), [#373](https://github.com/mikf/gallery-dl/issues/373)) @@ -78,8 +78,8 @@ Download a standalone executable file, put it into your `PATH <https://en.wikipedia.org/wiki/PATH_(variable)>`__, and run it inside a command prompt (like ``cmd.exe``). -- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.10.2/gallery-dl.exe>`__ -- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.10.2/gallery-dl.bin>`__ +- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.10.3/gallery-dl.exe>`__ +- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.10.3/gallery-dl.bin>`__ These executables include a Python 3.7 interpreter and all required Python packages. @@ -224,7 +224,7 @@ access to *gallery-dl*. Authorize it and you will be shown one or more .. _gallery-dl-example.conf: https://github.com/mikf/gallery-dl/blob/master/docs/gallery-dl-example.conf .. _configuration.rst: https://github.com/mikf/gallery-dl/blob/master/docs/configuration.rst .. _Supported Sites: https://github.com/mikf/gallery-dl/blob/master/docs/supportedsites.rst -.. _stable: https://github.com/mikf/gallery-dl/archive/v1.10.2.zip +.. _stable: https://github.com/mikf/gallery-dl/archive/v1.10.3.zip .. _dev: https://github.com/mikf/gallery-dl/archive/master.zip .. _Python: https://www.python.org/downloads/ diff --git a/docs/configuration.rst b/docs/configuration.rst index 0e2e355..d69406d 100644 --- a/docs/configuration.rst +++ b/docs/configuration.rst @@ -581,7 +581,8 @@ extractor.deviantart.quality =========== ===== Type ``integer`` Default ``100`` -Description JPEG compression quality for newer images hosted on wixmp servers. +Description JPEG quality level of newer images for which + an original file download is not available. =========== ===== @@ -1171,6 +1172,25 @@ Description | Route youtube-dl's output through gallery-dl's logging system. =========== ===== +downloader.ytdl.outtmpl +----------------------- +=========== ===== +Type ``string`` +Default ``null`` +Description The `Output Template <https://github.com/ytdl-org/youtube-dl#output-template>`__ + used to generate filenames for files downloaded with youtube-dl. + + Special values: + + * ``null``: generate filenames with `extractor.*.filename`_ + * ``"default"``: use youtube-dl's default, currently ``"%(title)s-%(id)s.%(ext)s"`` + + Note: An output template other than ``null`` might + cause unexpected results in combination with other options + (e.g. ``"skip": "enumerate"``) +=========== ===== + + downloader.ytdl.raw-options --------------------------- =========== ===== diff --git a/docs/gallery-dl.conf b/docs/gallery-dl.conf index b9ff32d..6439437 100644 --- a/docs/gallery-dl.conf +++ b/docs/gallery-dl.conf @@ -169,6 +169,7 @@ "format": null, "forward-cookies": true, "mtime": true, + "outtmpl": null, "rate": null, "retries": 4, "timeout": 30.0, diff --git a/gallery_dl/downloader/http.py b/gallery_dl/downloader/http.py index e3229eb..4c5fed5 100644 --- a/gallery_dl/downloader/http.py +++ b/gallery_dl/downloader/http.py @@ -38,11 +38,12 @@ class HttpDownloader(DownloaderBase): if self.retries < 0: self.retries = float("inf") if self.rate: - self.rate = text.parse_bytes(self.rate) - if not self.rate: - self.log.warning("Invalid rate limit specified") - elif self.rate < self.chunk_size: - self.chunk_size = self.rate + rate = text.parse_bytes(self.rate) + if not rate: + self.log.warning("Invalid rate limit (%r)", self.rate) + elif rate < self.chunk_size: + self.chunk_size = rate + self.rate = rate def download(self, url, pathfmt): try: @@ -124,10 +125,10 @@ class HttpDownloader(DownloaderBase): if not offset: mode = "w+b" if filesize: - self.log.info("Unable to resume partial download") + self.log.debug("Unable to resume partial download") else: mode = "r+b" - self.log.info("Resuming download at byte %d", offset) + self.log.debug("Resuming download at byte %d", offset) # start downloading self.out.start(pathfmt.path) diff --git a/gallery_dl/downloader/ytdl.py b/gallery_dl/downloader/ytdl.py index 7d8b905..ce921e3 100644 --- a/gallery_dl/downloader/ytdl.py +++ b/gallery_dl/downloader/ytdl.py @@ -8,7 +8,7 @@ """Downloader module for URLs requiring youtube-dl support""" -from youtube_dl import YoutubeDL +from youtube_dl import YoutubeDL, DEFAULT_OUTTMPL from .common import DownloaderBase from .. import text import os @@ -36,6 +36,9 @@ class YoutubeDLDownloader(DownloaderBase): options["logger"] = self.log self.forward_cookies = self.config("forward-cookies", True) + outtmpl = self.config("outtmpl") + self.outtmpl = DEFAULT_OUTTMPL if outtmpl == "default" else outtmpl + self.ytdl = YoutubeDL(options) def download(self, url, pathfmt): @@ -60,7 +63,17 @@ class YoutubeDLDownloader(DownloaderBase): def _download_video(self, pathfmt, info_dict): if "url" in info_dict: text.nameext_from_url(info_dict["url"], pathfmt.kwdict) - pathfmt.set_extension(info_dict["ext"]) + + if self.outtmpl: + self.ytdl.params["outtmpl"] = self.outtmpl + pathfmt.filename = filename = self.ytdl.prepare_filename(info_dict) + pathfmt.extension = info_dict["ext"] + pathfmt.path = pathfmt.directory + filename + pathfmt.realpath = pathfmt.temppath = ( + pathfmt.realdirectory + filename) + else: + pathfmt.set_extension(info_dict["ext"]) + if pathfmt.exists(): pathfmt.temppath = "" return True diff --git a/gallery_dl/extractor/8muses.py b/gallery_dl/extractor/8muses.py index 6fbf6b5..f5ca9ce 100644 --- a/gallery_dl/extractor/8muses.py +++ b/gallery_dl/extractor/8muses.py @@ -55,7 +55,7 @@ class _8musesAlbumExtractor(Extractor): }), ("https://www.8muses.com/comics/album/Fakku-Comics/6?sort=az", { "count": ">= 70", - "keyword": {"name": r"re:^[S-Zs-z]"}, + "keyword": {"name": r"re:^[R-Zr-z]"}, }), ) diff --git a/gallery_dl/extractor/adultempire.py b/gallery_dl/extractor/adultempire.py index 5e2480a..85d8266 100644 --- a/gallery_dl/extractor/adultempire.py +++ b/gallery_dl/extractor/adultempire.py @@ -26,7 +26,7 @@ class AdultempireGalleryExtractor(GalleryExtractor): }), ("https://www.adultdvdempire.com/5683/gallery.html", { "url": "b12cd1a65cae8019d837505adb4d6a2c1ed4d70d", - "keyword": "0fe9a6e3f0a331b95ba77f66a643705ca86e8ec5", + "keyword": "9634eb16cc6dbf347eb9dcdd9b2a499dfd04d167", }), ) diff --git a/gallery_dl/extractor/deviantart.py b/gallery_dl/extractor/deviantart.py index bd1299b..6614755 100644 --- a/gallery_dl/extractor/deviantart.py +++ b/gallery_dl/extractor/deviantart.py @@ -9,7 +9,7 @@ """Extract images from https://www.deviantart.com/""" from .common import Extractor, Message -from .. import text, exception +from .. import text, util, exception from ..cache import cache, memcache import collections import itertools @@ -137,30 +137,37 @@ class DeviantartExtractor(Extractor): deviation["date"] = text.parse_timestamp( deviation["published_time"]) + # filename metadata + alphabet = "0123456789abcdefghijklmnopqrstuvwxyz" + sub = re.compile(r"\W").sub + deviation["filename"] = "".join(( + sub("_", deviation["title"].lower()), "_by_", + sub("_", deviation["author"]["username"].lower()), "-d", + util.bencode(deviation["index"], alphabet), + )) + @staticmethod def commit(deviation, target): url = target["src"] - thumb = deviation["thumbs"][0]["src"] if "thumbs" in deviation else url - target = text.nameext_from_url(thumb, target.copy()) - if target["filename"].endswith("-150"): - target["filename"] = target["filename"][:-4] - if not target["filename"].count("-"): - name, _, hid = target["filename"].rpartition("_") - target["filename"] = name + "-" + hid + target = target.copy() + target["filename"] = deviation["filename"] deviation["target"] = target - deviation["filename"] = target["filename"] deviation["extension"] = target["extension"] = text.ext_from_url(url) return Message.Url, url, deviation def _commit_journal_html(self, deviation, journal): title = text.escape(deviation["title"]) url = deviation["url"] - thumbs = deviation["thumbs"] + thumbs = deviation.get("thumbs") or deviation.get("files") html = journal["html"] shadow = SHADOW_TEMPLATE.format_map(thumbs[0]) if thumbs else "" if "css" in journal: css, cls = journal["css"], "withskin" + elif html.startswith("<style"): + css, _, html = html.partition("</style>") + css = css.partition(">")[2] + cls = "withskin" else: css, cls = "", "journal-green" @@ -191,22 +198,25 @@ class DeviantartExtractor(Extractor): categories=categories, ) + if needle in html: + html = html.replace(needle, header, 1) + else: + html = JOURNAL_TEMPLATE_HTML_EXTRA.format(header, html) + html = JOURNAL_TEMPLATE_HTML.format( - title=title, - html=html.replace(needle, header, 1), - shadow=shadow, - css=css, - cls=cls, - ) + title=title, html=html, shadow=shadow, css=css, cls=cls) deviation["extension"] = "htm" return Message.Url, html, deviation @staticmethod def _commit_journal_text(deviation, journal): + html = journal["html"] + if html.startswith("<style"): + html = html.partition("</style>")[2] content = "\n".join( text.unescape(text.remove_html(txt)) - for txt in journal["html"].rpartition("<script")[0].split("<br />") + for txt in html.rpartition("<script")[0].split("<br />") ) txt = JOURNAL_TEMPLATE_TEXT.format( title=deviation["title"], @@ -302,7 +312,7 @@ class DeviantartGalleryExtractor(DeviantartExtractor): }), # 'folders' option (#276) ("https://www.deviantart.com/justatest235723", { - "count": 2, + "count": 3, "options": (("metadata", 1), ("folders", 1), ("original", 0)), "keyword": { "description": str, @@ -481,19 +491,21 @@ class DeviantartJournalExtractor(DeviantartExtractor): subcategory = "journal" directory_fmt = ("{category}", "{username}", "Journal") archive_fmt = "j_{username}_{index}.{extension}" - pattern = BASE_PATTERN + r"/(?:journal|blog)/?(?:\?catpath=/)?$" + pattern = BASE_PATTERN + r"/(?:posts(?:/journals)?|journal)/?(?:\?.*)?$" test = ( - ("https://www.deviantart.com/angrywhitewanker/journal/", { + ("https://www.deviantart.com/angrywhitewanker/posts/journals/", { "url": "38db2a0d3a587a7e0f9dba7ff7d274610ebefe44", }), - ("https://www.deviantart.com/angrywhitewanker/journal/", { + ("https://www.deviantart.com/angrywhitewanker/posts/journals/", { "url": "b2a8e74d275664b1a4acee0fca0a6fd33298571e", "options": (("journals", "text"),), }), - ("https://www.deviantart.com/angrywhitewanker/journal/", { + ("https://www.deviantart.com/angrywhitewanker/posts/journals/", { "count": 0, "options": (("journals", "none"),), }), + ("https://www.deviantart.com/shimoda7/posts/"), + ("https://www.deviantart.com/shimoda7/journal/"), ("https://www.deviantart.com/shimoda7/journal/?catpath=/"), ("https://shimoda7.deviantart.com/journal/"), ("https://shimoda7.deviantart.com/journal/?catpath=/"), @@ -546,22 +558,8 @@ class DeviantartPopularExtractor(DeviantartExtractor): deviation["popular"] = self.popular -class DeviantartExtractorV2(Extractor): +class DeviantartExtractorV2(DeviantartExtractor): """Base class for deviantart extractors using the NAPI""" - category = "deviantart" - directory_fmt = ("{category}", "{author[username]!l}") - filename_fmt = "{category}_{index}_{title}.{extension}" - root = "https://www.deviantart.com" - - def __init__(self, match=None): - Extractor.__init__(self, match) - self.offset = 0 - self.extra = self.config("extra", False) - self.quality = self.config("quality", "100") - self.user = match.group(1) or match.group(2) - - if self.quality: - self.quality = "q_{}".format(self.quality) def items(self): url = ( @@ -621,15 +619,17 @@ class DeviantartExtractorV2(Extractor): # extract download target target = files[-1] - name = files[0]["src"] - if target["type"] == "gif": + if deviation["isJournal"] and self.commit_journal: + journal = deviation["textContent"] + journal["html"] = journal["html"]["markup"] + target["src"] = self.commit_journal(deviation, journal)[1] + elif target["type"] == "gif": pass elif target["type"] == "video": # select largest video target = max( files, key=lambda x: text.parse_int(x.get("quality", "")[:-1])) - name = target["src"] elif target["type"] == "flash": if target["src"].startswith("https://sandbox.deviantart.com"): # extract SWF file from "sandbox" @@ -641,9 +641,7 @@ class DeviantartExtractorV2(Extractor): target = extended["download"] target["src"] = target["url"] del target["url"] - - # url rewrites - if target["src"].startswith("https://images-wixmp-"): + elif target["src"].startswith("https://images-wixmp-"): if deviation["index"] <= 790677560: # https://github.com/r888888888/danbooru/issues/4069 target["src"] = re.sub( @@ -653,16 +651,20 @@ class DeviantartExtractorV2(Extractor): target["src"] = re.sub( r"q_\d+", self.quality, target["src"]) - text.nameext_from_url(name, target) - if target["filename"].endswith("-150"): - target["filename"] = target["filename"][:-4] - if not target["filename"].count("-"): - name, _, hid = target["filename"].rpartition("_") - target["filename"] = name + "-" + hid + # filename and extension metadata + alphabet = "0123456789abcdefghijklmnopqrstuvwxyz" + sub = re.compile(r"\W").sub + deviation["filename"] = target["filename"] = "".join(( + sub("_", deviation["title"].lower()), "_by_", + sub("_", deviation["author"]["username"].lower()), "-d", + util.bencode(deviation["index"], alphabet), + )) + if "extension" not in deviation: + deviation["extension"] = target["extension"] = ( + text.ext_from_url(target["src"]) + ) deviation["target"] = target - deviation["filename"] = target["filename"] - deviation["extension"] = target["extension"] = ( - text.ext_from_url(target["src"])) + return deviation @@ -1118,6 +1120,27 @@ roses/cssmin/desktop.css?1491362542749" > </html> """ +JOURNAL_TEMPLATE_HTML_EXTRA = """\ +<div id="devskin0"><div class="negate-box-margin" style="">\ +<div usr class="gr-box gr-genericbox" + ><i usr class="gr1"><i></i></i + ><i usr class="gr2"><i></i></i + ><i usr class="gr3"><i></i></i + ><div usr class="gr-top"> + <i usr class="tri"></i> + {} + </div> + </div><div usr class="gr-body"><div usr class="gr"> + <div class="grf-indent"> + <div class="text"> + {} </div> + </div> + </div></div> + <i usr class="gr3 gb"></i> + <i usr class="gr2 gb"></i> + <i usr class="gr1 gb gb1"></i> </div> + </div></div>""" + JOURNAL_TEMPLATE_TEXT = """text:{title} by {username}, {date} diff --git a/gallery_dl/extractor/flickr.py b/gallery_dl/extractor/flickr.py index d941d76..73b8ec4 100644 --- a/gallery_dl/extractor/flickr.py +++ b/gallery_dl/extractor/flickr.py @@ -25,12 +25,20 @@ class FlickrExtractor(Extractor): def items(self): data = self.metadata() + extract = self.api._extract_format yield Message.Version, 1 yield Message.Directory, data for photo in self.photos(): - photo.update(data) - url = photo["url"] - yield Message.Url, url, text.nameext_from_url(url, photo) + try: + photo = extract(photo) + except Exception as exc: + self.log.warning( + "Skipping %s (%s)", photo["id"], exc.__class__.__name__) + self.log.debug("", exc_info=True) + else: + photo.update(data) + url = photo["url"] + yield Message.Url, url, text.nameext_from_url(url, photo) def metadata(self): """Return general metadata""" @@ -432,7 +440,7 @@ class FlickrAPI(oauth.OAuth1API): while True: data = self._call(method, params)[key] - yield from map(self._extract_format, data["photo"]) + yield from data["photo"] if params["page"] >= data["pages"]: return params["page"] += 1 diff --git a/gallery_dl/extractor/piczel.py b/gallery_dl/extractor/piczel.py index 6a5c41c..2775dac 100644 --- a/gallery_dl/extractor/piczel.py +++ b/gallery_dl/extractor/piczel.py @@ -6,7 +6,7 @@ # it under the terms of the GNU General Public License version 2 as # published by the Free Software Foundation. -"""Extract images from https://piczel.tv/""" +"""Extractors for https://piczel.tv/""" from .common import Extractor, Message from .. import text @@ -19,21 +19,12 @@ class PiczelExtractor(Extractor): filename_fmt = "{category}_{id}_{title}_{num:>02}.{extension}" archive_fmt = "{id}_{num}" root = "https://piczel.tv" - api_root = "https://apollo.piczel.tv" - - def __init__(self, match): - Extractor.__init__(self, match) - self.item_id = match.group(1) def items(self): - first = True yield Message.Version, 1 - for image in self.unpack(self.get_images()): - if first: - yield Message.Directory, image - first = False - path = image["image"]["image"]["url"] - url = "{}/static/{}".format(self.api_root, path) + for image in self.unpack(self.images()): + url = self.root + "/static" + image["image"]["image"]["url"] + yield Message.Directory, image yield Message.Url, url, text.nameext_from_url(url, image) @staticmethod @@ -50,21 +41,40 @@ class PiczelExtractor(Extractor): image["num"] = 0 yield image - def get_images(self): + def images(self): """Return an iterable with all relevant image objects""" + def _pagination(self, url, folder_id=None): + params = { + "hideNsfw" : "false", + "from_id" : None, + "folder_id": folder_id, + } + + while True: + data = self.request(url, params=params).json() + yield from data + + if len(data) < 32: + return + params["from_id"] = data[-1]["id"] + class PiczelUserExtractor(PiczelExtractor): """Extractor for all images from a user's gallery""" subcategory = "user" pattern = r"(?:https?://)?(?:www\.)?piczel\.tv/gallery/([^/?&#]+)/?$" - test = ("https://piczel.tv/gallery/Lulena", { - "count": ">= 13", + test = ("https://piczel.tv/gallery/Maximumwarp", { + "count": ">= 50", }) - def get_images(self): - url = "{}/api/users/{}/gallery".format(self.api_root, self.item_id) - return self.request(url).json() + def __init__(self, match): + PiczelExtractor.__init__(self, match) + self.user = match.group(1) + + def images(self): + url = "{}/api/users/{}/gallery".format(self.root, self.user) + return self._pagination(url) class PiczelFolderExtractor(PiczelExtractor): @@ -73,16 +83,18 @@ class PiczelFolderExtractor(PiczelExtractor): directory_fmt = ("{category}", "{user[username]}", "{folder[name]}") archive_fmt = "f{folder[id]}_{id}_{num}" pattern = (r"(?:https?://)?(?:www\.)?piczel\.tv" - r"/gallery/(?!image)[^/?&#]+/(\d+)") + r"/gallery/(?!image)([^/?&#]+)/(\d+)") test = ("https://piczel.tv/gallery/Lulena/1114", { "count": ">= 4", }) - def get_images(self): - url = "{}/api/gallery/folder/{}".format(self.api_root, self.item_id) - images = self.request(url).json() - images.reverse() - return images + def __init__(self, match): + PiczelExtractor.__init__(self, match) + self.user, self.folder_id = match.groups() + + def images(self): + url = "{}/api/users/{}/gallery".format(self.root, self.user) + return self._pagination(url, self.folder_id) class PiczelImageExtractor(PiczelExtractor): @@ -90,7 +102,7 @@ class PiczelImageExtractor(PiczelExtractor): subcategory = "image" pattern = r"(?:https?://)?(?:www\.)?piczel\.tv/gallery/image/(\d+)" test = ("https://piczel.tv/gallery/image/7807", { - "url": "9b9e416b6ab7e58676fab84453d5028f306ece34", + "url": "85225dd53a03c3b6028f6c4a45b71eccc07f7066", "content": "df9a053a24234474a19bce2b7e27e0dec23bff87", "keyword": { "created_at": "2018-07-22T05:13:58.000Z", @@ -113,6 +125,10 @@ class PiczelImageExtractor(PiczelExtractor): }, }) - def get_images(self): - url = "{}/api/gallery/image/{}".format(self.api_root, self.item_id) + def __init__(self, match): + PiczelExtractor.__init__(self, match) + self.image_id = match.group(1) + + def images(self): + url = "{}/api/gallery/image/{}".format(self.root, self.image_id) return (self.request(url).json(),) diff --git a/gallery_dl/extractor/pornhub.py b/gallery_dl/extractor/pornhub.py index 40816b3..bbbc709 100644 --- a/gallery_dl/extractor/pornhub.py +++ b/gallery_dl/extractor/pornhub.py @@ -123,7 +123,7 @@ class PornhubUserExtractor(PornhubExtractor): test = ( ("https://www.pornhub.com/users/flyings0l0/photos/public", { "pattern": PornhubGalleryExtractor.pattern, - "count": ">= 8", + "count": ">= 6", }), ("https://www.pornhub.com/users/flyings0l0/"), ("https://www.pornhub.com/users/flyings0l0/photos/public"), diff --git a/gallery_dl/extractor/seiga.py b/gallery_dl/extractor/seiga.py index 0d92573..69962c8 100644 --- a/gallery_dl/extractor/seiga.py +++ b/gallery_dl/extractor/seiga.py @@ -70,7 +70,7 @@ class SeigaUserExtractor(SeigaExtractor): subcategory = "user" directory_fmt = ("{category}", "{user[id]}") filename_fmt = "{category}_{user[id]}_{image_id}.{extension}" - pattern = (r"(?:https?://)?(?:www\.|seiga\.)?nicovideo\.jp/" + pattern = (r"(?:https?://)?(?:www\.|(?:sp\.)?seiga\.)?nicovideo\.jp/" r"user/illust/(\d+)(?:\?(?:[^&]+&)*sort=([^&#]+))?") test = ( ("https://seiga.nicovideo.jp/user/illust/39537793", { @@ -96,6 +96,7 @@ class SeigaUserExtractor(SeigaExtractor): }), ("https://seiga.nicovideo.jp/user/illust/39537793" "?sort=image_view&target=illust_all"), + ("https://sp.seiga.nicovideo.jp/user/illust/39537793"), ) def __init__(self, match): @@ -167,6 +168,7 @@ class SeigaImageExtractor(SeigaExtractor): filename_fmt = "{category}_{image_id}.{extension}" pattern = (r"(?:https?://)?(?:" r"(?:seiga\.|www\.)?nicovideo\.jp/(?:seiga/im|image/source/)" + r"|sp\.seiga\.nicovideo\.jp/seiga/#!/im" r"|lohas\.nicoseiga\.jp/(?:thumb|(?:priv|o)/[^/]+/\d+)/)(\d+)") test = ( ("https://seiga.nicovideo.jp/seiga/im5977527", { @@ -177,6 +179,7 @@ class SeigaImageExtractor(SeigaExtractor): "exception": exception.NotFoundError, }), ("https://seiga.nicovideo.jp/image/source/5977527"), + ("https://sp.seiga.nicovideo.jp/seiga/#!/im5977527"), ("https://lohas.nicoseiga.jp/thumb/5977527i"), ("https://lohas.nicoseiga.jp/priv" "/759a4ef1c639106ba4d665ee6333832e647d0e4e/1549727594/5977527"), diff --git a/gallery_dl/postprocessor/zip.py b/gallery_dl/postprocessor/zip.py index 1075c70..6659a8d 100644 --- a/gallery_dl/postprocessor/zip.py +++ b/gallery_dl/postprocessor/zip.py @@ -34,7 +34,7 @@ class ZipPP(PostProcessor): algorithm = "store" self.path = pathfmt.realdirectory - args = (self.path + ext, "a", + args = (self.path[:-1] + ext, "a", self.COMPRESSION_ALGORITHMS[algorithm], True) if options.get("mode") == "safe": diff --git a/gallery_dl/util.py b/gallery_dl/util.py index 79fa175..17cd73a 100644 --- a/gallery_dl/util.py +++ b/gallery_dl/util.py @@ -608,11 +608,13 @@ class PathFormat(): """Build directory path and create it if necessary""" # Build path segments by applying 'kwdict' to directory format strings + segments = [] + append = segments.append try: - segments = [ - self.clean_segment(format_map(kwdict).strip()) - for format_map in self.directory_formatters - ] + for formatter in self.directory_formatters: + segment = formatter(kwdict).strip() + if segment: + append(self.clean_segment(segment)) except Exception as exc: raise exception.FormatError(exc, "directory") @@ -620,16 +622,20 @@ class PathFormat(): sep = os.sep directory = self.clean_path(self.basedirectory + sep.join(segments)) - # Ensure directory ends with a path separator - if directory[-1] != sep: + # Ensure 'directory' ends with a path separator + if segments: directory += sep self.directory = directory - # Enable longer-than-260-character paths on Windows if os.name == "nt": - self.realdirectory = "\\\\?\\" + os.path.abspath(directory) + sep - else: - self.realdirectory = directory + # Enable longer-than-260-character paths on Windows + directory = "\\\\?\\" + os.path.abspath(directory) + + # abspath() in Python 3.7+ removes trailing path separators (#402) + if directory[-1] != sep: + directory += sep + + self.realdirectory = directory # Create directory tree os.makedirs(self.realdirectory, exist_ok=True) diff --git a/gallery_dl/version.py b/gallery_dl/version.py index 911939d..cbb8fe7 100644 --- a/gallery_dl/version.py +++ b/gallery_dl/version.py @@ -6,4 +6,4 @@ # it under the terms of the GNU General Public License version 2 as # published by the Free Software Foundation. -__version__ = "1.10.2" +__version__ = "1.10.3" diff --git a/test/test_postprocessor.py b/test/test_postprocessor.py index 786dc46..78b9838 100644 --- a/test/test_postprocessor.py +++ b/test/test_postprocessor.py @@ -58,7 +58,6 @@ class BasePostprocessorTest(unittest.TestCase): def setUpClass(cls): cls.extractor = extractor.find("test:") cls.dir = tempfile.TemporaryDirectory() - cls.fnum = 0 config.set(("base-directory",), cls.dir.name) @classmethod @@ -173,7 +172,10 @@ class MetadataTest(BasePostprocessorTest): """) def test_metadata_tags(self): - pp = self._create({"mode": "tags"}, {"tags": ["foo", "bar", "baz"]}) + pp = self._create( + {"mode": "tags"}, + {"tags": ["foo", "bar", "baz"]}, + ) self.assertEqual(pp.write, pp._write_tags) self.assertEqual(pp.extension, "txt") @@ -186,7 +188,10 @@ class MetadataTest(BasePostprocessorTest): self.assertEqual(self._output(m), "foo\nbar\nbaz\n") def test_metadata_tags_split_1(self): - pp = self._create({"mode": "tags"}, {"tags": "foo, bar, baz"}) + pp = self._create( + {"mode": "tags"}, + {"tags": "foo, bar, baz"}, + ) with patch("builtins.open", mock_open()) as m: pp.prepare(self.pathfmt) pp.run(self.pathfmt) @@ -203,7 +208,10 @@ class MetadataTest(BasePostprocessorTest): self.assertEqual(self._output(m), "foobar1\nfoobar2\nfoobarbaz\n") def test_metadata_tags_tagstring(self): - pp = self._create({"mode": "tags"}, {"tag_string": "foo, bar, baz"}) + pp = self._create( + {"mode": "tags"}, + {"tag_string": "foo, bar, baz"}, + ) with patch("builtins.open", mock_open()) as m: pp.prepare(self.pathfmt) pp.run(self.pathfmt) @@ -266,8 +274,7 @@ class ZipTest(BasePostprocessorTest): self.assertEqual(pp.delete, True) self.assertFalse(hasattr(pp, "args")) self.assertEqual(pp.zfile.compression, zipfile.ZIP_STORED) - self.assertEqual( - pp.zfile.filename, self.pathfmt.realdirectory + ".zip") + self.assertTrue(pp.zfile.filename.endswith("/test.zip")) def test_zip_options(self): pp = self._create({ @@ -277,8 +284,7 @@ class ZipTest(BasePostprocessorTest): }) self.assertEqual(pp.delete, False) self.assertEqual(pp.zfile.compression, zipfile.ZIP_DEFLATED) - self.assertEqual( - pp.zfile.filename, self.pathfmt.realdirectory + ".cbz") + self.assertTrue(pp.zfile.filename.endswith("/test.cbz")) def test_zip_safe(self): pp = self._create({"mode": "safe"}) @@ -286,8 +292,84 @@ class ZipTest(BasePostprocessorTest): self.assertEqual(pp.path, self.pathfmt.realdirectory) self.assertEqual(pp.run, pp._write_safe) self.assertEqual(pp.args, ( - pp.path + ".zip", "a", zipfile.ZIP_STORED, True + pp.path[:-1] + ".zip", "a", zipfile.ZIP_STORED, True, )) + self.assertTrue(pp.args[0].endswith("/test.zip")) + + def test_zip_write(self): + pp = self._create() + nti = pp.zfile.NameToInfo + + with tempfile.NamedTemporaryFile("w", dir=self.dir.name) as file: + file.write("foobar\n") + + # write dummy file with 3 different names + for i in range(3): + name = "file{}.ext".format(i) + self.pathfmt.temppath = file.name + self.pathfmt.filename = name + + pp.prepare(self.pathfmt) + pp.run(self.pathfmt) + + self.assertEqual(len(nti), i+1) + self.assertIn(name, nti) + + # check file contents + self.assertEqual(len(nti), 3) + self.assertIn("file0.ext", nti) + self.assertIn("file1.ext", nti) + self.assertIn("file2.ext", nti) + + # write the last file a second time (will be skipped) + pp.prepare(self.pathfmt) + pp.run(self.pathfmt) + self.assertEqual(len(pp.zfile.NameToInfo), 3) + + # close file + pp.finalize() + + # reopen to check persistence + with zipfile.ZipFile(pp.zfile.filename) as file: + nti = file.NameToInfo + self.assertEqual(len(pp.zfile.NameToInfo), 3) + self.assertIn("file0.ext", pp.zfile.NameToInfo) + self.assertIn("file1.ext", pp.zfile.NameToInfo) + self.assertIn("file2.ext", pp.zfile.NameToInfo) + + os.unlink(pp.zfile.filename) + + def test_zip_write_mock(self): + + def side_effect(_, name): + pp.zfile.NameToInfo.add(name) + + pp = self._create() + pp.zfile = Mock() + pp.zfile.NameToInfo = set() + pp.zfile.write.side_effect = side_effect + + # write 3 files + for i in range(3): + self.pathfmt.temppath = self.pathfmt.realdirectory + "file.ext" + self.pathfmt.filename = "file{}.ext".format(i) + pp.prepare(self.pathfmt) + pp.run(self.pathfmt) + + # write the last file a second time (will be skipped) + pp.prepare(self.pathfmt) + pp.run(self.pathfmt) + + pp.finalize() + + self.assertEqual(pp.zfile.write.call_count, 3) + for call in pp.zfile.write.call_args_list: + args, kwargs = call + self.assertEqual(len(args), 2) + self.assertEqual(len(kwargs), 0) + self.assertEqual(args[0], self.pathfmt.temppath) + self.assertRegex(args[1], r"file\d\.ext") + self.assertEqual(pp.zfile.close.call_count, 1) if __name__ == "__main__": diff --git a/test/test_results.py b/test/test_results.py index 12f2416..a69cc81 100644 --- a/test/test_results.py +++ b/test/test_results.py @@ -21,7 +21,7 @@ TRAVIS_SKIP = { "exhentai", "kissmanga", "mangafox", "dynastyscans", "nijie", "bobx", "archivedmoe", "archiveofsins", "thebarchive", "fireden", "4plebs", "sankaku", "idolcomplex", "mangahere", "readcomiconline", "mangadex", - "sankakucomplex", + "sankakucomplex", "warosu", } # temporary issues, etc. @@ -29,6 +29,8 @@ BROKEN = { "8chan", "imgth", "mangapark", + "mangoxo", + "tumblr", } |
