aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--CHANGELOG.md11
-rw-r--r--README.rst6
-rw-r--r--docs/configuration.rst22
-rw-r--r--docs/gallery-dl.conf1
-rw-r--r--gallery_dl/downloader/http.py15
-rw-r--r--gallery_dl/downloader/ytdl.py17
-rw-r--r--gallery_dl/extractor/8muses.py2
-rw-r--r--gallery_dl/extractor/adultempire.py2
-rw-r--r--gallery_dl/extractor/deviantart.py127
-rw-r--r--gallery_dl/extractor/flickr.py16
-rw-r--r--gallery_dl/extractor/piczel.py72
-rw-r--r--gallery_dl/extractor/pornhub.py2
-rw-r--r--gallery_dl/extractor/seiga.py5
-rw-r--r--gallery_dl/postprocessor/zip.py2
-rw-r--r--gallery_dl/util.py26
-rw-r--r--gallery_dl/version.py2
-rw-r--r--test/test_postprocessor.py100
-rw-r--r--test/test_results.py4
18 files changed, 309 insertions, 123 deletions
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 99df78a..a2b5109 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,16 @@
# Changelog
+## 1.10.3 - 2019-08-30
+### Additions
+- Provide `filename` metadata for all `deviantart` files ([#392](https://github.com/mikf/gallery-dl/issues/392), [#400](https://github.com/mikf/gallery-dl/issues/400))
+- Implement a `ytdl.outtmpl` option to let youtube-dl handle filenames by itself ([#395](https://github.com/mikf/gallery-dl/issues/395))
+- Support `seiga` mobile URLs ([#401](https://github.com/mikf/gallery-dl/issues/401))
+### Fixes
+- Extract more than the first 32 posts from `piczel` galleries ([#396](https://github.com/mikf/gallery-dl/issues/396))
+- Fix filenames of archives created with `--zip` ([#397](https://github.com/mikf/gallery-dl/issues/397))
+- Skip unavailable images and videos on `flickr` ([#398](https://github.com/mikf/gallery-dl/issues/398))
+- Fix filesystem paths on Windows with Python 3.6 and lower ([#402](https://github.com/mikf/gallery-dl/issues/402))
+
## 1.10.2 - 2019-08-23
### Additions
- Support for `instagram` stories and IGTV ([#371](https://github.com/mikf/gallery-dl/issues/371), [#373](https://github.com/mikf/gallery-dl/issues/373))
diff --git a/README.rst b/README.rst
index e62a7ec..2e934f8 100644
--- a/README.rst
+++ b/README.rst
@@ -78,8 +78,8 @@ Download a standalone executable file,
put it into your `PATH <https://en.wikipedia.org/wiki/PATH_(variable)>`__,
and run it inside a command prompt (like ``cmd.exe``).
-- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.10.2/gallery-dl.exe>`__
-- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.10.2/gallery-dl.bin>`__
+- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.10.3/gallery-dl.exe>`__
+- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.10.3/gallery-dl.bin>`__
These executables include a Python 3.7 interpreter
and all required Python packages.
@@ -224,7 +224,7 @@ access to *gallery-dl*. Authorize it and you will be shown one or more
.. _gallery-dl-example.conf: https://github.com/mikf/gallery-dl/blob/master/docs/gallery-dl-example.conf
.. _configuration.rst: https://github.com/mikf/gallery-dl/blob/master/docs/configuration.rst
.. _Supported Sites: https://github.com/mikf/gallery-dl/blob/master/docs/supportedsites.rst
-.. _stable: https://github.com/mikf/gallery-dl/archive/v1.10.2.zip
+.. _stable: https://github.com/mikf/gallery-dl/archive/v1.10.3.zip
.. _dev: https://github.com/mikf/gallery-dl/archive/master.zip
.. _Python: https://www.python.org/downloads/
diff --git a/docs/configuration.rst b/docs/configuration.rst
index 0e2e355..d69406d 100644
--- a/docs/configuration.rst
+++ b/docs/configuration.rst
@@ -581,7 +581,8 @@ extractor.deviantart.quality
=========== =====
Type ``integer``
Default ``100``
-Description JPEG compression quality for newer images hosted on wixmp servers.
+Description JPEG quality level of newer images for which
+ an original file download is not available.
=========== =====
@@ -1171,6 +1172,25 @@ Description | Route youtube-dl's output through gallery-dl's logging system.
=========== =====
+downloader.ytdl.outtmpl
+-----------------------
+=========== =====
+Type ``string``
+Default ``null``
+Description The `Output Template <https://github.com/ytdl-org/youtube-dl#output-template>`__
+ used to generate filenames for files downloaded with youtube-dl.
+
+ Special values:
+
+ * ``null``: generate filenames with `extractor.*.filename`_
+ * ``"default"``: use youtube-dl's default, currently ``"%(title)s-%(id)s.%(ext)s"``
+
+ Note: An output template other than ``null`` might
+ cause unexpected results in combination with other options
+ (e.g. ``"skip": "enumerate"``)
+=========== =====
+
+
downloader.ytdl.raw-options
---------------------------
=========== =====
diff --git a/docs/gallery-dl.conf b/docs/gallery-dl.conf
index b9ff32d..6439437 100644
--- a/docs/gallery-dl.conf
+++ b/docs/gallery-dl.conf
@@ -169,6 +169,7 @@
"format": null,
"forward-cookies": true,
"mtime": true,
+ "outtmpl": null,
"rate": null,
"retries": 4,
"timeout": 30.0,
diff --git a/gallery_dl/downloader/http.py b/gallery_dl/downloader/http.py
index e3229eb..4c5fed5 100644
--- a/gallery_dl/downloader/http.py
+++ b/gallery_dl/downloader/http.py
@@ -38,11 +38,12 @@ class HttpDownloader(DownloaderBase):
if self.retries < 0:
self.retries = float("inf")
if self.rate:
- self.rate = text.parse_bytes(self.rate)
- if not self.rate:
- self.log.warning("Invalid rate limit specified")
- elif self.rate < self.chunk_size:
- self.chunk_size = self.rate
+ rate = text.parse_bytes(self.rate)
+ if not rate:
+ self.log.warning("Invalid rate limit (%r)", self.rate)
+ elif rate < self.chunk_size:
+ self.chunk_size = rate
+ self.rate = rate
def download(self, url, pathfmt):
try:
@@ -124,10 +125,10 @@ class HttpDownloader(DownloaderBase):
if not offset:
mode = "w+b"
if filesize:
- self.log.info("Unable to resume partial download")
+ self.log.debug("Unable to resume partial download")
else:
mode = "r+b"
- self.log.info("Resuming download at byte %d", offset)
+ self.log.debug("Resuming download at byte %d", offset)
# start downloading
self.out.start(pathfmt.path)
diff --git a/gallery_dl/downloader/ytdl.py b/gallery_dl/downloader/ytdl.py
index 7d8b905..ce921e3 100644
--- a/gallery_dl/downloader/ytdl.py
+++ b/gallery_dl/downloader/ytdl.py
@@ -8,7 +8,7 @@
"""Downloader module for URLs requiring youtube-dl support"""
-from youtube_dl import YoutubeDL
+from youtube_dl import YoutubeDL, DEFAULT_OUTTMPL
from .common import DownloaderBase
from .. import text
import os
@@ -36,6 +36,9 @@ class YoutubeDLDownloader(DownloaderBase):
options["logger"] = self.log
self.forward_cookies = self.config("forward-cookies", True)
+ outtmpl = self.config("outtmpl")
+ self.outtmpl = DEFAULT_OUTTMPL if outtmpl == "default" else outtmpl
+
self.ytdl = YoutubeDL(options)
def download(self, url, pathfmt):
@@ -60,7 +63,17 @@ class YoutubeDLDownloader(DownloaderBase):
def _download_video(self, pathfmt, info_dict):
if "url" in info_dict:
text.nameext_from_url(info_dict["url"], pathfmt.kwdict)
- pathfmt.set_extension(info_dict["ext"])
+
+ if self.outtmpl:
+ self.ytdl.params["outtmpl"] = self.outtmpl
+ pathfmt.filename = filename = self.ytdl.prepare_filename(info_dict)
+ pathfmt.extension = info_dict["ext"]
+ pathfmt.path = pathfmt.directory + filename
+ pathfmt.realpath = pathfmt.temppath = (
+ pathfmt.realdirectory + filename)
+ else:
+ pathfmt.set_extension(info_dict["ext"])
+
if pathfmt.exists():
pathfmt.temppath = ""
return True
diff --git a/gallery_dl/extractor/8muses.py b/gallery_dl/extractor/8muses.py
index 6fbf6b5..f5ca9ce 100644
--- a/gallery_dl/extractor/8muses.py
+++ b/gallery_dl/extractor/8muses.py
@@ -55,7 +55,7 @@ class _8musesAlbumExtractor(Extractor):
}),
("https://www.8muses.com/comics/album/Fakku-Comics/6?sort=az", {
"count": ">= 70",
- "keyword": {"name": r"re:^[S-Zs-z]"},
+ "keyword": {"name": r"re:^[R-Zr-z]"},
}),
)
diff --git a/gallery_dl/extractor/adultempire.py b/gallery_dl/extractor/adultempire.py
index 5e2480a..85d8266 100644
--- a/gallery_dl/extractor/adultempire.py
+++ b/gallery_dl/extractor/adultempire.py
@@ -26,7 +26,7 @@ class AdultempireGalleryExtractor(GalleryExtractor):
}),
("https://www.adultdvdempire.com/5683/gallery.html", {
"url": "b12cd1a65cae8019d837505adb4d6a2c1ed4d70d",
- "keyword": "0fe9a6e3f0a331b95ba77f66a643705ca86e8ec5",
+ "keyword": "9634eb16cc6dbf347eb9dcdd9b2a499dfd04d167",
}),
)
diff --git a/gallery_dl/extractor/deviantart.py b/gallery_dl/extractor/deviantart.py
index bd1299b..6614755 100644
--- a/gallery_dl/extractor/deviantart.py
+++ b/gallery_dl/extractor/deviantart.py
@@ -9,7 +9,7 @@
"""Extract images from https://www.deviantart.com/"""
from .common import Extractor, Message
-from .. import text, exception
+from .. import text, util, exception
from ..cache import cache, memcache
import collections
import itertools
@@ -137,30 +137,37 @@ class DeviantartExtractor(Extractor):
deviation["date"] = text.parse_timestamp(
deviation["published_time"])
+ # filename metadata
+ alphabet = "0123456789abcdefghijklmnopqrstuvwxyz"
+ sub = re.compile(r"\W").sub
+ deviation["filename"] = "".join((
+ sub("_", deviation["title"].lower()), "_by_",
+ sub("_", deviation["author"]["username"].lower()), "-d",
+ util.bencode(deviation["index"], alphabet),
+ ))
+
@staticmethod
def commit(deviation, target):
url = target["src"]
- thumb = deviation["thumbs"][0]["src"] if "thumbs" in deviation else url
- target = text.nameext_from_url(thumb, target.copy())
- if target["filename"].endswith("-150"):
- target["filename"] = target["filename"][:-4]
- if not target["filename"].count("-"):
- name, _, hid = target["filename"].rpartition("_")
- target["filename"] = name + "-" + hid
+ target = target.copy()
+ target["filename"] = deviation["filename"]
deviation["target"] = target
- deviation["filename"] = target["filename"]
deviation["extension"] = target["extension"] = text.ext_from_url(url)
return Message.Url, url, deviation
def _commit_journal_html(self, deviation, journal):
title = text.escape(deviation["title"])
url = deviation["url"]
- thumbs = deviation["thumbs"]
+ thumbs = deviation.get("thumbs") or deviation.get("files")
html = journal["html"]
shadow = SHADOW_TEMPLATE.format_map(thumbs[0]) if thumbs else ""
if "css" in journal:
css, cls = journal["css"], "withskin"
+ elif html.startswith("<style"):
+ css, _, html = html.partition("</style>")
+ css = css.partition(">")[2]
+ cls = "withskin"
else:
css, cls = "", "journal-green"
@@ -191,22 +198,25 @@ class DeviantartExtractor(Extractor):
categories=categories,
)
+ if needle in html:
+ html = html.replace(needle, header, 1)
+ else:
+ html = JOURNAL_TEMPLATE_HTML_EXTRA.format(header, html)
+
html = JOURNAL_TEMPLATE_HTML.format(
- title=title,
- html=html.replace(needle, header, 1),
- shadow=shadow,
- css=css,
- cls=cls,
- )
+ title=title, html=html, shadow=shadow, css=css, cls=cls)
deviation["extension"] = "htm"
return Message.Url, html, deviation
@staticmethod
def _commit_journal_text(deviation, journal):
+ html = journal["html"]
+ if html.startswith("<style"):
+ html = html.partition("</style>")[2]
content = "\n".join(
text.unescape(text.remove_html(txt))
- for txt in journal["html"].rpartition("<script")[0].split("<br />")
+ for txt in html.rpartition("<script")[0].split("<br />")
)
txt = JOURNAL_TEMPLATE_TEXT.format(
title=deviation["title"],
@@ -302,7 +312,7 @@ class DeviantartGalleryExtractor(DeviantartExtractor):
}),
# 'folders' option (#276)
("https://www.deviantart.com/justatest235723", {
- "count": 2,
+ "count": 3,
"options": (("metadata", 1), ("folders", 1), ("original", 0)),
"keyword": {
"description": str,
@@ -481,19 +491,21 @@ class DeviantartJournalExtractor(DeviantartExtractor):
subcategory = "journal"
directory_fmt = ("{category}", "{username}", "Journal")
archive_fmt = "j_{username}_{index}.{extension}"
- pattern = BASE_PATTERN + r"/(?:journal|blog)/?(?:\?catpath=/)?$"
+ pattern = BASE_PATTERN + r"/(?:posts(?:/journals)?|journal)/?(?:\?.*)?$"
test = (
- ("https://www.deviantart.com/angrywhitewanker/journal/", {
+ ("https://www.deviantart.com/angrywhitewanker/posts/journals/", {
"url": "38db2a0d3a587a7e0f9dba7ff7d274610ebefe44",
}),
- ("https://www.deviantart.com/angrywhitewanker/journal/", {
+ ("https://www.deviantart.com/angrywhitewanker/posts/journals/", {
"url": "b2a8e74d275664b1a4acee0fca0a6fd33298571e",
"options": (("journals", "text"),),
}),
- ("https://www.deviantart.com/angrywhitewanker/journal/", {
+ ("https://www.deviantart.com/angrywhitewanker/posts/journals/", {
"count": 0,
"options": (("journals", "none"),),
}),
+ ("https://www.deviantart.com/shimoda7/posts/"),
+ ("https://www.deviantart.com/shimoda7/journal/"),
("https://www.deviantart.com/shimoda7/journal/?catpath=/"),
("https://shimoda7.deviantart.com/journal/"),
("https://shimoda7.deviantart.com/journal/?catpath=/"),
@@ -546,22 +558,8 @@ class DeviantartPopularExtractor(DeviantartExtractor):
deviation["popular"] = self.popular
-class DeviantartExtractorV2(Extractor):
+class DeviantartExtractorV2(DeviantartExtractor):
"""Base class for deviantart extractors using the NAPI"""
- category = "deviantart"
- directory_fmt = ("{category}", "{author[username]!l}")
- filename_fmt = "{category}_{index}_{title}.{extension}"
- root = "https://www.deviantart.com"
-
- def __init__(self, match=None):
- Extractor.__init__(self, match)
- self.offset = 0
- self.extra = self.config("extra", False)
- self.quality = self.config("quality", "100")
- self.user = match.group(1) or match.group(2)
-
- if self.quality:
- self.quality = "q_{}".format(self.quality)
def items(self):
url = (
@@ -621,15 +619,17 @@ class DeviantartExtractorV2(Extractor):
# extract download target
target = files[-1]
- name = files[0]["src"]
- if target["type"] == "gif":
+ if deviation["isJournal"] and self.commit_journal:
+ journal = deviation["textContent"]
+ journal["html"] = journal["html"]["markup"]
+ target["src"] = self.commit_journal(deviation, journal)[1]
+ elif target["type"] == "gif":
pass
elif target["type"] == "video":
# select largest video
target = max(
files, key=lambda x: text.parse_int(x.get("quality", "")[:-1]))
- name = target["src"]
elif target["type"] == "flash":
if target["src"].startswith("https://sandbox.deviantart.com"):
# extract SWF file from "sandbox"
@@ -641,9 +641,7 @@ class DeviantartExtractorV2(Extractor):
target = extended["download"]
target["src"] = target["url"]
del target["url"]
-
- # url rewrites
- if target["src"].startswith("https://images-wixmp-"):
+ elif target["src"].startswith("https://images-wixmp-"):
if deviation["index"] <= 790677560:
# https://github.com/r888888888/danbooru/issues/4069
target["src"] = re.sub(
@@ -653,16 +651,20 @@ class DeviantartExtractorV2(Extractor):
target["src"] = re.sub(
r"q_\d+", self.quality, target["src"])
- text.nameext_from_url(name, target)
- if target["filename"].endswith("-150"):
- target["filename"] = target["filename"][:-4]
- if not target["filename"].count("-"):
- name, _, hid = target["filename"].rpartition("_")
- target["filename"] = name + "-" + hid
+ # filename and extension metadata
+ alphabet = "0123456789abcdefghijklmnopqrstuvwxyz"
+ sub = re.compile(r"\W").sub
+ deviation["filename"] = target["filename"] = "".join((
+ sub("_", deviation["title"].lower()), "_by_",
+ sub("_", deviation["author"]["username"].lower()), "-d",
+ util.bencode(deviation["index"], alphabet),
+ ))
+ if "extension" not in deviation:
+ deviation["extension"] = target["extension"] = (
+ text.ext_from_url(target["src"])
+ )
deviation["target"] = target
- deviation["filename"] = target["filename"]
- deviation["extension"] = target["extension"] = (
- text.ext_from_url(target["src"]))
+
return deviation
@@ -1118,6 +1120,27 @@ roses/cssmin/desktop.css?1491362542749" >
</html>
"""
+JOURNAL_TEMPLATE_HTML_EXTRA = """\
+<div id="devskin0"><div class="negate-box-margin" style="">\
+<div usr class="gr-box gr-genericbox"
+ ><i usr class="gr1"><i></i></i
+ ><i usr class="gr2"><i></i></i
+ ><i usr class="gr3"><i></i></i
+ ><div usr class="gr-top">
+ <i usr class="tri"></i>
+ {}
+ </div>
+ </div><div usr class="gr-body"><div usr class="gr">
+ <div class="grf-indent">
+ <div class="text">
+ {} </div>
+ </div>
+ </div></div>
+ <i usr class="gr3 gb"></i>
+ <i usr class="gr2 gb"></i>
+ <i usr class="gr1 gb gb1"></i> </div>
+ </div></div>"""
+
JOURNAL_TEMPLATE_TEXT = """text:{title}
by {username}, {date}
diff --git a/gallery_dl/extractor/flickr.py b/gallery_dl/extractor/flickr.py
index d941d76..73b8ec4 100644
--- a/gallery_dl/extractor/flickr.py
+++ b/gallery_dl/extractor/flickr.py
@@ -25,12 +25,20 @@ class FlickrExtractor(Extractor):
def items(self):
data = self.metadata()
+ extract = self.api._extract_format
yield Message.Version, 1
yield Message.Directory, data
for photo in self.photos():
- photo.update(data)
- url = photo["url"]
- yield Message.Url, url, text.nameext_from_url(url, photo)
+ try:
+ photo = extract(photo)
+ except Exception as exc:
+ self.log.warning(
+ "Skipping %s (%s)", photo["id"], exc.__class__.__name__)
+ self.log.debug("", exc_info=True)
+ else:
+ photo.update(data)
+ url = photo["url"]
+ yield Message.Url, url, text.nameext_from_url(url, photo)
def metadata(self):
"""Return general metadata"""
@@ -432,7 +440,7 @@ class FlickrAPI(oauth.OAuth1API):
while True:
data = self._call(method, params)[key]
- yield from map(self._extract_format, data["photo"])
+ yield from data["photo"]
if params["page"] >= data["pages"]:
return
params["page"] += 1
diff --git a/gallery_dl/extractor/piczel.py b/gallery_dl/extractor/piczel.py
index 6a5c41c..2775dac 100644
--- a/gallery_dl/extractor/piczel.py
+++ b/gallery_dl/extractor/piczel.py
@@ -6,7 +6,7 @@
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
-"""Extract images from https://piczel.tv/"""
+"""Extractors for https://piczel.tv/"""
from .common import Extractor, Message
from .. import text
@@ -19,21 +19,12 @@ class PiczelExtractor(Extractor):
filename_fmt = "{category}_{id}_{title}_{num:>02}.{extension}"
archive_fmt = "{id}_{num}"
root = "https://piczel.tv"
- api_root = "https://apollo.piczel.tv"
-
- def __init__(self, match):
- Extractor.__init__(self, match)
- self.item_id = match.group(1)
def items(self):
- first = True
yield Message.Version, 1
- for image in self.unpack(self.get_images()):
- if first:
- yield Message.Directory, image
- first = False
- path = image["image"]["image"]["url"]
- url = "{}/static/{}".format(self.api_root, path)
+ for image in self.unpack(self.images()):
+ url = self.root + "/static" + image["image"]["image"]["url"]
+ yield Message.Directory, image
yield Message.Url, url, text.nameext_from_url(url, image)
@staticmethod
@@ -50,21 +41,40 @@ class PiczelExtractor(Extractor):
image["num"] = 0
yield image
- def get_images(self):
+ def images(self):
"""Return an iterable with all relevant image objects"""
+ def _pagination(self, url, folder_id=None):
+ params = {
+ "hideNsfw" : "false",
+ "from_id" : None,
+ "folder_id": folder_id,
+ }
+
+ while True:
+ data = self.request(url, params=params).json()
+ yield from data
+
+ if len(data) < 32:
+ return
+ params["from_id"] = data[-1]["id"]
+
class PiczelUserExtractor(PiczelExtractor):
"""Extractor for all images from a user's gallery"""
subcategory = "user"
pattern = r"(?:https?://)?(?:www\.)?piczel\.tv/gallery/([^/?&#]+)/?$"
- test = ("https://piczel.tv/gallery/Lulena", {
- "count": ">= 13",
+ test = ("https://piczel.tv/gallery/Maximumwarp", {
+ "count": ">= 50",
})
- def get_images(self):
- url = "{}/api/users/{}/gallery".format(self.api_root, self.item_id)
- return self.request(url).json()
+ def __init__(self, match):
+ PiczelExtractor.__init__(self, match)
+ self.user = match.group(1)
+
+ def images(self):
+ url = "{}/api/users/{}/gallery".format(self.root, self.user)
+ return self._pagination(url)
class PiczelFolderExtractor(PiczelExtractor):
@@ -73,16 +83,18 @@ class PiczelFolderExtractor(PiczelExtractor):
directory_fmt = ("{category}", "{user[username]}", "{folder[name]}")
archive_fmt = "f{folder[id]}_{id}_{num}"
pattern = (r"(?:https?://)?(?:www\.)?piczel\.tv"
- r"/gallery/(?!image)[^/?&#]+/(\d+)")
+ r"/gallery/(?!image)([^/?&#]+)/(\d+)")
test = ("https://piczel.tv/gallery/Lulena/1114", {
"count": ">= 4",
})
- def get_images(self):
- url = "{}/api/gallery/folder/{}".format(self.api_root, self.item_id)
- images = self.request(url).json()
- images.reverse()
- return images
+ def __init__(self, match):
+ PiczelExtractor.__init__(self, match)
+ self.user, self.folder_id = match.groups()
+
+ def images(self):
+ url = "{}/api/users/{}/gallery".format(self.root, self.user)
+ return self._pagination(url, self.folder_id)
class PiczelImageExtractor(PiczelExtractor):
@@ -90,7 +102,7 @@ class PiczelImageExtractor(PiczelExtractor):
subcategory = "image"
pattern = r"(?:https?://)?(?:www\.)?piczel\.tv/gallery/image/(\d+)"
test = ("https://piczel.tv/gallery/image/7807", {
- "url": "9b9e416b6ab7e58676fab84453d5028f306ece34",
+ "url": "85225dd53a03c3b6028f6c4a45b71eccc07f7066",
"content": "df9a053a24234474a19bce2b7e27e0dec23bff87",
"keyword": {
"created_at": "2018-07-22T05:13:58.000Z",
@@ -113,6 +125,10 @@ class PiczelImageExtractor(PiczelExtractor):
},
})
- def get_images(self):
- url = "{}/api/gallery/image/{}".format(self.api_root, self.item_id)
+ def __init__(self, match):
+ PiczelExtractor.__init__(self, match)
+ self.image_id = match.group(1)
+
+ def images(self):
+ url = "{}/api/gallery/image/{}".format(self.root, self.image_id)
return (self.request(url).json(),)
diff --git a/gallery_dl/extractor/pornhub.py b/gallery_dl/extractor/pornhub.py
index 40816b3..bbbc709 100644
--- a/gallery_dl/extractor/pornhub.py
+++ b/gallery_dl/extractor/pornhub.py
@@ -123,7 +123,7 @@ class PornhubUserExtractor(PornhubExtractor):
test = (
("https://www.pornhub.com/users/flyings0l0/photos/public", {
"pattern": PornhubGalleryExtractor.pattern,
- "count": ">= 8",
+ "count": ">= 6",
}),
("https://www.pornhub.com/users/flyings0l0/"),
("https://www.pornhub.com/users/flyings0l0/photos/public"),
diff --git a/gallery_dl/extractor/seiga.py b/gallery_dl/extractor/seiga.py
index 0d92573..69962c8 100644
--- a/gallery_dl/extractor/seiga.py
+++ b/gallery_dl/extractor/seiga.py
@@ -70,7 +70,7 @@ class SeigaUserExtractor(SeigaExtractor):
subcategory = "user"
directory_fmt = ("{category}", "{user[id]}")
filename_fmt = "{category}_{user[id]}_{image_id}.{extension}"
- pattern = (r"(?:https?://)?(?:www\.|seiga\.)?nicovideo\.jp/"
+ pattern = (r"(?:https?://)?(?:www\.|(?:sp\.)?seiga\.)?nicovideo\.jp/"
r"user/illust/(\d+)(?:\?(?:[^&]+&)*sort=([^&#]+))?")
test = (
("https://seiga.nicovideo.jp/user/illust/39537793", {
@@ -96,6 +96,7 @@ class SeigaUserExtractor(SeigaExtractor):
}),
("https://seiga.nicovideo.jp/user/illust/39537793"
"?sort=image_view&target=illust_all"),
+ ("https://sp.seiga.nicovideo.jp/user/illust/39537793"),
)
def __init__(self, match):
@@ -167,6 +168,7 @@ class SeigaImageExtractor(SeigaExtractor):
filename_fmt = "{category}_{image_id}.{extension}"
pattern = (r"(?:https?://)?(?:"
r"(?:seiga\.|www\.)?nicovideo\.jp/(?:seiga/im|image/source/)"
+ r"|sp\.seiga\.nicovideo\.jp/seiga/#!/im"
r"|lohas\.nicoseiga\.jp/(?:thumb|(?:priv|o)/[^/]+/\d+)/)(\d+)")
test = (
("https://seiga.nicovideo.jp/seiga/im5977527", {
@@ -177,6 +179,7 @@ class SeigaImageExtractor(SeigaExtractor):
"exception": exception.NotFoundError,
}),
("https://seiga.nicovideo.jp/image/source/5977527"),
+ ("https://sp.seiga.nicovideo.jp/seiga/#!/im5977527"),
("https://lohas.nicoseiga.jp/thumb/5977527i"),
("https://lohas.nicoseiga.jp/priv"
"/759a4ef1c639106ba4d665ee6333832e647d0e4e/1549727594/5977527"),
diff --git a/gallery_dl/postprocessor/zip.py b/gallery_dl/postprocessor/zip.py
index 1075c70..6659a8d 100644
--- a/gallery_dl/postprocessor/zip.py
+++ b/gallery_dl/postprocessor/zip.py
@@ -34,7 +34,7 @@ class ZipPP(PostProcessor):
algorithm = "store"
self.path = pathfmt.realdirectory
- args = (self.path + ext, "a",
+ args = (self.path[:-1] + ext, "a",
self.COMPRESSION_ALGORITHMS[algorithm], True)
if options.get("mode") == "safe":
diff --git a/gallery_dl/util.py b/gallery_dl/util.py
index 79fa175..17cd73a 100644
--- a/gallery_dl/util.py
+++ b/gallery_dl/util.py
@@ -608,11 +608,13 @@ class PathFormat():
"""Build directory path and create it if necessary"""
# Build path segments by applying 'kwdict' to directory format strings
+ segments = []
+ append = segments.append
try:
- segments = [
- self.clean_segment(format_map(kwdict).strip())
- for format_map in self.directory_formatters
- ]
+ for formatter in self.directory_formatters:
+ segment = formatter(kwdict).strip()
+ if segment:
+ append(self.clean_segment(segment))
except Exception as exc:
raise exception.FormatError(exc, "directory")
@@ -620,16 +622,20 @@ class PathFormat():
sep = os.sep
directory = self.clean_path(self.basedirectory + sep.join(segments))
- # Ensure directory ends with a path separator
- if directory[-1] != sep:
+ # Ensure 'directory' ends with a path separator
+ if segments:
directory += sep
self.directory = directory
- # Enable longer-than-260-character paths on Windows
if os.name == "nt":
- self.realdirectory = "\\\\?\\" + os.path.abspath(directory) + sep
- else:
- self.realdirectory = directory
+ # Enable longer-than-260-character paths on Windows
+ directory = "\\\\?\\" + os.path.abspath(directory)
+
+ # abspath() in Python 3.7+ removes trailing path separators (#402)
+ if directory[-1] != sep:
+ directory += sep
+
+ self.realdirectory = directory
# Create directory tree
os.makedirs(self.realdirectory, exist_ok=True)
diff --git a/gallery_dl/version.py b/gallery_dl/version.py
index 911939d..cbb8fe7 100644
--- a/gallery_dl/version.py
+++ b/gallery_dl/version.py
@@ -6,4 +6,4 @@
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
-__version__ = "1.10.2"
+__version__ = "1.10.3"
diff --git a/test/test_postprocessor.py b/test/test_postprocessor.py
index 786dc46..78b9838 100644
--- a/test/test_postprocessor.py
+++ b/test/test_postprocessor.py
@@ -58,7 +58,6 @@ class BasePostprocessorTest(unittest.TestCase):
def setUpClass(cls):
cls.extractor = extractor.find("test:")
cls.dir = tempfile.TemporaryDirectory()
- cls.fnum = 0
config.set(("base-directory",), cls.dir.name)
@classmethod
@@ -173,7 +172,10 @@ class MetadataTest(BasePostprocessorTest):
""")
def test_metadata_tags(self):
- pp = self._create({"mode": "tags"}, {"tags": ["foo", "bar", "baz"]})
+ pp = self._create(
+ {"mode": "tags"},
+ {"tags": ["foo", "bar", "baz"]},
+ )
self.assertEqual(pp.write, pp._write_tags)
self.assertEqual(pp.extension, "txt")
@@ -186,7 +188,10 @@ class MetadataTest(BasePostprocessorTest):
self.assertEqual(self._output(m), "foo\nbar\nbaz\n")
def test_metadata_tags_split_1(self):
- pp = self._create({"mode": "tags"}, {"tags": "foo, bar, baz"})
+ pp = self._create(
+ {"mode": "tags"},
+ {"tags": "foo, bar, baz"},
+ )
with patch("builtins.open", mock_open()) as m:
pp.prepare(self.pathfmt)
pp.run(self.pathfmt)
@@ -203,7 +208,10 @@ class MetadataTest(BasePostprocessorTest):
self.assertEqual(self._output(m), "foobar1\nfoobar2\nfoobarbaz\n")
def test_metadata_tags_tagstring(self):
- pp = self._create({"mode": "tags"}, {"tag_string": "foo, bar, baz"})
+ pp = self._create(
+ {"mode": "tags"},
+ {"tag_string": "foo, bar, baz"},
+ )
with patch("builtins.open", mock_open()) as m:
pp.prepare(self.pathfmt)
pp.run(self.pathfmt)
@@ -266,8 +274,7 @@ class ZipTest(BasePostprocessorTest):
self.assertEqual(pp.delete, True)
self.assertFalse(hasattr(pp, "args"))
self.assertEqual(pp.zfile.compression, zipfile.ZIP_STORED)
- self.assertEqual(
- pp.zfile.filename, self.pathfmt.realdirectory + ".zip")
+ self.assertTrue(pp.zfile.filename.endswith("/test.zip"))
def test_zip_options(self):
pp = self._create({
@@ -277,8 +284,7 @@ class ZipTest(BasePostprocessorTest):
})
self.assertEqual(pp.delete, False)
self.assertEqual(pp.zfile.compression, zipfile.ZIP_DEFLATED)
- self.assertEqual(
- pp.zfile.filename, self.pathfmt.realdirectory + ".cbz")
+ self.assertTrue(pp.zfile.filename.endswith("/test.cbz"))
def test_zip_safe(self):
pp = self._create({"mode": "safe"})
@@ -286,8 +292,84 @@ class ZipTest(BasePostprocessorTest):
self.assertEqual(pp.path, self.pathfmt.realdirectory)
self.assertEqual(pp.run, pp._write_safe)
self.assertEqual(pp.args, (
- pp.path + ".zip", "a", zipfile.ZIP_STORED, True
+ pp.path[:-1] + ".zip", "a", zipfile.ZIP_STORED, True,
))
+ self.assertTrue(pp.args[0].endswith("/test.zip"))
+
+ def test_zip_write(self):
+ pp = self._create()
+ nti = pp.zfile.NameToInfo
+
+ with tempfile.NamedTemporaryFile("w", dir=self.dir.name) as file:
+ file.write("foobar\n")
+
+ # write dummy file with 3 different names
+ for i in range(3):
+ name = "file{}.ext".format(i)
+ self.pathfmt.temppath = file.name
+ self.pathfmt.filename = name
+
+ pp.prepare(self.pathfmt)
+ pp.run(self.pathfmt)
+
+ self.assertEqual(len(nti), i+1)
+ self.assertIn(name, nti)
+
+ # check file contents
+ self.assertEqual(len(nti), 3)
+ self.assertIn("file0.ext", nti)
+ self.assertIn("file1.ext", nti)
+ self.assertIn("file2.ext", nti)
+
+ # write the last file a second time (will be skipped)
+ pp.prepare(self.pathfmt)
+ pp.run(self.pathfmt)
+ self.assertEqual(len(pp.zfile.NameToInfo), 3)
+
+ # close file
+ pp.finalize()
+
+ # reopen to check persistence
+ with zipfile.ZipFile(pp.zfile.filename) as file:
+ nti = file.NameToInfo
+ self.assertEqual(len(pp.zfile.NameToInfo), 3)
+ self.assertIn("file0.ext", pp.zfile.NameToInfo)
+ self.assertIn("file1.ext", pp.zfile.NameToInfo)
+ self.assertIn("file2.ext", pp.zfile.NameToInfo)
+
+ os.unlink(pp.zfile.filename)
+
+ def test_zip_write_mock(self):
+
+ def side_effect(_, name):
+ pp.zfile.NameToInfo.add(name)
+
+ pp = self._create()
+ pp.zfile = Mock()
+ pp.zfile.NameToInfo = set()
+ pp.zfile.write.side_effect = side_effect
+
+ # write 3 files
+ for i in range(3):
+ self.pathfmt.temppath = self.pathfmt.realdirectory + "file.ext"
+ self.pathfmt.filename = "file{}.ext".format(i)
+ pp.prepare(self.pathfmt)
+ pp.run(self.pathfmt)
+
+ # write the last file a second time (will be skipped)
+ pp.prepare(self.pathfmt)
+ pp.run(self.pathfmt)
+
+ pp.finalize()
+
+ self.assertEqual(pp.zfile.write.call_count, 3)
+ for call in pp.zfile.write.call_args_list:
+ args, kwargs = call
+ self.assertEqual(len(args), 2)
+ self.assertEqual(len(kwargs), 0)
+ self.assertEqual(args[0], self.pathfmt.temppath)
+ self.assertRegex(args[1], r"file\d\.ext")
+ self.assertEqual(pp.zfile.close.call_count, 1)
if __name__ == "__main__":
diff --git a/test/test_results.py b/test/test_results.py
index 12f2416..a69cc81 100644
--- a/test/test_results.py
+++ b/test/test_results.py
@@ -21,7 +21,7 @@ TRAVIS_SKIP = {
"exhentai", "kissmanga", "mangafox", "dynastyscans", "nijie", "bobx",
"archivedmoe", "archiveofsins", "thebarchive", "fireden", "4plebs",
"sankaku", "idolcomplex", "mangahere", "readcomiconline", "mangadex",
- "sankakucomplex",
+ "sankakucomplex", "warosu",
}
# temporary issues, etc.
@@ -29,6 +29,8 @@ BROKEN = {
"8chan",
"imgth",
"mangapark",
+ "mangoxo",
+ "tumblr",
}