diff options
author | Unit 193 <unit193@ubuntu.com> | 2019-10-11 20:28:32 -0400 |
---|---|---|
committer | Unit 193 <unit193@ubuntu.com> | 2019-10-11 20:28:32 -0400 |
commit | 40f5fe6edef268632d3bc484e85e5b37bad67bff (patch) | |
tree | 98817850b65f1d2877bd4ed63a3908f37d794f8d | |
parent | 639d9ea4a667733aadc3ff83a1df2cc9f0add3a9 (diff) | |
download | gallery-dl-40f5fe6edef268632d3bc484e85e5b37bad67bff.tar.bz2 gallery-dl-40f5fe6edef268632d3bc484e85e5b37bad67bff.tar.xz gallery-dl-40f5fe6edef268632d3bc484e85e5b37bad67bff.tar.zst |
New upstream version 1.10.6upstream/1.10.6
-rw-r--r-- | CHANGELOG.md | 11 | ||||
-rw-r--r-- | README.rst | 8 | ||||
-rw-r--r-- | docs/configuration.rst | 38 | ||||
-rw-r--r-- | gallery_dl/downloader/http.py | 7 | ||||
-rw-r--r-- | gallery_dl/extractor/8muses.py | 2 | ||||
-rw-r--r-- | gallery_dl/extractor/behance.py | 7 | ||||
-rw-r--r-- | gallery_dl/extractor/deviantart.py | 114 | ||||
-rw-r--r-- | gallery_dl/extractor/gfycat.py | 2 | ||||
-rw-r--r-- | gallery_dl/extractor/hitomi.py | 36 | ||||
-rw-r--r-- | gallery_dl/extractor/komikcast.py | 2 | ||||
-rw-r--r-- | gallery_dl/extractor/xhamster.py | 4 | ||||
-rw-r--r-- | gallery_dl/extractor/yaplog.py | 41 | ||||
-rw-r--r-- | gallery_dl/job.py | 3 | ||||
-rw-r--r-- | gallery_dl/option.py | 16 | ||||
-rw-r--r-- | gallery_dl/postprocessor/common.py | 22 | ||||
-rw-r--r-- | gallery_dl/postprocessor/exec.py | 56 | ||||
-rw-r--r-- | gallery_dl/version.py | 2 |
17 files changed, 259 insertions, 112 deletions
diff --git a/CHANGELOG.md b/CHANGELOG.md index c72f971..4a57394 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,16 @@ # Changelog +## 1.10.6 - 2019-10-11 +### Additions +- `--exec` command-line option to specify a command to run after each file download ([#421](https://github.com/mikf/gallery-dl/issues/421)) +### Changes +- Include titles in `gfycat` default filenames ([#434](https://github.com/mikf/gallery-dl/issues/434)) +### Fixes +- Fetch working download URLs for `deviantart` ([#436](https://github.com/mikf/gallery-dl/issues/436)) +- Various fixes and improvements for `yaplog` blogs ([#443](https://github.com/mikf/gallery-dl/issues/443)) +- Fix image URL generation for `hitomi` galleries +- Miscellaneous fixes for `behance` and `xvideos` + ## 1.10.5 - 2019-09-28 ### Additions - `instagram.highlights` option to include highlighted stories when downloading user profiles ([#329](https://github.com/mikf/gallery-dl/issues/329)) @@ -78,8 +78,8 @@ Download a standalone executable file, put it into your `PATH <https://en.wikipedia.org/wiki/PATH_(variable)>`__, and run it inside a command prompt (like ``cmd.exe``). -- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.10.5/gallery-dl.exe>`__ -- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.10.5/gallery-dl.bin>`__ +- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.10.6/gallery-dl.exe>`__ +- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.10.6/gallery-dl.bin>`__ These executables include a Python 3.7 interpreter and all required Python packages. @@ -224,13 +224,13 @@ access to *gallery-dl*. Authorize it and you will be shown one or more .. _gallery-dl-example.conf: https://github.com/mikf/gallery-dl/blob/master/docs/gallery-dl-example.conf .. _configuration.rst: https://github.com/mikf/gallery-dl/blob/master/docs/configuration.rst .. _Supported Sites: https://github.com/mikf/gallery-dl/blob/master/docs/supportedsites.rst -.. _stable: https://github.com/mikf/gallery-dl/archive/v1.10.5.zip +.. _stable: https://github.com/mikf/gallery-dl/archive/v1.10.6.zip .. _dev: https://github.com/mikf/gallery-dl/archive/master.zip .. _Python: https://www.python.org/downloads/ .. _PyPI: https://pypi.org/ .. _pip: https://pip.pypa.io/en/stable/ -.. _Requests: https://2.python-requests.org/en/master/#requests-http-for-humans +.. _Requests: https://requests.readthedocs.io/en/master/ .. _FFmpeg: https://www.ffmpeg.org/ .. _youtube-dl: https://ytdl-org.github.io/youtube-dl/ .. _pyOpenSSL: https://pyopenssl.org/ diff --git a/docs/configuration.rst b/docs/configuration.rst index 6b4055e..0824390 100644 --- a/docs/configuration.rst +++ b/docs/configuration.rst @@ -1574,7 +1574,7 @@ cache.file ---------- =========== ===== Type |Path|_ -Default * |tempfile.gettempdir()|_ + ``".gallery-dl.cache"`` on Windows +Default * |tempfile.gettempdir()|__ + ``".gallery-dl.cache"`` on Windows * (``$XDG_CACHE_HOME`` or ``"~/.cache"``) + ``"/gallery-dl/cache.sqlite3"`` on all other platforms Description Path of the SQLite3 database used to cache login sessions, cookies and API tokens across `gallery-dl` invocations. @@ -1583,6 +1583,8 @@ Description Path of the SQLite3 database used to cache login sessions, this cache. =========== ===== +__ gettempdir_ + ciphers ------- @@ -1848,20 +1850,20 @@ Description An object with the ``name`` of a post-processor and its options. .. _date-format: `extractor.*.date-format`_ .. _deviantart.metadata: extractor.deviantart.metadata_ -.. _.netrc: https://stackoverflow.com/tags/.netrc/info -.. _tempfile.gettempdir(): https://docs.python.org/3/library/tempfile.html#tempfile.gettempdir -.. _requests.request(): https://docs.python-requests.org/en/master/api/#requests.request -.. _timeout: https://docs.python-requests.org/en/latest/user/advanced/#timeouts -.. _verify: https://docs.python-requests.org/en/master/user/advanced/#ssl-cert-verification -.. _Last-Modified: https://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.29 -.. _`Requests' proxy documentation`: https://2.python-requests.org/en/master/user/advanced/#proxies -.. _format string: https://docs.python.org/3/library/string.html#formatstrings -.. _format strings: https://docs.python.org/3/library/string.html#formatstrings -.. _strptime: https://docs.python.org/3/library/datetime.html#strftime-strptime-behavior -.. _mature_content: https://www.deviantart.com/developers/http/v1/20160316/object/deviation -.. _webbrowser.open(): https://docs.python.org/3/library/webbrowser.html -.. _datetime: https://docs.python.org/3/library/datetime.html#datetime-objects -.. _datetime.max: https://docs.python.org/3/library/datetime.html#datetime.datetime.max -.. _Authentication: https://github.com/mikf/gallery-dl#authentication -.. _OAuth: https://github.com/mikf/gallery-dl#oauth -.. _youtube-dl: https://github.com/ytdl-org/youtube-dl +.. _.netrc: https://stackoverflow.com/tags/.netrc/info +.. _Last-Modified: https://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.29 +.. _datetime: https://docs.python.org/3/library/datetime.html#datetime-objects +.. _datetime.max: https://docs.python.org/3/library/datetime.html#datetime.datetime.max +.. _format string: https://docs.python.org/3/library/string.html#formatstrings +.. _format strings: https://docs.python.org/3/library/string.html#formatstrings +.. _gettempdir: https://docs.python.org/3/library/tempfile.html#tempfile.gettempdir +.. _strptime: https://docs.python.org/3/library/datetime.html#strftime-strptime-behavior +.. _webbrowser.open(): https://docs.python.org/3/library/webbrowser.html +.. _mature_content: https://www.deviantart.com/developers/http/v1/20160316/object/deviation +.. _Authentication: https://github.com/mikf/gallery-dl#authentication +.. _OAuth: https://github.com/mikf/gallery-dl#oauth +.. _youtube-dl: https://github.com/ytdl-org/youtube-dl +.. _requests.request(): https://requests.readthedocs.io/en/master/api/#requests.request +.. _timeout: https://requests.readthedocs.io/en/master/user/advanced/#timeouts +.. _verify: https://requests.readthedocs.io/en/master/user/advanced/#ssl-cert-verification +.. _`Requests' proxy documentation`: https://requests.readthedocs.io/en/master/user/advanced/#proxies diff --git a/gallery_dl/downloader/http.py b/gallery_dl/downloader/http.py index 4c5fed5..bb45de2 100644 --- a/gallery_dl/downloader/http.py +++ b/gallery_dl/downloader/http.py @@ -236,6 +236,13 @@ MIMETYPE_MAP = { "audio/ogg": "ogg", "audio/mpeg": "mp3", + "application/zip": "zip", + "application/x-zip": "zip", + "application/x-zip-compressed": "zip", + "application/x-rar": "rar", + "application/x-rar-compressed": "rar", + "application/x-7z-compressed": "7z", + "application/ogg": "ogg", "application/octet-stream": "bin", } diff --git a/gallery_dl/extractor/8muses.py b/gallery_dl/extractor/8muses.py index f5ca9ce..089a0e9 100644 --- a/gallery_dl/extractor/8muses.py +++ b/gallery_dl/extractor/8muses.py @@ -53,7 +53,7 @@ class _8musesAlbumExtractor(Extractor): "private": False, }, }), - ("https://www.8muses.com/comics/album/Fakku-Comics/6?sort=az", { + ("https://www.8muses.com/comics/album/Fakku-Comics/7?sort=az", { "count": ">= 70", "keyword": {"name": r"re:^[R-Zr-z]"}, }), diff --git a/gallery_dl/extractor/behance.py b/gallery_dl/extractor/behance.py index 467a935..c701927 100644 --- a/gallery_dl/extractor/behance.py +++ b/gallery_dl/extractor/behance.py @@ -33,8 +33,11 @@ class BehanceExtractor(Extractor): if data["fields"] and isinstance(data["fields"][0], dict): data["fields"] = [field["name"] for field in data["fields"]] data["owners"] = [owner["display_name"] for owner in data["owners"]] - if "tags" in data: - data["tags"] = [tag["title"] for tag in data["tags"]] + + tags = data.get("tags") or () + if tags and isinstance(tags[0], dict): + tags = [tag["title"] for tag in tags] + data["tags"] = tags # backwards compatibility data["gallery_id"] = data["id"] diff --git a/gallery_dl/extractor/deviantart.py b/gallery_dl/extractor/deviantart.py index 525cc84..ab32a00 100644 --- a/gallery_dl/extractor/deviantart.py +++ b/gallery_dl/extractor/deviantart.py @@ -47,12 +47,6 @@ class DeviantartExtractor(Extractor): if self.quality: self.quality = "q_{}".format(self.quality) - if self.original != "image": - self._update_content = self._update_content_default - else: - self._update_content = self._update_content_image - self.original = True - self.commit_journal = { "html": self._commit_journal_html, "text": self._commit_journal_text, @@ -98,7 +92,8 @@ class DeviantartExtractor(Extractor): yield self.commit(deviation, content) elif deviation["is_downloadable"]: - content = self.api.deviation_download(deviation["deviationid"]) + content = {} + self._update_content(deviation, content) yield self.commit(deviation, content) if "videos" in deviation: @@ -240,15 +235,29 @@ class DeviantartExtractor(Extractor): url = "{}/{}/{}/0/".format(self.root, self.user, category) return [(url + folder["name"], folder) for folder in folders] - def _update_content_default(self, deviation, content): - content.update(self.api.deviation_download(deviation["deviationid"])) - - def _update_content_image(self, deviation, content): - data = self.api.deviation_download(deviation["deviationid"]) - url = data["src"].partition("?")[0] - mtype = mimetypes.guess_type(url, False)[0] - if mtype and mtype.startswith("image/"): - content.update(data) + def _update_content(self, deviation, content): + try: + data = self.api.deviation_extended_fetch( + deviation["index"], + deviation["author"]["username"], + "journal" if "excerpt" in deviation else "art", + ) + download = data["deviation"]["extended"]["download"] + download["src"] = download["url"] + except Exception as e: + self.log.warning( + "Unable to fetch original download URL for ID %s ('%s: %s')", + deviation["index"], e.__class__.__name__, e, + ) + self.log.debug("Server response: %s", data) + else: + if self.original == "image": + url = data["src"].partition("?")[0] + mtype = mimetypes.guess_type(url, False)[0] + if not mtype or not mtype.startswith("image/"): + return + del download["url"] + content.update(download) class DeviantartGalleryExtractor(DeviantartExtractor): @@ -258,8 +267,8 @@ class DeviantartGalleryExtractor(DeviantartExtractor): pattern = BASE_PATTERN + r"(?:/(?:gallery/?(?:\?catpath=/)?)?)?$" test = ( ("https://www.deviantart.com/shimoda7/gallery/", { - "pattern": r"https://(s3.amazonaws.com/origin-(img|orig)" - r".deviantart.net/|images-wixmp-\w+.wixmp.com/)", + "pattern": r"https://(www.deviantart.com/download/\d+/" + r"|images-wixmp-[^.]+.wixmp.com/f/.+/.+.jpg\?token=.+)", "count": ">= 30", "keyword": { "allows_comments": bool, @@ -384,7 +393,7 @@ class DeviantartStashExtractor(DeviantartExtractor): pattern = r"(?:https?://)?sta\.sh/([a-z0-9]+)" test = ( ("https://sta.sh/022c83odnaxc", { - "pattern": r"https://s3.amazonaws.com/origin-orig.deviantart.net", + "pattern": r"https://sta.sh/download/7549925030122512/.+\?token=", "count": 1, }), # multiple stash items @@ -394,6 +403,7 @@ class DeviantartStashExtractor(DeviantartExtractor): }), # downloadable, but no "content" field (#307) ("https://sta.sh/024t4coz16mi", { + "pattern": r"https://sta.sh/download/7800709982190282/.+\?token=", "count": 1, }), ("https://sta.sh/abcdefghijkl", { @@ -411,16 +421,34 @@ class DeviantartStashExtractor(DeviantartExtractor): def deviations(self): url = "https://sta.sh/" + self.stash_id page = self.request(url).text - deviation_id = text.extract(page, '//deviation/', '"')[0] + deviation_id, pos = text.extract(page, '//deviation/', '"') if deviation_id: - yield self.api.deviation(deviation_id) + deviation = self.api.deviation(deviation_id) + pos = page.find("dev-page-download", pos) + if pos >= 0: + deviation["_download"] = { + "width" : text.parse_int(text.extract( + page, 'data-download_width="' , '"', pos)[0]), + "height": text.parse_int(text.extract( + page, 'data-download_height="', '"', pos)[0]), + "src" : text.unescape(text.extract( + page, 'data-download_url="' , '"', pos)[0]), + } + return (deviation,) else: data = {"_extractor": DeviantartStashExtractor} page = text.extract( - page, '<div id="stash-body"', '<div class="footer"')[0] - for url in text.extract_iter(page, '<a href="', '"'): - yield url, data + page, 'id="stash-body"', 'class="footer"', pos)[0] + return [ + (url, data) + for url in text.extract_iter(page, '<a href="', '"') + ] + + def _update_content(self, deviation, content): + if "_download" in deviation: + content.update(deviation["_download"]) + del deviation["_download"] class DeviantartFavoriteExtractor(DeviantartExtractor): @@ -562,28 +590,17 @@ class DeviantartExtractorV2(DeviantartExtractor): """Base class for deviantart extractors using the NAPI""" def items(self): - url = ( - self.root + "/_napi/da-browse/shared_api/deviation/extended_fetch" - ) - params = { - "deviationid" : None, - "username" : None, - "type" : None, - "include_session": "false", - } - headers = { - "Referer": self.root, - } - yield Message.Version, 1 for deviation in self.deviations(): - params["deviationid"] = deviation["deviationId"] - params["username"] = deviation["author"]["username"] - params["type"] = "journal" if deviation["isJournal"] else "art" - data = self.request(url, params=params, headers=headers).json() + data = self.api.deviation_extended_fetch( + deviation["deviationId"], + deviation["author"]["username"], + "journal" if deviation["isJournal"] else "art", + ) if "deviation" not in data: - self.log.warning("Skipping %s", params["deviationid"]) + self.log.warning("Skipping ID %s", deviation["deviationId"]) + self.log.debug("Server response: %s", data) continue deviation = self._extract(data) @@ -887,6 +904,19 @@ class DeviantartAPI(): params = {"mature_content": self.mature} return self._call(endpoint, params) + def deviation_extended_fetch(self, deviation_id, user, kind): + url = ("https://www.deviantart.com/_napi/da-browse/shared_api" + "/deviation/extended_fetch") + headers = {"Referer": "https://www.deviantart.com/"} + params = { + "deviationid" : deviation_id, + "username" : user, + "type" : kind, + "include_session": "false", + } + return self.extractor.request( + url, headers=headers, params=params, fatal=None).json() + def deviation_metadata(self, deviations): """ Fetch deviation metadata for a set of deviations""" if not deviations: diff --git a/gallery_dl/extractor/gfycat.py b/gallery_dl/extractor/gfycat.py index 1dcb3c8..2ebbec8 100644 --- a/gallery_dl/extractor/gfycat.py +++ b/gallery_dl/extractor/gfycat.py @@ -14,7 +14,7 @@ from .common import Extractor, Message class GfycatExtractor(Extractor): """Base class for gfycat extractors""" category = "gfycat" - filename_fmt = "{category}_{gfyName}.{extension}" + filename_fmt = "{category}_{gfyName}{title:?_//}.{extension}" archive_fmt = "{gfyName}" root = "https://gfycat.com" diff --git a/gallery_dl/extractor/hitomi.py b/gallery_dl/extractor/hitomi.py index e4f18b3..ef08d69 100644 --- a/gallery_dl/extractor/hitomi.py +++ b/gallery_dl/extractor/hitomi.py @@ -20,12 +20,9 @@ class HitomiGalleryExtractor(GalleryExtractor): pattern = r"(?:https?://)?hitomi\.la/(?:galleries|reader)/(\d+)" test = ( ("https://hitomi.la/galleries/867789.html", { - "url": "cb759868d090fe0e2655c3e29ebf146054322b6d", + "pattern": r"https://aa.hitomi.la/galleries/867789/\d+.jpg", "keyword": "d097a8db8e810045131b4510c41714004f9eff3a", - }), - ("https://hitomi.la/galleries/1036181.html", { - # "aa" subdomain for gallery-id ending in 1 (#142) - "pattern": r"https://aa\.hitomi\.la/", + "count": 16, }), ("https://hitomi.la/galleries/1401410.html", { # download test @@ -37,18 +34,39 @@ class HitomiGalleryExtractor(GalleryExtractor): "url": "c2a84185f467450b8b9b72fbe40c0649029ce007", "count": 210, }), + ("https://hitomi.la/galleries/1045954.html", { + # fallback for galleries only available through /reader/ URLs + "url": "055c898a36389719799d6bce76889cc4ea4421fc", + "count": 1413, + }), ("https://hitomi.la/reader/867789.html"), ) def __init__(self, match): - self.gallery_id = text.parse_int(match.group(1)) + self.gallery_id = match.group(1) + self.fallback = False url = "{}/galleries/{}.html".format(self.root, self.gallery_id) GalleryExtractor.__init__(self, match, url) + def request(self, url, **kwargs): + response = GalleryExtractor.request(self, url, fatal=False, **kwargs) + if response.status_code == 404: + self.fallback = True + url = url.replace("/galleries/", "/reader/") + response = GalleryExtractor.request(self, url, **kwargs) + return response + def metadata(self, page): + if self.fallback: + return { + "gallery_id": text.parse_int(self.gallery_id), + "title": text.unescape(text.extract( + page, "<title>", "<")[0].rpartition(" | ")[0]), + } + extr = text.extract_from(page, page.index('<h1><a href="/reader/')) data = { - "gallery_id": self.gallery_id, + "gallery_id": text.parse_int(self.gallery_id), "title" : text.unescape(extr('.html">', '<').strip()), "artist" : self._prep(extr('<h2>', '</h2>')), "group" : self._prep(extr('<td>Group</td><td>', '</td>')), @@ -66,7 +84,7 @@ class HitomiGalleryExtractor(GalleryExtractor): def images(self, page): # see https://ltn.hitomi.la/common.js - offset = self.gallery_id % 2 if self.gallery_id % 10 != 1 else 0 + offset = text.parse_int(self.gallery_id[-1]) % 3 subdomain = chr(97 + offset) + "a" base = "https://" + subdomain + ".hitomi.la/galleries/" @@ -79,6 +97,8 @@ class HitomiGalleryExtractor(GalleryExtractor): url = "{}/reader/{}.html".format(self.root, self.gallery_id) page = self.request(url).text begin, end = ">//g.hitomi.la/galleries/", "</div>" + elif self.fallback: + begin, end = ">//g.hitomi.la/galleries/", "</div>" else: begin, end = "'//tn.hitomi.la/smalltn/", ".jpg'," diff --git a/gallery_dl/extractor/komikcast.py b/gallery_dl/extractor/komikcast.py index 8541e4f..6e7f139 100644 --- a/gallery_dl/extractor/komikcast.py +++ b/gallery_dl/extractor/komikcast.py @@ -73,7 +73,7 @@ class KomikcastChapterExtractor(KomikcastBase, ChapterExtractor): return [ (text.unescape(url), None) for url in re.findall(r"<img[^>]* src=[\"']([^\"']+)", readerarea) - if "/Banner-" not in url + if "/Banner-" not in url and "/WM-Sampingan." not in url ] diff --git a/gallery_dl/extractor/xhamster.py b/gallery_dl/extractor/xhamster.py index 23750db..e125184 100644 --- a/gallery_dl/extractor/xhamster.py +++ b/gallery_dl/extractor/xhamster.py @@ -119,8 +119,8 @@ class XhamsterGalleryExtractor(XhamsterExtractor): "views" : text.parse_int(imgs["views"]), "likes" : text.parse_int(imgs["rating"]["likes"]), "dislikes" : text.parse_int(imgs["rating"]["dislikes"]), - "title" : imgs["title"], - "description": imgs["description"], + "title" : text.unescape(imgs["title"]), + "description": text.unescape(imgs["description"]), "thumbnail" : imgs["thumbURL"], }, "count": text.parse_int(imgs["quantity"]), diff --git a/gallery_dl/extractor/yaplog.py b/gallery_dl/extractor/yaplog.py index b3c5501..b07ba4b 100644 --- a/gallery_dl/extractor/yaplog.py +++ b/gallery_dl/extractor/yaplog.py @@ -12,6 +12,9 @@ from .common import Extractor, Message, AsynchronousMixin from .. import text, util +BASE_PATTERN = r"(?:https?://)?(?:www\.)?yaplog\.jp/([\w-]+)" + + class YaplogExtractor(AsynchronousMixin, Extractor): """Base class for yaplog extractors""" category = "yaplog" @@ -31,11 +34,15 @@ class YaplogExtractor(AsynchronousMixin, Extractor): for num, url in enumerate(urls, 1): page = self.request(url).text if num > 1 else url iurl = text.extract(page, '<img src="', '"')[0] - iid, _, ext = iurl.rpartition("/")[2].rpartition(".") + if iurl[0] == "/": + iurl = text.urljoin(self.root, iurl) + name, _, ext = iurl.rpartition("/")[2].rpartition(".") + iid = name.rpartition("_")[0] or name image = { "url" : iurl, "num" : num, - "id" : text.parse_int(iid.partition("_")[0]), + "id" : text.parse_int(iid, iid), + "filename" : name, "extension": ext, "post" : post, } @@ -52,7 +59,8 @@ class YaplogExtractor(AsynchronousMixin, Extractor): prev , pos = text.extract(page, 'class="last"><a href="', '"', pos) urls = list(text.extract_iter(page, '<li><a href="', '"', pos)) - urls[0] = page # cache HTML of first page + if urls: + urls[0] = page # cache HTML of first page if len(urls) == 24 and text.extract(page, '(1/', ')')[0] != '24': # there are a maximum of 24 image entries in an /image/ page @@ -69,14 +77,14 @@ class YaplogExtractor(AsynchronousMixin, Extractor): "id" : text.parse_int(pid), "title": text.unescape(title[:-3]), "user" : self.user, - "date" : date, + "date" : text.parse_datetime(date, "%B %d [%a], %Y, %H:%M"), } class YaplogBlogExtractor(YaplogExtractor): """Extractor for a user's blog on yaplog.jp""" subcategory = "blog" - pattern = r"(?:https?://)?(?:www\.)?yaplog\.jp/(\w+)/?(?:$|[?&#])" + pattern = BASE_PATTERN + r"/?(?:$|[?&#])" test = ("https://yaplog.jp/omitakashi3", { "pattern": r"https://img.yaplog.jp/img/18/pc/o/m/i/omitakashi3/0/", "count": ">= 2", @@ -92,12 +100,23 @@ class YaplogBlogExtractor(YaplogExtractor): class YaplogPostExtractor(YaplogExtractor): """Extractor for images from a blog post on yaplog.jp""" subcategory = "post" - pattern = (r"(?:https?://)?(?:www\.)?yaplog\.jp" - r"/(\w+)/(?:archive|image)/(\d+)") - test = ("https://yaplog.jp/imamiami0726/image/1299", { - "url": "896cae20fa718735a57e723c48544e830ff31345", - "keyword": "f8d8781e61c4c38238a7622d6df6c905f864e5d3", - }) + pattern = BASE_PATTERN + r"/(?:archive|image)/(\d+)" + test = ( + ("https://yaplog.jp/imamiami0726/image/1299", { + "url": "896cae20fa718735a57e723c48544e830ff31345", + "keyword": "22df8ad6cb534514c6bb2ff000381d156769a620", + }), + # complete image URLs (#443) + ("https://yaplog.jp/msjane/archive/246", { + "pattern": r"https://yaplog.jp/cv/msjane/img/246/img\d+_t.jpg" + }), + # empty post (#443) + ("https://yaplog.jp/f_l_a_s_c_o/image/872", { + "count": 0, + }), + # blog names with '-' (#443) + ("https://yaplog.jp/a-pierrot-o/image/3946/22779"), + ) def __init__(self, match): YaplogExtractor.__init__(self, match) diff --git a/gallery_dl/job.py b/gallery_dl/job.py index d529705..8b61024 100644 --- a/gallery_dl/job.py +++ b/gallery_dl/job.py @@ -237,6 +237,9 @@ class DownloadJob(Job): self.out.success(pathfmt.path, 0) if archive: archive.add(keywords) + if postprocessors: + for pp in postprocessors: + pp.run_after(pathfmt) self._skipcnt = 0 def handle_urllist(self, urls, keywords): diff --git a/gallery_dl/option.py b/gallery_dl/option.py index ecc2ee3..d3119b7 100644 --- a/gallery_dl/option.py +++ b/gallery_dl/option.py @@ -27,6 +27,15 @@ class ConfigConstAction(argparse.Action): namespace.options.append(((self.dest,), self.const)) +class AppendCommandAction(argparse.Action): + def __call__(self, parser, namespace, values, option_string=None): + items = getattr(namespace, self.dest, None) or [] + val = self.const.copy() + val["command"] = values + items.append(val) + setattr(namespace, self.dest, items) + + class DeprecatedConfigConstAction(argparse.Action): """Set argparse const values as config values + deprecation warning""" def __call__(self, parser, namespace, values, option_string=None): @@ -304,6 +313,13 @@ def build_parser(): help="Store downloaded files in a ZIP archive", ) postprocessor.add_argument( + "--exec", + dest="postprocessors", metavar="CMD", + action=AppendCommandAction, const={"name": "exec"}, + help=("Execute CMD for each downloaded file. " + "Example: --exec 'magick convert {} {}.png && rm {}'"), + ) + postprocessor.add_argument( "--ugoira-conv", dest="postprocessors", action="append_const", const={"name": "ugoira", "ffmpeg-args": ( diff --git a/gallery_dl/postprocessor/common.py b/gallery_dl/postprocessor/common.py index b967cf6..71ef932 100644 --- a/gallery_dl/postprocessor/common.py +++ b/gallery_dl/postprocessor/common.py @@ -8,20 +8,30 @@ """Common classes and constants used by postprocessor modules.""" -from . import log +import logging class PostProcessor(): """Base class for postprocessors""" - log = log - def prepare(self, pathfmt): - """ """ + def __init__(self): + name = self.__class__.__name__[:-2].lower() + self.log = logging.getLogger("postprocessor." + name) - def run(self, pathfmt): + @staticmethod + def prepare(pathfmt): + """Update file paths, etc.""" + + @staticmethod + def run(pathfmt): """Execute the postprocessor for a file""" - def finalize(self): + @staticmethod + def run_after(pathfmt): + """Execute postprocessor after moving a file to its target location""" + + @staticmethod + def finalize(): """Cleanup""" def __repr__(self): diff --git a/gallery_dl/postprocessor/exec.py b/gallery_dl/postprocessor/exec.py index c86b480..19a9b87 100644 --- a/gallery_dl/postprocessor/exec.py +++ b/gallery_dl/postprocessor/exec.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2018 Mike Fährmann +# Copyright 2018-2019 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -9,35 +9,61 @@ """Execute processes""" from .common import PostProcessor +from .. import util import subprocess +import os + + +if os.name == "nt": + def quote(s): + return '"' + s.replace('"', '\\"') + '"' +else: + from shlex import quote class ExecPP(PostProcessor): def __init__(self, pathfmt, options): PostProcessor.__init__(self) + args = options["command"] - try: - self.args = options["command"] - self.args[0] # test if 'args' is subscriptable - except (KeyError, IndexError, TypeError): - raise TypeError("option 'command' must be a non-empty list") + if isinstance(args, str): + if "{}" not in args: + args += " {}" + self.args = args + self.shell = True + self._format = self._format_args_string + else: + self.args = [util.Formatter(arg) for arg in args] + self.shell = False + self._format = self._format_args_list if options.get("async", False): - self._exec = subprocess.Popen + self._exec = self._exec_async + + def run_after(self, pathfmt): + self._exec(self._format(pathfmt)) - def run(self, pathfmt): - self._exec([ - arg.format_map(pathfmt.keywords) - for arg in self.args - ]) + def _format_args_string(self, pathfmt): + return self.args.replace("{}", quote(pathfmt.realpath)) + + def _format_args_list(self, pathfmt): + kwdict = pathfmt.kwdict + kwdict["_directory"] = pathfmt.realdirectory + kwdict["_filename"] = pathfmt.filename + kwdict["_path"] = pathfmt.realpath + return [arg.format_map(kwdict) for arg in self.args] def _exec(self, args): - retcode = subprocess.Popen(args).wait() + self.log.debug("Running '%s'", args) + retcode = subprocess.Popen(args, shell=self.shell).wait() if retcode: self.log.warning( - "executing '%s' returned non-zero exit status %d", - " ".join(args), retcode) + "Executing '%s' returned with non-zero exit status (%d)", + " ".join(args) if isinstance(args, list) else args, retcode) + + def _exec_async(self, args): + subprocess.Popen(args, shell=self.shell) __postprocessor__ = ExecPP diff --git a/gallery_dl/version.py b/gallery_dl/version.py index 6303fab..e83bed6 100644 --- a/gallery_dl/version.py +++ b/gallery_dl/version.py @@ -6,4 +6,4 @@ # it under the terms of the GNU General Public License version 2 as # published by the Free Software Foundation. -__version__ = "1.10.5" +__version__ = "1.10.6" |