diff options
| author | 2020-03-16 23:20:15 -0400 | |
|---|---|---|
| committer | 2020-03-16 23:20:15 -0400 | |
| commit | e8cc000750de972384f2f34d02d42222b4018ae9 (patch) | |
| tree | 26eb0bacedff7480d29bafcf184ca529cf9f1d9f /gallery_dl/extractor/khinsider.py | |
| parent | 4366125d2580982abb57bc65a26fc1fb8ef2a5df (diff) | |
New upstream version 1.13.2upstream/1.13.2
Diffstat (limited to 'gallery_dl/extractor/khinsider.py')
| -rw-r--r-- | gallery_dl/extractor/khinsider.py | 60 |
1 files changed, 29 insertions, 31 deletions
diff --git a/gallery_dl/extractor/khinsider.py b/gallery_dl/extractor/khinsider.py index c9e6959..822a743 100644 --- a/gallery_dl/extractor/khinsider.py +++ b/gallery_dl/extractor/khinsider.py @@ -1,12 +1,12 @@ # -*- coding: utf-8 -*- -# Copyright 2016-2019 Mike Fährmann +# Copyright 2016-2020 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as # published by the Free Software Foundation. -"""Extract soundtracks from https://downloads.khinsider.com/""" +"""Extractors for https://downloads.khinsider.com/""" from .common import Extractor, Message, AsynchronousMixin from .. import text, exception @@ -16,54 +16,52 @@ class KhinsiderSoundtrackExtractor(AsynchronousMixin, Extractor): """Extractor for soundtracks from khinsider.com""" category = "khinsider" subcategory = "soundtrack" - directory_fmt = ("{category}", "{album}") - archive_fmt = "{album}_{filename}.{extension}" + directory_fmt = ("{category}", "{album[name]}") + archive_fmt = "{filename}.{extension}" pattern = (r"(?:https?://)?downloads\.khinsider\.com" r"/game-soundtracks/album/([^/?&#]+)") + root = "https://downloads.khinsider.com" test = (("https://downloads.khinsider.com" "/game-soundtracks/album/horizon-riders-wii"), { - "pattern": r"https?://\d+\.\d+\.\d+\.\d+/ost/horizon-riders-wii/[^/]+" - r"/Horizon%20Riders%20Wii%20-%20Full%20Soundtrack\.mp3", - "count": 1, - "keyword": "b4f460c78dd23e1f1121f4ac784dd67ded7c2679", + "pattern": r"https?://vgmdownloads.com/soundtracks/horizon-riders-wii/" + r"[^/]+/Horizon%20Riders%20Wii%20-%20Full%20Soundtrack.mp3", + "keyword": "5b2c35cce638c326cab2a4f7a79f245d008d62ff", }) - root = "https://downloads.khinsider.com" def __init__(self, match): Extractor.__init__(self, match) self.album = match.group(1) def items(self): - url = (self.root + "/game-soundtracks/album/" + self.album) + url = self.root + "/game-soundtracks/album/" + self.album page = self.request(url, encoding="utf-8").text - data = self.get_job_metadata(page) + if "Download all songs at once:" not in page: + raise exception.NotFoundError("soundtrack") + + data = self.metadata(page) yield Message.Version, 1 yield Message.Directory, data - for url, track in self.get_album_tracks(page): + for track in self.tracks(page): track.update(data) - yield Message.Url, url, track + yield Message.Url, track["url"], track - def get_job_metadata(self, page): - """Collect metadata for extractor-job""" - if "Download all songs at once:" not in page: - raise exception.NotFoundError("soundtrack") - data = text.extract_all(page, ( - ("album", "Album name: <b>", "</b>"), - ("count", "Number of Files: <b>", "</b>"), - ("size" , "Total Filesize: <b>", "</b>"), - ("date" , "Date added: <b>", "</b>"), - ("type" , "Album type: <b>", "</b>"), - ))[0] - data["album"] = text.unescape(data["album"]) - return data + def metadata(self, page): + extr = text.extract_from(page) + return {"album": { + "name" : text.unescape(extr("Album name: <b>", "<")), + "count": text.parse_int(extr("Number of Files: <b>", "<")), + "size" : text.parse_bytes(extr("Total Filesize: <b>", "<")[:-1]), + "date" : extr("Date added: <b>", "<"), + "type" : extr("Album type: <b>", "<"), + }} - def get_album_tracks(self, page): - """Collect url and metadata for all tracks of a soundtrack""" + def tracks(self, page): page = text.extract(page, '<table id="songlist">', '</table>')[0] + for num, url in enumerate(text.extract_iter( page, '<td class="clickable-row"><a href="', '"'), 1): url = text.urljoin(self.root, url) page = self.request(url, encoding="utf-8").text - url = text.extract( - page, '<p><a style="color: #21363f;" href="', '"')[0] - yield url, text.nameext_from_url(url, {"num": num}) + + url = text.extract(page, 'style="color: #21363f;" href="', '"')[0] + yield text.nameext_from_url(url, {"num": num, "url": url}) |
