From 40f5fe6edef268632d3bc484e85e5b37bad67bff Mon Sep 17 00:00:00 2001 From: Unit 193 Date: Fri, 11 Oct 2019 20:28:32 -0400 Subject: New upstream version 1.10.6 --- gallery_dl/extractor/8muses.py | 2 +- gallery_dl/extractor/behance.py | 7 ++- gallery_dl/extractor/deviantart.py | 114 +++++++++++++++++++++++-------------- gallery_dl/extractor/gfycat.py | 2 +- gallery_dl/extractor/hitomi.py | 36 +++++++++--- gallery_dl/extractor/komikcast.py | 2 +- gallery_dl/extractor/xhamster.py | 4 +- gallery_dl/extractor/yaplog.py | 41 +++++++++---- 8 files changed, 140 insertions(+), 68 deletions(-) (limited to 'gallery_dl/extractor') diff --git a/gallery_dl/extractor/8muses.py b/gallery_dl/extractor/8muses.py index f5ca9ce..089a0e9 100644 --- a/gallery_dl/extractor/8muses.py +++ b/gallery_dl/extractor/8muses.py @@ -53,7 +53,7 @@ class _8musesAlbumExtractor(Extractor): "private": False, }, }), - ("https://www.8muses.com/comics/album/Fakku-Comics/6?sort=az", { + ("https://www.8muses.com/comics/album/Fakku-Comics/7?sort=az", { "count": ">= 70", "keyword": {"name": r"re:^[R-Zr-z]"}, }), diff --git a/gallery_dl/extractor/behance.py b/gallery_dl/extractor/behance.py index 467a935..c701927 100644 --- a/gallery_dl/extractor/behance.py +++ b/gallery_dl/extractor/behance.py @@ -33,8 +33,11 @@ class BehanceExtractor(Extractor): if data["fields"] and isinstance(data["fields"][0], dict): data["fields"] = [field["name"] for field in data["fields"]] data["owners"] = [owner["display_name"] for owner in data["owners"]] - if "tags" in data: - data["tags"] = [tag["title"] for tag in data["tags"]] + + tags = data.get("tags") or () + if tags and isinstance(tags[0], dict): + tags = [tag["title"] for tag in tags] + data["tags"] = tags # backwards compatibility data["gallery_id"] = data["id"] diff --git a/gallery_dl/extractor/deviantart.py b/gallery_dl/extractor/deviantart.py index 525cc84..ab32a00 100644 --- a/gallery_dl/extractor/deviantart.py +++ b/gallery_dl/extractor/deviantart.py @@ -47,12 +47,6 @@ class DeviantartExtractor(Extractor): if self.quality: self.quality = "q_{}".format(self.quality) - if self.original != "image": - self._update_content = self._update_content_default - else: - self._update_content = self._update_content_image - self.original = True - self.commit_journal = { "html": self._commit_journal_html, "text": self._commit_journal_text, @@ -98,7 +92,8 @@ class DeviantartExtractor(Extractor): yield self.commit(deviation, content) elif deviation["is_downloadable"]: - content = self.api.deviation_download(deviation["deviationid"]) + content = {} + self._update_content(deviation, content) yield self.commit(deviation, content) if "videos" in deviation: @@ -240,15 +235,29 @@ class DeviantartExtractor(Extractor): url = "{}/{}/{}/0/".format(self.root, self.user, category) return [(url + folder["name"], folder) for folder in folders] - def _update_content_default(self, deviation, content): - content.update(self.api.deviation_download(deviation["deviationid"])) - - def _update_content_image(self, deviation, content): - data = self.api.deviation_download(deviation["deviationid"]) - url = data["src"].partition("?")[0] - mtype = mimetypes.guess_type(url, False)[0] - if mtype and mtype.startswith("image/"): - content.update(data) + def _update_content(self, deviation, content): + try: + data = self.api.deviation_extended_fetch( + deviation["index"], + deviation["author"]["username"], + "journal" if "excerpt" in deviation else "art", + ) + download = data["deviation"]["extended"]["download"] + download["src"] = download["url"] + except Exception as e: + self.log.warning( + "Unable to fetch original download URL for ID %s ('%s: %s')", + deviation["index"], e.__class__.__name__, e, + ) + self.log.debug("Server response: %s", data) + else: + if self.original == "image": + url = data["src"].partition("?")[0] + mtype = mimetypes.guess_type(url, False)[0] + if not mtype or not mtype.startswith("image/"): + return + del download["url"] + content.update(download) class DeviantartGalleryExtractor(DeviantartExtractor): @@ -258,8 +267,8 @@ class DeviantartGalleryExtractor(DeviantartExtractor): pattern = BASE_PATTERN + r"(?:/(?:gallery/?(?:\?catpath=/)?)?)?$" test = ( ("https://www.deviantart.com/shimoda7/gallery/", { - "pattern": r"https://(s3.amazonaws.com/origin-(img|orig)" - r".deviantart.net/|images-wixmp-\w+.wixmp.com/)", + "pattern": r"https://(www.deviantart.com/download/\d+/" + r"|images-wixmp-[^.]+.wixmp.com/f/.+/.+.jpg\?token=.+)", "count": ">= 30", "keyword": { "allows_comments": bool, @@ -384,7 +393,7 @@ class DeviantartStashExtractor(DeviantartExtractor): pattern = r"(?:https?://)?sta\.sh/([a-z0-9]+)" test = ( ("https://sta.sh/022c83odnaxc", { - "pattern": r"https://s3.amazonaws.com/origin-orig.deviantart.net", + "pattern": r"https://sta.sh/download/7549925030122512/.+\?token=", "count": 1, }), # multiple stash items @@ -394,6 +403,7 @@ class DeviantartStashExtractor(DeviantartExtractor): }), # downloadable, but no "content" field (#307) ("https://sta.sh/024t4coz16mi", { + "pattern": r"https://sta.sh/download/7800709982190282/.+\?token=", "count": 1, }), ("https://sta.sh/abcdefghijkl", { @@ -411,16 +421,34 @@ class DeviantartStashExtractor(DeviantartExtractor): def deviations(self): url = "https://sta.sh/" + self.stash_id page = self.request(url).text - deviation_id = text.extract(page, '//deviation/', '"')[0] + deviation_id, pos = text.extract(page, '//deviation/', '"') if deviation_id: - yield self.api.deviation(deviation_id) + deviation = self.api.deviation(deviation_id) + pos = page.find("dev-page-download", pos) + if pos >= 0: + deviation["_download"] = { + "width" : text.parse_int(text.extract( + page, 'data-download_width="' , '"', pos)[0]), + "height": text.parse_int(text.extract( + page, 'data-download_height="', '"', pos)[0]), + "src" : text.unescape(text.extract( + page, 'data-download_url="' , '"', pos)[0]), + } + return (deviation,) else: data = {"_extractor": DeviantartStashExtractor} page = text.extract( - page, '" + elif self.fallback: + begin, end = ">//g.hitomi.la/galleries/", "" else: begin, end = "'//tn.hitomi.la/smalltn/", ".jpg'," diff --git a/gallery_dl/extractor/komikcast.py b/gallery_dl/extractor/komikcast.py index 8541e4f..6e7f139 100644 --- a/gallery_dl/extractor/komikcast.py +++ b/gallery_dl/extractor/komikcast.py @@ -73,7 +73,7 @@ class KomikcastChapterExtractor(KomikcastBase, ChapterExtractor): return [ (text.unescape(url), None) for url in re.findall(r"]* src=[\"']([^\"']+)", readerarea) - if "/Banner-" not in url + if "/Banner-" not in url and "/WM-Sampingan." not in url ] diff --git a/gallery_dl/extractor/xhamster.py b/gallery_dl/extractor/xhamster.py index 23750db..e125184 100644 --- a/gallery_dl/extractor/xhamster.py +++ b/gallery_dl/extractor/xhamster.py @@ -119,8 +119,8 @@ class XhamsterGalleryExtractor(XhamsterExtractor): "views" : text.parse_int(imgs["views"]), "likes" : text.parse_int(imgs["rating"]["likes"]), "dislikes" : text.parse_int(imgs["rating"]["dislikes"]), - "title" : imgs["title"], - "description": imgs["description"], + "title" : text.unescape(imgs["title"]), + "description": text.unescape(imgs["description"]), "thumbnail" : imgs["thumbURL"], }, "count": text.parse_int(imgs["quantity"]), diff --git a/gallery_dl/extractor/yaplog.py b/gallery_dl/extractor/yaplog.py index b3c5501..b07ba4b 100644 --- a/gallery_dl/extractor/yaplog.py +++ b/gallery_dl/extractor/yaplog.py @@ -12,6 +12,9 @@ from .common import Extractor, Message, AsynchronousMixin from .. import text, util +BASE_PATTERN = r"(?:https?://)?(?:www\.)?yaplog\.jp/([\w-]+)" + + class YaplogExtractor(AsynchronousMixin, Extractor): """Base class for yaplog extractors""" category = "yaplog" @@ -31,11 +34,15 @@ class YaplogExtractor(AsynchronousMixin, Extractor): for num, url in enumerate(urls, 1): page = self.request(url).text if num > 1 else url iurl = text.extract(page, '= 2", @@ -92,12 +100,23 @@ class YaplogBlogExtractor(YaplogExtractor): class YaplogPostExtractor(YaplogExtractor): """Extractor for images from a blog post on yaplog.jp""" subcategory = "post" - pattern = (r"(?:https?://)?(?:www\.)?yaplog\.jp" - r"/(\w+)/(?:archive|image)/(\d+)") - test = ("https://yaplog.jp/imamiami0726/image/1299", { - "url": "896cae20fa718735a57e723c48544e830ff31345", - "keyword": "f8d8781e61c4c38238a7622d6df6c905f864e5d3", - }) + pattern = BASE_PATTERN + r"/(?:archive|image)/(\d+)" + test = ( + ("https://yaplog.jp/imamiami0726/image/1299", { + "url": "896cae20fa718735a57e723c48544e830ff31345", + "keyword": "22df8ad6cb534514c6bb2ff000381d156769a620", + }), + # complete image URLs (#443) + ("https://yaplog.jp/msjane/archive/246", { + "pattern": r"https://yaplog.jp/cv/msjane/img/246/img\d+_t.jpg" + }), + # empty post (#443) + ("https://yaplog.jp/f_l_a_s_c_o/image/872", { + "count": 0, + }), + # blog names with '-' (#443) + ("https://yaplog.jp/a-pierrot-o/image/3946/22779"), + ) def __init__(self, match): YaplogExtractor.__init__(self, match) -- cgit v1.2.3