diff options
Diffstat (limited to 'gallery_dl/extractor/deviantart.py')
| -rw-r--r-- | gallery_dl/extractor/deviantart.py | 308 |
1 files changed, 252 insertions, 56 deletions
diff --git a/gallery_dl/extractor/deviantart.py b/gallery_dl/extractor/deviantart.py index 3686e1b..836fae7 100644 --- a/gallery_dl/extractor/deviantart.py +++ b/gallery_dl/extractor/deviantart.py @@ -177,24 +177,7 @@ class DeviantartExtractor(Extractor): yield self.commit(deviation, deviation["flash"]) if self.commit_journal: - if "excerpt" in deviation: - # journal = self.api.deviation_content( - # deviation["deviationid"]) - if not self.eclipse_api: - self.eclipse_api = DeviantartEclipseAPI(self) - content = self.eclipse_api.deviation_extended_fetch( - deviation["index"], - deviation["author"]["username"], - "journal", - )["deviation"]["textContent"] - html = content["html"]["markup"] - if html.startswith("{"): - html = content["excerpt"].replace("\n", "<br />") - journal = {"html": html} - elif "body" in deviation: - journal = {"html": deviation.pop("body")} - else: - journal = None + journal = self._extract_journal(deviation) if journal: if self.extra: deviation["_journal"] = journal["html"] @@ -375,6 +358,204 @@ class DeviantartExtractor(Extractor): deviation["extension"] = "txt" return Message.Url, txt, deviation + def _extract_journal(self, deviation): + if "excerpt" in deviation: + # # empty 'html' + # return self.api.deviation_content(deviation["deviationid"]) + + if "_page" in deviation: + page = deviation["_page"] + del deviation["_page"] + else: + page = self._limited_request(deviation["url"]).text + + # extract journal html from webpage + html = text.extr( + page, + "<h2>Literature Text</h2></span><div>", + "</div></section></div></div>") + if html: + return {"html": html} + + self.log.debug("%s: Failed to extract journal HTML from webpage. " + "Falling back to __INITIAL_STATE__ markup.", + deviation["index"]) + + # parse __INITIAL_STATE__ as fallback + state = util.json_loads(text.extr( + page, 'window.__INITIAL_STATE__ = JSON.parse("', '");') + .replace("\\\\", "\\").replace("\\'", "'").replace('\\"', '"')) + deviations = state["@@entities"]["deviation"] + content = deviations.popitem()[1]["textContent"] + + html = self._textcontent_to_html(deviation, content) + if html: + return {"html": html} + return {"html": content["excerpt"].replace("\n", "<br />")} + + if "body" in deviation: + return {"html": deviation.pop("body")} + return None + + def _textcontent_to_html(self, deviation, content): + html = content["html"] + markup = html["markup"] + + if not markup.startswith("{"): + return markup + + if html["type"] == "tiptap": + try: + return self._tiptap_to_html(markup) + except Exception as exc: + self.log.debug("", exc_info=exc) + self.log.error("%s: '%s: %s'", deviation["index"], + exc.__class__.__name__, exc) + + self.log.warning("%s: Unsupported '%s' markup.", + deviation["index"], html["type"]) + + def _tiptap_to_html(self, markup): + html = [] + + html.append('<div data-editor-viewer="1" ' + 'class="_83r8m _2CKTq _3NjDa mDnFl">') + data = util.json_loads(markup) + for block in data["document"]["content"]: + self._tiptap_process_content(html, block) + html.append("</div>") + + return "".join(html) + + def _tiptap_process_content(self, html, content): + type = content["type"] + + if type == "paragraph": + children = content.get("content") + if children: + html.append('<p style="') + + attrs = content["attrs"] + if "textAlign" in attrs: + html.append("text-align:") + html.append(attrs["textAlign"]) + html.append(";") + html.append('margin-inline-start:0px">') + + for block in children: + self._tiptap_process_content(html, block) + html.append("</p>") + else: + html.append('<p class="empty-p"><br/></p>') + + elif type == "text": + self._tiptap_process_text(html, content) + + elif type == "hardBreak": + html.append("<br/><br/>") + + elif type == "horizontalRule": + html.append("<hr/>") + + elif type == "da-deviation": + self._tiptap_process_deviation(html, content) + + elif type == "da-mention": + user = content["attrs"]["user"]["username"] + html.append('<a href="https://www.deviantart.com/') + html.append(user.lower()) + html.append('" data-da-type="da-mention" data-user="">@<!-- -->') + html.append(user) + html.append('</a>') + + else: + self.log.warning("Unsupported content type '%s'", type) + + def _tiptap_process_text(self, html, content): + marks = content.get("marks") + if marks: + close = [] + for mark in marks: + type = mark["type"] + if type == "link": + html.append('<a href="') + html.append(text.escape(mark["attrs"]["href"])) + html.append('" rel="noopener noreferrer nofollow ugc">') + close.append("</a>") + elif type == "bold": + html.append("<strong>") + close.append("</strong>") + elif type == "italic": + html.append("<em>") + close.append("</em>") + elif type == "underline": + html.append("<u>") + close.append("</u>") + elif type == "textStyle" and len(mark) <= 1: + pass + else: + self.log.warning("Unsupported text marker '%s'", type) + close.reverse() + html.append(text.escape(content["text"])) + html.extend(close) + else: + html.append(text.escape(content["text"])) + + def _tiptap_process_deviation(self, html, content): + dev = content["attrs"]["deviation"] + media = dev.get("media") or () + + html.append('<div class="jjNX2">') + html.append('<figure class="Qf-HY" data-da-type="da-deviation" ' + 'data-deviation="" ' + 'data-width="" data-link="" data-alignment="center">') + + if "baseUri" in media: + url, formats = self._eclipse_media(media) + full = formats["fullview"] + + html.append('<a href="') + html.append(text.escape(dev["url"])) + html.append('" class="_3ouD5" style="margin:0 auto;display:flex;' + 'align-items:center;justify-content:center;' + 'overflow:hidden;width:780px;height:') + html.append(str(780 * full["h"] / full["w"])) + html.append('px">') + + html.append('<img src="') + html.append(text.escape(url)) + html.append('" alt="') + html.append(text.escape(dev["title"])) + html.append('" style="width:100%;max-width:100%;display:block"/>') + html.append("</a>") + + elif "textContent" in dev: + html.append('<div class="_32Hs4" style="width:350px">') + + html.append('<a href="') + html.append(text.escape(dev["url"])) + html.append('" class="_3ouD5">') + + html.append('''\ +<section class="Q91qI aG7Yi" style="width:350px;height:313px">\ +<div class="_16ECM _1xMkk" aria-hidden="true">\ +<svg height="100%" viewBox="0 0 15 12" preserveAspectRatio="xMidYMin slice" \ +fill-rule="evenodd">\ +<linearGradient x1="87.8481761%" y1="16.3690766%" \ +x2="45.4107524%" y2="71.4898596%" id="app-root-3">\ +<stop stop-color="#00FF62" offset="0%"></stop>\ +<stop stop-color="#3197EF" stop-opacity="0" offset="100%"></stop>\ +</linearGradient>\ +<text class="_2uqbc" fill="url(#app-root-3)" text-anchor="end" x="15" y="11">J\ +</text></svg></div><div class="_1xz9u">Literature</div><h3 class="_2WvKD">\ +''') + html.append(text.escape(dev["title"])) + html.append('</h3><div class="_2CPLm">') + html.append(text.escape(dev["textContent"]["excerpt"])) + html.append('</div></section></a></div>') + + html.append('</figure></div>') + def _extract_content(self, deviation): content = deviation["content"] @@ -552,6 +733,23 @@ class DeviantartExtractor(Extractor): self.log.info("Unwatching %s", username) self.api.user_friends_unwatch(username) + def _eclipse_media(self, media, format="preview"): + url = [media["baseUri"], ] + + formats = { + fmt["t"]: fmt + for fmt in media["types"] + } + + tokens = media["token"] + if len(tokens) == 1: + fmt = formats[format] + url.append(fmt["c"].replace("<prettyName>", media["prettyName"])) + url.append("?token=") + url.append(tokens[-1]) + + return "".join(url), formats + def _eclipse_to_oauth(self, eclipse_api, deviations): for obj in deviations: deviation = obj["deviation"] if "deviation" in obj else obj @@ -709,43 +907,35 @@ class DeviantartStashExtractor(DeviantartExtractor): archive_fmt = "{index}.{extension}" pattern = (r"(?:https?://)?(?:(?:www\.)?deviantart\.com/stash|sta\.sh)" r"/([a-z0-9]+)") - example = "https://sta.sh/abcde" + example = "https://www.deviantart.com/stash/abcde" skip = Extractor.skip def __init__(self, match): DeviantartExtractor.__init__(self, match) self.user = None - self.stash_id = match.group(1) def deviations(self, stash_id=None): if stash_id is None: - stash_id = self.stash_id - url = "https://sta.sh/" + stash_id + stash_id = self.groups[0] + url = "https://www.deviantart.com/stash/" + stash_id page = self._limited_request(url).text if stash_id[0] == "0": uuid = text.extr(page, '//deviation/', '"') if uuid: deviation = self.api.deviation(uuid) + deviation["_page"] = page deviation["index"] = text.parse_int(text.extr( page, '\\"deviationId\\":', ',')) yield deviation return - for item in text.extract_iter( - page, 'class="stash-thumb-container', '</div>'): - url = text.extr(item, '<a href="', '"') - - if url: - stash_id = url.rpartition("/")[2] - else: - stash_id = text.extr(item, 'gmi-stashid="', '"') - stash_id = "2" + util.bencode(text.parse_int( - stash_id), "0123456789abcdefghijklmnopqrstuvwxyz") - - if len(stash_id) > 2: - yield from self.deviations(stash_id) + for sid in text.extract_iter( + page, 'href="https://www.deviantart.com/stash/', '"'): + if sid == stash_id or sid.endswith("#comments"): + continue + yield from self.deviations(sid) class DeviantartFavoriteExtractor(DeviantartExtractor): @@ -939,11 +1129,14 @@ class DeviantartDeviationExtractor(DeviantartExtractor): else: url = "{}/view/{}/".format(self.root, self.deviation_id) - uuid = text.extr(self._limited_request(url).text, - '"deviationUuid\\":\\"', '\\') + page = self._limited_request(url, notfound="deviation").text + uuid = text.extr(page, '"deviationUuid\\":\\"', '\\') if not uuid: raise exception.NotFoundError("deviation") - return (self.api.deviation(uuid),) + + deviation = self.api.deviation(uuid) + deviation["_page"] = page + return (deviation,) class DeviantartScrapsExtractor(DeviantartExtractor): @@ -1816,25 +2009,28 @@ JOURNAL_TEMPLATE_HTML = """text:<!DOCTYPE html> <head> <meta charset="utf-8"> <title>{title}</title> - <link rel="stylesheet" href="https://st.deviantart.net/\ -css/deviantart-network_lc.css?3843780832"> - <link rel="stylesheet" href="https://st.deviantart.net/\ -css/group_secrets_lc.css?3250492874"> - <link rel="stylesheet" href="https://st.deviantart.net/\ -css/v6core_lc.css?4246581581"> - <link rel="stylesheet" href="https://st.deviantart.net/\ -css/sidebar_lc.css?1490570941"> - <link rel="stylesheet" href="https://st.deviantart.net/\ -css/writer_lc.css?3090682151"> - <link rel="stylesheet" href="https://st.deviantart.net/\ -css/v6loggedin_lc.css?3001430805"> + <link rel="stylesheet" href="https://st.deviantart.net\ +/css/deviantart-network_lc.css?3843780832"/> + <link rel="stylesheet" href="https://st.deviantart.net\ +/css/group_secrets_lc.css?3250492874"/> + <link rel="stylesheet" href="https://st.deviantart.net\ +/css/v6core_lc.css?4246581581"/> + <link rel="stylesheet" href="https://st.deviantart.net\ +/css/sidebar_lc.css?1490570941"/> + <link rel="stylesheet" href="https://st.deviantart.net\ +/css/writer_lc.css?3090682151"/> + <link rel="stylesheet" href="https://st.deviantart.net\ +/css/v6loggedin_lc.css?3001430805"/> <style>{css}</style> - <link rel="stylesheet" href="https://st.deviantart.net/\ -roses/cssmin/core.css?1488405371919" > - <link rel="stylesheet" href="https://st.deviantart.net/\ -roses/cssmin/peeky.css?1487067424177" > - <link rel="stylesheet" href="https://st.deviantart.net/\ -roses/cssmin/desktop.css?1491362542749" > + <link rel="stylesheet" href="https://st.deviantart.net\ +/roses/cssmin/core.css?1488405371919"/> + <link rel="stylesheet" href="https://st.deviantart.net\ +/roses/cssmin/peeky.css?1487067424177"/> + <link rel="stylesheet" href="https://st.deviantart.net\ +/roses/cssmin/desktop.css?1491362542749"/> + <link rel="stylesheet" href="https://static.parastorage.com/services\ +/da-deviation/2bfd1ff7a9d6bf10d27b98dd8504c0399c3f9974a015785114b7dc6b\ +/app.min.css"/> </head> <body id="deviantART-v7" class="bubble no-apps loggedout w960 deviantart"> <div id="output"> |
