{title}

From 0db541f524e1774865efebcbe5653e9ad76ea2e8 Mon Sep 17 00:00:00 2001 From: Unit 193 Date: Mon, 14 Oct 2024 03:02:05 -0400 Subject: New upstream version 1.27.6. --- gallery_dl/extractor/deviantart.py | 308 ++++++++++++++++++++++++++++++------- 1 file changed, 252 insertions(+), 56 deletions(-) (limited to 'gallery_dl/extractor/deviantart.py') diff --git a/gallery_dl/extractor/deviantart.py b/gallery_dl/extractor/deviantart.py index 3686e1b..836fae7 100644 --- a/gallery_dl/extractor/deviantart.py +++ b/gallery_dl/extractor/deviantart.py @@ -177,24 +177,7 @@ class DeviantartExtractor(Extractor): yield self.commit(deviation, deviation["flash"]) if self.commit_journal: - if "excerpt" in deviation: - # journal = self.api.deviation_content( - # deviation["deviationid"]) - if not self.eclipse_api: - self.eclipse_api = DeviantartEclipseAPI(self) - content = self.eclipse_api.deviation_extended_fetch( - deviation["index"], - deviation["author"]["username"], - "journal", - )["deviation"]["textContent"] - html = content["html"]["markup"] - if html.startswith("{"): - html = content["excerpt"].replace("\n", "
") - journal = {"html": html} - elif "body" in deviation: - journal = {"html": deviation.pop("body")} - else: - journal = None + journal = self._extract_journal(deviation) if journal: if self.extra: deviation["_journal"] = journal["html"] @@ -375,6 +358,204 @@ class DeviantartExtractor(Extractor): deviation["extension"] = "txt" return Message.Url, txt, deviation + def _extract_journal(self, deviation): + if "excerpt" in deviation: + # # empty 'html' + # return self.api.deviation_content(deviation["deviationid"]) + + if "_page" in deviation: + page = deviation["_page"] + del deviation["_page"] + else: + page = self._limited_request(deviation["url"]).text + + # extract journal html from webpage + html = text.extr( + page, + "

Literature Text

", + "

") + if html: + return {"html": html} + + self.log.debug("%s: Failed to extract journal HTML from webpage. " + "Falling back to __INITIAL_STATE__ markup.", + deviation["index"]) + + # parse __INITIAL_STATE__ as fallback + state = util.json_loads(text.extr( + page, 'window.__INITIAL_STATE__ = JSON.parse("', '");') + .replace("\\\\", "\\").replace("\\'", "'").replace('\\"', '"')) + deviations = state["@@entities"]["deviation"] + content = deviations.popitem()[1]["textContent"] + + html = self._textcontent_to_html(deviation, content) + if html: + return {"html": html} + return {"html": content["excerpt"].replace("\n", "
")} + + if "body" in deviation: + return {"html": deviation.pop("body")} + return None + + def _textcontent_to_html(self, deviation, content): + html = content["html"] + markup = html["markup"] + + if not markup.startswith("{"): + return markup + + if html["type"] == "tiptap": + try: + return self._tiptap_to_html(markup) + except Exception as exc: + self.log.debug("", exc_info=exc) + self.log.error("%s: '%s: %s'", deviation["index"], + exc.__class__.__name__, exc) + + self.log.warning("%s: Unsupported '%s' markup.", + deviation["index"], html["type"]) + + def _tiptap_to_html(self, markup): + html = [] + + html.append('

') + data = util.json_loads(markup) + for block in data["document"]["content"]: + self._tiptap_process_content(html, block) + html.append("

") + + return "".join(html) + + def _tiptap_process_content(self, html, content): + type = content["type"] + + if type == "paragraph": + children = content.get("content") + if children: + html.append('

') + + for block in children: + self._tiptap_process_content(html, block) + html.append("

") + else: + html.append('

') + + elif type == "text": + self._tiptap_process_text(html, content) + + elif type == "hardBreak": + html.append("

") + + elif type == "horizontalRule": + html.append("

") + + elif type == "da-deviation": + self._tiptap_process_deviation(html, content) + + elif type == "da-mention": + user = content["attrs"]["user"]["username"] + html.append('@') + html.append(user) + html.append('') + + else: + self.log.warning("Unsupported content type '%s'", type) + + def _tiptap_process_text(self, html, content): + marks = content.get("marks") + if marks: + close = [] + for mark in marks: + type = mark["type"] + if type == "link": + html.append('') + close.append("") + elif type == "bold": + html.append("") + close.append("") + elif type == "italic": + html.append("") + close.append("") + elif type == "underline": + html.append("") + close.append("") + elif type == "textStyle" and len(mark) <= 1: + pass + else: + self.log.warning("Unsupported text marker '%s'", type) + close.reverse() + html.append(text.escape(content["text"])) + html.extend(close) + else: + html.append(text.escape(content["text"])) + + def _tiptap_process_deviation(self, html, content): + dev = content["attrs"]["deviation"] + media = dev.get("media") or () + + html.append('

') + html.append('

') + def _extract_content(self, deviation): content = deviation["content"] @@ -552,6 +733,23 @@ class DeviantartExtractor(Extractor): self.log.info("Unwatching %s", username) self.api.user_friends_unwatch(username) + def _eclipse_media(self, media, format="preview"): + url = [media["baseUri"], ] + + formats = { + fmt["t"]: fmt + for fmt in media["types"] + } + + tokens = media["token"] + if len(tokens) == 1: + fmt = formats[format] + url.append(fmt["c"].replace("", media["prettyName"])) + url.append("?token=") + url.append(tokens[-1]) + + return "".join(url), formats + def _eclipse_to_oauth(self, eclipse_api, deviations): for obj in deviations: deviation = obj["deviation"] if "deviation" in obj else obj @@ -709,43 +907,35 @@ class DeviantartStashExtractor(DeviantartExtractor): archive_fmt = "{index}.{extension}" pattern = (r"(?:https?://)?(?:(?:www\.)?deviantart\.com/stash|sta\.sh)" r"/([a-z0-9]+)") - example = "https://sta.sh/abcde" + example = "https://www.deviantart.com/stash/abcde" skip = Extractor.skip def __init__(self, match): DeviantartExtractor.__init__(self, match) self.user = None - self.stash_id = match.group(1) def deviations(self, stash_id=None): if stash_id is None: - stash_id = self.stash_id - url = "https://sta.sh/" + stash_id + stash_id = self.groups[0] + url = "https://www.deviantart.com/stash/" + stash_id page = self._limited_request(url).text if stash_id[0] == "0": uuid = text.extr(page, '//deviation/', '"') if uuid: deviation = self.api.deviation(uuid) + deviation["_page"] = page deviation["index"] = text.parse_int(text.extr( page, '\\"deviationId\\":', ',')) yield deviation return - for item in text.extract_iter( - page, 'class="stash-thumb-container', ''): - url = text.extr(item, ' 2: - yield from self.deviations(stash_id) + for sid in text.extract_iter( + page, 'href="https://www.deviantart.com/stash/', '"'): + if sid == stash_id or sid.endswith("#comments"): + continue + yield from self.deviations(sid) class DeviantartFavoriteExtractor(DeviantartExtractor): @@ -939,11 +1129,14 @@ class DeviantartDeviationExtractor(DeviantartExtractor): else: url = "{}/view/{}/".format(self.root, self.deviation_id) - uuid = text.extr(self._limited_request(url).text, - '"deviationUuid\\":\\"', '\\') + page = self._limited_request(url, notfound="deviation").text + uuid = text.extr(page, '"deviationUuid\\":\\"', '\\') if not uuid: raise exception.NotFoundError("deviation") - return (self.api.deviation(uuid),) + + deviation = self.api.deviation(uuid) + deviation["_page"] = page + return (deviation,) class DeviantartScrapsExtractor(DeviantartExtractor): @@ -1816,25 +2009,28 @@ JOURNAL_TEMPLATE_HTML = """text: {title} - - - - - - + + + + + + - - - + + + +

-- cgit v1.2.3

Literature Text

\ +''') + html.append(text.escape(dev["title"])) + html.append('