diff options
| author | 2020-10-12 18:14:27 -0400 | |
|---|---|---|
| committer | 2020-10-12 18:14:27 -0400 | |
| commit | e0c914765184ebbf99cffdecfe8cdbe10f42486e (patch) | |
| tree | 4dd89f11195c3f58b3b62b9911bbdc40d0e44471 /gallery_dl/extractor/weasyl.py | |
| parent | 9074eee175f76b824fbb6695d56426105191c51c (diff) | |
New upstream version 1.15.1.upstream/1.15.1
Diffstat (limited to 'gallery_dl/extractor/weasyl.py')
| -rw-r--r-- | gallery_dl/extractor/weasyl.py | 236 |
1 files changed, 236 insertions, 0 deletions
diff --git a/gallery_dl/extractor/weasyl.py b/gallery_dl/extractor/weasyl.py new file mode 100644 index 0000000..a39fbf1 --- /dev/null +++ b/gallery_dl/extractor/weasyl.py @@ -0,0 +1,236 @@ +# -*- coding: utf-8 -*- + +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. + +"""Extractors for https://www.weasyl.com/""" + +from .common import Extractor, Message +from .. import text + +BASE_PATTERN = r"(?:https://)?(?:www\.)?weasyl.com/" + + +class WeasylExtractor(Extractor): + category = "weasyl" + directory_fmt = ("{category}", "{owner_login}") + filename_fmt = "{submitid} {title}.{extension}" + archive_fmt = "{submitid}" + root = "https://www.weasyl.com" + + @staticmethod + def populate_submission(data): + # Some submissions don't have content and can be skipped + if "submission" in data["media"]: + data["url"] = data["media"]["submission"][0]["url"] + data["date"] = text.parse_datetime( + data["posted_at"][:19], "%Y-%m-%dT%H:%M:%S") + text.nameext_from_url(data["url"], data) + return True + return False + + def request_submission(self, submitid): + return self.request( + "{}/api/submissions/{}/view".format(self.root, submitid)).json() + + def retrieve_journal(self, journalid): + data = self.request( + "{}/api/journals/{}/view".format(self.root, journalid)).json() + data["extension"] = "html" + data["html"] = "text:" + data["content"] + data["date"] = text.parse_datetime(data["posted_at"]) + return data + + def submissions(self, owner_login, folderid=None): + url = "{}/api/users/{}/gallery".format(self.root, owner_login) + params = { + "nextid" : None, + "folderid": folderid, + } + + while True: + data = self.request(url, params=params).json() + for submission in data["submissions"]: + if self.populate_submission(submission): + submission["folderid"] = folderid + # Do any submissions have more than one url? If so + # a urllist of the submission array urls would work. + yield Message.Url, submission["url"], submission + if not data["nextid"]: + return + params["nextid"] = data["nextid"] + + +class WeasylSubmissionExtractor(WeasylExtractor): + subcategory = "submission" + pattern = BASE_PATTERN + r"(?:~[\w-]+/submissions|submission)/(\d+)" + test = ( + ("https://www.weasyl.com/~fiz/submissions/2031/a-wesley", { + "pattern": "https://cdn.weasyl.com/~fiz/submissions/2031/41ebc1c29" + "40be928532785dfbf35c37622664d2fbb8114c3b063df969562fc5" + "1/fiz-a-wesley.png", + "keyword": { + "comments" : int, + "date" : "dt:2012-04-20 00:38:04", + "description" : "<p>(flex)</p>", + "favorites" : int, + "folder_name" : "Wesley Stuff", + "folderid" : 2081, + "friends_only": False, + "owner" : "Fiz", + "owner_login" : "fiz", + "rating" : "general", + "submitid" : 2031, + "subtype" : "visual", + "tags" : list, + "title" : "A Wesley!", + "type" : "submission", + "views" : int, + }, + }), + ("https://www.weasyl.com/submission/2031/a-wesley"), + ) + + def __init__(self, match): + WeasylExtractor.__init__(self, match) + self.submitid = match.group(1) + + def items(self): + data = self.request_submission(self.submitid) + if self.populate_submission(data): + yield Message.Directory, data + yield Message.Url, data["url"], data + + +class WeasylSubmissionsExtractor(WeasylExtractor): + subcategory = "submissions" + pattern = BASE_PATTERN + r"(?:~|submissions/)([\w-]+)/?$" + test = ( + ("https://www.weasyl.com/~tanidareal", { + "count": ">= 200" + }), + ("https://www.weasyl.com/submissions/tanidareal"), + ) + + def __init__(self, match): + WeasylExtractor.__init__(self, match) + self.owner_login = match.group(1) + + def items(self): + yield Message.Version, 1 + yield Message.Directory, {"owner_login": self.owner_login} + yield from self.submissions(self.owner_login) + + +class WeasylFolderExtractor(WeasylExtractor): + subcategory = "folder" + directory_fmt = ("{category}", "{owner_login}", "{folder_name}") + pattern = BASE_PATTERN + r"submissions/([\w-]+)\?folderid=(\d+)" + test = ("https://www.weasyl.com/submissions/tanidareal?folderid=7403", { + "count": ">= 12" + }) + + def __init__(self, match): + WeasylExtractor.__init__(self, match) + self.owner_login, self.folderid = match.groups() + + def items(self): + yield Message.Version, 1 + iter = self.submissions(self.owner_login, self.folderid) + # Folder names are only on single submission api calls + msg, url, data = next(iter) + details = self.request_submission(data["submitid"]) + yield Message.Directory, details + yield msg, url, data + yield from iter + + +class WeasylJournalExtractor(WeasylExtractor): + subcategory = "journal" + filename_fmt = "{journalid} {title}.{extension}" + archive_fmt = "{journalid}" + pattern = BASE_PATTERN + r"journal/(\d+)" + test = ("https://www.weasyl.com/journal/17647/bbcode", { + "keyword": { + "title" : "BBCode", + "date" : "dt:2013-09-19 23:11:23", + "content": "<p><a>javascript:alert(42);</a></p>" + "<p>No more of that!</p>", + }, + }) + + def __init__(self, match): + WeasylExtractor.__init__(self, match) + self.journalid = match.group(1) + + def items(self): + data = self.retrieve_journal(self.journalid) + yield Message.Version, 1 + yield Message.Directory, data + yield Message.Url, data["html"], data + + +class WeasylJournalsExtractor(WeasylExtractor): + subcategory = "journals" + filename_fmt = "{journalid} {title}.{extension}" + archive_fmt = "{journalid}" + pattern = BASE_PATTERN + r"journals/([\w-]+)" + test = ("https://www.weasyl.com/journals/charmander", { + "count": ">= 2", + }) + + def __init__(self, match): + WeasylExtractor.__init__(self, match) + self.owner_login = match.group(1) + + def items(self): + yield Message.Version, 1 + yield Message.Directory, {"owner_login": self.owner_login} + + url = "{}/journals/{}".format(self.root, self.owner_login) + page = self.request(url).text + for journalid in text.extract_iter(page, 'href="/journal/', '/'): + data = self.retrieve_journal(journalid) + yield Message.Url, data["html"], data + + +class WeasylFavoriteExtractor(WeasylExtractor): + subcategory = "favorite" + directory_fmt = ("{category}", "{owner_login}", "Favorites") + pattern = BASE_PATTERN + r"favorites\?userid=(\d+)&feature=submit" + test = ("https://www.weasyl.com/favorites?userid=184616&feature=submit", { + "count": ">= 5", + }) + + def __init__(self, match): + WeasylExtractor.__init__(self, match) + self.userid = match.group(1) + + def items(self): + owner_login = lastid = None + url = self.root + "/favorites" + params = { + "userid" : self.userid, + "feature": "submit", + } + + while True: + page = self.request(url, params=params).text + pos = page.index('id="favorites-content"') + + if not owner_login: + owner_login = text.extract(page, '<a href="/~', '"')[0] + yield Message.Directory, {"owner_login": owner_login} + + for submitid in text.extract_iter(page, "/submissions/", "/", pos): + if submitid == lastid: + continue + lastid = submitid + submission = self.request_submission(submitid) + if self.populate_submission(submission): + yield Message.Url, submission["url"], submission + + if "&nextid=" not in page: + return + params["nextid"] = submitid |
