diff options
Diffstat (limited to 'gallery_dl/extractor/mastodon.py')
| -rw-r--r-- | gallery_dl/extractor/mastodon.py | 203 |
1 files changed, 203 insertions, 0 deletions
diff --git a/gallery_dl/extractor/mastodon.py b/gallery_dl/extractor/mastodon.py new file mode 100644 index 0000000..28a2c2d --- /dev/null +++ b/gallery_dl/extractor/mastodon.py @@ -0,0 +1,203 @@ +# -*- coding: utf-8 -*- + +# Copyright 2019 Mike Fährmann +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. + +"""Extractors for mastodon instances""" + +from .common import Extractor, Message +from .. import text, config, exception +import re + + +class MastodonExtractor(Extractor): + """Base class for mastodon extractors""" + basecategory = "mastodon" + directory_fmt = ("mastodon", "{instance}", "{account[username]}") + filename_fmt = "{category}_{id}_{media[id]}.{extension}" + archive_fmt = "{media[id]}" + instance = None + root = None + + def __init__(self, match): + Extractor.__init__(self, match) + self.api = MastodonAPI(self) + + def config(self, key, default=None, *, sentinel=object()): + value = Extractor.config(self, key, sentinel) + if value is not sentinel: + return value + return config.interpolate( + ("extractor", "mastodon", self.instance, self.subcategory, key), + default, + ) + + def items(self): + yield Message.Version, 1 + for status in self.statuses(): + attachments = self.prepare(status) + yield Message.Directory, status + for media in attachments: + status["media"] = media + url = media["url"] + yield Message.Url, url, text.nameext_from_url(url, status) + + def statuses(self): + """Return an iterable containing all relevant Status-objects""" + return () + + def prepare(self, status): + """Prepare a status object""" + status["instance"] = self.instance + status["tags"] = [tag["name"] for tag in status["tags"]] + attachments = status["media_attachments"] + del status["media_attachments"] + return attachments + + +class MastodonUserExtractor(MastodonExtractor): + """Extractor for all images of an account/user""" + subcategory = "user" + + def __init__(self, match): + MastodonExtractor.__init__(self, match) + self.account_name = match.group(1) + + def statuses(self): + results = self.api.account_search("@" + self.account_name, 1) + for account in results: + if account["username"] == self.account_name: + break + else: + raise exception.NotFoundError("account") + return self.api.account_statuses(account["id"]) + + +class MastodonStatusExtractor(MastodonExtractor): + """Extractor for images from a status""" + subcategory = "status" + + def __init__(self, match): + MastodonExtractor.__init__(self, match) + self.status_id = match.group(1) + + def statuses(self): + return (self.api.status(self.status_id),) + + +class MastodonAPI(): + """Minimal interface for the Mastodon API + + https://github.com/tootsuite/mastodon + https://github.com/tootsuite/documentation/blob/master/Using-the-API/API.md + """ + + def __init__(self, extractor, access_token=None): + self.root = extractor.root + self.extractor = extractor + + if not access_token: + access_token = extractor.config( + "access-token", extractor.access_token) + self.headers = {"Authorization": "Bearer {}".format(access_token)} + + def account_search(self, query, limit=40): + """Search for content""" + params = {"q": query, "limit": limit} + return self._call("accounts/search", params) + + def account_statuses(self, account_id): + """Get an account's statuses""" + endpoint = "accounts/{}/statuses".format(account_id) + params = {"only_media": "1"} + return self._pagination(endpoint, params) + + def status(self, status_id): + """Fetch a Status""" + return self._call("statuses/" + status_id) + + def _call(self, endpoint, params=None): + url = "{}/api/v1/{}".format(self.root, endpoint) + response = self.extractor.request( + url, params=params, headers=self.headers) + return self._parse(response) + + def _pagination(self, endpoint, params): + url = "{}/api/v1/{}".format(self.root, endpoint) + while url: + response = self.extractor.request( + url, params=params, headers=self.headers) + yield from self._parse(response) + url = response.links.get("next", {}).get("url") + + @staticmethod + def _parse(response): + """Parse an API response""" + if response.status_code == 404: + raise exception.NotFoundError() + return response.json() + + +def generate_extractors(): + """Dynamically generate Extractor classes for Mastodon instances""" + + symtable = globals() + extractors = config.get(("extractor", "mastodon")) + if extractors: + EXTRACTORS.update(extractors) + config.set(("extractor", "mastodon"), EXTRACTORS) + + for instance, info in EXTRACTORS.items(): + + if not isinstance(info, dict): + continue + + category = info.get("category") or instance.replace(".", "") + root = info.get("root") or "https://" + instance + name = (info.get("name") or category).capitalize() + token = info.get("access-token") + pattern = info.get("pattern") or re.escape(instance) + + class Extr(MastodonUserExtractor): + pass + + Extr.__name__ = Extr.__qualname__ = name + "UserExtractor" + Extr.__doc__ = "Extractor for all images of a user on " + instance + Extr.category = category + Extr.instance = instance + Extr.pattern = (r"(?:https?://)?" + pattern + + r"/@([^/?&#]+)(?:/media)?/?$") + Extr.root = root + Extr.access_token = token + symtable[Extr.__name__] = Extr + + class Extr(MastodonStatusExtractor): + pass + + Extr.__name__ = Extr.__qualname__ = name + "StatusExtractor" + Extr.__doc__ = "Extractor for images from a status on " + instance + Extr.category = category + Extr.instance = instance + Extr.pattern = r"(?:https?://)?" + pattern + r"/@[^/?&#]+/(\d+)" + Extr.root = root + Extr.access_token = token + symtable[Extr.__name__] = Extr + + +EXTRACTORS = { + "pawoo.net": { + "category" : "pawoo", + "access-token" : "286462927198d0cf3e24683e91c8259a" + "ac4367233064e0570ca18df2ac65b226", + "client-id" : "97b142b6904abf97a1068d51a7bc2f2f" + "cf9323cef81f13cb505415716dba7dac", + "client-secret": "e45bef4bad45b38abf7d9ef88a646b73" + "75e7fb2532c31a026327a93549236481", + }, +} + + +generate_extractors() |
