# -*- coding: utf-8 -*- # Copyright 2020-2025 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as # published by the Free Software Foundation. """Extractors for https://aryion.com/""" from .common import Extractor, Message from .. import text, util, dt, exception from ..cache import cache from email.utils import parsedate_tz BASE_PATTERN = r"(?:https?://)?(?:www\.)?aryion\.com/g4" class AryionExtractor(Extractor): """Base class for aryion extractors""" category = "aryion" directory_fmt = ("{category}", "{user!l}", "{path:I}") filename_fmt = "{id} {title}.{extension}" archive_fmt = "{id}" cookies_domain = ".aryion.com" cookies_names = ("phpbb3_rl7a3_sid",) root = "https://aryion.com" def __init__(self, match): Extractor.__init__(self, match) self.user = match[1] self.recursive = True def login(self): if self.cookies_check(self.cookies_names): return username, password = self._get_auth_info() if username: self.cookies_update(self._login_impl(username, password)) @cache(maxage=14*86400, keyarg=1) def _login_impl(self, username, password): self.log.info("Logging in as %s", username) url = self.root + "/forum/ucp.php?mode=login" data = { "username": username, "password": password, "login": "Login", } response = self.request(url, method="POST", data=data) if b"You have been successfully logged in." not in response.content: raise exception.AuthenticationError() return {c: response.cookies[c] for c in self.cookies_names} def items(self): self.login() data = self.metadata() for post_id in self.posts(): if post := self._parse_post(post_id): if data: post.update(data) yield Message.Directory, "", post yield Message.Url, post["url"], post elif post is False and self.recursive: base = self.root + "/g4/view/" data = {"_extractor": AryionPostExtractor} for post_id in self._pagination_params(base + post_id): yield Message.Queue, base + post_id, data def posts(self): """Yield relevant post IDs""" def metadata(self): """Return general metadata""" def _pagination_params(self, url, params=None, needle=None, quote="'"): if params is None: params = {"p": 1} else: params["p"] = text.parse_int(params.get("p"), 1) if needle is None: needle = "class='gallery-item' id=" + quote while True: page = self.request(url, params=params).text cnt = 0 for post_id in text.extract_iter(page, needle, quote): cnt += 1 yield post_id if cnt < 40 and ">Next >><" not in page: return params["p"] += 1 def _pagination_next(self, url): while True: page = self.request(url).text yield from text.extract_iter(page, "thumb' href='/g4/view/", "'") pos = page.find("Next >>") if pos < 0: return url = self.root + text.rextr(page, "href='", "'", pos) def _pagination_folders(self, url, folder=None, seen=None): if folder is None: self.kwdict["folder"] = "" else: url = f"{url}/{folder}" self.kwdict["folder"] = folder = text.unquote(folder) self.log.debug("Descending into folder '%s'", folder) params = {"p": 1} while True: page = self.request(url, params=params).text cnt = 0 for item in text.extract_iter( page, "