From 889c7b8caec8fc0b9c7a583ed1d9cfa43518fc42 Mon Sep 17 00:00:00 2001 From: Unit 193 Date: Sat, 1 Mar 2025 19:51:39 -0500 Subject: New upstream version 1.29.0. --- gallery_dl/extractor/subscribestar.py | 26 ++++++++++++++++++++++---- 1 file changed, 22 insertions(+), 4 deletions(-) (limited to 'gallery_dl/extractor/subscribestar.py') diff --git a/gallery_dl/extractor/subscribestar.py b/gallery_dl/extractor/subscribestar.py index 8668330..6c43941 100644 --- a/gallery_dl/extractor/subscribestar.py +++ b/gallery_dl/extractor/subscribestar.py @@ -51,6 +51,23 @@ class SubscribestarExtractor(Extractor): def posts(self): """Yield HTML content of all relevant posts""" + def request(self, url, **kwargs): + while True: + response = Extractor.request(self, url, **kwargs) + + if response.history and "/verify_subscriber" in response.url: + raise exception.StopExtraction( + "HTTP redirect to %s", response.url) + + content = response.content + if len(content) < 250 and b">redirected<" in content: + url = text.unescape(text.extr( + content, b'href="', b'"').decode()) + self.log.debug("HTML redirect message for %s", url) + continue + + return response + def login(self): if self.cookies_check(self.cookies_names): return @@ -189,10 +206,11 @@ class SubscribestarPostExtractor(SubscribestarExtractor): extr = text.extract_from(html) return { "post_id" : text.parse_int(extr('data-id="', '"')), - "author_name": text.unescape(extr('href="/', '"')), - "author_id" : text.parse_int(extr('data-user-id="', '"')), - "author_nick": text.unescape(extr('alt="', '"')), "date" : self._parse_datetime(extr( - '', '<')), + '

', '<')), "content" : extr('', '').strip(), + "author_name": text.unescape(extr( + 'class="star_link" href="/', '"')), + "author_id" : text.parse_int(extr('data-user-id="', '"')), + "author_nick": text.unescape(extr('alt="', '"')), } -- cgit v1.2.3