diff options
| author | 2025-03-01 19:51:45 -0500 | |
|---|---|---|
| committer | 2025-03-01 19:51:45 -0500 | |
| commit | bc1c79d35e0a75bc8da8f6f010df779c4acca201 (patch) | |
| tree | 9d8808a5aec770221eb667160a3fbda61f9d5d49 /gallery_dl/extractor/subscribestar.py | |
| parent | 75e3edb22dad2fc506494bb90ee6b331f5169adf (diff) | |
| parent | 889c7b8caec8fc0b9c7a583ed1d9cfa43518fc42 (diff) | |
Update upstream source from tag 'upstream/1.29.0'
Update to upstream version '1.29.0'
with Debian dir 7b309aa6ccc040a2faaf51d37a63f5233590a8d7
Diffstat (limited to 'gallery_dl/extractor/subscribestar.py')
| -rw-r--r-- | gallery_dl/extractor/subscribestar.py | 26 |
1 files changed, 22 insertions, 4 deletions
diff --git a/gallery_dl/extractor/subscribestar.py b/gallery_dl/extractor/subscribestar.py index 8668330..6c43941 100644 --- a/gallery_dl/extractor/subscribestar.py +++ b/gallery_dl/extractor/subscribestar.py @@ -51,6 +51,23 @@ class SubscribestarExtractor(Extractor): def posts(self): """Yield HTML content of all relevant posts""" + def request(self, url, **kwargs): + while True: + response = Extractor.request(self, url, **kwargs) + + if response.history and "/verify_subscriber" in response.url: + raise exception.StopExtraction( + "HTTP redirect to %s", response.url) + + content = response.content + if len(content) < 250 and b">redirected<" in content: + url = text.unescape(text.extr( + content, b'href="', b'"').decode()) + self.log.debug("HTML redirect message for %s", url) + continue + + return response + def login(self): if self.cookies_check(self.cookies_names): return @@ -189,10 +206,11 @@ class SubscribestarPostExtractor(SubscribestarExtractor): extr = text.extract_from(html) return { "post_id" : text.parse_int(extr('data-id="', '"')), - "author_name": text.unescape(extr('href="/', '"')), - "author_id" : text.parse_int(extr('data-user-id="', '"')), - "author_nick": text.unescape(extr('alt="', '"')), "date" : self._parse_datetime(extr( - '<span class="star_link-types">', '<')), + '<div class="section-title_date">', '<')), "content" : extr('<body>', '</body>').strip(), + "author_name": text.unescape(extr( + 'class="star_link" href="/', '"')), + "author_id" : text.parse_int(extr('data-user-id="', '"')), + "author_nick": text.unescape(extr('alt="', '"')), } |
