diff options
| author | 2025-05-26 06:46:00 -0400 | |
|---|---|---|
| committer | 2025-05-26 06:46:00 -0400 | |
| commit | 6424318a059207759b9055cf8a8df91c0ddac7c8 (patch) | |
| tree | 3fb8adec807ad1ffeba4889a506b05e680ca8051 /gallery_dl/extractor/subscribestar.py | |
| parent | 2bef55427baa34bf0f78d52590bbf27b2c5f3a56 (diff) | |
| parent | 7672a750cb74bf31e21d76aad2776367fd476155 (diff) | |
Update upstream source from tag 'upstream/1.29.7'
Update to upstream version '1.29.7'
with Debian dir 264267cd1ebd5c7205fe1f137a394d0ae1a2fb3b
Diffstat (limited to 'gallery_dl/extractor/subscribestar.py')
| -rw-r--r-- | gallery_dl/extractor/subscribestar.py | 24 |
1 files changed, 20 insertions, 4 deletions
diff --git a/gallery_dl/extractor/subscribestar.py b/gallery_dl/extractor/subscribestar.py index 1054a63..a83f2da 100644 --- a/gallery_dl/extractor/subscribestar.py +++ b/gallery_dl/extractor/subscribestar.py @@ -40,8 +40,14 @@ class SubscribestarExtractor(Extractor): for post_html in self.posts(): media = self._media_from_post(post_html) data = self._data_from_post(post_html) - data["title"] = text.unescape(text.extr( - data["content"], "<h1>", "</h1>")) + + content = data["content"] + if "<html><body>" in content: + data["content"] = content = text.extr( + content, "<body>", "</body>") + data["title"] = text.unescape( + text.rextract(content, "<h1>", "</h1>")[0] or "") + yield Message.Directory, data for num, item in enumerate(media, 1): item.update(data) @@ -189,7 +195,12 @@ class SubscribestarExtractor(Extractor): "author_nick": text.unescape(extr('>', '<')), "date" : self._parse_datetime(extr( 'class="post-date">', '</').rpartition(">")[2]), - "content" : extr('<body>', '</body>').strip(), + "content" : extr( + '<div class="post-content" data-role="post_content-text">', + '</div><div class="post-uploads for-youtube"').strip(), + "tags" : list(text.extract_iter(extr( + '<div class="post_tags for-post">', + '<div class="post-actions">'), '?tag=', '"')), } def _parse_datetime(self, dt): @@ -243,7 +254,12 @@ class SubscribestarPostExtractor(SubscribestarExtractor): "post_id" : text.parse_int(extr('data-id="', '"')), "date" : self._parse_datetime(extr( '<div class="section-title_date">', '<')), - "content" : extr('<body>', '</body>').strip(), + "content" : extr( + '<div class="post-content" data-role="post_content-text">', + '</div><div class="post-uploads for-youtube"').strip(), + "tags" : list(text.extract_iter(extr( + '<div class="post_tags for-post">', + '<div class="post-actions">'), '?tag=', '"')), "author_name": text.unescape(extr( 'class="star_link" href="/', '"')), "author_id" : text.parse_int(extr('data-user-id="', '"')), |
