diff options
Diffstat (limited to 'gallery_dl/extractor/subscribestar.py')
| -rw-r--r-- | gallery_dl/extractor/subscribestar.py | 24 |
1 files changed, 20 insertions, 4 deletions
diff --git a/gallery_dl/extractor/subscribestar.py b/gallery_dl/extractor/subscribestar.py index 1054a63..a83f2da 100644 --- a/gallery_dl/extractor/subscribestar.py +++ b/gallery_dl/extractor/subscribestar.py @@ -40,8 +40,14 @@ class SubscribestarExtractor(Extractor): for post_html in self.posts(): media = self._media_from_post(post_html) data = self._data_from_post(post_html) - data["title"] = text.unescape(text.extr( - data["content"], "<h1>", "</h1>")) + + content = data["content"] + if "<html><body>" in content: + data["content"] = content = text.extr( + content, "<body>", "</body>") + data["title"] = text.unescape( + text.rextract(content, "<h1>", "</h1>")[0] or "") + yield Message.Directory, data for num, item in enumerate(media, 1): item.update(data) @@ -189,7 +195,12 @@ class SubscribestarExtractor(Extractor): "author_nick": text.unescape(extr('>', '<')), "date" : self._parse_datetime(extr( 'class="post-date">', '</').rpartition(">")[2]), - "content" : extr('<body>', '</body>').strip(), + "content" : extr( + '<div class="post-content" data-role="post_content-text">', + '</div><div class="post-uploads for-youtube"').strip(), + "tags" : list(text.extract_iter(extr( + '<div class="post_tags for-post">', + '<div class="post-actions">'), '?tag=', '"')), } def _parse_datetime(self, dt): @@ -243,7 +254,12 @@ class SubscribestarPostExtractor(SubscribestarExtractor): "post_id" : text.parse_int(extr('data-id="', '"')), "date" : self._parse_datetime(extr( '<div class="section-title_date">', '<')), - "content" : extr('<body>', '</body>').strip(), + "content" : extr( + '<div class="post-content" data-role="post_content-text">', + '</div><div class="post-uploads for-youtube"').strip(), + "tags" : list(text.extract_iter(extr( + '<div class="post_tags for-post">', + '<div class="post-actions">'), '?tag=', '"')), "author_name": text.unescape(extr( 'class="star_link" href="/', '"')), "author_id" : text.parse_int(extr('data-user-id="', '"')), |
