diff options
| author | 2020-08-15 17:48:11 -0400 | |
|---|---|---|
| committer | 2020-08-15 17:48:11 -0400 | |
| commit | 7cf59dc17c3607e096292462ed15d391be4e3dfd (patch) | |
| tree | 50d2750e958f43271dc6cc5310211cf8f8bbd9d0 /gallery_dl/extractor/subscribestar.py | |
| parent | ba039cfb2e1ba2522ee0a0fa2a84a1a6579e4877 (diff) | |
New upstream version 1.14.4.upstream/1.14.4
Diffstat (limited to 'gallery_dl/extractor/subscribestar.py')
| -rw-r--r-- | gallery_dl/extractor/subscribestar.py | 43 |
1 files changed, 24 insertions, 19 deletions
diff --git a/gallery_dl/extractor/subscribestar.py b/gallery_dl/extractor/subscribestar.py index 08d8850..076d0c0 100644 --- a/gallery_dl/extractor/subscribestar.py +++ b/gallery_dl/extractor/subscribestar.py @@ -11,7 +11,6 @@ from .common import Extractor, Message from .. import text, exception from ..cache import cache -import datetime import json @@ -35,8 +34,6 @@ class SubscribestarExtractor(Extractor): self.cookiedomain = "subscribestar.adult" self.subcategory += "-adult" Extractor.__init__(self, match) - self.metadata = self.config("metadata", False) - self._year = " " + str(datetime.date.today().year) def items(self): self.login() @@ -92,38 +89,46 @@ class SubscribestarExtractor(Extractor): @staticmethod def _media_from_post(html): + media = [] + gallery = text.extract(html, 'data-gallery="', '"')[0] if gallery: - return [ + media.extend( item for item in json.loads(text.unescape(gallery)) if "/previews/" not in item["url"] - ] - return () + ) + + attachments = text.extract( + html, 'class="uploads-docs"', 'data-role="post-edit_form"')[0] + if attachments: + for att in attachments.split('class="doc_preview"')[1:]: + media.append({ + "id" : text.parse_int(text.extract( + att, 'data-upload-id="', '"')[0]), + "url" : text.extract(att, 'href="', '"')[0], + "type": "attachment", + }) + + return media def _data_from_post(self, html): extr = text.extract_from(html) - data = { + return { "post_id" : text.parse_int(extr('data-id="', '"')), "author_id" : text.parse_int(extr('data-user-id="', '"')), "author_name": text.unescape(extr('href="/', '"')), "author_nick": text.unescape(extr('>', '<')), + "date" : self._parse_datetime(text.remove_html(extr( + 'class="post-date">', '</'))), "content" : (extr( '<div class="post-content', '<div class="post-uploads') .partition(">")[2]), } - if self.metadata: - url = "{}/posts/{}".format(self.root, data["post_id"]) - page = self.request(url).text - data["date"] = self._parse_datetime(text.extract( - page, 'class="section-subtitle">', '<')[0]) - - return data - def _parse_datetime(self, dt): - date = text.parse_datetime(dt, "%B %d, %Y %H:%M") + date = text.parse_datetime(dt, "%b %d, %Y %I:%M %p") if date is dt: - date = text.parse_datetime(dt + self._year, "%d %b %H:%M %Y") + date = text.parse_datetime(dt, "%B %d, %Y %I:%M %p") return date @@ -141,6 +146,7 @@ class SubscribestarUserExtractor(SubscribestarExtractor): "author_name": "subscribestar", "author_nick": "SubscribeStar", "content": str, + "date" : "type:datetime", "height" : int, "id" : int, "pinned" : bool, @@ -209,8 +215,7 @@ class SubscribestarPostExtractor(SubscribestarExtractor): def posts(self): url = "{}/posts/{}".format(self.root, self.item) - self._page = self.request(url).text - return (self._page,) + return (self.request(url).text,) def _data_from_post(self, html): extr = text.extract_from(html) |
