summaryrefslogtreecommitdiffstats
path: root/gallery_dl/extractor/hentaifoundry.py
diff options
context:
space:
mode:
Diffstat (limited to 'gallery_dl/extractor/hentaifoundry.py')
-rw-r--r--gallery_dl/extractor/hentaifoundry.py18
1 files changed, 15 insertions, 3 deletions
diff --git a/gallery_dl/extractor/hentaifoundry.py b/gallery_dl/extractor/hentaifoundry.py
index e529940..91bcd38 100644
--- a/gallery_dl/extractor/hentaifoundry.py
+++ b/gallery_dl/extractor/hentaifoundry.py
@@ -32,6 +32,10 @@ class HentaifoundryExtractor(Extractor):
self.start_post = 0
self.start_page = 1
+ def _init(self):
+ if self.config("descriptions") == "html":
+ self._process_description = self._process_description_html
+
def items(self):
self._init_site_filters()
data = self.metadata()
@@ -77,9 +81,9 @@ class HentaifoundryExtractor(Extractor):
"artist" : text.unescape(extr('/profile">', '<')),
"_body" : extr(
'<div class="boxbody"', '<div class="boxfooter"'),
- "description": text.unescape(text.remove_html(extr(
- '>Description</div>', '</section>')
- .replace("\r\n", "\n"), "", "")),
+ "description": self._process_description(extr(
+ "<div class='picDescript'>", '</section>')
+ .replace("\r\n", "\n")),
"ratings" : [text.unescape(r) for r in text.extract_iter(extr(
"class='ratings_box'", "</div>"), "title='", "'")],
"date" : text.parse_datetime(extr("datetime='", "'")),
@@ -106,6 +110,14 @@ class HentaifoundryExtractor(Extractor):
return text.nameext_from_url(data["src"], data)
+ def _process_description(self, description):
+ return text.unescape(text.remove_html(description, "", ""))
+
+ def _process_description_html(self, description):
+ pos1 = description.rfind('</div') # picDescript
+ pos2 = description.rfind('</div', None, pos1) # boxBody
+ return str.strip(description[0:pos2])
+
def _parse_story(self, html):
"""Collect url and metadata for a story"""
extr = text.extract_from(html)