diff options
Diffstat (limited to 'gallery_dl/extractor/hentaifoundry.py')
| -rw-r--r-- | gallery_dl/extractor/hentaifoundry.py | 18 |
1 files changed, 15 insertions, 3 deletions
diff --git a/gallery_dl/extractor/hentaifoundry.py b/gallery_dl/extractor/hentaifoundry.py index e529940..91bcd38 100644 --- a/gallery_dl/extractor/hentaifoundry.py +++ b/gallery_dl/extractor/hentaifoundry.py @@ -32,6 +32,10 @@ class HentaifoundryExtractor(Extractor): self.start_post = 0 self.start_page = 1 + def _init(self): + if self.config("descriptions") == "html": + self._process_description = self._process_description_html + def items(self): self._init_site_filters() data = self.metadata() @@ -77,9 +81,9 @@ class HentaifoundryExtractor(Extractor): "artist" : text.unescape(extr('/profile">', '<')), "_body" : extr( '<div class="boxbody"', '<div class="boxfooter"'), - "description": text.unescape(text.remove_html(extr( - '>Description</div>', '</section>') - .replace("\r\n", "\n"), "", "")), + "description": self._process_description(extr( + "<div class='picDescript'>", '</section>') + .replace("\r\n", "\n")), "ratings" : [text.unescape(r) for r in text.extract_iter(extr( "class='ratings_box'", "</div>"), "title='", "'")], "date" : text.parse_datetime(extr("datetime='", "'")), @@ -106,6 +110,14 @@ class HentaifoundryExtractor(Extractor): return text.nameext_from_url(data["src"], data) + def _process_description(self, description): + return text.unescape(text.remove_html(description, "", "")) + + def _process_description_html(self, description): + pos1 = description.rfind('</div') # picDescript + pos2 = description.rfind('</div', None, pos1) # boxBody + return str.strip(description[0:pos2]) + def _parse_story(self, html): """Collect url and metadata for a story""" extr = text.extract_from(html) |
