diff options
| author | 2021-05-03 23:36:45 -0400 | |
|---|---|---|
| committer | 2021-05-03 23:36:45 -0400 | |
| commit | e7eb1f9779f2e223575ab23a6bc1abf2222e7d27 (patch) | |
| tree | 6cfdc1e3da2143801a598a0ba1182d8f7289dc6d /gallery_dl/extractor/slideshare.py | |
| parent | d27dcd4646242d6da8436f14c7b37ce864355858 (diff) | |
New upstream version 1.17.3.upstream/1.17.3
Diffstat (limited to 'gallery_dl/extractor/slideshare.py')
| -rw-r--r-- | gallery_dl/extractor/slideshare.py | 20 |
1 files changed, 11 insertions, 9 deletions
diff --git a/gallery_dl/extractor/slideshare.py b/gallery_dl/extractor/slideshare.py index 0b970cc..15dbb85 100644 --- a/gallery_dl/extractor/slideshare.py +++ b/gallery_dl/extractor/slideshare.py @@ -6,7 +6,7 @@ # it under the terms of the GNU General Public License version 2 as # published by the Free Software Foundation. -"""Extract images from https://www.slideshare.net/""" +"""Extractors for https://www.slideshare.net/""" from .common import Extractor, Message from .. import text @@ -58,15 +58,16 @@ class SlidesharePresentationExtractor(Extractor): """Collect metadata for extractor-job""" descr, pos = text.extract( page, '<meta name="description" content="', '"') - title, pos = text.extract( - page, '<span class="j-title-breadcrumb">', '</span>', pos) + category, pos = text.extract( + page, '<div class="metadata-item">', '</div>', pos) views, pos = text.extract( - page, '<span class="notranslate">', 'views<', pos) + page, '<div class="metadata-item">', '</div>', pos) published, pos = text.extract( - page, '<time datetime="', '"', pos) + page, '<div class="metadata-item">', '</div>', pos) + title, pos = text.extract( + page, '<span class="j-title-breadcrumb">', '</span>', pos) alt_descr, pos = text.extract( - page, 'id="slideshow-description-paragraph" class="notranslate">', - '</p>', pos) + page, '<p class="slideshow-description notranslate">', '</p>', pos) if descr.endswith("…") and alt_descr: descr = text.remove_html(alt_descr).strip() @@ -76,8 +77,9 @@ class SlidesharePresentationExtractor(Extractor): "presentation": self.presentation, "title": text.unescape(title.strip()), "description": text.unescape(descr), - "views": text.parse_int(views.replace(",", "")), - "published": published, + "views": text.parse_int(views.rpartition( + " views")[0].replace(",", "")), + "published": published.strip(), } @staticmethod |
