diff options
| author | 2025-03-29 07:19:58 -0400 | |
|---|---|---|
| committer | 2025-03-29 07:19:58 -0400 | |
| commit | 662e5ac868a5c1a3e7bc95b37054b3a0ca4db74f (patch) | |
| tree | 537d0429926fb5eb3719aa2b384048ae79bda0b8 /gallery_dl/extractor/bbc.py | |
| parent | 8026a3c45446030d7af524bfc487d3462c8114ef (diff) | |
New upstream version 1.29.3.upstream/1.29.3
Diffstat (limited to 'gallery_dl/extractor/bbc.py')
| -rw-r--r-- | gallery_dl/extractor/bbc.py | 33 |
1 files changed, 22 insertions, 11 deletions
diff --git a/gallery_dl/extractor/bbc.py b/gallery_dl/extractor/bbc.py index 113a669..b398152 100644 --- a/gallery_dl/extractor/bbc.py +++ b/gallery_dl/extractor/bbc.py @@ -27,7 +27,12 @@ class BbcGalleryExtractor(GalleryExtractor): def metadata(self, page): data = self._extract_jsonld(page) + return { + "title": text.unescape(text.extr( + page, "<h1>", "</h1>").rpartition("</span>")[2]), + "description": text.unescape(text.extr( + page, 'property="og:description" content="', '"')), "programme": self.gallery_url.split("/")[4], "path": list(util.unique_sequence( element["name"] @@ -40,11 +45,20 @@ class BbcGalleryExtractor(GalleryExtractor): width = width - width % 16 if width else 1920 dimensions = "/{}xn/".format(width) - return [ - (src.replace("/320x180_b/", dimensions), - {"_fallback": self._fallback_urls(src, width)}) - for src in text.extract_iter(page, 'data-image-src="', '"') - ] + results = [] + for img in text.extract_iter(page, 'class="gallery__thumbnail', ">"): + src = text.extr(img, 'data-image-src="', '"') + results.append(( + src.replace("/320x180_b/", dimensions), + { + "title_image": text.unescape(text.extr( + img, 'data-gallery-title="', '"')), + "synopsis": text.unescape(text.extr( + img, 'data-gallery-synopsis="', '"')), + "_fallback": self._fallback_urls(src, width), + }, + )) + return results @staticmethod def _fallback_urls(src, max_width): @@ -62,14 +76,11 @@ class BbcProgrammeExtractor(Extractor): pattern = BASE_PATTERN + r"[^/?#]+/galleries)(?:/?\?page=(\d+))?" example = "https://www.bbc.co.uk/programmes/ID/galleries" - def __init__(self, match): - Extractor.__init__(self, match) - self.path, self.page = match.groups() - def items(self): + path, pnum = self.groups data = {"_extractor": BbcGalleryExtractor} - params = {"page": text.parse_int(self.page, 1)} - galleries_url = self.root + self.path + params = {"page": text.parse_int(pnum, 1)} + galleries_url = self.root + path while True: page = self.request(galleries_url, params=params).text |
