diff options
| author | 2025-12-20 05:49:11 -0500 | |
|---|---|---|
| committer | 2025-12-20 05:49:11 -0500 | |
| commit | c586ea4b3c871f5696626f9820e8c88a4e78f4a6 (patch) | |
| tree | e6d7bae96282c3d147159f091d451e53bdaa2efe /gallery_dl/extractor/realbooru.py | |
| parent | 01a2bf622c31072d1322884584404b9bd59b28cc (diff) | |
| parent | a24ec1647aeac35a63b744ea856011ad6e06be3b (diff) | |
Update upstream source from tag 'upstream/1.31.1'
Update to upstream version '1.31.1'
with Debian dir b5d91c25143175f933b1c69c7e82249cd7e145ab
Diffstat (limited to 'gallery_dl/extractor/realbooru.py')
| -rw-r--r-- | gallery_dl/extractor/realbooru.py | 39 |
1 files changed, 21 insertions, 18 deletions
diff --git a/gallery_dl/extractor/realbooru.py b/gallery_dl/extractor/realbooru.py index cf45578..7f731f8 100644 --- a/gallery_dl/extractor/realbooru.py +++ b/gallery_dl/extractor/realbooru.py @@ -28,18 +28,31 @@ class RealbooruExtractor(booru.BooruExtractor): extr('class="container"', '>') post = { - "_html" : page, "id" : post_id, "rating" : "e" if rating == "adult" else (rating or "?")[0], - "tags" : text.unescape(extr(' alt="', '"')), - "file_url" : extr('src="', '"'), + "file_url" : (s := extr('src="', '"')), + "_fallback" : (extr('src="', '"'),) if s.endswith(".mp4") else (), "created_at": extr(">Posted at ", " by "), "uploader" : extr(">", "<"), "score" : extr('">', "<"), + "tags" : extr('<br />', "</div>"), "title" : extr('id="title" style="width: 100%;" value="', '"'), "source" : extr('d="source" style="width: 100%;" value="', '"'), } + tags_container = post["tags"] + tags = [] + tags_categories = collections.defaultdict(list) + pattern = text.re(r'<a class="(?:tag-type-)?([^"]+).*?;tags=([^"&]+)') + for tag_type, tag_name in pattern.findall(tags_container): + tag = text.unescape(text.unquote(tag_name)) + tags.append(tag) + tags_categories[tag_type].append(tag) + for key, value in tags_categories.items(): + post[f"tags_{key}"] = ", ".join(value) + tags.sort() + + post["tags"] = ", ".join(tags) post["md5"] = post["file_url"].rpartition("/")[2].partition(".")[0] return post @@ -48,7 +61,7 @@ class RealbooruExtractor(booru.BooruExtractor): return num def _prepare(self, post): - post["date"] = text.parse_datetime(post["created_at"], "%b, %d %Y") + post["date"] = self.parse_datetime(post["created_at"], "%b, %d %Y") def _pagination(self, params, begin, end): url = self.root + "/index.php" @@ -66,23 +79,13 @@ class RealbooruExtractor(booru.BooruExtractor): return params["pid"] += self.per_page - def _tags(self, post, _): - page = post["_html"] - tag_container = text.extr(page, 'id="tagLink"', '</div>') - tags = collections.defaultdict(list) - pattern = util.re(r'<a class="(?:tag-type-)?([^"]+).*?;tags=([^"&]+)') - for tag_type, tag_name in pattern.findall(tag_container): - tags[tag_type].append(text.unescape(text.unquote(tag_name))) - for key, value in tags.items(): - post["tags_" + key] = " ".join(value) - class RealbooruTagExtractor(RealbooruExtractor): subcategory = "tag" directory_fmt = ("{category}", "{search_tags}") archive_fmt = "t_{search_tags}_{id}" per_page = 42 - pattern = BASE_PATTERN + r"/index\.php\?page=post&s=list&tags=([^&#]*)" + pattern = rf"{BASE_PATTERN}/index\.php\?page=post&s=list&tags=([^&#]*)" example = "https://realbooru.com/index.php?page=post&s=list&tags=TAG" def metadata(self): @@ -102,7 +105,7 @@ class RealbooruFavoriteExtractor(RealbooruExtractor): directory_fmt = ("{category}", "favorites", "{favorite_id}") archive_fmt = "f_{favorite_id}_{id}" per_page = 50 - pattern = BASE_PATTERN + r"/index\.php\?page=favorites&s=view&id=(\d+)" + pattern = rf"{BASE_PATTERN}/index\.php\?page=favorites&s=view&id=(\d+)" example = "https://realbooru.com/index.php?page=favorites&s=view&id=12345" def metadata(self): @@ -120,7 +123,7 @@ class RealbooruPoolExtractor(RealbooruExtractor): subcategory = "pool" directory_fmt = ("{category}", "pool", "{pool} {pool_name}") archive_fmt = "p_{pool}_{id}" - pattern = BASE_PATTERN + r"/index\.php\?page=pool&s=show&id=(\d+)" + pattern = rf"{BASE_PATTERN}/index\.php\?page=pool&s=show&id=(\d+)" example = "https://realbooru.com/index.php?page=pool&s=show&id=12345" def metadata(self): @@ -147,7 +150,7 @@ class RealbooruPoolExtractor(RealbooruExtractor): class RealbooruPostExtractor(RealbooruExtractor): subcategory = "post" archive_fmt = "{id}" - pattern = BASE_PATTERN + r"/index\.php\?page=post&s=view&id=(\d+)" + pattern = rf"{BASE_PATTERN}/index\.php\?page=post&s=view&id=(\d+)" example = "https://realbooru.com/index.php?page=post&s=view&id=12345" def posts(self): |
