diff options
| author | 2025-12-20 05:49:11 -0500 | |
|---|---|---|
| committer | 2025-12-20 05:49:11 -0500 | |
| commit | c586ea4b3c871f5696626f9820e8c88a4e78f4a6 (patch) | |
| tree | e6d7bae96282c3d147159f091d451e53bdaa2efe /gallery_dl/extractor/civitai.py | |
| parent | 01a2bf622c31072d1322884584404b9bd59b28cc (diff) | |
| parent | a24ec1647aeac35a63b744ea856011ad6e06be3b (diff) | |
Update upstream source from tag 'upstream/1.31.1'
Update to upstream version '1.31.1'
with Debian dir b5d91c25143175f933b1c69c7e82249cd7e145ab
Diffstat (limited to 'gallery_dl/extractor/civitai.py')
| -rw-r--r-- | gallery_dl/extractor/civitai.py | 138 |
1 files changed, 81 insertions, 57 deletions
diff --git a/gallery_dl/extractor/civitai.py b/gallery_dl/extractor/civitai.py index 26ee3fd..742c561 100644 --- a/gallery_dl/extractor/civitai.py +++ b/gallery_dl/extractor/civitai.py @@ -15,7 +15,7 @@ import itertools import time BASE_PATTERN = r"(?:https?://)?civitai\.com" -USER_PATTERN = BASE_PATTERN + r"/user/([^/?#]+)" +USER_PATTERN = rf"{BASE_PATTERN}/user/([^/?#]+)" class CivitaiExtractor(Extractor): @@ -61,13 +61,14 @@ class CivitaiExtractor(Extractor): if isinstance(metadata, str): metadata = metadata.split(",") elif not isinstance(metadata, (list, tuple)): - metadata = ("generation", "version", "post") + metadata = {"generation", "version", "post", "tags"} self._meta_generation = ("generation" in metadata) self._meta_version = ("version" in metadata) self._meta_post = ("post" in metadata) + self._meta_tags = ("tags" in metadata) else: self._meta_generation = self._meta_version = self._meta_post = \ - False + self._meta_tags = False def items(self): if models := self.models(): @@ -86,8 +87,7 @@ class CivitaiExtractor(Extractor): images = self.api.images_post(post["id"]) post = self.api.post(post["id"]) - post["date"] = text.parse_datetime( - post["publishedAt"], "%Y-%m-%dT%H:%M:%S.%fZ") + post["date"] = self.parse_datetime_iso(post["publishedAt"]) data = { "post": post, "user": post.pop("user"), @@ -96,7 +96,7 @@ class CivitaiExtractor(Extractor): data["model"], data["version"] = \ self._extract_meta_version(post) - yield Message.Directory, data + yield Message.Directory, "", data for file in self._image_results(images): file.update(data) yield Message.Url, file["url"], file @@ -111,8 +111,9 @@ class CivitaiExtractor(Extractor): } if self._meta_generation: - data["generation"] = \ - self._extract_meta_generation(file) + data["generation"] = self._extract_meta_generation(file) + if self._meta_tags: + data["tags"] = self._extract_meta_tags(file) if self._meta_version: data["model"], data["version"] = \ self._extract_meta_version(file, False) @@ -122,8 +123,7 @@ class CivitaiExtractor(Extractor): data["post"] = post = self._extract_meta_post(file) if post: post.pop("user", None) - file["date"] = text.parse_datetime( - file["createdAt"], "%Y-%m-%dT%H:%M:%S.%fZ") + file["date"] = self.parse_datetime_iso(file["createdAt"]) data["url"] = url = self._url(file) text.nameext_from_url(url, data) @@ -131,7 +131,7 @@ class CivitaiExtractor(Extractor): data["extension"] = ( self._video_ext if file.get("type") == "video" else self._image_ext) - yield Message.Directory, data + yield Message.Directory, "", data yield Message.Url, url, data return @@ -180,10 +180,11 @@ class CivitaiExtractor(Extractor): if "id" not in file and data["filename"].isdecimal(): file["id"] = text.parse_int(data["filename"]) if "date" not in file: - file["date"] = text.parse_datetime( - file["createdAt"], "%Y-%m-%dT%H:%M:%S.%fZ") + file["date"] = self.parse_datetime_iso(file["createdAt"]) if self._meta_generation: file["generation"] = self._extract_meta_generation(file) + if self._meta_tags: + file["tags"] = self._extract_meta_tags(file) yield data def _image_reactions(self): @@ -211,16 +212,21 @@ class CivitaiExtractor(Extractor): try: return self.api.image_generationdata(image["id"]) except Exception as exc: - return self.log.debug("", exc_info=exc) + return self.log.traceback(exc) def _extract_meta_post(self, image): try: post = self.api.post(image["postId"]) - post["date"] = text.parse_datetime( - post["publishedAt"], "%Y-%m-%dT%H:%M:%S.%fZ") + post["date"] = self.parse_datetime_iso(post["publishedAt"]) return post except Exception as exc: - return self.log.debug("", exc_info=exc) + return self.log.traceback(exc) + + def _extract_meta_tags(self, image): + try: + return self.api.tag_getvotabletags(image["id"]) + except Exception as exc: + return self.log.traceback(exc) def _extract_meta_version(self, item, is_post=True): try: @@ -228,7 +234,7 @@ class CivitaiExtractor(Extractor): version = self.api.model_version(version_id).copy() return version.pop("model", None), version except Exception as exc: - self.log.debug("", exc_info=exc) + self.log.traceback(exc) return None, None def _extract_version_id(self, item, is_post=True): @@ -252,7 +258,7 @@ class CivitaiModelExtractor(CivitaiExtractor): directory_fmt = ("{category}", "{user[username]}", "{model[id]}{model[name]:? //}", "{version[id]}{version[name]:? //}") - pattern = BASE_PATTERN + r"/models/(\d+)(?:/?\?modelVersionId=(\d+))?" + pattern = rf"{BASE_PATTERN}/models/(\d+)(?:/?\?modelVersionId=(\d+))?" example = "https://civitai.com/models/12345/TITLE" def items(self): @@ -278,8 +284,7 @@ class CivitaiModelExtractor(CivitaiExtractor): versions = (version,) for version in versions: - version["date"] = text.parse_datetime( - version["createdAt"], "%Y-%m-%dT%H:%M:%S.%fZ") + version["date"] = self.parse_datetime_iso(version["createdAt"]) data = { "model" : model, @@ -287,7 +292,7 @@ class CivitaiModelExtractor(CivitaiExtractor): "user" : user, } - yield Message.Directory, data + yield Message.Directory, "", data for file in self._extract_files(model, version, user): file.update(data) yield Message.Url, file["url"], file @@ -342,9 +347,9 @@ class CivitaiModelExtractor(CivitaiExtractor): params = { "modelVersionId": version["id"], "prioritizedUserIds": (user["id"],), - "period": "AllTime", - "sort": "Most Reactions", - "limit": 20, + "period" : self.api._param_period(), + "sort" : self.api._param_sort(), + "limit" : 20, "pending": True, } images = self.api.images(params, defaults=False) @@ -370,7 +375,7 @@ class CivitaiModelExtractor(CivitaiExtractor): class CivitaiImageExtractor(CivitaiExtractor): subcategory = "image" - pattern = BASE_PATTERN + r"/images/(\d+)" + pattern = rf"{BASE_PATTERN}/images/(\d+)" example = "https://civitai.com/images/12345" def images(self): @@ -381,7 +386,7 @@ class CivitaiCollectionExtractor(CivitaiExtractor): subcategory = "collection" directory_fmt = ("{category}", "{user_collection[username]}", "collections", "{collection[id]}{collection[name]:? //}") - pattern = BASE_PATTERN + r"/collections/(\d+)" + pattern = rf"{BASE_PATTERN}/collections/(\d+)" example = "https://civitai.com/collections/12345" def images(self): @@ -391,8 +396,8 @@ class CivitaiCollectionExtractor(CivitaiExtractor): params = { "collectionId" : cid, - "period" : "AllTime", - "sort" : "Newest", + "period" : self.api._param_period(), + "sort" : self.api._param_sort(), "browsingLevel" : self.api.nsfw, "include" : ("cosmetics",), } @@ -403,7 +408,7 @@ class CivitaiPostExtractor(CivitaiExtractor): subcategory = "post" directory_fmt = ("{category}", "{username|user[username]}", "posts", "{post[id]}{post[title]:? //}") - pattern = BASE_PATTERN + r"/posts/(\d+)" + pattern = rf"{BASE_PATTERN}/posts/(\d+)" example = "https://civitai.com/posts/12345" def posts(self): @@ -412,7 +417,7 @@ class CivitaiPostExtractor(CivitaiExtractor): class CivitaiTagExtractor(CivitaiExtractor): subcategory = "tag" - pattern = BASE_PATTERN + r"/tag/([^/?&#]+)" + pattern = rf"{BASE_PATTERN}/tag/([^/?&#]+)" example = "https://civitai.com/tag/TAG" def models(self): @@ -422,7 +427,7 @@ class CivitaiTagExtractor(CivitaiExtractor): class CivitaiSearchModelsExtractor(CivitaiExtractor): subcategory = "search-models" - pattern = BASE_PATTERN + r"/search/models\?([^#]+)" + pattern = rf"{BASE_PATTERN}/search/models\?([^#]+)" example = "https://civitai.com/search/models?query=QUERY" def models(self): @@ -433,7 +438,7 @@ class CivitaiSearchModelsExtractor(CivitaiExtractor): class CivitaiSearchImagesExtractor(CivitaiExtractor): subcategory = "search-images" - pattern = BASE_PATTERN + r"/search/images\?([^#]+)" + pattern = rf"{BASE_PATTERN}/search/images\?([^#]+)" example = "https://civitai.com/search/images?query=QUERY" def images(self): @@ -444,7 +449,7 @@ class CivitaiSearchImagesExtractor(CivitaiExtractor): class CivitaiModelsExtractor(CivitaiExtractor): subcategory = "models" - pattern = BASE_PATTERN + r"/models(?:/?\?([^#]+))?(?:$|#)" + pattern = rf"{BASE_PATTERN}/models(?:/?\?([^#]+))?(?:$|#)" example = "https://civitai.com/models" def models(self): @@ -454,7 +459,7 @@ class CivitaiModelsExtractor(CivitaiExtractor): class CivitaiImagesExtractor(CivitaiExtractor): subcategory = "images" - pattern = BASE_PATTERN + r"/images(?:/?\?([^#]+))?(?:$|#)" + pattern = rf"{BASE_PATTERN}/images(?:/?\?([^#]+))?(?:$|#)" example = "https://civitai.com/images" def images(self): @@ -465,7 +470,7 @@ class CivitaiImagesExtractor(CivitaiExtractor): class CivitaiVideosExtractor(CivitaiExtractor): subcategory = "videos" - pattern = BASE_PATTERN + r"/videos(?:/?\?([^#]+))?(?:$|#)" + pattern = rf"{BASE_PATTERN}/videos(?:/?\?([^#]+))?(?:$|#)" example = "https://civitai.com/videos" def images(self): @@ -476,7 +481,7 @@ class CivitaiVideosExtractor(CivitaiExtractor): class CivitaiPostsExtractor(CivitaiExtractor): subcategory = "posts" - pattern = BASE_PATTERN + r"/posts(?:/?\?([^#]+))?(?:$|#)" + pattern = rf"{BASE_PATTERN}/posts(?:/?\?([^#]+))?(?:$|#)" example = "https://civitai.com/posts" def posts(self): @@ -485,7 +490,7 @@ class CivitaiPostsExtractor(CivitaiExtractor): class CivitaiUserExtractor(Dispatch, CivitaiExtractor): - pattern = USER_PATTERN + r"/?(?:$|\?|#)" + pattern = rf"{USER_PATTERN}/?(?:$|\?|#)" example = "https://civitai.com/user/USER" def items(self): @@ -501,7 +506,7 @@ class CivitaiUserExtractor(Dispatch, CivitaiExtractor): class CivitaiUserModelsExtractor(CivitaiExtractor): subcategory = "user-models" - pattern = USER_PATTERN + r"/models/?(?:\?([^#]+))?" + pattern = rf"{USER_PATTERN}/models/?(?:\?([^#]+))?" example = "https://civitai.com/user/USER/models" def models(self): @@ -515,7 +520,7 @@ class CivitaiUserPostsExtractor(CivitaiExtractor): subcategory = "user-posts" directory_fmt = ("{category}", "{username|user[username]}", "posts", "{post[id]}{post[title]:? //}") - pattern = USER_PATTERN + r"/posts/?(?:\?([^#]+))?" + pattern = rf"{USER_PATTERN}/posts/?(?:\?([^#]+))?" example = "https://civitai.com/user/USER/posts" def posts(self): @@ -527,7 +532,7 @@ class CivitaiUserPostsExtractor(CivitaiExtractor): class CivitaiUserImagesExtractor(CivitaiExtractor): subcategory = "user-images" - pattern = USER_PATTERN + r"/images/?(?:\?([^#]+))?" + pattern = rf"{USER_PATTERN}/images/?(?:\?([^#]+))?" example = "https://civitai.com/user/USER/images" def __init__(self, match): @@ -548,7 +553,7 @@ class CivitaiUserImagesExtractor(CivitaiExtractor): class CivitaiUserVideosExtractor(CivitaiExtractor): subcategory = "user-videos" directory_fmt = ("{category}", "{username|user[username]}", "videos") - pattern = USER_PATTERN + r"/videos/?(?:\?([^#]+))?" + pattern = rf"{USER_PATTERN}/videos/?(?:\?([^#]+))?" example = "https://civitai.com/user/USER/videos" def __init__(self, match): @@ -567,7 +572,7 @@ class CivitaiUserVideosExtractor(CivitaiExtractor): class CivitaiUserCollectionsExtractor(CivitaiExtractor): subcategory = "user-collections" - pattern = USER_PATTERN + r"/collections/?(?:\?([^#]+))?" + pattern = rf"{USER_PATTERN}/collections/?(?:\?([^#]+))?" example = "https://civitai.com/user/USER/collections" def items(self): @@ -586,16 +591,15 @@ class CivitaiGeneratedExtractor(CivitaiExtractor): subcategory = "generated" filename_fmt = "{filename}.{extension}" directory_fmt = ("{category}", "generated") - pattern = f"{BASE_PATTERN}/generate" + pattern = rf"{BASE_PATTERN}/generate" example = "https://civitai.com/generate" def items(self): self._require_auth() for gen in self.api.orchestrator_queryGeneratedImages(): - gen["date"] = text.parse_datetime( - gen["createdAt"], "%Y-%m-%dT%H:%M:%S.%fZ") - yield Message.Directory, gen + gen["date"] = self.parse_datetime_iso(gen["createdAt"]) + yield Message.Directory, "", gen for step in gen.pop("steps", ()): for image in step.pop("images", ()): data = {"file": image, **step, **gen} @@ -719,8 +723,8 @@ class CivitaiTrpcAPI(): if defaults: params = self._merge_params(params, { "useIndex" : True, - "period" : "AllTime", - "sort" : "Newest", + "period" : self._param_period(), + "sort" : self._param_sort(), "withMeta" : False, # Metadata Only "fromPlatform" : False, # Made On-Site "browsingLevel": self.nsfw, @@ -733,8 +737,8 @@ class CivitaiTrpcAPI(): def images_gallery(self, model, version, user): endpoint = "image.getImagesAsPostsInfinite" params = { - "period" : "AllTime", - "sort" : "Newest", + "period" : self._param_period(), + "sort" : self._param_sort(), "modelVersionId": version["id"], "modelId" : model["id"], "hidden" : False, @@ -768,9 +772,9 @@ class CivitaiTrpcAPI(): if defaults: params = self._merge_params(params, { - "period" : "AllTime", + "period" : self._param_period(), "periodMode" : "published", - "sort" : "Newest", + "sort" : self._param_sort(), "pending" : False, "hidden" : False, "followed" : False, @@ -797,9 +801,9 @@ class CivitaiTrpcAPI(): if defaults: params = self._merge_params(params, { "browsingLevel": self.nsfw, - "period" : "AllTime", + "period" : self._param_period(), "periodMode" : "published", - "sort" : "Newest", + "sort" : self._param_sort(), "followed" : False, "draftOnly" : False, "pending" : True, @@ -821,12 +825,17 @@ class CivitaiTrpcAPI(): if defaults: params = self._merge_params(params, { "browsingLevel": self.nsfw, - "sort" : "Newest", + "sort" : self._param_sort(), }) params = self._type_params(params) return self._pagination(endpoint, params) + def tag_getvotabletags(self, image_id): + endpoint = "tag.getVotableTags" + params = {"id": int(image_id), "type": "image"} + return self._call(endpoint, params) + def user(self, username): endpoint = "user.getCreator" params = {"username": username} @@ -835,7 +844,7 @@ class CivitaiTrpcAPI(): def orchestrator_queryGeneratedImages(self): endpoint = "orchestrator.queryGeneratedImages" params = { - "ascending": False, + "ascending": True if self._param_sort() == "Oldest" else False, "tags" : ("gen",), "authed" : True, } @@ -908,6 +917,21 @@ class CivitaiTrpcAPI(): params[name] = [type(item) for item in value] return params + def _param_period(self): + if period := self.extractor.config("period"): + return period + return "AllTime" + + def _param_sort(self): + if sort := self.extractor.config("sort"): + s = sort[0].lower() + if s in "drn": + return "Newest" + if s in "ao": + return "Oldest" + return sort + return "Newest" + def _bool(value): return value == "true" |
