diff options
| author | 2025-03-15 18:05:15 -0400 | |
|---|---|---|
| committer | 2025-03-15 18:05:15 -0400 | |
| commit | 8026a3c45446030d7af524bfc487d3462c8114ef (patch) | |
| tree | 0818c682a06f620c08a8b6b4c07f4935bd79493a /gallery_dl | |
| parent | 243d1f1beb4e4eb75a524f1aff948c47761a4f1d (diff) | |
New upstream version 1.29.2.upstream/1.29.2
Diffstat (limited to 'gallery_dl')
| -rw-r--r-- | gallery_dl/extractor/__init__.py | 1 | ||||
| -rw-r--r-- | gallery_dl/extractor/arcalive.py | 186 | ||||
| -rw-r--r-- | gallery_dl/extractor/batoto.py | 38 | ||||
| -rw-r--r-- | gallery_dl/extractor/civitai.py | 67 | ||||
| -rw-r--r-- | gallery_dl/extractor/facebook.py | 7 | ||||
| -rw-r--r-- | gallery_dl/extractor/furaffinity.py | 30 | ||||
| -rw-r--r-- | gallery_dl/extractor/itaku.py | 11 | ||||
| -rw-r--r-- | gallery_dl/extractor/sankaku.py | 54 | ||||
| -rw-r--r-- | gallery_dl/extractor/tiktok.py | 27 | ||||
| -rw-r--r-- | gallery_dl/extractor/twitter.py | 2 | ||||
| -rw-r--r-- | gallery_dl/extractor/wikimedia.py | 14 | ||||
| -rw-r--r-- | gallery_dl/text.py | 17 | ||||
| -rw-r--r-- | gallery_dl/version.py | 2 |
13 files changed, 382 insertions, 74 deletions
diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py index 8208241..8198619 100644 --- a/gallery_dl/extractor/__init__.py +++ b/gallery_dl/extractor/__init__.py @@ -24,6 +24,7 @@ modules = [ "adultempire", "agnph", "ao3", + "arcalive", "architizer", "artstation", "aryion", diff --git a/gallery_dl/extractor/arcalive.py b/gallery_dl/extractor/arcalive.py new file mode 100644 index 0000000..8e832fe --- /dev/null +++ b/gallery_dl/extractor/arcalive.py @@ -0,0 +1,186 @@ +# -*- coding: utf-8 -*- + +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. + +"""Extractors for https://arca.live/""" + +from .common import Extractor, Message +from .. import text, util, exception +import re + +BASE_PATTERN = r"(?:https?://)?(?:www\.)?arca\.live" + + +class ArcaliveExtractor(Extractor): + """Base class for Arca.live extractors""" + category = "arcalive" + root = "https://arca.live" + request_interval = (0.5, 1.5) + + def _init(self): + self.api = ArcaliveAPI(self) + + def items(self): + for article in self.articles(): + article["_extractor"] = ArcalivePostExtractor + board = self.board or article.get("boardSlug") or "breaking" + url = "{}/b/{}/{}".format(self.root, board, article["id"]) + yield Message.Queue, url, article + + +class ArcalivePostExtractor(ArcaliveExtractor): + """Extractor for an arca.live post""" + subcategory = "post" + directory_fmt = ("{category}", "{boardSlug}") + filename_fmt = "{id}_{num}{title:? //[b:230]}.{extension}" + archive_fmt = "{id}_{num}" + pattern = BASE_PATTERN + r"/b/(?:\w+)/(\d+)" + example = "https://arca.live/b/breaking/123456789" + + def items(self): + self.emoticons = self.config("emoticons", False) + self.gifs = self.config("gifs", True) + + post = self.api.post(self.groups[0]) + files = self._extract_files(post) + + post["count"] = len(files) + post["date"] = text.parse_datetime( + post["createdAt"][:19], "%Y-%m-%dT%H:%M:%S") + post["post_url"] = post_url = "{}/b/{}/{}".format( + self.root, post["boardSlug"], post["id"]) + post["_http_headers"] = {"Referer": post_url + "?p=1"} + + yield Message.Directory, post + for post["num"], file in enumerate(files, 1): + post.update(file) + url = file["url"] + yield Message.Url, url, text.nameext_from_url(url, post) + + def _extract_files(self, post): + files = [] + + for video, media in self._extract_media(post["content"]): + + if not self.emoticons and 'class="arca-emoticon"' in media: + continue + + src = (text.extr(media, 'data-originalurl="', '"') or + text.extr(media, 'src="', '"')) + if not src: + continue + + src = text.unescape(src.partition("?")[0]) + if src[0] == "/": + if src[1] == "/": + url = "https:" + src + else: + url = self.root + src + else: + url = src + + fallback = () + orig = text.extr(media, 'data-orig="', '"') + if orig: + path, _, ext = url.rpartition(".") + if ext != orig: + fallback = (url + "?type=orig",) + url = path + "." + orig + elif video and self.gifs: + url_gif = url.rpartition(".")[0] + ".gif" + response = self.request( + url_gif + "?type=orig", method="HEAD", fatal=False) + if response.status_code < 400: + fallback = (url + "?type=orig",) + url = url_gif + + files.append({ + "url" : url + "?type=orig", + "width" : text.parse_int(text.extr(media, 'width="', '"')), + "height": text.parse_int(text.extr(media, 'height="', '"')), + "_fallback": fallback, + }) + + return files + + def _extract_media(self, content): + ArcalivePostExtractor._extract_media = extr = re.compile( + r"<(?:img|vide(o)) ([^>]+)").findall + return extr(content) + + +class ArcaliveBoardExtractor(ArcaliveExtractor): + """Extractor for an arca.live board's posts""" + subcategory = "board" + pattern = BASE_PATTERN + r"/b/([^/?#]+)/?(?:\?([^#]+))?$" + example = "https://arca.live/b/breaking" + + def articles(self): + self.board, query = self.groups + params = text.parse_query(query) + return self.api.board(self.board, params) + + +class ArcaliveUserExtractor(ArcaliveExtractor): + """Extractor for an arca.live users's posts""" + subcategory = "user" + pattern = BASE_PATTERN + r"/u/@([^/?#]+)/?(?:\?([^#]+))?$" + example = "https://arca.live/u/@USER" + + def articles(self): + self.board = None + user, query = self.groups + params = text.parse_query(query) + return self.api.user_posts(text.unquote(user), params) + + +class ArcaliveAPI(): + + def __init__(self, extractor): + self.extractor = extractor + self.log = extractor.log + self.root = extractor.root + "/api/app" + + headers = extractor.session.headers + headers["User-Agent"] = "net.umanle.arca.android.playstore/0.9.75" + headers["X-Device-Token"] = util.generate_token(64) + + def board(self, board_slug, params): + endpoint = "/list/channel/" + board_slug + return self._pagination(endpoint, params, "articles") + + def post(self, post_id): + endpoint = "/view/article/breaking/" + str(post_id) + return self._call(endpoint) + + def user_posts(self, username, params): + endpoint = "/list/channel/breaking" + params["target"] = "nickname" + params["keyword"] = username + return self._pagination(endpoint, params, "articles") + + def _call(self, endpoint, params=None): + url = self.root + endpoint + response = self.extractor.request(url, params=params) + + data = response.json() + if response.status_code == 200: + return data + + self.log.debug("Server response: %s", data) + msg = data.get("message") + raise exception.StopExtraction( + "API request failed%s", ": " + msg if msg else "") + + def _pagination(self, endpoint, params, key): + while True: + data = self._call(endpoint, params) + + posts = data.get(key) + if not posts: + break + yield from posts + + params.update(data["next"]) diff --git a/gallery_dl/extractor/batoto.py b/gallery_dl/extractor/batoto.py index 4d192a4..a1ad3ae 100644 --- a/gallery_dl/extractor/batoto.py +++ b/gallery_dl/extractor/batoto.py @@ -54,11 +54,23 @@ class BatotoBase(): """Base class for batoto extractors""" category = "batoto" root = "https://xbato.org" - - def _init_root(self, match): - domain = match.group(1) - if domain not in LEGACY_DOMAINS: - self.root = "https://" + domain + _warn_legacy = True + + def _init_root(self): + domain = self.config("domain") + if domain is None or domain in {"auto", "url"}: + domain = self.groups[0] + if domain in LEGACY_DOMAINS: + if self._warn_legacy: + BatotoBase._warn_legacy = False + self.log.warning("Legacy domain '%s'", domain) + elif domain == "nolegacy": + domain = self.groups[0] + if domain in LEGACY_DOMAINS: + domain = "xbato.org" + elif domain == "nowarn": + domain = self.groups[0] + self.root = "https://" + domain def request(self, url, **kwargs): kwargs["encoding"] = "utf-8" @@ -72,10 +84,10 @@ class BatotoChapterExtractor(BatotoBase, ChapterExtractor): example = "https://xbato.org/title/12345-MANGA/54321" def __init__(self, match): - self._init_root(match) - self.chapter_id = match.group(2) - url = "{}/title/0/{}".format(self.root, self.chapter_id) - ChapterExtractor.__init__(self, match, url) + ChapterExtractor.__init__(self, match, False) + self._init_root() + self.chapter_id = self.groups[1] + self.gallery_url = "{}/title/0/{}".format(self.root, self.chapter_id) def metadata(self, page): extr = text.extract_from(page) @@ -133,10 +145,10 @@ class BatotoMangaExtractor(BatotoBase, MangaExtractor): example = "https://xbato.org/title/12345-MANGA/" def __init__(self, match): - self._init_root(match) - self.manga_id = match.group(2) or match.group(3) - url = "{}/title/{}".format(self.root, self.manga_id) - MangaExtractor.__init__(self, match, url) + MangaExtractor.__init__(self, match, False) + self._init_root() + self.manga_id = self.groups[1] or self.groups[2] + self.manga_url = "{}/title/{}".format(self.root, self.manga_id) def chapters(self, page): extr = text.extract_from(page) diff --git a/gallery_dl/extractor/civitai.py b/gallery_dl/extractor/civitai.py index 36efcfe..034a3c2 100644 --- a/gallery_dl/extractor/civitai.py +++ b/gallery_dl/extractor/civitai.py @@ -144,6 +144,11 @@ class CivitaiExtractor(Extractor): file["generation"] = self.api.image_generationdata(file["id"]) yield data + def _parse_query(self, value): + return text.parse_query_list( + value, {"tags", "reactions", "baseModels", "tools", "techniques", + "types", "fileFormats"}) + class CivitaiModelExtractor(CivitaiExtractor): subcategory = "model" @@ -348,8 +353,9 @@ class CivitaiUserModelsExtractor(CivitaiExtractor): example = "https://civitai.com/user/USER/models" def models(self): - params = text.parse_query(self.groups[1]) - params["username"] = text.unquote(self.groups[0]) + user, query = self.groups + params = self._parse_query(query) + params["username"] = text.unquote(user) return self.api.models(params) @@ -361,8 +367,9 @@ class CivitaiUserPostsExtractor(CivitaiExtractor): example = "https://civitai.com/user/USER/posts" def posts(self): - params = text.parse_query(self.groups[1]) - params["username"] = text.unquote(self.groups[0]) + user, query = self.groups + params = self._parse_query(query) + params["username"] = text.unquote(user) return self.api.posts(params) @@ -372,7 +379,7 @@ class CivitaiUserImagesExtractor(CivitaiExtractor): example = "https://civitai.com/user/USER/images" def __init__(self, match): - self.params = text.parse_query_list(match.group(2)) + self.params = self._parse_query(match.group(2)) if self.params.get("section") == "reactions": self.subcategory = "reactions" self.images = self.images_reactions @@ -392,12 +399,8 @@ class CivitaiUserImagesExtractor(CivitaiExtractor): params = self.params params["authed"] = True params["useIndex"] = False - if "reactions" in params: - if isinstance(params["reactions"], str): - params["reactions"] = (params["reactions"],) - else: - params["reactions"] = ( - "Like", "Dislike", "Heart", "Laugh", "Cry") + if "reactions" not in params: + params["reactions"] = ("Like", "Dislike", "Heart", "Laugh", "Cry") return self.api.images(params) @@ -409,9 +412,11 @@ class CivitaiUserVideosExtractor(CivitaiExtractor): def images(self): self._image_ext = "mp4" - params = text.parse_query(self.groups[1]) + + user, query = self.groups + params = self._parse_query(query) params["types"] = ["video"] - params["username"] = text.unquote(self.groups[0]) + params["username"] = text.unquote(user) return self.api.images(params) @@ -499,7 +504,7 @@ class CivitaiTrpcAPI(): self.root = extractor.root + "/api/trpc/" self.headers = { "content-type" : "application/json", - "x-client-version": "5.0.394", + "x-client-version": "5.0.542", "x-client-date" : "", "x-client" : "web", "x-fingerprint" : "undefined", @@ -660,15 +665,35 @@ class CivitaiTrpcAPI(): meta_ = meta def _merge_params(self, params_user, params_default): + """Combine 'params_user' with 'params_default'""" params_default.update(params_user) return params_default def _type_params(self, params): - for key, type in ( - ("tags" , int), - ("modelId" , int), - ("modelVersionId", int), - ): - if key in params: - params[key] = type(params[key]) + """Convert 'params' values to expected types""" + types = { + "tags" : int, + "tools" : int, + "techniques" : int, + "modelId" : int, + "modelVersionId": int, + "remixesOnly" : _bool, + "nonRemixesOnly": _bool, + "withMeta" : _bool, + "fromPlatform" : _bool, + "supportsGeneration": _bool, + } + + for name, value in params.items(): + if name not in types: + continue + elif isinstance(value, str): + params[name] = types[name](value) + elif isinstance(value, list): + type = types[name] + params[name] = [type(item) for item in value] return params + + +def _bool(value): + return True if value == "true" else False diff --git a/gallery_dl/extractor/facebook.py b/gallery_dl/extractor/facebook.py index 1ec6adc..b284ee8 100644 --- a/gallery_dl/extractor/facebook.py +++ b/gallery_dl/extractor/facebook.py @@ -99,9 +99,10 @@ class FacebookExtractor(Extractor): '"message":{"delight_ranges"', '"},"message_preferred_body"' ).rsplit('],"text":"', 1)[-1]), - "date": text.parse_timestamp(text.extr( - photo_page, '\\"publish_time\\":', ',' - )), + "date": text.parse_timestamp( + text.extr(photo_page, '\\"publish_time\\":', ',') or + text.extr(photo_page, '"created_time":', ',') + ), "url": FacebookExtractor.decode_all(text.extr( photo_page, ',"image":{"uri":"', '","' )), diff --git a/gallery_dl/extractor/furaffinity.py b/gallery_dl/extractor/furaffinity.py index 216aeb1..565fd71 100644 --- a/gallery_dl/extractor/furaffinity.py +++ b/gallery_dl/extractor/furaffinity.py @@ -97,6 +97,7 @@ class FuraffinityExtractor(Extractor): if self._new_layout: data["tags"] = text.split_html(extr( 'class="tags-row">', '</section>')) + data["scraps"] = (extr(' submissions">', "<") == "Scraps") data["title"] = text.unescape(extr("<h2><p>", "</p></h2>")) data["artist_url"] = extr('title="', '"').strip() data["artist"] = extr(">", "<") @@ -121,6 +122,8 @@ class FuraffinityExtractor(Extractor): folders.append(folder) else: # old site layout + data["scraps"] = ( + "/scraps/" in extr('class="minigallery-title', "</a>")) data["title"] = text.unescape(extr("<h2>", "</h2>")) data["artist_url"] = extr('title="', '"').strip() data["artist"] = extr(">", "<") @@ -153,12 +156,13 @@ class FuraffinityExtractor(Extractor): def _process_description(description): return text.unescape(text.remove_html(description, "", "")) - def _pagination(self, path): + def _pagination(self, path, folder=None): num = 1 + folder = "" if folder is None else "/folder/{}/a".format(folder) while True: - url = "{}/{}/{}/{}/".format( - self.root, path, self.user, num) + url = "{}/{}/{}{}/{}/".format( + self.root, path, self.user, folder, num) page = self.request(url).text post_id = None @@ -232,13 +236,31 @@ class FuraffinityExtractor(Extractor): class FuraffinityGalleryExtractor(FuraffinityExtractor): """Extractor for a furaffinity user's gallery""" subcategory = "gallery" - pattern = BASE_PATTERN + r"/gallery/([^/?#]+)" + pattern = BASE_PATTERN + r"/gallery/([^/?#]+)(?:$|/(?!folder/))" example = "https://www.furaffinity.net/gallery/USER/" def posts(self): return self._pagination("gallery") +class FuraffinityFolderExtractor(FuraffinityExtractor): + """Extractor for a FurAffinity folder""" + subcategory = "folder" + directory_fmt = ("{category}", "{user!l}", + "Folders", "{folder_id}{folder_name:? //}") + pattern = BASE_PATTERN + r"/gallery/([^/?#]+)/folder/(\d+)(?:/([^/?#]+))?" + example = "https://www.furaffinity.net/gallery/USER/folder/12345/FOLDER" + + def metadata(self): + return { + "folder_id" : self.groups[1], + "folder_name": self.groups[2] or "", + } + + def posts(self): + return self._pagination("gallery", self.groups[1]) + + class FuraffinityScrapsExtractor(FuraffinityExtractor): """Extractor for a furaffinity user's scraps""" subcategory = "scraps" diff --git a/gallery_dl/extractor/itaku.py b/gallery_dl/extractor/itaku.py index 5c91eb9..2974b59 100644 --- a/gallery_dl/extractor/itaku.py +++ b/gallery_dl/extractor/itaku.py @@ -80,7 +80,8 @@ class ItakuSearchExtractor(ItakuExtractor): example = "https://itaku.ee/home/images?tags=SEARCH" def posts(self): - params = text.parse_query_list(self.groups[0]) + params = text.parse_query_list( + self.groups[0], {"tags", "maturity_rating"}) return self.api.search_images(params) @@ -99,13 +100,7 @@ class ItakuAPI(): negative_tags = [] optional_tags = [] - tags = params.pop("tags", None) - if not tags: - tags = () - elif isinstance(tags, str): - tags = (tags,) - - for tag in tags: + for tag in params.pop("tags", None) or (): if not tag: pass elif tag[0] == "-": diff --git a/gallery_dl/extractor/sankaku.py b/gallery_dl/extractor/sankaku.py index b2f31dd..c7303f2 100644 --- a/gallery_dl/extractor/sankaku.py +++ b/gallery_dl/extractor/sankaku.py @@ -66,8 +66,7 @@ class SankakuExtractor(BooruExtractor): def _prepare(self, post): post["created_at"] = post["created_at"]["s"] post["date"] = text.parse_timestamp(post["created_at"]) - post["tags"] = [tag["name"].lower().replace(" ", "_") - for tag in post["tags"] if tag["name"]] + post["tags"] = post.pop("tag_names", ()) post["tag_string"] = " ".join(post["tags"]) post["_http_validate"] = self._check_expired @@ -76,7 +75,7 @@ class SankakuExtractor(BooruExtractor): def _tags(self, post, page): tags = collections.defaultdict(list) - for tag in post["tags"]: + for tag in self.api.tags(post["id"]): name = tag["name"] if name: tags[tag["type"]].append(name.lower().replace(" ", "_")) @@ -112,11 +111,11 @@ class SankakuTagExtractor(SankakuExtractor): if "date:" in self.tags: # rewrite 'date:' tags (#1790) self.tags = re.sub( - r"date:(\d\d)[.-](\d\d)[.-](\d\d\d\d)", - r"date:\3.\2.\1", self.tags) + r"date:(\d\d)[.-](\d\d)[.-](\d\d\d\d)(?!T)", + r"date:\3-\2-\1T00:00", self.tags) self.tags = re.sub( - r"date:(\d\d\d\d)[.-](\d\d)[.-](\d\d)", - r"date:\1.\2.\3", self.tags) + r"date:(\d\d\d\d)[.-](\d\d)[.-](\d\d)(?!T)", + r"date:\1-\2-\3T00:00", self.tags) def metadata(self): return {"search_tags": self.tags} @@ -209,6 +208,30 @@ class SankakuAPI(): params = {"lang": "en"} return self._call("/posts/{}/notes".format(post_id), params) + def tags(self, post_id): + endpoint = "/posts/{}/tags".format(post_id) + params = { + "lang" : "en", + "page" : 1, + "limit": 100, + } + + tags = None + while True: + data = self._call(endpoint, params) + + tags_new = data["data"] + if not tags_new: + return tags or [] + elif tags is None: + tags = tags_new + else: + tags.extend(tags_new) + + if len(tags_new) < 80 or len(tags) >= data["total"]: + return tags + params["page"] += 1 + def pools(self, pool_id): params = {"lang": "en"} return self._call("/pools/" + pool_id, params) @@ -216,6 +239,15 @@ class SankakuAPI(): def pools_keyset(self, params): return self._pagination("/pools/keyset", params) + def pools_series(self, params): + params_ = { + "lang" : "en", + "filledPools": "true", + "includes[]" : "pools", + } + params_.update(params) + return self._pagination("/poolseriesv2", params) + def posts(self, post_id): params = { "lang" : "en", @@ -223,17 +255,17 @@ class SankakuAPI(): "limit": "1", "tags" : ("md5:" if len(post_id) == 32 else "id_range:") + post_id, } - return self._call("/posts", params) + return self._call("/v2/posts", params) def posts_keyset(self, params): - return self._pagination("/posts/keyset", params) + return self._pagination("/v2/posts/keyset", params) def authenticate(self): self.headers["Authorization"] = \ _authenticate_impl(self.extractor, self.username, self.password) def _call(self, endpoint, params=None): - url = "https://capi-v2.sankakucomplex.com" + endpoint + url = "https://sankakuapi.com" + endpoint for _ in range(5): self.authenticate() response = self.extractor.request( @@ -311,7 +343,7 @@ class SankakuAPI(): def _authenticate_impl(extr, username, password): extr.log.info("Logging in as %s", username) - url = "https://capi-v2.sankakucomplex.com/auth/token" + url = "https://sankakuapi.com/auth/token" headers = {"Accept": "application/vnd.sankaku.api+json;v=2"} data = {"login": username, "password": password} diff --git a/gallery_dl/extractor/tiktok.py b/gallery_dl/extractor/tiktok.py index 203b1ac..30f310d 100644 --- a/gallery_dl/extractor/tiktok.py +++ b/gallery_dl/extractor/tiktok.py @@ -17,7 +17,7 @@ class TiktokExtractor(Extractor): category = "tiktok" directory_fmt = ("{category}", "{user}") filename_fmt = ( - "{id}{num:?_//>02} {title[b:150]}{img_id:? [/]/}.{extension}") + "{id}{num:?_//>02} {title[b:150]}{img_id|audio_id:? [/]/}.{extension}") archive_fmt = "{id}_{num}_{img_id}" root = "https://www.tiktok.com" cookies_domain = ".tiktok.com" @@ -83,7 +83,11 @@ class TiktokExtractor(Extractor): yield Message.Url, url, post if self.audio and "music" in post: - ytdl_media = "audio" + if self.audio == "ytdl": + ytdl_media = "audio" + else: + url = self._extract_audio(post) + yield Message.Url, url, post elif self.video and "video" in post: ytdl_media = "video" @@ -146,6 +150,25 @@ class TiktokExtractor(Extractor): 'type="application/json">', '</script>') return util.json_loads(data)["__DEFAULT_SCOPE__"] + def _extract_audio(self, post): + audio = post["music"] + url = audio["playUrl"] + text.nameext_from_url(url, post) + post.update({ + "type" : "audio", + "image" : None, + "title" : post["desc"] or "TikTok audio #{}".format(post["id"]), + "duration" : audio.get("duration"), + "num" : 0, + "img_id" : "", + "audio_id" : audio.get("id"), + "width" : 0, + "height" : 0, + }) + if not post["extension"]: + post["extension"] = "mp3" + return url + def _check_status_code(self, detail, url): status = detail.get("statusCode") if not status: diff --git a/gallery_dl/extractor/twitter.py b/gallery_dl/extractor/twitter.py index c391bad..8d90bc5 100644 --- a/gallery_dl/extractor/twitter.py +++ b/gallery_dl/extractor/twitter.py @@ -237,7 +237,7 @@ class TwitterExtractor(Extractor): def _extract_components(self, tweet, data, files): for component_id in data["components"]: com = data["component_objects"][component_id] - for conv in com["data"]["conversation_preview"]: + for conv in com["data"].get("conversation_preview") or (): for url in conv.get("mediaUrls") or (): files.append({"url": url}) diff --git a/gallery_dl/extractor/wikimedia.py b/gallery_dl/extractor/wikimedia.py index 4eae537..3b23f3a 100644 --- a/gallery_dl/extractor/wikimedia.py +++ b/gallery_dl/extractor/wikimedia.py @@ -54,7 +54,7 @@ class WikimediaExtractor(BaseExtractor): @staticmethod def prepare(image): - """Adjust the content of a image object""" + """Adjust the content of an image object""" image["metadata"] = { m["name"]: m["value"] for m in image["metadata"] or ()} @@ -80,6 +80,14 @@ class WikimediaExtractor(BaseExtractor): yield Message.Directory, image yield Message.Url, image["url"], image + if self.subcategories: + base = self.root + "/wiki/" + self.params["gcmtype"] = "subcat" + for subcat in self._pagination(self.params): + url = base + subcat["title"].replace(" ", "_") + subcat["_extractor"] = WikimediaArticleExtractor + yield Message.Queue, url, subcat + def _pagination(self, params): """ https://www.mediawiki.org/wiki/API:Query @@ -208,6 +216,8 @@ class WikimediaArticleExtractor(WikimediaExtractor): self.subcategory = prefix if prefix == "category": + self.subcategories = \ + True if self.config("subcategories", True) else False self.params = { "generator": "categorymembers", "gcmtitle" : path, @@ -215,10 +225,12 @@ class WikimediaArticleExtractor(WikimediaExtractor): "gcmlimit" : self.per_page, } elif prefix == "file": + self.subcategories = False self.params = { "titles" : path, } else: + self.subcategories = False self.params = { "generator": "images", "gimlimit" : self.per_page, diff --git a/gallery_dl/text.py b/gallery_dl/text.py index f117c92..c1dde94 100644 --- a/gallery_dl/text.py +++ b/gallery_dl/text.py @@ -258,10 +258,10 @@ def parse_query(qs): return result -def parse_query_list(qs): +def parse_query_list(qs, as_list=()): """Parse a query string into name-value pairs - Combine values of duplicate names into lists + Combine values of names in 'as_list' into lists """ if not qs: return {} @@ -273,14 +273,13 @@ def parse_query_list(qs): if eq: name = unquote(name.replace("+", " ")) value = unquote(value.replace("+", " ")) - if name in result: - rvalue = result[name] - if isinstance(rvalue, list): - rvalue.append(value) + if name in as_list: + if name in result: + result[name].append(value) else: - result[name] = [rvalue, value] - else: - result[name] = value + result[name] = [value] + elif name not in result: + result[name] = unquote(value.replace("+", " ")) except Exception: pass return result diff --git a/gallery_dl/version.py b/gallery_dl/version.py index ad98770..558b02e 100644 --- a/gallery_dl/version.py +++ b/gallery_dl/version.py @@ -6,5 +6,5 @@ # it under the terms of the GNU General Public License version 2 as # published by the Free Software Foundation. -__version__ = "1.29.1" +__version__ = "1.29.2" __variant__ = None |
