aboutsummaryrefslogtreecommitdiffstats
path: root/gallery_dl
diff options
context:
space:
mode:
authorLibravatarUnit 193 <unit193@unit193.net>2025-03-15 18:05:15 -0400
committerLibravatarUnit 193 <unit193@unit193.net>2025-03-15 18:05:15 -0400
commit8026a3c45446030d7af524bfc487d3462c8114ef (patch)
tree0818c682a06f620c08a8b6b4c07f4935bd79493a /gallery_dl
parent243d1f1beb4e4eb75a524f1aff948c47761a4f1d (diff)
New upstream version 1.29.2.upstream/1.29.2
Diffstat (limited to 'gallery_dl')
-rw-r--r--gallery_dl/extractor/__init__.py1
-rw-r--r--gallery_dl/extractor/arcalive.py186
-rw-r--r--gallery_dl/extractor/batoto.py38
-rw-r--r--gallery_dl/extractor/civitai.py67
-rw-r--r--gallery_dl/extractor/facebook.py7
-rw-r--r--gallery_dl/extractor/furaffinity.py30
-rw-r--r--gallery_dl/extractor/itaku.py11
-rw-r--r--gallery_dl/extractor/sankaku.py54
-rw-r--r--gallery_dl/extractor/tiktok.py27
-rw-r--r--gallery_dl/extractor/twitter.py2
-rw-r--r--gallery_dl/extractor/wikimedia.py14
-rw-r--r--gallery_dl/text.py17
-rw-r--r--gallery_dl/version.py2
13 files changed, 382 insertions, 74 deletions
diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py
index 8208241..8198619 100644
--- a/gallery_dl/extractor/__init__.py
+++ b/gallery_dl/extractor/__init__.py
@@ -24,6 +24,7 @@ modules = [
"adultempire",
"agnph",
"ao3",
+ "arcalive",
"architizer",
"artstation",
"aryion",
diff --git a/gallery_dl/extractor/arcalive.py b/gallery_dl/extractor/arcalive.py
new file mode 100644
index 0000000..8e832fe
--- /dev/null
+++ b/gallery_dl/extractor/arcalive.py
@@ -0,0 +1,186 @@
+# -*- coding: utf-8 -*-
+
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Extractors for https://arca.live/"""
+
+from .common import Extractor, Message
+from .. import text, util, exception
+import re
+
+BASE_PATTERN = r"(?:https?://)?(?:www\.)?arca\.live"
+
+
+class ArcaliveExtractor(Extractor):
+ """Base class for Arca.live extractors"""
+ category = "arcalive"
+ root = "https://arca.live"
+ request_interval = (0.5, 1.5)
+
+ def _init(self):
+ self.api = ArcaliveAPI(self)
+
+ def items(self):
+ for article in self.articles():
+ article["_extractor"] = ArcalivePostExtractor
+ board = self.board or article.get("boardSlug") or "breaking"
+ url = "{}/b/{}/{}".format(self.root, board, article["id"])
+ yield Message.Queue, url, article
+
+
+class ArcalivePostExtractor(ArcaliveExtractor):
+ """Extractor for an arca.live post"""
+ subcategory = "post"
+ directory_fmt = ("{category}", "{boardSlug}")
+ filename_fmt = "{id}_{num}{title:? //[b:230]}.{extension}"
+ archive_fmt = "{id}_{num}"
+ pattern = BASE_PATTERN + r"/b/(?:\w+)/(\d+)"
+ example = "https://arca.live/b/breaking/123456789"
+
+ def items(self):
+ self.emoticons = self.config("emoticons", False)
+ self.gifs = self.config("gifs", True)
+
+ post = self.api.post(self.groups[0])
+ files = self._extract_files(post)
+
+ post["count"] = len(files)
+ post["date"] = text.parse_datetime(
+ post["createdAt"][:19], "%Y-%m-%dT%H:%M:%S")
+ post["post_url"] = post_url = "{}/b/{}/{}".format(
+ self.root, post["boardSlug"], post["id"])
+ post["_http_headers"] = {"Referer": post_url + "?p=1"}
+
+ yield Message.Directory, post
+ for post["num"], file in enumerate(files, 1):
+ post.update(file)
+ url = file["url"]
+ yield Message.Url, url, text.nameext_from_url(url, post)
+
+ def _extract_files(self, post):
+ files = []
+
+ for video, media in self._extract_media(post["content"]):
+
+ if not self.emoticons and 'class="arca-emoticon"' in media:
+ continue
+
+ src = (text.extr(media, 'data-originalurl="', '"') or
+ text.extr(media, 'src="', '"'))
+ if not src:
+ continue
+
+ src = text.unescape(src.partition("?")[0])
+ if src[0] == "/":
+ if src[1] == "/":
+ url = "https:" + src
+ else:
+ url = self.root + src
+ else:
+ url = src
+
+ fallback = ()
+ orig = text.extr(media, 'data-orig="', '"')
+ if orig:
+ path, _, ext = url.rpartition(".")
+ if ext != orig:
+ fallback = (url + "?type=orig",)
+ url = path + "." + orig
+ elif video and self.gifs:
+ url_gif = url.rpartition(".")[0] + ".gif"
+ response = self.request(
+ url_gif + "?type=orig", method="HEAD", fatal=False)
+ if response.status_code < 400:
+ fallback = (url + "?type=orig",)
+ url = url_gif
+
+ files.append({
+ "url" : url + "?type=orig",
+ "width" : text.parse_int(text.extr(media, 'width="', '"')),
+ "height": text.parse_int(text.extr(media, 'height="', '"')),
+ "_fallback": fallback,
+ })
+
+ return files
+
+ def _extract_media(self, content):
+ ArcalivePostExtractor._extract_media = extr = re.compile(
+ r"<(?:img|vide(o)) ([^>]+)").findall
+ return extr(content)
+
+
+class ArcaliveBoardExtractor(ArcaliveExtractor):
+ """Extractor for an arca.live board's posts"""
+ subcategory = "board"
+ pattern = BASE_PATTERN + r"/b/([^/?#]+)/?(?:\?([^#]+))?$"
+ example = "https://arca.live/b/breaking"
+
+ def articles(self):
+ self.board, query = self.groups
+ params = text.parse_query(query)
+ return self.api.board(self.board, params)
+
+
+class ArcaliveUserExtractor(ArcaliveExtractor):
+ """Extractor for an arca.live users's posts"""
+ subcategory = "user"
+ pattern = BASE_PATTERN + r"/u/@([^/?#]+)/?(?:\?([^#]+))?$"
+ example = "https://arca.live/u/@USER"
+
+ def articles(self):
+ self.board = None
+ user, query = self.groups
+ params = text.parse_query(query)
+ return self.api.user_posts(text.unquote(user), params)
+
+
+class ArcaliveAPI():
+
+ def __init__(self, extractor):
+ self.extractor = extractor
+ self.log = extractor.log
+ self.root = extractor.root + "/api/app"
+
+ headers = extractor.session.headers
+ headers["User-Agent"] = "net.umanle.arca.android.playstore/0.9.75"
+ headers["X-Device-Token"] = util.generate_token(64)
+
+ def board(self, board_slug, params):
+ endpoint = "/list/channel/" + board_slug
+ return self._pagination(endpoint, params, "articles")
+
+ def post(self, post_id):
+ endpoint = "/view/article/breaking/" + str(post_id)
+ return self._call(endpoint)
+
+ def user_posts(self, username, params):
+ endpoint = "/list/channel/breaking"
+ params["target"] = "nickname"
+ params["keyword"] = username
+ return self._pagination(endpoint, params, "articles")
+
+ def _call(self, endpoint, params=None):
+ url = self.root + endpoint
+ response = self.extractor.request(url, params=params)
+
+ data = response.json()
+ if response.status_code == 200:
+ return data
+
+ self.log.debug("Server response: %s", data)
+ msg = data.get("message")
+ raise exception.StopExtraction(
+ "API request failed%s", ": " + msg if msg else "")
+
+ def _pagination(self, endpoint, params, key):
+ while True:
+ data = self._call(endpoint, params)
+
+ posts = data.get(key)
+ if not posts:
+ break
+ yield from posts
+
+ params.update(data["next"])
diff --git a/gallery_dl/extractor/batoto.py b/gallery_dl/extractor/batoto.py
index 4d192a4..a1ad3ae 100644
--- a/gallery_dl/extractor/batoto.py
+++ b/gallery_dl/extractor/batoto.py
@@ -54,11 +54,23 @@ class BatotoBase():
"""Base class for batoto extractors"""
category = "batoto"
root = "https://xbato.org"
-
- def _init_root(self, match):
- domain = match.group(1)
- if domain not in LEGACY_DOMAINS:
- self.root = "https://" + domain
+ _warn_legacy = True
+
+ def _init_root(self):
+ domain = self.config("domain")
+ if domain is None or domain in {"auto", "url"}:
+ domain = self.groups[0]
+ if domain in LEGACY_DOMAINS:
+ if self._warn_legacy:
+ BatotoBase._warn_legacy = False
+ self.log.warning("Legacy domain '%s'", domain)
+ elif domain == "nolegacy":
+ domain = self.groups[0]
+ if domain in LEGACY_DOMAINS:
+ domain = "xbato.org"
+ elif domain == "nowarn":
+ domain = self.groups[0]
+ self.root = "https://" + domain
def request(self, url, **kwargs):
kwargs["encoding"] = "utf-8"
@@ -72,10 +84,10 @@ class BatotoChapterExtractor(BatotoBase, ChapterExtractor):
example = "https://xbato.org/title/12345-MANGA/54321"
def __init__(self, match):
- self._init_root(match)
- self.chapter_id = match.group(2)
- url = "{}/title/0/{}".format(self.root, self.chapter_id)
- ChapterExtractor.__init__(self, match, url)
+ ChapterExtractor.__init__(self, match, False)
+ self._init_root()
+ self.chapter_id = self.groups[1]
+ self.gallery_url = "{}/title/0/{}".format(self.root, self.chapter_id)
def metadata(self, page):
extr = text.extract_from(page)
@@ -133,10 +145,10 @@ class BatotoMangaExtractor(BatotoBase, MangaExtractor):
example = "https://xbato.org/title/12345-MANGA/"
def __init__(self, match):
- self._init_root(match)
- self.manga_id = match.group(2) or match.group(3)
- url = "{}/title/{}".format(self.root, self.manga_id)
- MangaExtractor.__init__(self, match, url)
+ MangaExtractor.__init__(self, match, False)
+ self._init_root()
+ self.manga_id = self.groups[1] or self.groups[2]
+ self.manga_url = "{}/title/{}".format(self.root, self.manga_id)
def chapters(self, page):
extr = text.extract_from(page)
diff --git a/gallery_dl/extractor/civitai.py b/gallery_dl/extractor/civitai.py
index 36efcfe..034a3c2 100644
--- a/gallery_dl/extractor/civitai.py
+++ b/gallery_dl/extractor/civitai.py
@@ -144,6 +144,11 @@ class CivitaiExtractor(Extractor):
file["generation"] = self.api.image_generationdata(file["id"])
yield data
+ def _parse_query(self, value):
+ return text.parse_query_list(
+ value, {"tags", "reactions", "baseModels", "tools", "techniques",
+ "types", "fileFormats"})
+
class CivitaiModelExtractor(CivitaiExtractor):
subcategory = "model"
@@ -348,8 +353,9 @@ class CivitaiUserModelsExtractor(CivitaiExtractor):
example = "https://civitai.com/user/USER/models"
def models(self):
- params = text.parse_query(self.groups[1])
- params["username"] = text.unquote(self.groups[0])
+ user, query = self.groups
+ params = self._parse_query(query)
+ params["username"] = text.unquote(user)
return self.api.models(params)
@@ -361,8 +367,9 @@ class CivitaiUserPostsExtractor(CivitaiExtractor):
example = "https://civitai.com/user/USER/posts"
def posts(self):
- params = text.parse_query(self.groups[1])
- params["username"] = text.unquote(self.groups[0])
+ user, query = self.groups
+ params = self._parse_query(query)
+ params["username"] = text.unquote(user)
return self.api.posts(params)
@@ -372,7 +379,7 @@ class CivitaiUserImagesExtractor(CivitaiExtractor):
example = "https://civitai.com/user/USER/images"
def __init__(self, match):
- self.params = text.parse_query_list(match.group(2))
+ self.params = self._parse_query(match.group(2))
if self.params.get("section") == "reactions":
self.subcategory = "reactions"
self.images = self.images_reactions
@@ -392,12 +399,8 @@ class CivitaiUserImagesExtractor(CivitaiExtractor):
params = self.params
params["authed"] = True
params["useIndex"] = False
- if "reactions" in params:
- if isinstance(params["reactions"], str):
- params["reactions"] = (params["reactions"],)
- else:
- params["reactions"] = (
- "Like", "Dislike", "Heart", "Laugh", "Cry")
+ if "reactions" not in params:
+ params["reactions"] = ("Like", "Dislike", "Heart", "Laugh", "Cry")
return self.api.images(params)
@@ -409,9 +412,11 @@ class CivitaiUserVideosExtractor(CivitaiExtractor):
def images(self):
self._image_ext = "mp4"
- params = text.parse_query(self.groups[1])
+
+ user, query = self.groups
+ params = self._parse_query(query)
params["types"] = ["video"]
- params["username"] = text.unquote(self.groups[0])
+ params["username"] = text.unquote(user)
return self.api.images(params)
@@ -499,7 +504,7 @@ class CivitaiTrpcAPI():
self.root = extractor.root + "/api/trpc/"
self.headers = {
"content-type" : "application/json",
- "x-client-version": "5.0.394",
+ "x-client-version": "5.0.542",
"x-client-date" : "",
"x-client" : "web",
"x-fingerprint" : "undefined",
@@ -660,15 +665,35 @@ class CivitaiTrpcAPI():
meta_ = meta
def _merge_params(self, params_user, params_default):
+ """Combine 'params_user' with 'params_default'"""
params_default.update(params_user)
return params_default
def _type_params(self, params):
- for key, type in (
- ("tags" , int),
- ("modelId" , int),
- ("modelVersionId", int),
- ):
- if key in params:
- params[key] = type(params[key])
+ """Convert 'params' values to expected types"""
+ types = {
+ "tags" : int,
+ "tools" : int,
+ "techniques" : int,
+ "modelId" : int,
+ "modelVersionId": int,
+ "remixesOnly" : _bool,
+ "nonRemixesOnly": _bool,
+ "withMeta" : _bool,
+ "fromPlatform" : _bool,
+ "supportsGeneration": _bool,
+ }
+
+ for name, value in params.items():
+ if name not in types:
+ continue
+ elif isinstance(value, str):
+ params[name] = types[name](value)
+ elif isinstance(value, list):
+ type = types[name]
+ params[name] = [type(item) for item in value]
return params
+
+
+def _bool(value):
+ return True if value == "true" else False
diff --git a/gallery_dl/extractor/facebook.py b/gallery_dl/extractor/facebook.py
index 1ec6adc..b284ee8 100644
--- a/gallery_dl/extractor/facebook.py
+++ b/gallery_dl/extractor/facebook.py
@@ -99,9 +99,10 @@ class FacebookExtractor(Extractor):
'"message":{"delight_ranges"',
'"},"message_preferred_body"'
).rsplit('],"text":"', 1)[-1]),
- "date": text.parse_timestamp(text.extr(
- photo_page, '\\"publish_time\\":', ','
- )),
+ "date": text.parse_timestamp(
+ text.extr(photo_page, '\\"publish_time\\":', ',') or
+ text.extr(photo_page, '"created_time":', ',')
+ ),
"url": FacebookExtractor.decode_all(text.extr(
photo_page, ',"image":{"uri":"', '","'
)),
diff --git a/gallery_dl/extractor/furaffinity.py b/gallery_dl/extractor/furaffinity.py
index 216aeb1..565fd71 100644
--- a/gallery_dl/extractor/furaffinity.py
+++ b/gallery_dl/extractor/furaffinity.py
@@ -97,6 +97,7 @@ class FuraffinityExtractor(Extractor):
if self._new_layout:
data["tags"] = text.split_html(extr(
'class="tags-row">', '</section>'))
+ data["scraps"] = (extr(' submissions">', "<") == "Scraps")
data["title"] = text.unescape(extr("<h2><p>", "</p></h2>"))
data["artist_url"] = extr('title="', '"').strip()
data["artist"] = extr(">", "<")
@@ -121,6 +122,8 @@ class FuraffinityExtractor(Extractor):
folders.append(folder)
else:
# old site layout
+ data["scraps"] = (
+ "/scraps/" in extr('class="minigallery-title', "</a>"))
data["title"] = text.unescape(extr("<h2>", "</h2>"))
data["artist_url"] = extr('title="', '"').strip()
data["artist"] = extr(">", "<")
@@ -153,12 +156,13 @@ class FuraffinityExtractor(Extractor):
def _process_description(description):
return text.unescape(text.remove_html(description, "", ""))
- def _pagination(self, path):
+ def _pagination(self, path, folder=None):
num = 1
+ folder = "" if folder is None else "/folder/{}/a".format(folder)
while True:
- url = "{}/{}/{}/{}/".format(
- self.root, path, self.user, num)
+ url = "{}/{}/{}{}/{}/".format(
+ self.root, path, self.user, folder, num)
page = self.request(url).text
post_id = None
@@ -232,13 +236,31 @@ class FuraffinityExtractor(Extractor):
class FuraffinityGalleryExtractor(FuraffinityExtractor):
"""Extractor for a furaffinity user's gallery"""
subcategory = "gallery"
- pattern = BASE_PATTERN + r"/gallery/([^/?#]+)"
+ pattern = BASE_PATTERN + r"/gallery/([^/?#]+)(?:$|/(?!folder/))"
example = "https://www.furaffinity.net/gallery/USER/"
def posts(self):
return self._pagination("gallery")
+class FuraffinityFolderExtractor(FuraffinityExtractor):
+ """Extractor for a FurAffinity folder"""
+ subcategory = "folder"
+ directory_fmt = ("{category}", "{user!l}",
+ "Folders", "{folder_id}{folder_name:? //}")
+ pattern = BASE_PATTERN + r"/gallery/([^/?#]+)/folder/(\d+)(?:/([^/?#]+))?"
+ example = "https://www.furaffinity.net/gallery/USER/folder/12345/FOLDER"
+
+ def metadata(self):
+ return {
+ "folder_id" : self.groups[1],
+ "folder_name": self.groups[2] or "",
+ }
+
+ def posts(self):
+ return self._pagination("gallery", self.groups[1])
+
+
class FuraffinityScrapsExtractor(FuraffinityExtractor):
"""Extractor for a furaffinity user's scraps"""
subcategory = "scraps"
diff --git a/gallery_dl/extractor/itaku.py b/gallery_dl/extractor/itaku.py
index 5c91eb9..2974b59 100644
--- a/gallery_dl/extractor/itaku.py
+++ b/gallery_dl/extractor/itaku.py
@@ -80,7 +80,8 @@ class ItakuSearchExtractor(ItakuExtractor):
example = "https://itaku.ee/home/images?tags=SEARCH"
def posts(self):
- params = text.parse_query_list(self.groups[0])
+ params = text.parse_query_list(
+ self.groups[0], {"tags", "maturity_rating"})
return self.api.search_images(params)
@@ -99,13 +100,7 @@ class ItakuAPI():
negative_tags = []
optional_tags = []
- tags = params.pop("tags", None)
- if not tags:
- tags = ()
- elif isinstance(tags, str):
- tags = (tags,)
-
- for tag in tags:
+ for tag in params.pop("tags", None) or ():
if not tag:
pass
elif tag[0] == "-":
diff --git a/gallery_dl/extractor/sankaku.py b/gallery_dl/extractor/sankaku.py
index b2f31dd..c7303f2 100644
--- a/gallery_dl/extractor/sankaku.py
+++ b/gallery_dl/extractor/sankaku.py
@@ -66,8 +66,7 @@ class SankakuExtractor(BooruExtractor):
def _prepare(self, post):
post["created_at"] = post["created_at"]["s"]
post["date"] = text.parse_timestamp(post["created_at"])
- post["tags"] = [tag["name"].lower().replace(" ", "_")
- for tag in post["tags"] if tag["name"]]
+ post["tags"] = post.pop("tag_names", ())
post["tag_string"] = " ".join(post["tags"])
post["_http_validate"] = self._check_expired
@@ -76,7 +75,7 @@ class SankakuExtractor(BooruExtractor):
def _tags(self, post, page):
tags = collections.defaultdict(list)
- for tag in post["tags"]:
+ for tag in self.api.tags(post["id"]):
name = tag["name"]
if name:
tags[tag["type"]].append(name.lower().replace(" ", "_"))
@@ -112,11 +111,11 @@ class SankakuTagExtractor(SankakuExtractor):
if "date:" in self.tags:
# rewrite 'date:' tags (#1790)
self.tags = re.sub(
- r"date:(\d\d)[.-](\d\d)[.-](\d\d\d\d)",
- r"date:\3.\2.\1", self.tags)
+ r"date:(\d\d)[.-](\d\d)[.-](\d\d\d\d)(?!T)",
+ r"date:\3-\2-\1T00:00", self.tags)
self.tags = re.sub(
- r"date:(\d\d\d\d)[.-](\d\d)[.-](\d\d)",
- r"date:\1.\2.\3", self.tags)
+ r"date:(\d\d\d\d)[.-](\d\d)[.-](\d\d)(?!T)",
+ r"date:\1-\2-\3T00:00", self.tags)
def metadata(self):
return {"search_tags": self.tags}
@@ -209,6 +208,30 @@ class SankakuAPI():
params = {"lang": "en"}
return self._call("/posts/{}/notes".format(post_id), params)
+ def tags(self, post_id):
+ endpoint = "/posts/{}/tags".format(post_id)
+ params = {
+ "lang" : "en",
+ "page" : 1,
+ "limit": 100,
+ }
+
+ tags = None
+ while True:
+ data = self._call(endpoint, params)
+
+ tags_new = data["data"]
+ if not tags_new:
+ return tags or []
+ elif tags is None:
+ tags = tags_new
+ else:
+ tags.extend(tags_new)
+
+ if len(tags_new) < 80 or len(tags) >= data["total"]:
+ return tags
+ params["page"] += 1
+
def pools(self, pool_id):
params = {"lang": "en"}
return self._call("/pools/" + pool_id, params)
@@ -216,6 +239,15 @@ class SankakuAPI():
def pools_keyset(self, params):
return self._pagination("/pools/keyset", params)
+ def pools_series(self, params):
+ params_ = {
+ "lang" : "en",
+ "filledPools": "true",
+ "includes[]" : "pools",
+ }
+ params_.update(params)
+ return self._pagination("/poolseriesv2", params)
+
def posts(self, post_id):
params = {
"lang" : "en",
@@ -223,17 +255,17 @@ class SankakuAPI():
"limit": "1",
"tags" : ("md5:" if len(post_id) == 32 else "id_range:") + post_id,
}
- return self._call("/posts", params)
+ return self._call("/v2/posts", params)
def posts_keyset(self, params):
- return self._pagination("/posts/keyset", params)
+ return self._pagination("/v2/posts/keyset", params)
def authenticate(self):
self.headers["Authorization"] = \
_authenticate_impl(self.extractor, self.username, self.password)
def _call(self, endpoint, params=None):
- url = "https://capi-v2.sankakucomplex.com" + endpoint
+ url = "https://sankakuapi.com" + endpoint
for _ in range(5):
self.authenticate()
response = self.extractor.request(
@@ -311,7 +343,7 @@ class SankakuAPI():
def _authenticate_impl(extr, username, password):
extr.log.info("Logging in as %s", username)
- url = "https://capi-v2.sankakucomplex.com/auth/token"
+ url = "https://sankakuapi.com/auth/token"
headers = {"Accept": "application/vnd.sankaku.api+json;v=2"}
data = {"login": username, "password": password}
diff --git a/gallery_dl/extractor/tiktok.py b/gallery_dl/extractor/tiktok.py
index 203b1ac..30f310d 100644
--- a/gallery_dl/extractor/tiktok.py
+++ b/gallery_dl/extractor/tiktok.py
@@ -17,7 +17,7 @@ class TiktokExtractor(Extractor):
category = "tiktok"
directory_fmt = ("{category}", "{user}")
filename_fmt = (
- "{id}{num:?_//>02} {title[b:150]}{img_id:? [/]/}.{extension}")
+ "{id}{num:?_//>02} {title[b:150]}{img_id|audio_id:? [/]/}.{extension}")
archive_fmt = "{id}_{num}_{img_id}"
root = "https://www.tiktok.com"
cookies_domain = ".tiktok.com"
@@ -83,7 +83,11 @@ class TiktokExtractor(Extractor):
yield Message.Url, url, post
if self.audio and "music" in post:
- ytdl_media = "audio"
+ if self.audio == "ytdl":
+ ytdl_media = "audio"
+ else:
+ url = self._extract_audio(post)
+ yield Message.Url, url, post
elif self.video and "video" in post:
ytdl_media = "video"
@@ -146,6 +150,25 @@ class TiktokExtractor(Extractor):
'type="application/json">', '</script>')
return util.json_loads(data)["__DEFAULT_SCOPE__"]
+ def _extract_audio(self, post):
+ audio = post["music"]
+ url = audio["playUrl"]
+ text.nameext_from_url(url, post)
+ post.update({
+ "type" : "audio",
+ "image" : None,
+ "title" : post["desc"] or "TikTok audio #{}".format(post["id"]),
+ "duration" : audio.get("duration"),
+ "num" : 0,
+ "img_id" : "",
+ "audio_id" : audio.get("id"),
+ "width" : 0,
+ "height" : 0,
+ })
+ if not post["extension"]:
+ post["extension"] = "mp3"
+ return url
+
def _check_status_code(self, detail, url):
status = detail.get("statusCode")
if not status:
diff --git a/gallery_dl/extractor/twitter.py b/gallery_dl/extractor/twitter.py
index c391bad..8d90bc5 100644
--- a/gallery_dl/extractor/twitter.py
+++ b/gallery_dl/extractor/twitter.py
@@ -237,7 +237,7 @@ class TwitterExtractor(Extractor):
def _extract_components(self, tweet, data, files):
for component_id in data["components"]:
com = data["component_objects"][component_id]
- for conv in com["data"]["conversation_preview"]:
+ for conv in com["data"].get("conversation_preview") or ():
for url in conv.get("mediaUrls") or ():
files.append({"url": url})
diff --git a/gallery_dl/extractor/wikimedia.py b/gallery_dl/extractor/wikimedia.py
index 4eae537..3b23f3a 100644
--- a/gallery_dl/extractor/wikimedia.py
+++ b/gallery_dl/extractor/wikimedia.py
@@ -54,7 +54,7 @@ class WikimediaExtractor(BaseExtractor):
@staticmethod
def prepare(image):
- """Adjust the content of a image object"""
+ """Adjust the content of an image object"""
image["metadata"] = {
m["name"]: m["value"]
for m in image["metadata"] or ()}
@@ -80,6 +80,14 @@ class WikimediaExtractor(BaseExtractor):
yield Message.Directory, image
yield Message.Url, image["url"], image
+ if self.subcategories:
+ base = self.root + "/wiki/"
+ self.params["gcmtype"] = "subcat"
+ for subcat in self._pagination(self.params):
+ url = base + subcat["title"].replace(" ", "_")
+ subcat["_extractor"] = WikimediaArticleExtractor
+ yield Message.Queue, url, subcat
+
def _pagination(self, params):
"""
https://www.mediawiki.org/wiki/API:Query
@@ -208,6 +216,8 @@ class WikimediaArticleExtractor(WikimediaExtractor):
self.subcategory = prefix
if prefix == "category":
+ self.subcategories = \
+ True if self.config("subcategories", True) else False
self.params = {
"generator": "categorymembers",
"gcmtitle" : path,
@@ -215,10 +225,12 @@ class WikimediaArticleExtractor(WikimediaExtractor):
"gcmlimit" : self.per_page,
}
elif prefix == "file":
+ self.subcategories = False
self.params = {
"titles" : path,
}
else:
+ self.subcategories = False
self.params = {
"generator": "images",
"gimlimit" : self.per_page,
diff --git a/gallery_dl/text.py b/gallery_dl/text.py
index f117c92..c1dde94 100644
--- a/gallery_dl/text.py
+++ b/gallery_dl/text.py
@@ -258,10 +258,10 @@ def parse_query(qs):
return result
-def parse_query_list(qs):
+def parse_query_list(qs, as_list=()):
"""Parse a query string into name-value pairs
- Combine values of duplicate names into lists
+ Combine values of names in 'as_list' into lists
"""
if not qs:
return {}
@@ -273,14 +273,13 @@ def parse_query_list(qs):
if eq:
name = unquote(name.replace("+", " "))
value = unquote(value.replace("+", " "))
- if name in result:
- rvalue = result[name]
- if isinstance(rvalue, list):
- rvalue.append(value)
+ if name in as_list:
+ if name in result:
+ result[name].append(value)
else:
- result[name] = [rvalue, value]
- else:
- result[name] = value
+ result[name] = [value]
+ elif name not in result:
+ result[name] = unquote(value.replace("+", " "))
except Exception:
pass
return result
diff --git a/gallery_dl/version.py b/gallery_dl/version.py
index ad98770..558b02e 100644
--- a/gallery_dl/version.py
+++ b/gallery_dl/version.py
@@ -6,5 +6,5 @@
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
-__version__ = "1.29.1"
+__version__ = "1.29.2"
__variant__ = None