summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLibravatarUnit 193 <unit193@unit193.net>2025-03-15 18:05:15 -0400
committerLibravatarUnit 193 <unit193@unit193.net>2025-03-15 18:05:15 -0400
commit8026a3c45446030d7af524bfc487d3462c8114ef (patch)
tree0818c682a06f620c08a8b6b4c07f4935bd79493a
parent243d1f1beb4e4eb75a524f1aff948c47761a4f1d (diff)
New upstream version 1.29.2.upstream/1.29.2
-rw-r--r--CHANGELOG.md30
-rw-r--r--PKG-INFO6
-rw-r--r--README.rst4
-rw-r--r--data/man/gallery-dl.12
-rw-r--r--data/man/gallery-dl.conf.578
-rw-r--r--docs/gallery-dl.conf14
-rw-r--r--gallery_dl.egg-info/PKG-INFO6
-rw-r--r--gallery_dl.egg-info/SOURCES.txt1
-rw-r--r--gallery_dl/extractor/__init__.py1
-rw-r--r--gallery_dl/extractor/arcalive.py186
-rw-r--r--gallery_dl/extractor/batoto.py38
-rw-r--r--gallery_dl/extractor/civitai.py67
-rw-r--r--gallery_dl/extractor/facebook.py7
-rw-r--r--gallery_dl/extractor/furaffinity.py30
-rw-r--r--gallery_dl/extractor/itaku.py11
-rw-r--r--gallery_dl/extractor/sankaku.py54
-rw-r--r--gallery_dl/extractor/tiktok.py27
-rw-r--r--gallery_dl/extractor/twitter.py2
-rw-r--r--gallery_dl/extractor/wikimedia.py14
-rw-r--r--gallery_dl/text.py17
-rw-r--r--gallery_dl/version.py2
-rw-r--r--test/test_results.py9
-rw-r--r--test/test_text.py6
23 files changed, 502 insertions, 110 deletions
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 5d7c67a..484ddeb 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,23 +1,15 @@
-## 1.29.1 - 2025-03-08
+## 1.29.2 - 2025-03-15
### Extractors
#### Additions
-- [tenor] add support ([#6075](https://github.com/mikf/gallery-dl/issues/6075))
+- [arcalive] add support ([#5657](https://github.com/mikf/gallery-dl/issues/5657) [#7100](https://github.com/mikf/gallery-dl/issues/7100))
+- [furaffinity] add `folder` extractor ([#1817](https://github.com/mikf/gallery-dl/issues/1817) [#7159](https://github.com/mikf/gallery-dl/issues/7159))
#### Fixes
-- [bunkr] update API endpoint ([#7097](https://github.com/mikf/gallery-dl/issues/7097))
-- [erome] fix `AttributeError` for albums without tags ([#7076](https://github.com/mikf/gallery-dl/issues/7076))
-- [furaffinity] fix `artist` metadata ([#6582](https://github.com/mikf/gallery-dl/issues/6582) [#7115](https://github.com/mikf/gallery-dl/issues/7115) [#7123](https://github.com/mikf/gallery-dl/issues/7123) [#7130](https://github.com/mikf/gallery-dl/issues/7130))
-- [jpgfish] decrypt file URLs ([#7073](https://github.com/mikf/gallery-dl/issues/7073) [#7079](https://github.com/mikf/gallery-dl/issues/7079) [#7109](https://github.com/mikf/gallery-dl/issues/7109))
-- [sankaku] fix search tag limit check
-- [vsco] fix `video` extractor ([#7113](https://github.com/mikf/gallery-dl/issues/7113))
-- [vsco] fix extracting videos from `/gallery` results ([#7113](https://github.com/mikf/gallery-dl/issues/7113))
+- [civitai] fix/improve query parameter handling ([#7138](https://github.com/mikf/gallery-dl/issues/7138))
+- [facebook] improve `date` extraction ([#7151](https://github.com/mikf/gallery-dl/issues/7151))
+- [sankaku] update API URLs ([#7154](https://github.com/mikf/gallery-dl/issues/7154) [#7155](https://github.com/mikf/gallery-dl/issues/7155) [#7163](https://github.com/mikf/gallery-dl/issues/7163))
+- [twitter] prevent exception in `_extract_components()` ([#7139](https://github.com/mikf/gallery-dl/issues/7139))
#### Improvements
-- [bunkr] add `endpoint` option ([#7097](https://github.com/mikf/gallery-dl/issues/7097))
-- [danbooru:pool] download posts in pool order, add `order-posts` option ([#7091](https://github.com/mikf/gallery-dl/issues/7091))
-- [erome:search] recognize all URL query parameters ([#7125](https://github.com/mikf/gallery-dl/issues/7125))
-- [reddit] add `selftext` option ([#7111](https://github.com/mikf/gallery-dl/issues/7111))
-- [redgifs:search] support `/search?query=...` URLs ([#7118](https://github.com/mikf/gallery-dl/issues/7118))
-- [sankaku] increase wait time on 429 errors ([#7129](https://github.com/mikf/gallery-dl/issues/7129))
-- [tiktok] improve `tiktok-range` parsing ([#7098](https://github.com/mikf/gallery-dl/issues/7098))
-### Downloaders
-- [http] detect Cloudflare/DDoS-Guard challenges ([#7066](https://github.com/mikf/gallery-dl/issues/7066) [#7121](https://github.com/mikf/gallery-dl/issues/7121))
-- warn about invalid `subcategory` values ([#7103](https://github.com/mikf/gallery-dl/issues/7103) [#7119](https://github.com/mikf/gallery-dl/issues/7119))
+- [batoto] add `domain` option ([#7174](https://github.com/mikf/gallery-dl/issues/7174))
+- [furaffinity] extract `scraps` metadata ([#7015](https://github.com/mikf/gallery-dl/issues/7015))
+- [tiktok] implement audio extraction without `yt-dlp`
+- [wikimedia] add `subcategories` option ([#2340](https://github.com/mikf/gallery-dl/issues/2340))
diff --git a/PKG-INFO b/PKG-INFO
index c74f013..1d71036 100644
--- a/PKG-INFO
+++ b/PKG-INFO
@@ -1,6 +1,6 @@
Metadata-Version: 2.2
Name: gallery_dl
-Version: 1.29.1
+Version: 1.29.2
Summary: Command-line program to download image galleries and collections from several image hosting sites
Home-page: https://github.com/mikf/gallery-dl
Download-URL: https://github.com/mikf/gallery-dl/releases/latest
@@ -132,9 +132,9 @@ Standalone Executable
Prebuilt executable files with a Python interpreter and
required Python packages included are available for
-- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.29.1/gallery-dl.exe>`__
+- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.29.2/gallery-dl.exe>`__
(Requires `Microsoft Visual C++ Redistributable Package (x86) <https://aka.ms/vs/17/release/vc_redist.x86.exe>`__)
-- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.29.1/gallery-dl.bin>`__
+- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.29.2/gallery-dl.bin>`__
Nightly Builds
diff --git a/README.rst b/README.rst
index 4be6c57..ae51968 100644
--- a/README.rst
+++ b/README.rst
@@ -77,9 +77,9 @@ Standalone Executable
Prebuilt executable files with a Python interpreter and
required Python packages included are available for
-- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.29.1/gallery-dl.exe>`__
+- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.29.2/gallery-dl.exe>`__
(Requires `Microsoft Visual C++ Redistributable Package (x86) <https://aka.ms/vs/17/release/vc_redist.x86.exe>`__)
-- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.29.1/gallery-dl.bin>`__
+- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.29.2/gallery-dl.bin>`__
Nightly Builds
diff --git a/data/man/gallery-dl.1 b/data/man/gallery-dl.1
index 761c413..8c34ff3 100644
--- a/data/man/gallery-dl.1
+++ b/data/man/gallery-dl.1
@@ -1,4 +1,4 @@
-.TH "GALLERY-DL" "1" "2025-03-08" "1.29.1" "gallery-dl Manual"
+.TH "GALLERY-DL" "1" "2025-03-15" "1.29.2" "gallery-dl Manual"
.\" disable hyphenation
.nh
diff --git a/data/man/gallery-dl.conf.5 b/data/man/gallery-dl.conf.5
index c0b64df..aaf94b3 100644
--- a/data/man/gallery-dl.conf.5
+++ b/data/man/gallery-dl.conf.5
@@ -1,4 +1,4 @@
-.TH "GALLERY-DL.CONF" "5" "2025-03-08" "1.29.1" "gallery-dl Manual"
+.TH "GALLERY-DL.CONF" "5" "2025-03-15" "1.29.2" "gallery-dl Manual"
.\" disable hyphenation
.nh
.\" disable justification (adjust text to left margin only)
@@ -455,6 +455,7 @@ response before \f[I]retrying\f[] the request.
.br
* \f[I]"0.5-1.5"\f[]
\f[I]ao3\f[],
+\f[I]arcalive\f[],
\f[I]civitai\f[],
\f[I][Danbooru]\f[],
\f[I][E621]\f[],
@@ -1637,6 +1638,29 @@ Special values:
Format(s) to download.
+.SS extractor.arcalive.emoticons
+.IP "Type:" 6
+\f[I]bool\f[]
+
+.IP "Default:" 9
+\f[I]false\f[]
+
+.IP "Description:" 4
+Download emoticon images.
+
+
+.SS extractor.arcalive.gifs
+.IP "Type:" 6
+\f[I]bool\f[]
+
+.IP "Default:" 9
+\f[I]true\f[]
+
+.IP "Description:" 4
+Check if \f[I].mp4\f[] videos have a \f[I].gif\f[] version
+and download those instead.
+
+
.SS extractor.artstation.external
.IP "Type:" 6
\f[I]bool\f[]
@@ -1710,6 +1734,33 @@ descend into subfolders
* \f[I]false\f[]: Get posts from "Latest Updates" pages
+.SS extractor.batoto.domain
+.IP "Type:" 6
+\f[I]string\f[]
+
+.IP "Default:" 9
+\f[I]"auto"\f[]
+
+.IP "Example:" 4
+"mangatoto.org"
+
+.IP "Description:" 4
+Specifies the domain used by \f[I]batoto\f[] extractors.
+
+\f[I]"auto"\f[] | \f[I]"url"\f[]
+Use the input URL's domain
+\f[I]"nolegacy"\f[]
+Use the input URL's domain
+.br
+- replace legacy domains with \f[I]"xbato.org"\f[]
+\f[I]"nowarn"\f[]
+Use the input URL's domain
+.br
+- do not warn about legacy domains
+any \f[I]string\f[]
+Use this domain
+
+
.SS extractor.bbc.width
.IP "Type:" 6
\f[I]integer\f[]
@@ -5165,13 +5216,23 @@ Possible formats include
.SS extractor.tiktok.audio
.IP "Type:" 6
-\f[I]bool\f[]
+.br
+* \f[I]bool\f[]
+.br
+* \f[I]string\f[]
.IP "Default:" 9
\f[I]true\f[]
.IP "Description:" 4
-Download audio tracks using \f[I]ytdl\f[].
+Controls audio download behavior.
+
+.br
+* \f[I]true\f[]: Download audio tracks
+.br
+* \f[I]"ytdl"\f[]: Download audio tracks using \f[I]ytdl\f[]
+.br
+* \f[I]false\f[]: Ignore audio tracks
.SS extractor.tiktok.videos
@@ -6208,6 +6269,17 @@ Number of results to return in a single API query.
The value must be between 10 and 500.
+.SS extractor.wikimedia.subcategories
+.IP "Type:" 6
+\f[I]bool\f[]
+
+.IP "Default:" 9
+\f[I]true\f[]
+
+.IP "Description:" 4
+For \f[I]Category:\f[] pages, recursively descent into subcategories.
+
+
.SS extractor.ytdl.cmdline-args
.IP "Type:" 6
.br
diff --git a/docs/gallery-dl.conf b/docs/gallery-dl.conf
index 0ad87c0..7887fd5 100644
--- a/docs/gallery-dl.conf
+++ b/docs/gallery-dl.conf
@@ -99,6 +99,13 @@
"formats": ["pdf"]
},
+ "arcalive":
+ {
+ "sleep-request": "0.5-1.5",
+
+ "emoticons": false,
+ "gifs" : true
+ },
"artstation":
{
"external" : false,
@@ -117,6 +124,10 @@
"recursive": true
},
+ "batoto":
+ {
+ "domain": "auto"
+ },
"bbc":
{
"width": 1920
@@ -905,7 +916,8 @@
"wikimedia":
{
"sleep-request": "1.0-2.0",
- "limit": 50
+ "limit": 50,
+ "subcategories": true
},
"booru":
diff --git a/gallery_dl.egg-info/PKG-INFO b/gallery_dl.egg-info/PKG-INFO
index c74f013..1d71036 100644
--- a/gallery_dl.egg-info/PKG-INFO
+++ b/gallery_dl.egg-info/PKG-INFO
@@ -1,6 +1,6 @@
Metadata-Version: 2.2
Name: gallery_dl
-Version: 1.29.1
+Version: 1.29.2
Summary: Command-line program to download image galleries and collections from several image hosting sites
Home-page: https://github.com/mikf/gallery-dl
Download-URL: https://github.com/mikf/gallery-dl/releases/latest
@@ -132,9 +132,9 @@ Standalone Executable
Prebuilt executable files with a Python interpreter and
required Python packages included are available for
-- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.29.1/gallery-dl.exe>`__
+- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.29.2/gallery-dl.exe>`__
(Requires `Microsoft Visual C++ Redistributable Package (x86) <https://aka.ms/vs/17/release/vc_redist.x86.exe>`__)
-- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.29.1/gallery-dl.bin>`__
+- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.29.2/gallery-dl.bin>`__
Nightly Builds
diff --git a/gallery_dl.egg-info/SOURCES.txt b/gallery_dl.egg-info/SOURCES.txt
index 0609d8d..3e8f365 100644
--- a/gallery_dl.egg-info/SOURCES.txt
+++ b/gallery_dl.egg-info/SOURCES.txt
@@ -58,6 +58,7 @@ gallery_dl/extractor/__init__.py
gallery_dl/extractor/adultempire.py
gallery_dl/extractor/agnph.py
gallery_dl/extractor/ao3.py
+gallery_dl/extractor/arcalive.py
gallery_dl/extractor/architizer.py
gallery_dl/extractor/artstation.py
gallery_dl/extractor/aryion.py
diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py
index 8208241..8198619 100644
--- a/gallery_dl/extractor/__init__.py
+++ b/gallery_dl/extractor/__init__.py
@@ -24,6 +24,7 @@ modules = [
"adultempire",
"agnph",
"ao3",
+ "arcalive",
"architizer",
"artstation",
"aryion",
diff --git a/gallery_dl/extractor/arcalive.py b/gallery_dl/extractor/arcalive.py
new file mode 100644
index 0000000..8e832fe
--- /dev/null
+++ b/gallery_dl/extractor/arcalive.py
@@ -0,0 +1,186 @@
+# -*- coding: utf-8 -*-
+
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Extractors for https://arca.live/"""
+
+from .common import Extractor, Message
+from .. import text, util, exception
+import re
+
+BASE_PATTERN = r"(?:https?://)?(?:www\.)?arca\.live"
+
+
+class ArcaliveExtractor(Extractor):
+ """Base class for Arca.live extractors"""
+ category = "arcalive"
+ root = "https://arca.live"
+ request_interval = (0.5, 1.5)
+
+ def _init(self):
+ self.api = ArcaliveAPI(self)
+
+ def items(self):
+ for article in self.articles():
+ article["_extractor"] = ArcalivePostExtractor
+ board = self.board or article.get("boardSlug") or "breaking"
+ url = "{}/b/{}/{}".format(self.root, board, article["id"])
+ yield Message.Queue, url, article
+
+
+class ArcalivePostExtractor(ArcaliveExtractor):
+ """Extractor for an arca.live post"""
+ subcategory = "post"
+ directory_fmt = ("{category}", "{boardSlug}")
+ filename_fmt = "{id}_{num}{title:? //[b:230]}.{extension}"
+ archive_fmt = "{id}_{num}"
+ pattern = BASE_PATTERN + r"/b/(?:\w+)/(\d+)"
+ example = "https://arca.live/b/breaking/123456789"
+
+ def items(self):
+ self.emoticons = self.config("emoticons", False)
+ self.gifs = self.config("gifs", True)
+
+ post = self.api.post(self.groups[0])
+ files = self._extract_files(post)
+
+ post["count"] = len(files)
+ post["date"] = text.parse_datetime(
+ post["createdAt"][:19], "%Y-%m-%dT%H:%M:%S")
+ post["post_url"] = post_url = "{}/b/{}/{}".format(
+ self.root, post["boardSlug"], post["id"])
+ post["_http_headers"] = {"Referer": post_url + "?p=1"}
+
+ yield Message.Directory, post
+ for post["num"], file in enumerate(files, 1):
+ post.update(file)
+ url = file["url"]
+ yield Message.Url, url, text.nameext_from_url(url, post)
+
+ def _extract_files(self, post):
+ files = []
+
+ for video, media in self._extract_media(post["content"]):
+
+ if not self.emoticons and 'class="arca-emoticon"' in media:
+ continue
+
+ src = (text.extr(media, 'data-originalurl="', '"') or
+ text.extr(media, 'src="', '"'))
+ if not src:
+ continue
+
+ src = text.unescape(src.partition("?")[0])
+ if src[0] == "/":
+ if src[1] == "/":
+ url = "https:" + src
+ else:
+ url = self.root + src
+ else:
+ url = src
+
+ fallback = ()
+ orig = text.extr(media, 'data-orig="', '"')
+ if orig:
+ path, _, ext = url.rpartition(".")
+ if ext != orig:
+ fallback = (url + "?type=orig",)
+ url = path + "." + orig
+ elif video and self.gifs:
+ url_gif = url.rpartition(".")[0] + ".gif"
+ response = self.request(
+ url_gif + "?type=orig", method="HEAD", fatal=False)
+ if response.status_code < 400:
+ fallback = (url + "?type=orig",)
+ url = url_gif
+
+ files.append({
+ "url" : url + "?type=orig",
+ "width" : text.parse_int(text.extr(media, 'width="', '"')),
+ "height": text.parse_int(text.extr(media, 'height="', '"')),
+ "_fallback": fallback,
+ })
+
+ return files
+
+ def _extract_media(self, content):
+ ArcalivePostExtractor._extract_media = extr = re.compile(
+ r"<(?:img|vide(o)) ([^>]+)").findall
+ return extr(content)
+
+
+class ArcaliveBoardExtractor(ArcaliveExtractor):
+ """Extractor for an arca.live board's posts"""
+ subcategory = "board"
+ pattern = BASE_PATTERN + r"/b/([^/?#]+)/?(?:\?([^#]+))?$"
+ example = "https://arca.live/b/breaking"
+
+ def articles(self):
+ self.board, query = self.groups
+ params = text.parse_query(query)
+ return self.api.board(self.board, params)
+
+
+class ArcaliveUserExtractor(ArcaliveExtractor):
+ """Extractor for an arca.live users's posts"""
+ subcategory = "user"
+ pattern = BASE_PATTERN + r"/u/@([^/?#]+)/?(?:\?([^#]+))?$"
+ example = "https://arca.live/u/@USER"
+
+ def articles(self):
+ self.board = None
+ user, query = self.groups
+ params = text.parse_query(query)
+ return self.api.user_posts(text.unquote(user), params)
+
+
+class ArcaliveAPI():
+
+ def __init__(self, extractor):
+ self.extractor = extractor
+ self.log = extractor.log
+ self.root = extractor.root + "/api/app"
+
+ headers = extractor.session.headers
+ headers["User-Agent"] = "net.umanle.arca.android.playstore/0.9.75"
+ headers["X-Device-Token"] = util.generate_token(64)
+
+ def board(self, board_slug, params):
+ endpoint = "/list/channel/" + board_slug
+ return self._pagination(endpoint, params, "articles")
+
+ def post(self, post_id):
+ endpoint = "/view/article/breaking/" + str(post_id)
+ return self._call(endpoint)
+
+ def user_posts(self, username, params):
+ endpoint = "/list/channel/breaking"
+ params["target"] = "nickname"
+ params["keyword"] = username
+ return self._pagination(endpoint, params, "articles")
+
+ def _call(self, endpoint, params=None):
+ url = self.root + endpoint
+ response = self.extractor.request(url, params=params)
+
+ data = response.json()
+ if response.status_code == 200:
+ return data
+
+ self.log.debug("Server response: %s", data)
+ msg = data.get("message")
+ raise exception.StopExtraction(
+ "API request failed%s", ": " + msg if msg else "")
+
+ def _pagination(self, endpoint, params, key):
+ while True:
+ data = self._call(endpoint, params)
+
+ posts = data.get(key)
+ if not posts:
+ break
+ yield from posts
+
+ params.update(data["next"])
diff --git a/gallery_dl/extractor/batoto.py b/gallery_dl/extractor/batoto.py
index 4d192a4..a1ad3ae 100644
--- a/gallery_dl/extractor/batoto.py
+++ b/gallery_dl/extractor/batoto.py
@@ -54,11 +54,23 @@ class BatotoBase():
"""Base class for batoto extractors"""
category = "batoto"
root = "https://xbato.org"
-
- def _init_root(self, match):
- domain = match.group(1)
- if domain not in LEGACY_DOMAINS:
- self.root = "https://" + domain
+ _warn_legacy = True
+
+ def _init_root(self):
+ domain = self.config("domain")
+ if domain is None or domain in {"auto", "url"}:
+ domain = self.groups[0]
+ if domain in LEGACY_DOMAINS:
+ if self._warn_legacy:
+ BatotoBase._warn_legacy = False
+ self.log.warning("Legacy domain '%s'", domain)
+ elif domain == "nolegacy":
+ domain = self.groups[0]
+ if domain in LEGACY_DOMAINS:
+ domain = "xbato.org"
+ elif domain == "nowarn":
+ domain = self.groups[0]
+ self.root = "https://" + domain
def request(self, url, **kwargs):
kwargs["encoding"] = "utf-8"
@@ -72,10 +84,10 @@ class BatotoChapterExtractor(BatotoBase, ChapterExtractor):
example = "https://xbato.org/title/12345-MANGA/54321"
def __init__(self, match):
- self._init_root(match)
- self.chapter_id = match.group(2)
- url = "{}/title/0/{}".format(self.root, self.chapter_id)
- ChapterExtractor.__init__(self, match, url)
+ ChapterExtractor.__init__(self, match, False)
+ self._init_root()
+ self.chapter_id = self.groups[1]
+ self.gallery_url = "{}/title/0/{}".format(self.root, self.chapter_id)
def metadata(self, page):
extr = text.extract_from(page)
@@ -133,10 +145,10 @@ class BatotoMangaExtractor(BatotoBase, MangaExtractor):
example = "https://xbato.org/title/12345-MANGA/"
def __init__(self, match):
- self._init_root(match)
- self.manga_id = match.group(2) or match.group(3)
- url = "{}/title/{}".format(self.root, self.manga_id)
- MangaExtractor.__init__(self, match, url)
+ MangaExtractor.__init__(self, match, False)
+ self._init_root()
+ self.manga_id = self.groups[1] or self.groups[2]
+ self.manga_url = "{}/title/{}".format(self.root, self.manga_id)
def chapters(self, page):
extr = text.extract_from(page)
diff --git a/gallery_dl/extractor/civitai.py b/gallery_dl/extractor/civitai.py
index 36efcfe..034a3c2 100644
--- a/gallery_dl/extractor/civitai.py
+++ b/gallery_dl/extractor/civitai.py
@@ -144,6 +144,11 @@ class CivitaiExtractor(Extractor):
file["generation"] = self.api.image_generationdata(file["id"])
yield data
+ def _parse_query(self, value):
+ return text.parse_query_list(
+ value, {"tags", "reactions", "baseModels", "tools", "techniques",
+ "types", "fileFormats"})
+
class CivitaiModelExtractor(CivitaiExtractor):
subcategory = "model"
@@ -348,8 +353,9 @@ class CivitaiUserModelsExtractor(CivitaiExtractor):
example = "https://civitai.com/user/USER/models"
def models(self):
- params = text.parse_query(self.groups[1])
- params["username"] = text.unquote(self.groups[0])
+ user, query = self.groups
+ params = self._parse_query(query)
+ params["username"] = text.unquote(user)
return self.api.models(params)
@@ -361,8 +367,9 @@ class CivitaiUserPostsExtractor(CivitaiExtractor):
example = "https://civitai.com/user/USER/posts"
def posts(self):
- params = text.parse_query(self.groups[1])
- params["username"] = text.unquote(self.groups[0])
+ user, query = self.groups
+ params = self._parse_query(query)
+ params["username"] = text.unquote(user)
return self.api.posts(params)
@@ -372,7 +379,7 @@ class CivitaiUserImagesExtractor(CivitaiExtractor):
example = "https://civitai.com/user/USER/images"
def __init__(self, match):
- self.params = text.parse_query_list(match.group(2))
+ self.params = self._parse_query(match.group(2))
if self.params.get("section") == "reactions":
self.subcategory = "reactions"
self.images = self.images_reactions
@@ -392,12 +399,8 @@ class CivitaiUserImagesExtractor(CivitaiExtractor):
params = self.params
params["authed"] = True
params["useIndex"] = False
- if "reactions" in params:
- if isinstance(params["reactions"], str):
- params["reactions"] = (params["reactions"],)
- else:
- params["reactions"] = (
- "Like", "Dislike", "Heart", "Laugh", "Cry")
+ if "reactions" not in params:
+ params["reactions"] = ("Like", "Dislike", "Heart", "Laugh", "Cry")
return self.api.images(params)
@@ -409,9 +412,11 @@ class CivitaiUserVideosExtractor(CivitaiExtractor):
def images(self):
self._image_ext = "mp4"
- params = text.parse_query(self.groups[1])
+
+ user, query = self.groups
+ params = self._parse_query(query)
params["types"] = ["video"]
- params["username"] = text.unquote(self.groups[0])
+ params["username"] = text.unquote(user)
return self.api.images(params)
@@ -499,7 +504,7 @@ class CivitaiTrpcAPI():
self.root = extractor.root + "/api/trpc/"
self.headers = {
"content-type" : "application/json",
- "x-client-version": "5.0.394",
+ "x-client-version": "5.0.542",
"x-client-date" : "",
"x-client" : "web",
"x-fingerprint" : "undefined",
@@ -660,15 +665,35 @@ class CivitaiTrpcAPI():
meta_ = meta
def _merge_params(self, params_user, params_default):
+ """Combine 'params_user' with 'params_default'"""
params_default.update(params_user)
return params_default
def _type_params(self, params):
- for key, type in (
- ("tags" , int),
- ("modelId" , int),
- ("modelVersionId", int),
- ):
- if key in params:
- params[key] = type(params[key])
+ """Convert 'params' values to expected types"""
+ types = {
+ "tags" : int,
+ "tools" : int,
+ "techniques" : int,
+ "modelId" : int,
+ "modelVersionId": int,
+ "remixesOnly" : _bool,
+ "nonRemixesOnly": _bool,
+ "withMeta" : _bool,
+ "fromPlatform" : _bool,
+ "supportsGeneration": _bool,
+ }
+
+ for name, value in params.items():
+ if name not in types:
+ continue
+ elif isinstance(value, str):
+ params[name] = types[name](value)
+ elif isinstance(value, list):
+ type = types[name]
+ params[name] = [type(item) for item in value]
return params
+
+
+def _bool(value):
+ return True if value == "true" else False
diff --git a/gallery_dl/extractor/facebook.py b/gallery_dl/extractor/facebook.py
index 1ec6adc..b284ee8 100644
--- a/gallery_dl/extractor/facebook.py
+++ b/gallery_dl/extractor/facebook.py
@@ -99,9 +99,10 @@ class FacebookExtractor(Extractor):
'"message":{"delight_ranges"',
'"},"message_preferred_body"'
).rsplit('],"text":"', 1)[-1]),
- "date": text.parse_timestamp(text.extr(
- photo_page, '\\"publish_time\\":', ','
- )),
+ "date": text.parse_timestamp(
+ text.extr(photo_page, '\\"publish_time\\":', ',') or
+ text.extr(photo_page, '"created_time":', ',')
+ ),
"url": FacebookExtractor.decode_all(text.extr(
photo_page, ',"image":{"uri":"', '","'
)),
diff --git a/gallery_dl/extractor/furaffinity.py b/gallery_dl/extractor/furaffinity.py
index 216aeb1..565fd71 100644
--- a/gallery_dl/extractor/furaffinity.py
+++ b/gallery_dl/extractor/furaffinity.py
@@ -97,6 +97,7 @@ class FuraffinityExtractor(Extractor):
if self._new_layout:
data["tags"] = text.split_html(extr(
'class="tags-row">', '</section>'))
+ data["scraps"] = (extr(' submissions">', "<") == "Scraps")
data["title"] = text.unescape(extr("<h2><p>", "</p></h2>"))
data["artist_url"] = extr('title="', '"').strip()
data["artist"] = extr(">", "<")
@@ -121,6 +122,8 @@ class FuraffinityExtractor(Extractor):
folders.append(folder)
else:
# old site layout
+ data["scraps"] = (
+ "/scraps/" in extr('class="minigallery-title', "</a>"))
data["title"] = text.unescape(extr("<h2>", "</h2>"))
data["artist_url"] = extr('title="', '"').strip()
data["artist"] = extr(">", "<")
@@ -153,12 +156,13 @@ class FuraffinityExtractor(Extractor):
def _process_description(description):
return text.unescape(text.remove_html(description, "", ""))
- def _pagination(self, path):
+ def _pagination(self, path, folder=None):
num = 1
+ folder = "" if folder is None else "/folder/{}/a".format(folder)
while True:
- url = "{}/{}/{}/{}/".format(
- self.root, path, self.user, num)
+ url = "{}/{}/{}{}/{}/".format(
+ self.root, path, self.user, folder, num)
page = self.request(url).text
post_id = None
@@ -232,13 +236,31 @@ class FuraffinityExtractor(Extractor):
class FuraffinityGalleryExtractor(FuraffinityExtractor):
"""Extractor for a furaffinity user's gallery"""
subcategory = "gallery"
- pattern = BASE_PATTERN + r"/gallery/([^/?#]+)"
+ pattern = BASE_PATTERN + r"/gallery/([^/?#]+)(?:$|/(?!folder/))"
example = "https://www.furaffinity.net/gallery/USER/"
def posts(self):
return self._pagination("gallery")
+class FuraffinityFolderExtractor(FuraffinityExtractor):
+ """Extractor for a FurAffinity folder"""
+ subcategory = "folder"
+ directory_fmt = ("{category}", "{user!l}",
+ "Folders", "{folder_id}{folder_name:? //}")
+ pattern = BASE_PATTERN + r"/gallery/([^/?#]+)/folder/(\d+)(?:/([^/?#]+))?"
+ example = "https://www.furaffinity.net/gallery/USER/folder/12345/FOLDER"
+
+ def metadata(self):
+ return {
+ "folder_id" : self.groups[1],
+ "folder_name": self.groups[2] or "",
+ }
+
+ def posts(self):
+ return self._pagination("gallery", self.groups[1])
+
+
class FuraffinityScrapsExtractor(FuraffinityExtractor):
"""Extractor for a furaffinity user's scraps"""
subcategory = "scraps"
diff --git a/gallery_dl/extractor/itaku.py b/gallery_dl/extractor/itaku.py
index 5c91eb9..2974b59 100644
--- a/gallery_dl/extractor/itaku.py
+++ b/gallery_dl/extractor/itaku.py
@@ -80,7 +80,8 @@ class ItakuSearchExtractor(ItakuExtractor):
example = "https://itaku.ee/home/images?tags=SEARCH"
def posts(self):
- params = text.parse_query_list(self.groups[0])
+ params = text.parse_query_list(
+ self.groups[0], {"tags", "maturity_rating"})
return self.api.search_images(params)
@@ -99,13 +100,7 @@ class ItakuAPI():
negative_tags = []
optional_tags = []
- tags = params.pop("tags", None)
- if not tags:
- tags = ()
- elif isinstance(tags, str):
- tags = (tags,)
-
- for tag in tags:
+ for tag in params.pop("tags", None) or ():
if not tag:
pass
elif tag[0] == "-":
diff --git a/gallery_dl/extractor/sankaku.py b/gallery_dl/extractor/sankaku.py
index b2f31dd..c7303f2 100644
--- a/gallery_dl/extractor/sankaku.py
+++ b/gallery_dl/extractor/sankaku.py
@@ -66,8 +66,7 @@ class SankakuExtractor(BooruExtractor):
def _prepare(self, post):
post["created_at"] = post["created_at"]["s"]
post["date"] = text.parse_timestamp(post["created_at"])
- post["tags"] = [tag["name"].lower().replace(" ", "_")
- for tag in post["tags"] if tag["name"]]
+ post["tags"] = post.pop("tag_names", ())
post["tag_string"] = " ".join(post["tags"])
post["_http_validate"] = self._check_expired
@@ -76,7 +75,7 @@ class SankakuExtractor(BooruExtractor):
def _tags(self, post, page):
tags = collections.defaultdict(list)
- for tag in post["tags"]:
+ for tag in self.api.tags(post["id"]):
name = tag["name"]
if name:
tags[tag["type"]].append(name.lower().replace(" ", "_"))
@@ -112,11 +111,11 @@ class SankakuTagExtractor(SankakuExtractor):
if "date:" in self.tags:
# rewrite 'date:' tags (#1790)
self.tags = re.sub(
- r"date:(\d\d)[.-](\d\d)[.-](\d\d\d\d)",
- r"date:\3.\2.\1", self.tags)
+ r"date:(\d\d)[.-](\d\d)[.-](\d\d\d\d)(?!T)",
+ r"date:\3-\2-\1T00:00", self.tags)
self.tags = re.sub(
- r"date:(\d\d\d\d)[.-](\d\d)[.-](\d\d)",
- r"date:\1.\2.\3", self.tags)
+ r"date:(\d\d\d\d)[.-](\d\d)[.-](\d\d)(?!T)",
+ r"date:\1-\2-\3T00:00", self.tags)
def metadata(self):
return {"search_tags": self.tags}
@@ -209,6 +208,30 @@ class SankakuAPI():
params = {"lang": "en"}
return self._call("/posts/{}/notes".format(post_id), params)
+ def tags(self, post_id):
+ endpoint = "/posts/{}/tags".format(post_id)
+ params = {
+ "lang" : "en",
+ "page" : 1,
+ "limit": 100,
+ }
+
+ tags = None
+ while True:
+ data = self._call(endpoint, params)
+
+ tags_new = data["data"]
+ if not tags_new:
+ return tags or []
+ elif tags is None:
+ tags = tags_new
+ else:
+ tags.extend(tags_new)
+
+ if len(tags_new) < 80 or len(tags) >= data["total"]:
+ return tags
+ params["page"] += 1
+
def pools(self, pool_id):
params = {"lang": "en"}
return self._call("/pools/" + pool_id, params)
@@ -216,6 +239,15 @@ class SankakuAPI():
def pools_keyset(self, params):
return self._pagination("/pools/keyset", params)
+ def pools_series(self, params):
+ params_ = {
+ "lang" : "en",
+ "filledPools": "true",
+ "includes[]" : "pools",
+ }
+ params_.update(params)
+ return self._pagination("/poolseriesv2", params)
+
def posts(self, post_id):
params = {
"lang" : "en",
@@ -223,17 +255,17 @@ class SankakuAPI():
"limit": "1",
"tags" : ("md5:" if len(post_id) == 32 else "id_range:") + post_id,
}
- return self._call("/posts", params)
+ return self._call("/v2/posts", params)
def posts_keyset(self, params):
- return self._pagination("/posts/keyset", params)
+ return self._pagination("/v2/posts/keyset", params)
def authenticate(self):
self.headers["Authorization"] = \
_authenticate_impl(self.extractor, self.username, self.password)
def _call(self, endpoint, params=None):
- url = "https://capi-v2.sankakucomplex.com" + endpoint
+ url = "https://sankakuapi.com" + endpoint
for _ in range(5):
self.authenticate()
response = self.extractor.request(
@@ -311,7 +343,7 @@ class SankakuAPI():
def _authenticate_impl(extr, username, password):
extr.log.info("Logging in as %s", username)
- url = "https://capi-v2.sankakucomplex.com/auth/token"
+ url = "https://sankakuapi.com/auth/token"
headers = {"Accept": "application/vnd.sankaku.api+json;v=2"}
data = {"login": username, "password": password}
diff --git a/gallery_dl/extractor/tiktok.py b/gallery_dl/extractor/tiktok.py
index 203b1ac..30f310d 100644
--- a/gallery_dl/extractor/tiktok.py
+++ b/gallery_dl/extractor/tiktok.py
@@ -17,7 +17,7 @@ class TiktokExtractor(Extractor):
category = "tiktok"
directory_fmt = ("{category}", "{user}")
filename_fmt = (
- "{id}{num:?_//>02} {title[b:150]}{img_id:? [/]/}.{extension}")
+ "{id}{num:?_//>02} {title[b:150]}{img_id|audio_id:? [/]/}.{extension}")
archive_fmt = "{id}_{num}_{img_id}"
root = "https://www.tiktok.com"
cookies_domain = ".tiktok.com"
@@ -83,7 +83,11 @@ class TiktokExtractor(Extractor):
yield Message.Url, url, post
if self.audio and "music" in post:
- ytdl_media = "audio"
+ if self.audio == "ytdl":
+ ytdl_media = "audio"
+ else:
+ url = self._extract_audio(post)
+ yield Message.Url, url, post
elif self.video and "video" in post:
ytdl_media = "video"
@@ -146,6 +150,25 @@ class TiktokExtractor(Extractor):
'type="application/json">', '</script>')
return util.json_loads(data)["__DEFAULT_SCOPE__"]
+ def _extract_audio(self, post):
+ audio = post["music"]
+ url = audio["playUrl"]
+ text.nameext_from_url(url, post)
+ post.update({
+ "type" : "audio",
+ "image" : None,
+ "title" : post["desc"] or "TikTok audio #{}".format(post["id"]),
+ "duration" : audio.get("duration"),
+ "num" : 0,
+ "img_id" : "",
+ "audio_id" : audio.get("id"),
+ "width" : 0,
+ "height" : 0,
+ })
+ if not post["extension"]:
+ post["extension"] = "mp3"
+ return url
+
def _check_status_code(self, detail, url):
status = detail.get("statusCode")
if not status:
diff --git a/gallery_dl/extractor/twitter.py b/gallery_dl/extractor/twitter.py
index c391bad..8d90bc5 100644
--- a/gallery_dl/extractor/twitter.py
+++ b/gallery_dl/extractor/twitter.py
@@ -237,7 +237,7 @@ class TwitterExtractor(Extractor):
def _extract_components(self, tweet, data, files):
for component_id in data["components"]:
com = data["component_objects"][component_id]
- for conv in com["data"]["conversation_preview"]:
+ for conv in com["data"].get("conversation_preview") or ():
for url in conv.get("mediaUrls") or ():
files.append({"url": url})
diff --git a/gallery_dl/extractor/wikimedia.py b/gallery_dl/extractor/wikimedia.py
index 4eae537..3b23f3a 100644
--- a/gallery_dl/extractor/wikimedia.py
+++ b/gallery_dl/extractor/wikimedia.py
@@ -54,7 +54,7 @@ class WikimediaExtractor(BaseExtractor):
@staticmethod
def prepare(image):
- """Adjust the content of a image object"""
+ """Adjust the content of an image object"""
image["metadata"] = {
m["name"]: m["value"]
for m in image["metadata"] or ()}
@@ -80,6 +80,14 @@ class WikimediaExtractor(BaseExtractor):
yield Message.Directory, image
yield Message.Url, image["url"], image
+ if self.subcategories:
+ base = self.root + "/wiki/"
+ self.params["gcmtype"] = "subcat"
+ for subcat in self._pagination(self.params):
+ url = base + subcat["title"].replace(" ", "_")
+ subcat["_extractor"] = WikimediaArticleExtractor
+ yield Message.Queue, url, subcat
+
def _pagination(self, params):
"""
https://www.mediawiki.org/wiki/API:Query
@@ -208,6 +216,8 @@ class WikimediaArticleExtractor(WikimediaExtractor):
self.subcategory = prefix
if prefix == "category":
+ self.subcategories = \
+ True if self.config("subcategories", True) else False
self.params = {
"generator": "categorymembers",
"gcmtitle" : path,
@@ -215,10 +225,12 @@ class WikimediaArticleExtractor(WikimediaExtractor):
"gcmlimit" : self.per_page,
}
elif prefix == "file":
+ self.subcategories = False
self.params = {
"titles" : path,
}
else:
+ self.subcategories = False
self.params = {
"generator": "images",
"gimlimit" : self.per_page,
diff --git a/gallery_dl/text.py b/gallery_dl/text.py
index f117c92..c1dde94 100644
--- a/gallery_dl/text.py
+++ b/gallery_dl/text.py
@@ -258,10 +258,10 @@ def parse_query(qs):
return result
-def parse_query_list(qs):
+def parse_query_list(qs, as_list=()):
"""Parse a query string into name-value pairs
- Combine values of duplicate names into lists
+ Combine values of names in 'as_list' into lists
"""
if not qs:
return {}
@@ -273,14 +273,13 @@ def parse_query_list(qs):
if eq:
name = unquote(name.replace("+", " "))
value = unquote(value.replace("+", " "))
- if name in result:
- rvalue = result[name]
- if isinstance(rvalue, list):
- rvalue.append(value)
+ if name in as_list:
+ if name in result:
+ result[name].append(value)
else:
- result[name] = [rvalue, value]
- else:
- result[name] = value
+ result[name] = [value]
+ elif name not in result:
+ result[name] = unquote(value.replace("+", " "))
except Exception:
pass
return result
diff --git a/gallery_dl/version.py b/gallery_dl/version.py
index ad98770..558b02e 100644
--- a/gallery_dl/version.py
+++ b/gallery_dl/version.py
@@ -6,5 +6,5 @@
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
-__version__ = "1.29.1"
+__version__ = "1.29.2"
__variant__ = None
diff --git a/test/test_results.py b/test/test_results.py
index c3b9b2d..3136743 100644
--- a/test/test_results.py
+++ b/test/test_results.py
@@ -106,6 +106,10 @@ class TestExtractorResults(unittest.TestCase):
if len(result) <= 2:
return # only matching
+ skip = result.pop("#skip", False)
+ if skip:
+ return self._skipped.append((result["#url"], skip))
+
if auth is None:
auth = (cat in AUTH_REQUIRED)
elif not auth:
@@ -237,7 +241,10 @@ class TestExtractorResults(unittest.TestCase):
elif isinstance(test, range):
self.assertRange(value, test, msg=path)
elif isinstance(test, set):
- self.assertTrue(value in test or type(value) in test, msg=path)
+ try:
+ self.assertIn(value, test, msg=path)
+ except AssertionError:
+ self.assertIn(type(value), test, msg=path)
elif isinstance(test, list):
subtest = False
for idx, item in enumerate(test):
diff --git a/test/test_text.py b/test/test_text.py
index 30feefc..d42507c 100644
--- a/test/test_text.py
+++ b/test/test_text.py
@@ -431,10 +431,10 @@ class TestText(unittest.TestCase):
self.assertEqual(f("foo=1&bar&baz=3"), {"foo": "1", "baz": "3"})
# keys with identical names
- self.assertEqual(f("foo=1&foo=2"), {"foo": ["1", "2"]})
+ self.assertEqual(f("foo=1&foo=2", ("foo",)), {"foo": ["1", "2"]})
self.assertEqual(
- f("foo=1&bar=2&foo=3&bar=4&foo=5"),
- {"foo": ["1", "3", "5"], "bar": ["2", "4"]},
+ f("foo=1&bar=2&foo=3&bar=4&foo=5", {"foo", "baz"}),
+ {"foo": ["1", "3", "5"], "bar": "2"},
)
# invalid arguments