aboutsummaryrefslogtreecommitdiffstats
path: root/gallery_dl
diff options
context:
space:
mode:
authorLibravatarUnit 193 <unit193@unit193.net>2025-05-26 06:46:00 -0400
committerLibravatarUnit 193 <unit193@unit193.net>2025-05-26 06:46:00 -0400
commit6424318a059207759b9055cf8a8df91c0ddac7c8 (patch)
tree3fb8adec807ad1ffeba4889a506b05e680ca8051 /gallery_dl
parent2bef55427baa34bf0f78d52590bbf27b2c5f3a56 (diff)
parent7672a750cb74bf31e21d76aad2776367fd476155 (diff)
Update upstream source from tag 'upstream/1.29.7'
Update to upstream version '1.29.7' with Debian dir 264267cd1ebd5c7205fe1f137a394d0ae1a2fb3b
Diffstat (limited to 'gallery_dl')
-rw-r--r--gallery_dl/cookies.py51
-rw-r--r--gallery_dl/downloader/http.py5
-rw-r--r--gallery_dl/downloader/ytdl.py19
-rw-r--r--gallery_dl/extractor/arcalive.py5
-rw-r--r--gallery_dl/extractor/aryion.py2
-rw-r--r--gallery_dl/extractor/bluesky.py6
-rw-r--r--gallery_dl/extractor/civitai.py106
-rw-r--r--gallery_dl/extractor/fanbox.py10
-rw-r--r--gallery_dl/extractor/flickr.py112
-rw-r--r--gallery_dl/extractor/idolcomplex.py46
-rw-r--r--gallery_dl/extractor/instagram.py20
-rw-r--r--gallery_dl/extractor/mangadex.py151
-rw-r--r--gallery_dl/extractor/mastodon.py8
-rw-r--r--gallery_dl/extractor/motherless.py42
-rw-r--r--gallery_dl/extractor/pinterest.py3
-rw-r--r--gallery_dl/extractor/pixeldrain.py70
-rw-r--r--gallery_dl/extractor/pixiv.py28
-rw-r--r--gallery_dl/extractor/sankaku.py21
-rw-r--r--gallery_dl/extractor/subscribestar.py24
-rw-r--r--gallery_dl/extractor/vipergirls.py29
-rw-r--r--gallery_dl/transaction_id.py6
-rw-r--r--gallery_dl/version.py2
22 files changed, 570 insertions, 196 deletions
diff --git a/gallery_dl/cookies.py b/gallery_dl/cookies.py
index 71b0b6b..f03ad58 100644
--- a/gallery_dl/cookies.py
+++ b/gallery_dl/cookies.py
@@ -26,7 +26,9 @@ from . import aes, text, util
SUPPORTED_BROWSERS_CHROMIUM = {
"brave", "chrome", "chromium", "edge", "opera", "thorium", "vivaldi"}
-SUPPORTED_BROWSERS = SUPPORTED_BROWSERS_CHROMIUM | {"firefox", "safari"}
+SUPPORTED_BROWSERS_FIREFOX = {"firefox", "zen"}
+SUPPORTED_BROWSERS = \
+ SUPPORTED_BROWSERS_CHROMIUM | SUPPORTED_BROWSERS_FIREFOX | {"safari"}
logger = logging.getLogger("cookies")
@@ -34,8 +36,8 @@ logger = logging.getLogger("cookies")
def load_cookies(browser_specification):
browser_name, profile, keyring, container, domain = \
_parse_browser_specification(*browser_specification)
- if browser_name == "firefox":
- return load_cookies_firefox(profile, container, domain)
+ if browser_name in SUPPORTED_BROWSERS_FIREFOX:
+ return load_cookies_firefox(browser_name, profile, container, domain)
elif browser_name == "safari":
return load_cookies_safari(profile, domain)
elif browser_name in SUPPORTED_BROWSERS_CHROMIUM:
@@ -44,8 +46,10 @@ def load_cookies(browser_specification):
raise ValueError("unknown browser '{}'".format(browser_name))
-def load_cookies_firefox(profile=None, container=None, domain=None):
- path, container_id = _firefox_cookies_database(profile, container)
+def load_cookies_firefox(browser_name, profile=None,
+ container=None, domain=None):
+ path, container_id = _firefox_cookies_database(browser_name,
+ profile, container)
sql = ("SELECT name, value, host, path, isSecure, expiry "
"FROM moz_cookies")
@@ -83,7 +87,8 @@ def load_cookies_firefox(profile=None, container=None, domain=None):
sql, parameters)
]
- _log_info("Extracted %s cookies from Firefox", len(cookies))
+ _log_info("Extracted %s cookies from %s",
+ len(cookies), browser_name.capitalize())
return cookies
@@ -196,13 +201,14 @@ def load_cookies_chromium(browser_name, profile=None,
# --------------------------------------------------------------------
# firefox
-def _firefox_cookies_database(profile=None, container=None):
+def _firefox_cookies_database(browser_name, profile=None, container=None):
if not profile:
- search_root = _firefox_browser_directory()
+ search_root = _firefox_browser_directory(browser_name)
elif _is_path(profile):
search_root = profile
else:
- search_root = os.path.join(_firefox_browser_directory(), profile)
+ search_root = os.path.join(
+ _firefox_browser_directory(browser_name), profile)
path = _find_most_recently_used_file(search_root, "cookies.sqlite")
if path is None:
@@ -245,14 +251,27 @@ def _firefox_cookies_database(profile=None, container=None):
return path, container_id
-def _firefox_browser_directory():
+def _firefox_browser_directory(browser_name):
+ join = os.path.join
+
if sys.platform in ("win32", "cygwin"):
- return os.path.expandvars(
- r"%APPDATA%\Mozilla\Firefox\Profiles")
- if sys.platform == "darwin":
- return os.path.expanduser(
- "~/Library/Application Support/Firefox/Profiles")
- return os.path.expanduser("~/.mozilla/firefox")
+ appdata = os.path.expandvars("%APPDATA%")
+ return {
+ "firefox": join(appdata, R"Mozilla\Firefox\Profiles"),
+ "zen" : join(appdata, R"zen\Profiles")
+ }[browser_name]
+ elif sys.platform == "darwin":
+ appdata = os.path.expanduser("~/Library/Application Support")
+ return {
+ "firefox": join(appdata, R"Firefox/Profiles"),
+ "zen" : join(appdata, R"zen/Profiles")
+ }[browser_name]
+ else:
+ home = os.path.expanduser("~")
+ return {
+ "firefox": join(home, R".mozilla/firefox"),
+ "zen" : join(home, R".zen")
+ }[browser_name]
# --------------------------------------------------------------------
diff --git a/gallery_dl/downloader/http.py b/gallery_dl/downloader/http.py
index faea9e5..c58e2fb 100644
--- a/gallery_dl/downloader/http.py
+++ b/gallery_dl/downloader/http.py
@@ -322,7 +322,10 @@ class HttpDownloader(DownloaderBase):
self.downloading = False
if self.mtime:
- kwdict.setdefault("_mtime", response.headers.get("Last-Modified"))
+ if "_http_lastmodified" in kwdict:
+ kwdict["_mtime"] = kwdict["_http_lastmodified"]
+ else:
+ kwdict["_mtime"] = response.headers.get("Last-Modified")
else:
kwdict["_mtime"] = None
diff --git a/gallery_dl/downloader/ytdl.py b/gallery_dl/downloader/ytdl.py
index 7a20dc2..1fc2f82 100644
--- a/gallery_dl/downloader/ytdl.py
+++ b/gallery_dl/downloader/ytdl.py
@@ -130,18 +130,27 @@ class YoutubeDLDownloader(DownloaderBase):
if pathfmt.exists():
pathfmt.temppath = ""
return True
- if self.part and self.partdir:
- pathfmt.temppath = os.path.join(
- self.partdir, pathfmt.filename)
-
- self._set_outtmpl(ytdl_instance, pathfmt.temppath.replace("%", "%%"))
self.out.start(pathfmt.path)
+ if self.part:
+ pathfmt.kwdict["extension"] = pathfmt.prefix + "part"
+ filename = pathfmt.build_filename(pathfmt.kwdict)
+ pathfmt.kwdict["extension"] = info_dict["ext"]
+ if self.partdir:
+ path = os.path.join(self.partdir, filename)
+ else:
+ path = pathfmt.realdirectory + filename
+ else:
+ path = pathfmt.realpath
+
+ self._set_outtmpl(ytdl_instance, path.replace("%", "%%"))
try:
ytdl_instance.process_info(info_dict)
except Exception as exc:
self.log.debug("", exc_info=exc)
return False
+
+ pathfmt.temppath = info_dict["filepath"]
return True
def _download_playlist(self, ytdl_instance, pathfmt, info_dict):
diff --git a/gallery_dl/extractor/arcalive.py b/gallery_dl/extractor/arcalive.py
index 8c44256..3c39a1a 100644
--- a/gallery_dl/extractor/arcalive.py
+++ b/gallery_dl/extractor/arcalive.py
@@ -17,6 +17,7 @@ class ArcaliveExtractor(Extractor):
"""Base class for Arca.live extractors"""
category = "arcalive"
root = "https://arca.live"
+ useragent = "net.umanle.arca.android.playstore/0.9.75"
request_interval = (0.5, 1.5)
def _init(self):
@@ -149,9 +150,7 @@ class ArcaliveAPI():
self.log = extractor.log
self.root = extractor.root + "/api/app"
- headers = extractor.session.headers
- headers["User-Agent"] = "net.umanle.arca.android.playstore/0.9.75"
- headers["X-Device-Token"] = util.generate_token(64)
+ extractor.session.headers["X-Device-Token"] = util.generate_token(64)
def board(self, board_slug, params):
endpoint = "/list/channel/" + board_slug
diff --git a/gallery_dl/extractor/aryion.py b/gallery_dl/extractor/aryion.py
index 17b780e..ca88187 100644
--- a/gallery_dl/extractor/aryion.py
+++ b/gallery_dl/extractor/aryion.py
@@ -169,7 +169,7 @@ class AryionExtractor(Extractor):
"<p>", "</p>"), "", "")),
"filename" : fname,
"extension": ext,
- "_mtime" : lmod,
+ "_http_lastmodified": lmod,
}
diff --git a/gallery_dl/extractor/bluesky.py b/gallery_dl/extractor/bluesky.py
index ec274b8..6f4abd5 100644
--- a/gallery_dl/extractor/bluesky.py
+++ b/gallery_dl/extractor/bluesky.py
@@ -49,7 +49,11 @@ class BlueskyExtractor(Extractor):
self.log.debug("Skipping %s (repost)", self._pid(post))
continue
embed = post.get("embed")
- post.update(post.pop("record"))
+ try:
+ post.update(post.pop("record"))
+ except Exception:
+ self.log.debug("Skipping %s (no 'record')", self._pid(post))
+ continue
while True:
self._prepare(post)
diff --git a/gallery_dl/extractor/civitai.py b/gallery_dl/extractor/civitai.py
index de8f86c..56fe851 100644
--- a/gallery_dl/extractor/civitai.py
+++ b/gallery_dl/extractor/civitai.py
@@ -45,6 +45,20 @@ class CivitaiExtractor(Extractor):
self._image_quality = "original=true"
self._image_ext = "png"
+ quality_video = self.config("quality-videos")
+ if quality_video:
+ if not isinstance(quality_video, str):
+ quality_video = ",".join(quality_video)
+ if quality_video[0] == "+":
+ quality_video = (self._image_quality + "," +
+ quality_video.lstrip("+,"))
+ self._video_quality = quality_video
+ elif quality_video is not None and quality:
+ self._video_quality = self._image_quality
+ else:
+ self._video_quality = "quality=100"
+ self._video_ext = "webm"
+
metadata = self.config("metadata")
if metadata:
if isinstance(metadata, str):
@@ -82,9 +96,8 @@ class CivitaiExtractor(Extractor):
"user": post.pop("user"),
}
if self._meta_version:
- data["version"] = version = self.api.model_version(
- post["modelVersionId"]).copy()
- data["model"] = version.pop("model")
+ data["model"], data["version"] = \
+ self._extract_meta_version(post)
yield Message.Directory, data
for file in self._image_results(images):
@@ -95,26 +108,22 @@ class CivitaiExtractor(Extractor):
images = self.images()
if images:
for image in images:
- url = self._url(image)
+
if self._meta_generation:
- image["generation"] = self.api.image_generationdata(
- image["id"])
+ image["generation"] = \
+ self._extract_meta_generation(image)
if self._meta_version:
- if "modelVersionId" in image:
- version_id = image["modelVersionId"]
- else:
- post = image["post"] = self.api.post(
- image["postId"])
- post.pop("user", None)
- version_id = post["modelVersionId"]
- image["version"] = version = self.api.model_version(
- version_id).copy()
- image["model"] = version.pop("model")
-
+ image["model"], image["version"] = \
+ self._extract_meta_version(image, False)
image["date"] = text.parse_datetime(
image["createdAt"], "%Y-%m-%dT%H:%M:%S.%fZ")
+
+ url = self._url(image)
text.nameext_from_url(url, image)
- image["extension"] = self._image_ext
+ if not image["extension"]:
+ image["extension"] = (
+ self._video_ext if image.get("type") == "video" else
+ self._image_ext)
yield Message.Directory, image
yield Message.Url, url, image
return
@@ -130,20 +139,23 @@ class CivitaiExtractor(Extractor):
def _url(self, image):
url = image["url"]
+ video = image.get("type") == "video"
+ quality = self._video_quality if video else self._image_quality
+
if "/" in url:
parts = url.rsplit("/", 3)
image["uuid"] = parts[1]
- parts[2] = self._image_quality
+ parts[2] = quality
return "/".join(parts)
- image["uuid"] = url
+ image["uuid"] = url
name = image.get("name")
if not name:
mime = image.get("mimeType") or self._image_ext
name = "{}.{}".format(image.get("id"), mime.rpartition("/")[2])
return (
"https://image.civitai.com/xG1nkqKTMzGDvpLrqFT7WA/{}/{}/{}".format(
- url, self._image_quality, name)
+ url, quality, name)
)
def _image_results(self, images):
@@ -154,11 +166,13 @@ class CivitaiExtractor(Extractor):
"url" : self._url(file),
})
if not data["extension"]:
- data["extension"] = self._image_ext
+ data["extension"] = (
+ self._video_ext if file.get("type") == "video" else
+ self._image_ext)
if "id" not in file and data["filename"].isdecimal():
file["id"] = text.parse_int(data["filename"])
if self._meta_generation:
- file["generation"] = self.api.image_generationdata(file["id"])
+ file["generation"] = self._extract_meta_generation(file)
yield data
def _parse_query(self, value):
@@ -166,6 +180,38 @@ class CivitaiExtractor(Extractor):
value, {"tags", "reactions", "baseModels", "tools", "techniques",
"types", "fileFormats"})
+ def _extract_meta_generation(self, image):
+ try:
+ return self.api.image_generationdata(image["id"])
+ except Exception as exc:
+ return self.log.debug("", exc_info=exc)
+
+ def _extract_meta_version(self, item, is_post=True):
+ try:
+ version_id = self._extract_version_id(item, is_post)
+ if version_id:
+ version = self.api.model_version(version_id).copy()
+ return version.pop("model", None), version
+ except Exception as exc:
+ self.log.debug("", exc_info=exc)
+ return None, None
+
+ def _extract_version_id(self, item, is_post=True):
+ version_id = item.get("modelVersionId")
+ if version_id:
+ return version_id
+
+ version_ids = item.get("modelVersionIds")
+ if version_ids:
+ return version_ids[0]
+
+ if is_post:
+ return None
+
+ item["post"] = post = self.api.post(item["postId"])
+ post.pop("user", None)
+ return self._extract_version_id(post)
+
class CivitaiModelExtractor(CivitaiExtractor):
subcategory = "model"
@@ -235,16 +281,20 @@ class CivitaiModelExtractor(CivitaiExtractor):
files = []
for num, file in enumerate(version["files"], 1):
+ name, sep, ext = file["name"].rpartition(".")
+ if not sep:
+ name = ext
+ ext = "bin"
file["uuid"] = "model-{}-{}-{}".format(
model["id"], version["id"], file["id"])
files.append({
"num" : num,
"file" : file,
- "filename" : file["name"],
- "extension": "bin",
- "url" : file.get("downloadUrl") or
- "{}/api/download/models/{}".format(
- self.root, version["id"]),
+ "filename" : name,
+ "extension": ext,
+ "url" : (file.get("downloadUrl") or
+ "{}/api/download/models/{}".format(
+ self.root, version["id"])),
"_http_headers" : {
"Authorization": self.api.headers.get("Authorization")},
"_http_validate": self._validate_file_model,
diff --git a/gallery_dl/extractor/fanbox.py b/gallery_dl/extractor/fanbox.py
index 3b43134..8981c29 100644
--- a/gallery_dl/extractor/fanbox.py
+++ b/gallery_dl/extractor/fanbox.py
@@ -26,12 +26,18 @@ class FanboxExtractor(Extractor):
directory_fmt = ("{category}", "{creatorId}")
filename_fmt = "{id}_{num}.{extension}"
archive_fmt = "{id}_{num}"
+ browser = "firefox"
_warning = True
def _init(self):
self.headers = {
- "Accept": "application/json, text/plain, */*",
- "Origin": self.root,
+ "Accept" : "application/json, text/plain, */*",
+ "Origin" : "https://www.fanbox.cc",
+ "Referer": "https://www.fanbox.cc/",
+ "Cookie" : None,
+ "Sec-Fetch-Dest": "empty",
+ "Sec-Fetch-Mode": "cors",
+ "Sec-Fetch-Site": "same-site",
}
self.embeds = self.config("embeds", True)
diff --git a/gallery_dl/extractor/flickr.py b/gallery_dl/extractor/flickr.py
index e85a375..eb68c3e 100644
--- a/gallery_dl/extractor/flickr.py
+++ b/gallery_dl/extractor/flickr.py
@@ -23,13 +23,10 @@ class FlickrExtractor(Extractor):
request_interval = (1.0, 2.0)
request_interval_min = 0.5
- def __init__(self, match):
- Extractor.__init__(self, match)
- self.item_id = match.group(1)
-
def _init(self):
self.api = FlickrAPI(self)
self.user = None
+ self.item_id = self.groups[0]
def items(self):
data = self.metadata()
@@ -51,6 +48,8 @@ class FlickrExtractor(Extractor):
def metadata(self):
"""Return general metadata"""
self.user = self.api.urls_lookupUser(self.item_id)
+ if self.config("profile", False):
+ self.user.update(self.api.people_getInfo(self.user["nsid"]))
return {"user": self.user}
def photos(self):
@@ -75,23 +74,26 @@ class FlickrImageExtractor(FlickrExtractor):
r"|flic\.kr/p/([A-Za-z1-9]+))")
example = "https://www.flickr.com/photos/USER/12345"
- def __init__(self, match):
- FlickrExtractor.__init__(self, match)
- if not self.item_id:
+ def items(self):
+ item_id, enc_id = self.groups
+ if enc_id is not None:
alphabet = ("123456789abcdefghijkmnopqrstu"
"vwxyzABCDEFGHJKLMNPQRSTUVWXYZ")
- self.item_id = util.bdecode(match.group(2), alphabet)
+ item_id = util.bdecode(enc_id, alphabet)
- def items(self):
- photo = self.api.photos_getInfo(self.item_id)
+ photo = self.api.photos_getInfo(item_id)
- self.api._extract_metadata(photo)
+ self.api._extract_metadata(photo, False)
if photo["media"] == "video" and self.api.videos:
self.api._extract_video(photo)
else:
self.api._extract_photo(photo)
- photo["user"] = photo["owner"]
+ if self.config("profile", False):
+ photo["user"] = self.api.people_getInfo(photo["owner"]["nsid"])
+ else:
+ photo["user"] = photo["owner"]
+
photo["title"] = photo["title"]["_content"]
photo["comments"] = text.parse_int(photo["comments"]["_content"])
photo["description"] = photo["description"]["_content"]
@@ -120,11 +122,8 @@ class FlickrAlbumExtractor(FlickrExtractor):
pattern = BASE_PATTERN + r"/photos/([^/?#]+)/(?:album|set)s(?:/(\d+))?"
example = "https://www.flickr.com/photos/USER/albums/12345"
- def __init__(self, match):
- FlickrExtractor.__init__(self, match)
- self.album_id = match.group(2)
-
def items(self):
+ self.album_id = self.groups[1]
if self.album_id:
return FlickrExtractor.items(self)
return self._album_items()
@@ -163,12 +162,9 @@ class FlickrGalleryExtractor(FlickrExtractor):
pattern = BASE_PATTERN + r"/photos/([^/?#]+)/galleries/(\d+)"
example = "https://www.flickr.com/photos/USER/galleries/12345/"
- def __init__(self, match):
- FlickrExtractor.__init__(self, match)
- self.gallery_id = match.group(2)
-
def metadata(self):
data = FlickrExtractor.metadata(self)
+ self.gallery_id = self.groups[1]
data["gallery"] = self.api.galleries_getInfo(self.gallery_id)
return data
@@ -223,13 +219,10 @@ class FlickrSearchExtractor(FlickrExtractor):
pattern = BASE_PATTERN + r"/search/?\?([^#]+)"
example = "https://flickr.com/search/?text=QUERY"
- def __init__(self, match):
- FlickrExtractor.__init__(self, match)
- self.search = text.parse_query(match.group(1))
+ def metadata(self):
+ self.search = text.parse_query(self.groups[0])
if "text" not in self.search:
self.search["text"] = ""
-
- def metadata(self):
return {"search": self.search}
def photos(self):
@@ -275,13 +268,27 @@ class FlickrAPI(oauth.OAuth1API):
"appletv" : 1,
"iphone_wifi": 0,
}
+ LICENSES = {
+ "0": "All Rights Reserved",
+ "1": "Attribution-NonCommercial-ShareAlike License",
+ "2": "Attribution-NonCommercial License",
+ "3": "Attribution-NonCommercial-NoDerivs License",
+ "4": "Attribution License",
+ "5": "Attribution-ShareAlike License",
+ "6": "Attribution-NoDerivs License",
+ "7": "No known copyright restrictions",
+ "8": "United States Government Work",
+ "9": "Public Domain Dedication (CC0)",
+ "10": "Public Domain Mark",
+ }
def __init__(self, extractor):
oauth.OAuth1API.__init__(self, extractor)
- self.exif = extractor.config("exif", False)
self.videos = extractor.config("videos", True)
- self.contexts = extractor.config("contexts", False)
+ self.meta_exif = extractor.config("exif", False)
+ self.meta_info = extractor.config("info", False)
+ self.meta_contexts = extractor.config("contexts", False)
self.maxsize = extractor.config("size-max")
if isinstance(self.maxsize, str):
@@ -321,6 +328,26 @@ class FlickrAPI(oauth.OAuth1API):
params = {"group_id": group_id}
return self._pagination("groups.pools.getPhotos", params)
+ def people_getInfo(self, user_id):
+ """Get information about a user."""
+ params = {"user_id": user_id}
+ user = self._call("people.getInfo", params)
+
+ try:
+ user = user["person"]
+ for key in ("description", "username", "realname", "location",
+ "profileurl", "photosurl", "mobileurl"):
+ if isinstance(user.get(key), dict):
+ user[key] = user[key]["_content"]
+ photos = user["photos"]
+ for key in ("count", "firstdate", "firstdatetaken"):
+ if isinstance(photos.get(key), dict):
+ photos[key] = photos[key]["_content"]
+ except Exception:
+ pass
+
+ return user
+
def people_getPhotos(self, user_id):
"""Return photos from the given user's photostream."""
params = {"user_id": user_id}
@@ -469,14 +496,15 @@ class FlickrAPI(oauth.OAuth1API):
self._extract_metadata(photo)
photo["id"] = text.parse_int(photo["id"])
- if "owner" in photo:
+ if "owner" not in photo:
+ photo["owner"] = self.extractor.user
+ elif not self.meta_info:
photo["owner"] = {
"nsid" : photo["owner"],
"username" : photo["ownername"],
"path_alias": photo["pathalias"],
}
- else:
- photo["owner"] = self.extractor.user
+
del photo["pathalias"]
del photo["ownername"]
@@ -522,8 +550,23 @@ class FlickrAPI(oauth.OAuth1API):
photo["width"] = photo["height"] = 0
return photo
- def _extract_metadata(self, photo):
- if self.exif:
+ def _extract_metadata(self, photo, info=True):
+ if info and self.meta_info:
+ try:
+ photo.update(self.photos_getInfo(photo["id"]))
+ photo["title"] = photo["title"]["_content"]
+ photo["comments"] = text.parse_int(
+ photo["comments"]["_content"])
+ photo["description"] = photo["description"]["_content"]
+ photo["tags"] = [t["raw"] for t in photo["tags"]["tag"]]
+ photo["views"] = text.parse_int(photo["views"])
+ photo["id"] = text.parse_int(photo["id"])
+ except Exception as exc:
+ self.log.warning(
+ "Unable to retrieve 'info' data for %s (%s: %s)",
+ photo["id"], exc.__class__.__name__, exc)
+
+ if self.meta_exif:
try:
photo.update(self.photos_getExif(photo["id"]))
except Exception as exc:
@@ -531,7 +574,7 @@ class FlickrAPI(oauth.OAuth1API):
"Unable to retrieve 'exif' data for %s (%s: %s)",
photo["id"], exc.__class__.__name__, exc)
- if self.contexts:
+ if self.meta_contexts:
try:
photo.update(self.photos_getAllContexts(photo["id"]))
except Exception as exc:
@@ -539,6 +582,9 @@ class FlickrAPI(oauth.OAuth1API):
"Unable to retrieve 'contexts' data for %s (%s: %s)",
photo["id"], exc.__class__.__name__, exc)
+ if "license" in photo:
+ photo["license_name"] = self.LICENSES.get(photo["license"])
+
@staticmethod
def _clean_info(info):
info["title"] = info["title"]["_content"]
diff --git a/gallery_dl/extractor/idolcomplex.py b/gallery_dl/extractor/idolcomplex.py
index dfd9a31..8f4a10c 100644
--- a/gallery_dl/extractor/idolcomplex.py
+++ b/gallery_dl/extractor/idolcomplex.py
@@ -90,9 +90,11 @@ class IdolcomplexExtractor(SankakuExtractor):
"user[password]": password,
"commit" : "Login",
}
+ self.sleep(10, "login")
response = self.request(url, method="POST", headers=headers, data=data)
- if not response.history or response.url.endswith("/user/home"):
+ if not response.history or response.url.endswith(
+ ("/users/login", "/user/home")):
raise exception.AuthenticationError()
return {c.name: c.value for c in response.history[0].cookies}
@@ -187,32 +189,39 @@ class IdolcomplexTagExtractor(IdolcomplexExtractor):
return {"search_tags": " ".join(tags)}
def post_ids(self):
- params = {"tags": self.tags}
+ url = self.root + "/en/posts"
+ params = {"auto_page": "t"}
if self.next:
params["next"] = self.next
else:
params["page"] = self.start_page
+ params["tags"] = self.tags
while True:
- page = self.request(self.root, params=params, retries=10).text
- pos = ((page.find('id="more-popular-posts-link"') + 1) or
- (page.find('<span class="thumb') + 1))
+ response = self.request(url, params=params, retries=10)
+ if response.history and "/posts/premium" in response.url:
+ self.log.warning("HTTP redirect to %s", response.url)
+ page = response.text
- yield from self.find_pids(page, pos)
+ yield from text.extract_iter(page, '"id":"', '"')
- next_url = text.extract(page, 'next-page-url="', '"', pos)[0]
- if not next_url:
+ next_page_url = text.extr(page, 'next-page-url="', '"')
+ if not next_page_url:
return
- next_params = text.parse_query(text.unquote(text.unescape(
- text.unescape(next_url).lstrip("?/"))))
+ url, _, next_params = text.unquote(
+ text.unescape(text.unescape(next_page_url))).partition("?")
+ next_params = text.parse_query(next_params)
if "next" in next_params:
# stop if the same "next" value occurs twice in a row (#265)
if "next" in params and params["next"] == next_params["next"]:
return
next_params["page"] = "2"
+
+ if url[0] == "/":
+ url = self.root + url
params = next_params
@@ -225,10 +234,6 @@ class IdolcomplexPoolExtractor(IdolcomplexExtractor):
example = "https://idol.sankakucomplex.com/pools/0123456789abcdef"
per_page = 24
- def __init__(self, match):
- IdolcomplexExtractor.__init__(self, match)
- self.pool_id = match.group(1)
-
def skip(self, num):
pages, posts = divmod(num, self.per_page)
self.start_page += pages
@@ -236,10 +241,13 @@ class IdolcomplexPoolExtractor(IdolcomplexExtractor):
return num
def metadata(self):
- return {"pool": self.pool_id}
+ return {"pool": self.groups[0]}
def post_ids(self):
- url = self.root + "/pools/show/" + self.pool_id
+ if not self.logged_in:
+ self.log.warning("Login required")
+
+ url = self.root + "/pools/show/" + self.groups[0]
params = {"page": self.start_page}
while True:
@@ -260,9 +268,5 @@ class IdolcomplexPostExtractor(IdolcomplexExtractor):
pattern = BASE_PATTERN + r"/posts?/(?:show/)?(\w+)"
example = "https://idol.sankakucomplex.com/posts/0123456789abcdef"
- def __init__(self, match):
- IdolcomplexExtractor.__init__(self, match)
- self.post_id = match.group(1)
-
def post_ids(self):
- return (self.post_id,)
+ return (self.groups[0],)
diff --git a/gallery_dl/extractor/instagram.py b/gallery_dl/extractor/instagram.py
index 0f88cac..624bba2 100644
--- a/gallery_dl/extractor/instagram.py
+++ b/gallery_dl/extractor/instagram.py
@@ -165,13 +165,16 @@ class InstagramExtractor(Extractor):
if "items" in post: # story or highlight
items = post["items"]
reel_id = str(post["id"]).rpartition(":")[2]
+ expires = post.get("expiring_at")
data = {
- "expires": text.parse_timestamp(post.get("expiring_at")),
+ "expires": text.parse_timestamp(expires),
"post_id": reel_id,
"post_shortcode": shortcode_from_id(reel_id),
}
if "title" in post:
data["highlight_title"] = post["title"]
+ if expires and not post.get("seen"):
+ post["seen"] = expires - 86400
else: # regular image/video post
data = {
@@ -583,7 +586,10 @@ class InstagramStoriesExtractor(InstagramExtractor):
reel_id = self.highlight_id or self.api.user_id(self.user)
reels = self.api.reels_media(reel_id)
- if self.media_id and reels:
+ if not reels:
+ return ()
+
+ if self.media_id:
reel = reels[0]
for item in reel["items"]:
if item["pk"] == self.media_id:
@@ -592,6 +598,16 @@ class InstagramStoriesExtractor(InstagramExtractor):
else:
raise exception.NotFoundError("story")
+ elif self.config("split"):
+ reel = reels[0]
+ reels = []
+ for item in reel["items"]:
+ item.pop("user", None)
+ copy = reel.copy()
+ copy.update(item)
+ copy["items"] = (item,)
+ reels.append(copy)
+
return reels
diff --git a/gallery_dl/extractor/mangadex.py b/gallery_dl/extractor/mangadex.py
index 7f87cff..42a508d 100644
--- a/gallery_dl/extractor/mangadex.py
+++ b/gallery_dl/extractor/mangadex.py
@@ -29,11 +29,8 @@ class MangadexExtractor(Extractor):
useragent = util.USERAGENT
_cache = {}
- def __init__(self, match):
- Extractor.__init__(self, match)
- self.uuid = match.group(1)
-
def _init(self):
+ self.uuid = self.groups[0]
self.api = MangadexAPI(self)
def items(self):
@@ -44,6 +41,12 @@ class MangadexExtractor(Extractor):
self._cache[uuid] = data
yield Message.Queue, self.root + "/chapter/" + uuid, data
+ def _items_manga(self):
+ data = {"_extractor": MangadexMangaExtractor}
+ for manga in self.manga():
+ url = "{}/title/{}".format(self.root, manga["id"])
+ yield Message.Queue, url, data
+
def _transform(self, chapter):
relationships = defaultdict(list)
for item in chapter["relationships"]:
@@ -130,7 +133,7 @@ class MangadexChapterExtractor(MangadexExtractor):
class MangadexMangaExtractor(MangadexExtractor):
"""Extractor for manga from mangadex.org"""
subcategory = "manga"
- pattern = BASE_PATTERN + r"/(?:title|manga)/(?!feed$)([0-9a-f-]+)"
+ pattern = BASE_PATTERN + r"/(?:title|manga)/(?!follows|feed$)([0-9a-f-]+)"
example = ("https://mangadex.org/title"
"/01234567-89ab-cdef-0123-456789abcdef")
@@ -139,17 +142,29 @@ class MangadexMangaExtractor(MangadexExtractor):
class MangadexFeedExtractor(MangadexExtractor):
- """Extractor for chapters from your Followed Feed"""
+ """Extractor for chapters from your Updates Feed"""
subcategory = "feed"
- pattern = BASE_PATTERN + r"/title/feed$()"
+ pattern = BASE_PATTERN + r"/titles?/feed$()"
example = "https://mangadex.org/title/feed"
def chapters(self):
return self.api.user_follows_manga_feed()
+class MangadexFollowingExtractor(MangadexExtractor):
+ """Extractor for followed manga from your Library"""
+ subcategory = "following"
+ pattern = BASE_PATTERN + r"/titles?/follows(?:\?([^#]+))?$"
+ example = "https://mangadex.org/title/follows"
+
+ items = MangadexExtractor._items_manga
+
+ def manga(self):
+ return self.api.user_follows_manga()
+
+
class MangadexListExtractor(MangadexExtractor):
- """Extractor for mangadex lists"""
+ """Extractor for mangadex MDLists"""
subcategory = "list"
pattern = (BASE_PATTERN +
r"/list/([0-9a-f-]+)(?:/[^/?#]*)?(?:\?tab=(\w+))?")
@@ -161,17 +176,17 @@ class MangadexListExtractor(MangadexExtractor):
if match.group(2) == "feed":
self.subcategory = "list-feed"
else:
- self.items = self._items_titles
+ self.items = self._items_manga
def chapters(self):
return self.api.list_feed(self.uuid)
- def _items_titles(self):
- data = {"_extractor": MangadexMangaExtractor}
- for item in self.api.list(self.uuid)["relationships"]:
- if item["type"] == "manga":
- url = "{}/title/{}".format(self.root, item["id"])
- yield Message.Queue, url, data
+ def manga(self):
+ return [
+ item
+ for item in self.api.list(self.uuid)["relationships"]
+ if item["type"] == "manga"
+ ]
class MangadexAuthorExtractor(MangadexExtractor):
@@ -196,10 +211,18 @@ class MangadexAPI():
def __init__(self, extr):
self.extractor = extr
- self.headers = {}
+ self.headers = None
+ self.headers_auth = {}
self.username, self.password = extr._get_auth_info()
- if not self.username:
+ if self.username:
+ self.client_id = cid = extr.config("client-id")
+ self.client_secret = extr.config("client-secret")
+ if cid:
+ self._authenticate_impl = self._authenticate_impl_client
+ else:
+ self._authenticate_impl = self._authenticate_impl_legacy
+ else:
self.authenticate = util.noop
server = extr.config("api-server")
@@ -218,10 +241,10 @@ class MangadexAPI():
return self._call("/chapter/" + uuid, params)["data"]
def list(self, uuid):
- return self._call("/list/" + uuid)["data"]
+ return self._call("/list/" + uuid, None, True)["data"]
def list_feed(self, uuid):
- return self._pagination_chapters("/list/" + uuid + "/feed")
+ return self._pagination_chapters("/list/" + uuid + "/feed", None, True)
@memcache(keyarg=1)
def manga(self, uuid):
@@ -240,28 +263,73 @@ class MangadexAPI():
}
return self._pagination_chapters("/manga/" + uuid + "/feed", params)
+ def user_follows_manga(self):
+ params = {"contentRating": None}
+ return self._pagination_manga(
+ "/user/follows/manga", params, True)
+
def user_follows_manga_feed(self):
params = {"order[publishAt]": "desc"}
- return self._pagination_chapters("/user/follows/manga/feed", params)
+ return self._pagination_chapters(
+ "/user/follows/manga/feed", params, True)
def authenticate(self):
- self.headers["Authorization"] = \
+ self.headers_auth["Authorization"] = \
self._authenticate_impl(self.username, self.password)
@cache(maxage=900, keyarg=1)
- def _authenticate_impl(self, username, password):
+ def _authenticate_impl_client(self, username, password):
+ refresh_token = _refresh_token_cache((username, "personal"))
+ if refresh_token:
+ self.extractor.log.info("Refreshing access token")
+ data = {
+ "grant_type" : "refresh_token",
+ "refresh_token": refresh_token,
+ "client_id" : self.client_id,
+ "client_secret": self.client_secret,
+ }
+ else:
+ self.extractor.log.info("Logging in as %s", username)
+ data = {
+ "grant_type" : "password",
+ "username" : self.username,
+ "password" : self.password,
+ "client_id" : self.client_id,
+ "client_secret": self.client_secret,
+ }
+
+ self.extractor.log.debug("Using client-id '%s…'", self.client_id[:24])
+ url = ("https://auth.mangadex.org/realms/mangadex"
+ "/protocol/openid-connect/token")
+ data = self.extractor.request(
+ url, method="POST", data=data, fatal=None).json()
+
+ try:
+ access_token = data["access_token"]
+ except Exception:
+ raise exception.AuthenticationError(data.get("error_description"))
+
+ if refresh_token != data.get("refresh_token"):
+ _refresh_token_cache.update(
+ (username, "personal"), data["refresh_token"])
+
+ return "Bearer " + access_token
+
+ @cache(maxage=900, keyarg=1)
+ def _authenticate_impl_legacy(self, username, password):
refresh_token = _refresh_token_cache(username)
if refresh_token:
self.extractor.log.info("Refreshing access token")
url = self.root + "/auth/refresh"
- data = {"token": refresh_token}
+ json = {"token": refresh_token}
else:
self.extractor.log.info("Logging in as %s", username)
url = self.root + "/auth/login"
- data = {"username": username, "password": password}
+ json = {"username": username, "password": password}
+ self.extractor.log.debug("Using legacy login method")
data = self.extractor.request(
- url, method="POST", json=data, fatal=None).json()
+ url, method="POST", json=json, fatal=None).json()
if data.get("result") != "ok":
raise exception.AuthenticationError()
@@ -269,13 +337,15 @@ class MangadexAPI():
_refresh_token_cache.update(username, data["token"]["refresh"])
return "Bearer " + data["token"]["session"]
- def _call(self, endpoint, params=None):
+ def _call(self, endpoint, params=None, auth=False):
url = self.root + endpoint
+ headers = self.headers_auth if auth else self.headers
while True:
- self.authenticate()
+ if auth:
+ self.authenticate()
response = self.extractor.request(
- url, params=params, headers=self.headers, fatal=None)
+ url, params=params, headers=headers, fatal=None)
if response.status_code < 400:
return response.json()
@@ -284,12 +354,12 @@ class MangadexAPI():
self.extractor.wait(until=until)
continue
- msg = ", ".join('{title}: {detail}'.format_map(error)
+ msg = ", ".join('{title}: "{detail}"'.format_map(error)
for error in response.json()["errors"])
raise exception.StopExtraction(
"%s %s (%s)", response.status_code, response.reason, msg)
- def _pagination_chapters(self, endpoint, params=None):
+ def _pagination_chapters(self, endpoint, params=None, auth=False):
if params is None:
params = {}
@@ -299,21 +369,22 @@ class MangadexAPI():
params["translatedLanguage[]"] = lang
params["includes[]"] = ("scanlation_group",)
- return self._pagination(endpoint, params)
+ return self._pagination(endpoint, params, auth)
- def _pagination_manga(self, endpoint, params=None):
+ def _pagination_manga(self, endpoint, params=None, auth=False):
if params is None:
params = {}
- return self._pagination(endpoint, params)
+ return self._pagination(endpoint, params, auth)
- def _pagination(self, endpoint, params):
+ def _pagination(self, endpoint, params, auth=False):
config = self.extractor.config
- ratings = config("ratings")
- if ratings is None:
- ratings = ("safe", "suggestive", "erotica", "pornographic")
- params["contentRating[]"] = ratings
+ if "contentRating" not in params:
+ ratings = config("ratings")
+ if ratings is None:
+ ratings = ("safe", "suggestive", "erotica", "pornographic")
+ params["contentRating[]"] = ratings
params["offset"] = 0
api_params = config("api-parameters")
@@ -321,7 +392,7 @@ class MangadexAPI():
params.update(api_params)
while True:
- data = self._call(endpoint, params)
+ data = self._call(endpoint, params, auth)
yield from data["data"]
params["offset"] = data["offset"] + data["limit"]
@@ -329,6 +400,6 @@ class MangadexAPI():
return
-@cache(maxage=28*86400, keyarg=0)
+@cache(maxage=90*86400, keyarg=0)
def _refresh_token_cache(username):
return None
diff --git a/gallery_dl/extractor/mastodon.py b/gallery_dl/extractor/mastodon.py
index 5e78ad4..8b38474 100644
--- a/gallery_dl/extractor/mastodon.py
+++ b/gallery_dl/extractor/mastodon.py
@@ -196,11 +196,15 @@ class MastodonFollowingExtractor(MastodonExtractor):
class MastodonStatusExtractor(MastodonExtractor):
"""Extractor for images from a status"""
subcategory = "status"
- pattern = (BASE_PATTERN + r"/(?:@[^/?#]+|(?:users/[^/?#]+/)?statuses)"
- r"/(?!following)([^/?#]+)")
+ pattern = (BASE_PATTERN + r"/(?:@[^/?#]+|(?:users/[^/?#]+/)?"
+ r"(?:statuses|notice|objects()))/(?!following)([^/?#]+)")
example = "https://mastodon.social/@USER/12345"
def statuses(self):
+ if self.groups[-2] is not None:
+ url = "{}/objects/{}".format(self.root, self.item)
+ location = self.request_location(url)
+ self.item = location.rpartition("/")[2]
return (MastodonAPI(self).status(self.item),)
diff --git a/gallery_dl/extractor/motherless.py b/gallery_dl/extractor/motherless.py
index c5b9322..ce83ded 100644
--- a/gallery_dl/extractor/motherless.py
+++ b/gallery_dl/extractor/motherless.py
@@ -23,21 +23,6 @@ class MotherlessExtractor(Extractor):
filename_fmt = "{id} {title}.{extension}"
archive_fmt = "{id}"
-
-class MotherlessMediaExtractor(MotherlessExtractor):
- """Extractor for a single image/video from motherless.com"""
- subcategory = "media"
- pattern = (BASE_PATTERN +
- r"/((?:g/[^/?#]+/|G[IV]?[A-Z0-9]+/)?"
- r"(?!G)[A-Z0-9]+)")
- example = "https://motherless.com/ABC123"
-
- def items(self):
- file = self._extract_media(self.groups[0])
- url = file["url"]
- yield Message.Directory, file
- yield Message.Url, url, text.nameext_from_url(url, file)
-
def _extract_media(self, path):
url = self.root + "/" + path
page = self.request(url).text
@@ -95,6 +80,21 @@ class MotherlessMediaExtractor(MotherlessExtractor):
return ""
+class MotherlessMediaExtractor(MotherlessExtractor):
+ """Extractor for a single image/video from motherless.com"""
+ subcategory = "media"
+ pattern = (BASE_PATTERN +
+ r"/((?:g/[^/?#]+/|G[IV]?[A-Z0-9]+/)?"
+ r"(?!G)[A-Z0-9]+)")
+ example = "https://motherless.com/ABC123"
+
+ def items(self):
+ file = self._extract_media(self.groups[0])
+ url = file["url"]
+ yield Message.Directory, file
+ yield Message.Url, url, text.nameext_from_url(url, file)
+
+
class MotherlessGalleryExtractor(MotherlessExtractor):
"""Extractor for a motherless.com gallery"""
subcategory = "gallery"
@@ -119,6 +119,10 @@ class MotherlessGalleryExtractor(MotherlessExtractor):
for num, thumb in enumerate(self._pagination(page), 1):
file = self._parse_thumb_data(thumb)
+
+ if file["type"] == "video":
+ file = self._extract_media(file["id"])
+
file.update(data)
file["num"] = num
url = file["url"]
@@ -151,17 +155,13 @@ class MotherlessGalleryExtractor(MotherlessExtractor):
def _parse_thumb_data(self, thumb):
extr = text.extract_from(thumb)
+
data = {
"id" : extr('data-codename="', '"'),
"type" : extr('data-mediatype="', '"'),
"thumbnail": extr('class="static" src="', '"'),
"title" : extr(' alt="', '"'),
}
-
- type = data["type"]
- url = data["thumbnail"].replace("thumb", type)
- if type == "video":
- url = "{}/{}.mp4".format(url.rpartition("/")[0], data["id"])
- data["url"] = url
+ data["url"] = data["thumbnail"].replace("thumb", data["type"])
return data
diff --git a/gallery_dl/extractor/pinterest.py b/gallery_dl/extractor/pinterest.py
index ad8c681..62fa9be 100644
--- a/gallery_dl/extractor/pinterest.py
+++ b/gallery_dl/extractor/pinterest.py
@@ -132,6 +132,9 @@ class PinterestExtractor(Extractor):
"extension": "txt",
"media_id": block.get("id")}
+ elif type == "story_pin_product_sticker_block":
+ continue
+
elif type == "story_pin_static_sticker_block":
continue
diff --git a/gallery_dl/extractor/pixeldrain.py b/gallery_dl/extractor/pixeldrain.py
index 83f3577..7a4d1a5 100644
--- a/gallery_dl/extractor/pixeldrain.py
+++ b/gallery_dl/extractor/pixeldrain.py
@@ -96,3 +96,73 @@ class PixeldrainAlbumExtractor(PixeldrainExtractor):
file["date"] = self.parse_datetime(file["date_upload"])
text.nameext_from_url(file["name"], file)
yield Message.Url, url, file
+
+
+class PixeldrainFolderExtractor(PixeldrainExtractor):
+ """Extractor for pixeldrain filesystem files and directories"""
+ subcategory = "folder"
+ filename_fmt = "{filename[:230]}.{extension}"
+ archive_fmt = "{path}_{num}"
+ pattern = BASE_PATTERN + r"/(?:d|api/filesystem)/([^?]+)"
+ example = "https://pixeldrain.com/d/abcdefgh"
+
+ def metadata(self, data):
+ return {
+ "type" : data["type"],
+ "path" : data["path"],
+ "name" : data["name"],
+ "mime_type" : data["file_type"],
+ "size" : data["file_size"],
+ "hash_sha256": data["sha256_sum"],
+ "date" : self.parse_datetime(data["created"]),
+ }
+
+ def items(self):
+ recursive = self.config("recursive")
+
+ url = "{}/api/filesystem/{}".format(self.root, self.groups[0])
+ stat = self.request(url + "?stat").json()
+
+ paths = stat["path"]
+ path = paths[stat["base_index"]]
+ if path["type"] == "dir":
+ children = [
+ child
+ for child in stat["children"]
+ if child["name"] != ".search_index.gz"
+ ]
+ else:
+ children = (path,)
+
+ folder = self.metadata(path)
+ folder["id"] = paths[0]["id"]
+
+ yield Message.Directory, folder
+
+ num = 0
+ for child in children:
+ if child["type"] == "file":
+ num += 1
+ url = "{}/api/filesystem{}?attach".format(
+ self.root, child["path"])
+ share_url = "{}/d{}".format(self.root, child["path"])
+ data = self.metadata(child)
+ data.update({
+ "id" : folder["id"],
+ "num" : num,
+ "url" : url,
+ "share_url": share_url,
+ })
+ data["filename"], _, data["extension"] = \
+ child["name"].rpartition(".")
+ yield Message.Url, url, data
+
+ elif child["type"] == "dir":
+ if recursive:
+ url = "{}/d{}".format(self.root, child["path"])
+ child["_extractor"] = PixeldrainFolderExtractor
+ yield Message.Queue, url, child
+
+ else:
+ self.log.debug("'%s' is of unknown type (%s)",
+ child.get("name"), child["type"])
diff --git a/gallery_dl/extractor/pixiv.py b/gallery_dl/extractor/pixiv.py
index c063216..73c5c1c 100644
--- a/gallery_dl/extractor/pixiv.py
+++ b/gallery_dl/extractor/pixiv.py
@@ -136,7 +136,21 @@ class PixivExtractor(Extractor):
self.log.warning("%s: 'limit_sanity_level' warning", work_id)
if self.sanity_workaround:
body = self._request_ajax("/illust/" + str(work_id))
- return self._extract_ajax(work, body)
+ if work["type"] == "ugoira":
+ if not self.load_ugoira:
+ return ()
+ self.log.info("%s: Retrieving Ugoira AJAX metadata",
+ work["id"])
+ try:
+ self._extract_ajax(work, body)
+ return self._extract_ugoira(work, url)
+ except Exception as exc:
+ self.log.debug("", exc_info=exc)
+ self.log.warning(
+ "%s: Unable to extract Ugoira URL. Provide "
+ "logged-in cookies to access it", work["id"])
+ else:
+ return self._extract_ajax(work, body)
elif limit_type == "limit_mypixiv_360.png":
work["_mypixiv"] = True
@@ -161,7 +175,12 @@ class PixivExtractor(Extractor):
return ()
def _extract_ugoira(self, work, img_url):
- ugoira = self.api.ugoira_metadata(work["id"])
+ if work.get("_ajax"):
+ ugoira = self._request_ajax(
+ "/illust/" + str(work["id"]) + "/ugoira_meta")
+ img_url = ugoira["src"]
+ else:
+ ugoira = self.api.ugoira_metadata(work["id"])
work["_ugoira_frame_data"] = work["frames"] = frames = ugoira["frames"]
work["_ugoira_original"] = self.load_ugoira_original
work["_http_adjust_extension"] = False
@@ -198,7 +217,10 @@ class PixivExtractor(Extractor):
]
else:
- zip_url = ugoira["zip_urls"]["medium"]
+ if work.get("_ajax"):
+ zip_url = ugoira["originalSrc"]
+ else:
+ zip_url = ugoira["zip_urls"]["medium"]
work["date_url"] = self._date_from_url(zip_url)
url = zip_url.replace("_ugoira600x600", "_ugoira1920x1080", 1)
return ({"url": url},)
diff --git a/gallery_dl/extractor/sankaku.py b/gallery_dl/extractor/sankaku.py
index c7303f2..3485db9 100644
--- a/gallery_dl/extractor/sankaku.py
+++ b/gallery_dl/extractor/sankaku.py
@@ -47,6 +47,10 @@ class SankakuExtractor(BooruExtractor):
def _init(self):
self.api = SankakuAPI(self)
+ if self.config("tags") == "extended":
+ self._tags = self._tags_extended
+ self._tags_findall = re.compile(
+ r"tag-type-([^\"' ]+).*?\?tags=([^\"'&]+)").findall
def _file_url(self, post):
url = post["file_url"]
@@ -85,6 +89,23 @@ class SankakuExtractor(BooruExtractor):
post["tags_" + name] = values
post["tag_string_" + name] = " ".join(values)
+ def _tags_extended(self, post, page):
+ try:
+ url = "https://chan.sankakucomplex.com/posts/" + post["id"]
+ page = self.request(url).text
+ except Exception as exc:
+ return self.log.warning(
+ "%s: Failed to extract extended tag categories (%s: %s)",
+ post["id"], exc.__class__.__name__, exc)
+
+ tags = collections.defaultdict(list)
+ tag_sidebar = text.extr(page, '<ul id="tag-sidebar"', "</ul>")
+ for tag_type, tag_name in self._tags_findall(tag_sidebar):
+ tags[tag_type].append(text.unescape(text.unquote(tag_name)))
+ for type, values in tags.items():
+ post["tags_" + type] = values
+ post["tag_string_" + type] = " ".join(values)
+
def _notes(self, post, page):
if post.get("has_notes"):
post["notes"] = self.api.notes(post["id"])
diff --git a/gallery_dl/extractor/subscribestar.py b/gallery_dl/extractor/subscribestar.py
index 1054a63..a83f2da 100644
--- a/gallery_dl/extractor/subscribestar.py
+++ b/gallery_dl/extractor/subscribestar.py
@@ -40,8 +40,14 @@ class SubscribestarExtractor(Extractor):
for post_html in self.posts():
media = self._media_from_post(post_html)
data = self._data_from_post(post_html)
- data["title"] = text.unescape(text.extr(
- data["content"], "<h1>", "</h1>"))
+
+ content = data["content"]
+ if "<html><body>" in content:
+ data["content"] = content = text.extr(
+ content, "<body>", "</body>")
+ data["title"] = text.unescape(
+ text.rextract(content, "<h1>", "</h1>")[0] or "")
+
yield Message.Directory, data
for num, item in enumerate(media, 1):
item.update(data)
@@ -189,7 +195,12 @@ class SubscribestarExtractor(Extractor):
"author_nick": text.unescape(extr('>', '<')),
"date" : self._parse_datetime(extr(
'class="post-date">', '</').rpartition(">")[2]),
- "content" : extr('<body>', '</body>').strip(),
+ "content" : extr(
+ '<div class="post-content" data-role="post_content-text">',
+ '</div><div class="post-uploads for-youtube"').strip(),
+ "tags" : list(text.extract_iter(extr(
+ '<div class="post_tags for-post">',
+ '<div class="post-actions">'), '?tag=', '"')),
}
def _parse_datetime(self, dt):
@@ -243,7 +254,12 @@ class SubscribestarPostExtractor(SubscribestarExtractor):
"post_id" : text.parse_int(extr('data-id="', '"')),
"date" : self._parse_datetime(extr(
'<div class="section-title_date">', '<')),
- "content" : extr('<body>', '</body>').strip(),
+ "content" : extr(
+ '<div class="post-content" data-role="post_content-text">',
+ '</div><div class="post-uploads for-youtube"').strip(),
+ "tags" : list(text.extract_iter(extr(
+ '<div class="post_tags for-post">',
+ '<div class="post-actions">'), '?tag=', '"')),
"author_name": text.unescape(extr(
'class="star_link" href="/', '"')),
"author_id" : text.parse_int(extr('data-user-id="', '"')),
diff --git a/gallery_dl/extractor/vipergirls.py b/gallery_dl/extractor/vipergirls.py
index af3f32d..1dd3482 100644
--- a/gallery_dl/extractor/vipergirls.py
+++ b/gallery_dl/extractor/vipergirls.py
@@ -43,31 +43,40 @@ class VipergirlsExtractor(Extractor):
def items(self):
self.login()
- posts = self.posts()
+ root = self.posts()
+ forum_title = root[1].attrib["title"]
+ thread_title = root[2].attrib["title"]
like = self.config("like")
if like:
- user_hash = posts[0].get("hash")
+ user_hash = root[0].get("hash")
if len(user_hash) < 16:
self.log.warning("Login required to like posts")
like = False
- posts = posts.iter("post")
+ posts = root.iter("post")
if self.page:
util.advance(posts, (text.parse_int(self.page[5:]) - 1) * 15)
for post in posts:
+ images = list(post)
+
data = post.attrib
+ data["forum_title"] = forum_title
data["thread_id"] = self.thread_id
+ data["thread_title"] = thread_title
+ data["post_id"] = data.pop("id")
+ data["post_num"] = data.pop("number")
+ data["post_title"] = data.pop("title")
+ data["count"] = len(images)
+ del data["imagecount"]
yield Message.Directory, data
-
- image = None
- for image in post:
- yield Message.Queue, image.attrib["main_url"], data
-
- if image is not None and like:
- self.like(post, user_hash)
+ if images:
+ for data["num"], image in enumerate(images, 1):
+ yield Message.Queue, image.attrib["main_url"], data
+ if like:
+ self.like(post, user_hash)
def login(self):
if self.cookies_check(self.cookies_names):
diff --git a/gallery_dl/transaction_id.py b/gallery_dl/transaction_id.py
index 25f1775..89e3d5b 100644
--- a/gallery_dl/transaction_id.py
+++ b/gallery_dl/transaction_id.py
@@ -129,7 +129,9 @@ class ClientTransaction():
keyword="obfiowerehiring", rndnum=3):
bytes_key = self.key_bytes
- now = int(time.time()) - 1682924400
+ nowf = time.time()
+ nowi = int(nowf)
+ now = nowi - 1682924400
bytes_time = (
(now ) & 0xFF, # noqa: E202
(now >> 8) & 0xFF, # noqa: E222
@@ -141,7 +143,7 @@ class ClientTransaction():
method, path, now, keyword, self.animation_key)
bytes_hash = hashlib.sha256(payload.encode()).digest()[:16]
- num = random.randrange(256)
+ num = (random.randrange(16) << 4) + int((nowf - nowi) * 16.0)
result = bytes(
byte ^ num
for byte in itertools.chain(
diff --git a/gallery_dl/version.py b/gallery_dl/version.py
index d40dacd..e543a31 100644
--- a/gallery_dl/version.py
+++ b/gallery_dl/version.py
@@ -6,5 +6,5 @@
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
-__version__ = "1.29.6"
+__version__ = "1.29.7"
__variant__ = None