summaryrefslogtreecommitdiffstats
path: root/gallery_dl
diff options
context:
space:
mode:
Diffstat (limited to 'gallery_dl')
-rw-r--r--gallery_dl/__init__.py10
-rw-r--r--gallery_dl/cookies.py24
-rw-r--r--gallery_dl/downloader/ytdl.py10
-rw-r--r--gallery_dl/extractor/8chan.py16
-rw-r--r--gallery_dl/extractor/__init__.py3
-rw-r--r--gallery_dl/extractor/ao3.py302
-rw-r--r--gallery_dl/extractor/bluesky.py78
-rw-r--r--gallery_dl/extractor/chevereto.py2
-rw-r--r--gallery_dl/extractor/civitai.py490
-rw-r--r--gallery_dl/extractor/cohost.py223
-rw-r--r--gallery_dl/extractor/common.py80
-rw-r--r--gallery_dl/extractor/deviantart.py44
-rw-r--r--gallery_dl/extractor/flickr.py6
-rw-r--r--gallery_dl/extractor/inkbunny.py13
-rw-r--r--gallery_dl/extractor/newgrounds.py4
-rw-r--r--gallery_dl/extractor/pixiv.py5
-rw-r--r--gallery_dl/extractor/skeb.py6
-rw-r--r--gallery_dl/extractor/weasyl.py28
-rw-r--r--gallery_dl/extractor/wikimedia.py21
-rw-r--r--gallery_dl/extractor/zzup.py30
-rw-r--r--gallery_dl/formatter.py2
-rw-r--r--gallery_dl/job.py7
-rw-r--r--gallery_dl/option.py11
-rw-r--r--gallery_dl/postprocessor/ugoira.py128
-rw-r--r--gallery_dl/text.py25
-rw-r--r--gallery_dl/util.py34
-rw-r--r--gallery_dl/version.py2
27 files changed, 1438 insertions, 166 deletions
diff --git a/gallery_dl/__init__.py b/gallery_dl/__init__.py
index 663fe99..7a9e0be 100644
--- a/gallery_dl/__init__.py
+++ b/gallery_dl/__init__.py
@@ -202,12 +202,18 @@ def main():
extractor.modules.append("")
sys.stdout.write("\n".join(extractor.modules))
- elif args.list_extractors:
+ elif args.list_extractors is not None:
write = sys.stdout.write
fmt = ("{}{}\nCategory: {} - Subcategory: {}"
"\nExample : {}\n\n").format
- for extr in extractor.extractors():
+ extractors = extractor.extractors()
+ if args.list_extractors:
+ fltr = util.build_extractor_filter(
+ args.list_extractors, negate=False)
+ extractors = filter(fltr, extractors)
+
+ for extr in extractors:
write(fmt(
extr.__name__,
"\n" + extr.__doc__ if extr.__doc__ else "",
diff --git a/gallery_dl/cookies.py b/gallery_dl/cookies.py
index deb7c7b..0ffd29a 100644
--- a/gallery_dl/cookies.py
+++ b/gallery_dl/cookies.py
@@ -50,21 +50,27 @@ def load_cookies_firefox(cookiejar, profile=None, container=None, domain=None):
sql = ("SELECT name, value, host, path, isSecure, expiry "
"FROM moz_cookies")
- parameters = ()
+ conditions = []
+ parameters = []
if container_id is False:
- sql += " WHERE NOT INSTR(originAttributes,'userContextId=')"
+ conditions.append("NOT INSTR(originAttributes,'userContextId=')")
elif container_id:
- sql += " WHERE originAttributes LIKE ? OR originAttributes LIKE ?"
+ conditions.append(
+ "originAttributes LIKE ? OR originAttributes LIKE ?")
uid = "%userContextId={}".format(container_id)
- parameters = (uid, uid + "&%")
- elif domain:
+ parameters += (uid, uid + "&%")
+
+ if domain:
if domain[0] == ".":
- sql += " WHERE host == ? OR host LIKE ?"
- parameters = (domain[1:], "%" + domain)
+ conditions.append("host == ? OR host LIKE ?")
+ parameters += (domain[1:], "%" + domain)
else:
- sql += " WHERE host == ? OR host == ?"
- parameters = (domain, "." + domain)
+ conditions.append("host == ? OR host == ?")
+ parameters += (domain, "." + domain)
+
+ if conditions:
+ sql = "{} WHERE ( {} )".format(sql, " ) AND ( ".join(conditions))
set_cookie = cookiejar.set_cookie
for name, value, domain, path, secure, expires in db.execute(
diff --git a/gallery_dl/downloader/ytdl.py b/gallery_dl/downloader/ytdl.py
index b3bec21..950a72f 100644
--- a/gallery_dl/downloader/ytdl.py
+++ b/gallery_dl/downloader/ytdl.py
@@ -45,7 +45,7 @@ class YoutubeDLDownloader(DownloaderBase):
except (ImportError, SyntaxError) as exc:
self.log.error("Cannot import module '%s'",
getattr(exc, "name", ""))
- self.log.debug("", exc_info=True)
+ self.log.debug("", exc_info=exc)
self.download = lambda u, p: False
return False
self.ytdl_instance = ytdl_instance = ytdl.construct_YoutubeDL(
@@ -64,8 +64,8 @@ class YoutubeDLDownloader(DownloaderBase):
if not info_dict:
try:
info_dict = ytdl_instance.extract_info(url[5:], download=False)
- except Exception:
- pass
+ except Exception as exc:
+ self.log.debug("", exc_info=exc)
if not info_dict:
return False
@@ -120,8 +120,8 @@ class YoutubeDLDownloader(DownloaderBase):
self.out.start(pathfmt.path)
try:
ytdl_instance.process_info(info_dict)
- except Exception:
- self.log.debug("Traceback", exc_info=True)
+ except Exception as exc:
+ self.log.debug("", exc_info=exc)
return False
return True
diff --git a/gallery_dl/extractor/8chan.py b/gallery_dl/extractor/8chan.py
index a5e8b27..afa3a69 100644
--- a/gallery_dl/extractor/8chan.py
+++ b/gallery_dl/extractor/8chan.py
@@ -9,9 +9,9 @@
"""Extractors for https://8chan.moe/"""
from .common import Extractor, Message
-from .. import text
+from .. import text, util
from ..cache import memcache
-from datetime import datetime, timedelta
+from datetime import timedelta
import itertools
BASE_PATTERN = r"(?:https?://)?8chan\.(moe|se|cc)"
@@ -27,21 +27,23 @@ class _8chanExtractor(Extractor):
Extractor.__init__(self, match)
def _init(self):
- self.cookies.set(
- "TOS20240718", "1", domain=self.root.rpartition("/")[2])
+ now = util.datetime_utcnow()
+ domain = self.root.rpartition("/")[2]
+ self.cookies.set("TOS20240928", "1", domain=domain)
+ self.cookies.set(now.strftime("TOS%Y%m%d"), "1", domain=domain)
@memcache()
def cookies_prepare(self):
# fetch captcha cookies
# (necessary to download without getting interrupted)
- now = datetime.utcnow()
+ now = util.datetime_utcnow()
url = self.root + "/captcha.js"
params = {"d": now.strftime("%a %b %d %Y %H:%M:%S GMT+0000 (UTC)")}
self.request(url, params=params).content
# adjust cookies
# - remove 'expires' timestamp
- # - move 'captchaexpiration' value forward by 1 month)
+ # - move 'captchaexpiration' value forward by 1 month
domain = self.root.rpartition("/")[2]
for cookie in self.cookies:
if cookie.domain.endswith(domain):
@@ -79,7 +81,7 @@ class _8chanThreadExtractor(_8chanExtractor):
self.cookies = self.cookies_prepare()
except Exception as exc:
self.log.debug("Failed to fetch captcha cookies: %s: %s",
- exc.__class__.__name__, exc, exc_info=True)
+ exc.__class__.__name__, exc, exc_info=exc)
# download files
posts = thread.pop("posts", ())
diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py
index e103cb1..826771c 100644
--- a/gallery_dl/extractor/__init__.py
+++ b/gallery_dl/extractor/__init__.py
@@ -23,6 +23,7 @@ modules = [
"8muses",
"adultempire",
"agnph",
+ "ao3",
"architizer",
"artstation",
"aryion",
@@ -35,6 +36,8 @@ modules = [
"catbox",
"chevereto",
"cien",
+ "civitai",
+ "cohost",
"comicvine",
"cyberdrop",
"danbooru",
diff --git a/gallery_dl/extractor/ao3.py b/gallery_dl/extractor/ao3.py
new file mode 100644
index 0000000..1f570e8
--- /dev/null
+++ b/gallery_dl/extractor/ao3.py
@@ -0,0 +1,302 @@
+# -*- coding: utf-8 -*-
+
+# Copyright 2024 Mike Fährmann
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Extractors for https://archiveofourown.org/"""
+
+from .common import Extractor, Message
+from .. import text, util, exception
+from ..cache import cache
+
+BASE_PATTERN = (r"(?:https?://)?(?:www\.)?"
+ r"a(?:rchiveofourown|o3)\.(?:org|com|net)")
+
+
+class Ao3Extractor(Extractor):
+ """Base class for ao3 extractors"""
+ category = "ao3"
+ root = "https://archiveofourown.org"
+ categorytransfer = True
+ cookies_domain = ".archiveofourown.org"
+ cookies_names = ("remember_user_token",)
+ request_interval = (0.5, 1.5)
+
+ def items(self):
+ self.login()
+
+ base = self.root + "/works/"
+ data = {"_extractor": Ao3WorkExtractor}
+
+ for work_id in self.works():
+ yield Message.Queue, base + work_id, data
+
+ def works(self):
+ return self._pagination(self.groups[0])
+
+ def login(self):
+ if self.cookies_check(self.cookies_names):
+ return
+
+ username, password = self._get_auth_info()
+ if username:
+ return self.cookies_update(self._login_impl(username, password))
+
+ @cache(maxage=90*86400, keyarg=1)
+ def _login_impl(self, username, password):
+ self.log.info("Logging in as %s", username)
+
+ url = self.root + "/users/login"
+ page = self.request(url).text
+
+ pos = page.find('id="loginform"')
+ token = text.extract(
+ page, ' name="authenticity_token" value="', '"', pos)[0]
+ if not token:
+ self.log.error("Unable to extract 'authenticity_token'")
+
+ data = {
+ "authenticity_token": text.unescape(token),
+ "user[login]" : username,
+ "user[password]" : password,
+ "user[remember_me]" : "1",
+ "commit" : "Log In",
+ }
+
+ response = self.request(url, method="POST", data=data)
+ if not response.history:
+ raise exception.AuthenticationError()
+
+ remember = response.history[0].cookies.get("remember_user_token")
+ if not remember:
+ raise exception.AuthenticationError()
+
+ return {
+ "remember_user_token": remember,
+ "user_credentials" : "1",
+ }
+
+ def _pagination(self, path, needle='<li id="work_'):
+ while True:
+ page = self.request(self.root + path).text
+ yield from text.extract_iter(page, needle, '"')
+ path = text.extr(page, '<a rel="next" href="', '"')
+ if not path:
+ return
+ path = text.unescape(path)
+
+
+class Ao3WorkExtractor(Ao3Extractor):
+ """Extractor for an AO3 work"""
+ subcategory = "work"
+ directory_fmt = ("{category}", "{author}")
+ filename_fmt = "{id} {title}.{extension}"
+ archive_fmt = "{id}.{extension}"
+ pattern = BASE_PATTERN + r"/works/(\d+)"
+ example = "https://archiveofourown.org/works/12345"
+
+ def _init(self):
+ formats = self.config("formats")
+ if formats is None:
+ self.formats = ("pdf",)
+ elif not formats:
+ self.formats = ()
+ elif isinstance(formats, str):
+ self.formats = formats.lower().replace(" ", "").split(",")
+ else:
+ self.formats = formats
+
+ self.cookies.set("view_adult", "true", domain="archiveofourown.org")
+
+ def items(self):
+ self.login()
+
+ work_id = self.groups[0]
+ url = "{}/works/{}".format(self.root, work_id)
+ response = self.request(url, notfound="work")
+
+ if response.url.endswith("/users/login?restricted=true"):
+ raise exception.AuthorizationError(
+ "Login required to access member-only works")
+ page = response.text
+ if len(page) < 20000 and \
+ '<h2 class="landmark heading">Adult Content Warning</' in page:
+ raise exception.StopExtraction("Adult Content")
+
+ extr = text.extract_from(page)
+
+ chapters = {}
+ cindex = extr(' id="chapter_index"', "</ul>")
+ for ch in text.extract_iter(cindex, ' value="', "</option>"):
+ cid, _, cname = ch.partition('">')
+ chapters[cid] = text.unescape(cname)
+
+ fmts = {}
+ path = ""
+ download = extr(' class="download"', "</ul>")
+ for dl in text.extract_iter(download, ' href="', "</"):
+ path, _, type = dl.rpartition('">')
+ fmts[type.lower()] = path
+
+ data = {
+ "id" : text.parse_int(work_id),
+ "rating" : text.split_html(
+ extr('<dd class="rating tags">', "</dd>")),
+ "warnings" : text.split_html(
+ extr('<dd class="warning tags">', "</dd>")),
+ "categories" : text.split_html(
+ extr('<dd class="category tags">', "</dd>")),
+ "fandom" : text.split_html(
+ extr('<dd class="fandom tags">', "</dd>")),
+ "relationships": text.split_html(
+ extr('<dd class="relationship tags">', "</dd>")),
+ "characters" : text.split_html(
+ extr('<dd class="character tags">', "</dd>")),
+ "tags" : text.split_html(
+ extr('<dd class="freeform tags">', "</dd>")),
+ "lang" : extr('<dd class="language" lang="', '"'),
+ "series" : extr('<dd class="series">', "</dd>"),
+ "date" : text.parse_datetime(
+ extr('<dd class="published">', "<"), "%Y-%m-%d"),
+ "date_completed": text.parse_datetime(
+ extr('>Completed:</dt><dd class="status">', "<"), "%Y-%m-%d"),
+ "date_updated" : text.parse_timestamp(
+ path.rpartition("updated_at=")[2]),
+ "words" : text.parse_int(
+ extr('<dd class="words">', "<").replace(",", "")),
+ "chapters" : chapters,
+ "comments" : text.parse_int(
+ extr('<dd class="comments">', "<").replace(",", "")),
+ "likes" : text.parse_int(
+ extr('<dd class="kudos">', "<").replace(",", "")),
+ "bookmarks" : text.parse_int(text.remove_html(
+ extr('<dd class="bookmarks">', "</dd>")).replace(",", "")),
+ "views" : text.parse_int(
+ extr('<dd class="hits">', "<").replace(",", "")),
+ "title" : text.unescape(text.remove_html(
+ extr(' class="title heading">', "</h2>")).strip()),
+ "author" : text.unescape(text.remove_html(
+ extr(' class="byline heading">', "</h3>"))),
+ "summary" : text.split_html(
+ extr(' class="heading">Summary:</h3>', "</div>")),
+ }
+ data["language"] = util.code_to_language(data["lang"])
+
+ series = data["series"]
+ if series:
+ extr = text.extract_from(series)
+ data["series"] = {
+ "prev" : extr(' class="previous" href="/works/', '"'),
+ "index": extr(' class="position">Part ', " "),
+ "id" : extr(' href="/series/', '"'),
+ "name" : text.unescape(extr(">", "<")),
+ "next" : extr(' class="next" href="/works/', '"'),
+ }
+ else:
+ data["series"] = None
+
+ yield Message.Directory, data
+ for fmt in self.formats:
+ try:
+ url = text.urljoin(self.root, fmts[fmt])
+ except KeyError:
+ self.log.warning("%s: Format '%s' not available", work_id, fmt)
+ else:
+ yield Message.Url, url, text.nameext_from_url(url, data)
+
+
+class Ao3SeriesExtractor(Ao3Extractor):
+ """Extractor for AO3 works of a series"""
+ subcategory = "series"
+ pattern = BASE_PATTERN + r"(/series/(\d+))"
+ example = "https://archiveofourown.org/series/12345"
+
+
+class Ao3TagExtractor(Ao3Extractor):
+ """Extractor for AO3 works by tag"""
+ subcategory = "tag"
+ pattern = BASE_PATTERN + r"(/tags/([^/?#]+)/works(?:/?\?.+)?)"
+ example = "https://archiveofourown.org/tags/TAG/works"
+
+
+class Ao3SearchExtractor(Ao3Extractor):
+ """Extractor for AO3 search results"""
+ subcategory = "search"
+ pattern = BASE_PATTERN + r"(/works/search/?\?.+)"
+ example = "https://archiveofourown.org/works/search?work_search[query]=air"
+
+
+class Ao3UserExtractor(Ao3Extractor):
+ """Extractor for an AO3 user profile"""
+ subcategory = "user"
+ pattern = (BASE_PATTERN + r"/users/([^/?#]+(?:/pseuds/[^/?#]+)?)"
+ r"(?:/profile)?/?(?:$|\?|#)")
+ example = "https://archiveofourown.org/users/USER"
+
+ def initialize(self):
+ pass
+
+ def items(self):
+ base = "{}/users/{}/".format(self.root, self.groups[0])
+ return self._dispatch_extractors((
+ (Ao3UserWorksExtractor , base + "works"),
+ (Ao3UserSeriesExtractor , base + "series"),
+ (Ao3UserBookmarkExtractor, base + "bookmarks"),
+ ), ("user-works", "user-series"))
+
+
+class Ao3UserWorksExtractor(Ao3Extractor):
+ """Extractor for works of an AO3 user"""
+ subcategory = "user-works"
+ pattern = (BASE_PATTERN + r"(/users/([^/?#]+)/(?:pseuds/([^/?#]+)/)?"
+ r"works(?:/?\?.+)?)")
+ example = "https://archiveofourown.org/users/USER/works"
+
+
+class Ao3UserSeriesExtractor(Ao3Extractor):
+ """Extractor for series of an AO3 user"""
+ subcategory = "user-series"
+ pattern = (BASE_PATTERN + r"(/users/([^/?#]+)/(?:pseuds/([^/?#]+)/)?"
+ r"series(?:/?\?.+)?)")
+ example = "https://archiveofourown.org/users/USER/series"
+
+ def items(self):
+ self.login()
+
+ base = self.root + "/series/"
+ data = {"_extractor": Ao3SeriesExtractor}
+
+ for series_id in self.series():
+ yield Message.Queue, base + series_id, data
+
+ def series(self):
+ return self._pagination(self.groups[0], '<li id="series_')
+
+
+class Ao3UserBookmarkExtractor(Ao3Extractor):
+ """Extractor for bookmarked works of an AO3 user"""
+ subcategory = "user-bookmark"
+ pattern = (BASE_PATTERN + r"(/users/([^/?#]+)/(?:pseuds/([^/?#]+)/)?"
+ r"bookmarks(?:/?\?.+)?)")
+ example = "https://archiveofourown.org/users/USER/bookmarks"
+
+ def items(self):
+ self.login()
+
+ base = self.root + "/"
+ data_work = {"_extractor": Ao3WorkExtractor}
+ data_series = {"_extractor": Ao3SeriesExtractor}
+
+ for item in self._pagination(
+ self.groups[0], '<span class="count"><a href="/'):
+ path = item.rpartition("/")[0]
+ url = base + path
+ if item.startswith("works/"):
+ yield Message.Queue, url, data_work
+ elif item.startswith("series/"):
+ yield Message.Queue, url, data_series
+ else:
+ self.log.warning("Unsupported bookmark type '%s'", path)
diff --git a/gallery_dl/extractor/bluesky.py b/gallery_dl/extractor/bluesky.py
index c97bf65..39c5635 100644
--- a/gallery_dl/extractor/bluesky.py
+++ b/gallery_dl/extractor/bluesky.py
@@ -41,6 +41,7 @@ class BlueskyExtractor(Extractor):
self.api = BlueskyAPI(self)
self._user = self._user_did = None
self.instance = self.root.partition("://")[2]
+ self.videos = self.config("videos", True)
def items(self):
for post in self.posts():
@@ -55,14 +56,6 @@ class BlueskyExtractor(Extractor):
post.update(post["record"])
del post["record"]
- images = ()
- if "embed" in post:
- media = post["embed"]
- if "media" in media:
- media = media["media"]
- if "images" in media:
- images = media["images"]
-
if self._metadata_facets:
if "facets" in post:
post["hashtags"] = tags = []
@@ -82,45 +75,66 @@ class BlueskyExtractor(Extractor):
if self._metadata_user:
post["user"] = self._user or post["author"]
+ files = self._extract_files(post)
post["instance"] = self.instance
post["post_id"] = pid
- post["count"] = len(images)
+ post["count"] = len(files)
post["date"] = text.parse_datetime(
post["createdAt"][:19], "%Y-%m-%dT%H:%M:%S")
yield Message.Directory, post
- if not images:
+ if not files:
continue
base = ("https://bsky.social/xrpc/com.atproto.sync.getBlob"
"?did={}&cid=".format(post["author"]["did"]))
- post["num"] = 0
-
- for file in images:
- post["num"] += 1
- post["description"] = file["alt"]
-
- try:
- aspect = file["aspectRatio"]
- post["width"] = aspect["width"]
- post["height"] = aspect["height"]
- except KeyError:
- post["width"] = post["height"] = 0
-
- image = file["image"]
- try:
- cid = image["ref"]["$link"]
- except KeyError:
- cid = image["cid"]
- post["filename"] = cid
- post["extension"] = image["mimeType"].rpartition("/")[2]
-
- yield Message.Url, base + cid, post
+ for post["num"], file in enumerate(files, 1):
+ post.update(file)
+ yield Message.Url, base + file["filename"], post
def posts(self):
return ()
+ def _extract_files(self, post):
+ if "embed" not in post:
+ return ()
+
+ files = []
+ media = post["embed"]
+ if "media" in media:
+ media = media["media"]
+
+ if "images" in media:
+ for image in media["images"]:
+ files.append(self._extract_media(image, "image"))
+ if "video" in media and self.videos:
+ files.append(self._extract_media(media, "video"))
+
+ return files
+
+ def _extract_media(self, media, key):
+ try:
+ aspect = media["aspectRatio"]
+ width = aspect["width"]
+ height = aspect["height"]
+ except KeyError:
+ width = height = 0
+
+ data = media[key]
+ try:
+ cid = data["ref"]["$link"]
+ except KeyError:
+ cid = data["cid"]
+
+ return {
+ "description": media.get("alt") or "",
+ "width" : width,
+ "height" : height,
+ "filename" : cid,
+ "extension" : data["mimeType"].rpartition("/")[2],
+ }
+
def _make_post(self, actor, kind):
did = self.api._did_from_actor(actor)
profile = self.api.get_profile(did)
diff --git a/gallery_dl/extractor/chevereto.py b/gallery_dl/extractor/chevereto.py
index ef5a44c..102945b 100644
--- a/gallery_dl/extractor/chevereto.py
+++ b/gallery_dl/extractor/chevereto.py
@@ -35,7 +35,7 @@ class CheveretoExtractor(BaseExtractor):
BASE_PATTERN = CheveretoExtractor.update({
"jpgfish": {
- "root": "https://jpg4.su",
+ "root": "https://jpg5.su",
"pattern": r"jpe?g\d?\.(?:su|pet|fish(?:ing)?|church)",
},
"imgkiwi": {
diff --git a/gallery_dl/extractor/civitai.py b/gallery_dl/extractor/civitai.py
new file mode 100644
index 0000000..3e657d6
--- /dev/null
+++ b/gallery_dl/extractor/civitai.py
@@ -0,0 +1,490 @@
+# -*- coding: utf-8 -*-
+
+# Copyright 2024 Mike Fährmann
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Extractors for https://www.civitai.com/"""
+
+from .common import Extractor, Message
+from .. import text, util
+import itertools
+import time
+
+BASE_PATTERN = r"(?:https?://)?civitai\.com"
+USER_PATTERN = BASE_PATTERN + r"/user/([^/?#]+)"
+
+
+class CivitaiExtractor(Extractor):
+ """Base class for civitai extractors"""
+ category = "civitai"
+ root = "https://civitai.com"
+ directory_fmt = ("{category}", "{username|user[username]}", "images")
+ filename_fmt = "{id}.{extension}"
+ archive_fmt = "{hash}"
+ request_interval = (0.5, 1.5)
+
+ def _init(self):
+ if self.config("api") == "trpc":
+ self.log.debug("Using tRPC API")
+ self.api = CivitaiTrpcAPI(self)
+ else:
+ self.log.debug("Using REST API")
+ self.api = CivitaiRestAPI(self)
+
+ quality = self.config("quality")
+ if quality:
+ if not isinstance(quality, str):
+ quality = ",".join(quality)
+ self._image_quality = quality
+ self._image_ext = ("png" if quality == "original=true" else "jpg")
+ else:
+ self._image_quality = "original=true"
+ self._image_ext = "png"
+
+ def items(self):
+ models = self.models()
+ if models:
+ data = {"_extractor": CivitaiModelExtractor}
+ for model in models:
+ url = "{}/models/{}".format(self.root, model["id"])
+ yield Message.Queue, url, data
+ return
+
+ images = self.images()
+ if images:
+ for image in images:
+ url = self._url(image)
+ image["date"] = text.parse_datetime(
+ image["createdAt"], "%Y-%m-%dT%H:%M:%S.%fZ")
+ text.nameext_from_url(url, image)
+ image["extension"] = self._image_ext
+ yield Message.Directory, image
+ yield Message.Url, url, image
+ return
+
+ def models(self):
+ return ()
+
+ def images(self):
+ return ()
+
+ def _url(self, image):
+ url = image["url"]
+ if "/" in url:
+ parts = url.rsplit("/", 2)
+ parts[1] = self._image_quality
+ return "/".join(parts)
+
+ name = image.get("name")
+ if not name:
+ mime = image.get("mimeType") or self._image_ext
+ name = "{}.{}".format(image.get("id"), mime.rpartition("/")[2])
+ return (
+ "https://image.civitai.com/xG1nkqKTMzGDvpLrqFT7WA/{}/{}/{}".format(
+ url, self._image_quality, name)
+ )
+
+
+class CivitaiModelExtractor(CivitaiExtractor):
+ subcategory = "model"
+ directory_fmt = ("{category}", "{user[username]}",
+ "{model[id]}{model[name]:? //}",
+ "{version[id]}{version[name]:? //}")
+ filename_fmt = "{filename}.{extension}"
+ archive_fmt = "{file[hash]}"
+ pattern = BASE_PATTERN + r"/models/(\d+)(?:/?\?modelVersionId=(\d+))?"
+ example = "https://civitai.com/models/12345/TITLE"
+
+ def items(self):
+ model_id, version_id = self.groups
+ model = self.api.model(model_id)
+
+ if "user" in model:
+ user = model["user"]
+ del model["user"]
+ else:
+ user = model["creator"]
+ del model["creator"]
+ versions = model["modelVersions"]
+ del model["modelVersions"]
+
+ if version_id:
+ version_id = int(version_id)
+ for version in versions:
+ if version["id"] == version_id:
+ break
+ else:
+ version = self.api.model_version(version_id)
+ versions = (version,)
+
+ for version in versions:
+ version["date"] = text.parse_datetime(
+ version["createdAt"], "%Y-%m-%dT%H:%M:%S.%fZ")
+
+ data = {
+ "model" : model,
+ "version": version,
+ "user" : user,
+ }
+
+ yield Message.Directory, data
+ for file in self._extract_files(model, version, user):
+ file.update(data)
+ yield Message.Url, file["url"], file
+
+ def _extract_files(self, model, version, user):
+ filetypes = self.config("files")
+ if filetypes is None:
+ return self._extract_files_image(model, version, user)
+
+ generators = {
+ "model" : self._extract_files_model,
+ "image" : self._extract_files_image,
+ "gallery" : self._extract_files_gallery,
+ "gallerie": self._extract_files_gallery,
+ }
+ if isinstance(filetypes, str):
+ filetypes = filetypes.split(",")
+
+ return itertools.chain.from_iterable(
+ generators[ft.rstrip("s")](model, version, user)
+ for ft in filetypes
+ )
+
+ def _extract_files_model(self, model, version, user):
+ return [
+ {
+ "num" : num,
+ "file" : file,
+ "filename" : file["name"],
+ "extension": "bin",
+ "url" : file["downloadUrl"],
+ "_http_headers" : {
+ "Authorization": self.api.headers.get("Authorization")},
+ "_http_validate": self._validate_file_model,
+ }
+ for num, file in enumerate(version["files"], 1)
+ ]
+
+ def _extract_files_image(self, model, version, user):
+ if "images" in version:
+ images = version["images"]
+ else:
+ params = {
+ "modelVersionId": version["id"],
+ "prioritizedUserIds": [user["id"]],
+ "period": "AllTime",
+ "sort": "Most Reactions",
+ "limit": 20,
+ "pending": True,
+ }
+ images = self.api.images(params, defaults=False)
+
+ return [
+ text.nameext_from_url(file["url"], {
+ "num" : num,
+ "file": file,
+ "url" : self._url(file),
+ })
+ for num, file in enumerate(images, 1)
+ ]
+
+ def _extract_files_gallery(self, model, version, user):
+ images = self.api.images_gallery(model, version, user)
+ for num, file in enumerate(images, 1):
+ yield text.nameext_from_url(file["url"], {
+ "num" : num,
+ "file": file,
+ "url" : self._url(file),
+ })
+
+ def _validate_file_model(self, response):
+ if response.headers.get("Content-Type", "").startswith("text/html"):
+ alert = text.extr(
+ response.text, 'mantine-Alert-message">', "</div></div></div>")
+ if alert:
+ msg = "\"{}\" - 'api-key' required".format(
+ text.remove_html(alert))
+ else:
+ msg = "'api-key' required to download this file"
+ self.log.warning(msg)
+ return False
+ return True
+
+
+class CivitaiImageExtractor(CivitaiExtractor):
+ subcategory = "image"
+ pattern = BASE_PATTERN + r"/images/(\d+)"
+ example = "https://civitai.com/images/12345"
+
+ def images(self):
+ return self.api.image(self.groups[0])
+
+
+class CivitaiTagModelsExtractor(CivitaiExtractor):
+ subcategory = "tag-models"
+ pattern = BASE_PATTERN + r"/(?:tag/|models\?tag=)([^/?&#]+)"
+ example = "https://civitai.com/tag/TAG"
+
+ def models(self):
+ tag = text.unquote(self.groups[0])
+ return self.api.models({"tag": tag})
+
+
+class CivitaiTagImagesExtractor(CivitaiExtractor):
+ subcategory = "tag-images"
+ pattern = BASE_PATTERN + r"/images\?tags=([^&#]+)"
+ example = "https://civitai.com/images?tags=12345"
+
+ def images(self):
+ tag = text.unquote(self.groups[0])
+ return self.api.images({"tag": tag})
+
+
+class CivitaiSearchExtractor(CivitaiExtractor):
+ subcategory = "search"
+ pattern = BASE_PATTERN + r"/search/models\?([^#]+)"
+ example = "https://civitai.com/search/models?query=QUERY"
+
+ def models(self):
+ params = text.parse_query(self.groups[0])
+ return self.api.models(params)
+
+
+class CivitaiUserExtractor(CivitaiExtractor):
+ subcategory = "user"
+ pattern = USER_PATTERN + r"/?(?:$|\?|#)"
+ example = "https://civitai.com/user/USER"
+
+ def initialize(self):
+ pass
+
+ def items(self):
+ base = "{}/user/{}/".format(self.root, self.groups[0])
+ return self._dispatch_extractors((
+ (CivitaiUserModelsExtractor, base + "models"),
+ (CivitaiUserImagesExtractor, base + "images"),
+ ), ("user-models", "user-images"))
+
+
+class CivitaiUserModelsExtractor(CivitaiExtractor):
+ subcategory = "user-models"
+ pattern = USER_PATTERN + r"/models/?(?:\?([^#]+))?"
+ example = "https://civitai.com/user/USER/models"
+
+ def models(self):
+ params = text.parse_query(self.groups[1])
+ params["username"] = text.unquote(self.groups[0])
+ return self.api.models(params)
+
+
+class CivitaiUserImagesExtractor(CivitaiExtractor):
+ subcategory = "user-images"
+ pattern = USER_PATTERN + r"/images/?(?:\?([^#]+))?"
+ example = "https://civitai.com/user/USER/images"
+
+ def images(self):
+ params = text.parse_query(self.groups[1])
+ params["username"] = text.unquote(self.groups[0])
+ return self.api.images(params)
+
+
+class CivitaiRestAPI():
+ """Interface for the Civitai Public REST API
+
+ https://developer.civitai.com/docs/api/public-rest
+ """
+
+ def __init__(self, extractor):
+ self.extractor = extractor
+ self.root = extractor.root + "/api"
+ self.headers = {"Content-Type": "application/json"}
+
+ api_key = extractor.config("api-key")
+ if api_key:
+ extractor.log.debug("Using api_key authentication")
+ self.headers["Authorization"] = "Bearer " + api_key
+
+ nsfw = extractor.config("nsfw")
+ if nsfw is None or nsfw is True:
+ nsfw = "X"
+ elif not nsfw:
+ nsfw = "Safe"
+ self.nsfw = nsfw
+
+ def image(self, image_id):
+ return self.images({
+ "imageId": image_id,
+ })
+
+ def images(self, params):
+ endpoint = "/v1/images"
+ if "nsfw" not in params:
+ params["nsfw"] = self.nsfw
+ return self._pagination(endpoint, params)
+
+ def images_gallery(self, model, version, user):
+ return self.images({
+ "modelId" : model["id"],
+ "modelVersionId": version["id"],
+ })
+
+ def model(self, model_id):
+ endpoint = "/v1/models/{}".format(model_id)
+ return self._call(endpoint)
+
+ def model_version(self, model_version_id):
+ endpoint = "/v1/model-versions/{}".format(model_version_id)
+ return self._call(endpoint)
+
+ def models(self, params):
+ return self._pagination("/v1/models", params)
+
+ def _call(self, endpoint, params=None):
+ if endpoint[0] == "/":
+ url = self.root + endpoint
+ else:
+ url = endpoint
+
+ response = self.extractor.request(
+ url, params=params, headers=self.headers)
+ return response.json()
+
+ def _pagination(self, endpoint, params):
+ while True:
+ data = self._call(endpoint, params)
+ yield from data["items"]
+
+ try:
+ endpoint = data["metadata"]["nextPage"]
+ except KeyError:
+ return
+ params = None
+
+
+class CivitaiTrpcAPI():
+ """Interface for the Civitai TRPC API"""
+
+ def __init__(self, extractor):
+ self.extractor = extractor
+ self.root = extractor.root + "/api/trpc/"
+ self.headers = {
+ "content-type" : "application/json",
+ "x-client-version": "5.0.94",
+ "x-client-date" : "",
+ "x-client" : "web",
+ "x-fingerprint" : "undefined",
+ }
+ api_key = extractor.config("api-key")
+ if api_key:
+ extractor.log.debug("Using api_key authentication")
+ self.headers["Authorization"] = "Bearer " + api_key
+
+ nsfw = extractor.config("nsfw")
+ if nsfw is None or nsfw is True:
+ nsfw = 31
+ elif not nsfw:
+ nsfw = 1
+ self.nsfw = nsfw
+
+ def image(self, image_id):
+ endpoint = "image.get"
+ params = {"id": int(image_id)}
+ return (self._call(endpoint, params),)
+
+ def images(self, params, defaults=True):
+ endpoint = "image.getInfinite"
+
+ if defaults:
+ params_ = {
+ "useIndex" : True,
+ "period" : "AllTime",
+ "sort" : "Newest",
+ "types" : ["image"],
+ "withMeta" : False, # Metadata Only
+ "fromPlatform" : False, # Made On-Site
+ "browsingLevel": self.nsfw,
+ "include" : ["cosmetics"],
+ }
+ params_.update(params)
+ else:
+ params_ = params
+
+ return self._pagination(endpoint, params_)
+
+ def images_gallery(self, model, version, user):
+ endpoint = "image.getImagesAsPostsInfinite"
+ params = {
+ "period" : "AllTime",
+ "sort" : "Newest",
+ "modelVersionId": version["id"],
+ "modelId" : model["id"],
+ "hidden" : False,
+ "limit" : 50,
+ "browsingLevel" : self.nsfw,
+ }
+
+ for post in self._pagination(endpoint, params):
+ yield from post["images"]
+
+ def model(self, model_id):
+ endpoint = "model.getById"
+ params = {"id": int(model_id)}
+ return self._call(endpoint, params)
+
+ def model_version(self, model_version_id):
+ endpoint = "modelVersion.getById"
+ params = {"id": int(model_version_id)}
+ return self._call(endpoint, params)
+
+ def models(self, params, defaults=True):
+ endpoint = "model.getAll"
+
+ if defaults:
+ params_ = {
+ "period" : "AllTime",
+ "periodMode" : "published",
+ "sort" : "Newest",
+ "pending" : False,
+ "hidden" : False,
+ "followed" : False,
+ "earlyAccess" : False,
+ "fromPlatform" : False,
+ "supportsGeneration": False,
+ "browsingLevel": self.nsfw,
+ }
+ params_.update(params)
+ else:
+ params_ = params
+
+ return self._pagination(endpoint, params_)
+
+ def user(self, username):
+ endpoint = "user.getCreator"
+ params = {"username": username}
+ return (self._call(endpoint, params),)
+
+ def _call(self, endpoint, params):
+ url = self.root + endpoint
+ headers = self.headers
+ params = {"input": util.json_dumps({"json": params})}
+
+ headers["x-client-date"] = str(int(time.time() * 1000))
+ response = self.extractor.request(url, headers=headers, params=params)
+
+ return response.json()["result"]["data"]["json"]
+
+ def _pagination(self, endpoint, params):
+ while True:
+ data = self._call(endpoint, params)
+ yield from data["items"]
+
+ try:
+ if not data["nextCursor"]:
+ return
+ params["cursor"] = data["nextCursor"]
+ except KeyError:
+ return
diff --git a/gallery_dl/extractor/cohost.py b/gallery_dl/extractor/cohost.py
new file mode 100644
index 0000000..e1f6040
--- /dev/null
+++ b/gallery_dl/extractor/cohost.py
@@ -0,0 +1,223 @@
+# -*- coding: utf-8 -*-
+
+# Copyright 2024 Mike Fährmann
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Extractors for https://cohost.org/"""
+
+from .common import Extractor, Message
+from .. import text, util
+
+BASE_PATTERN = r"(?:https?://)?(?:www\.)?cohost\.org"
+
+
+class CohostExtractor(Extractor):
+ """Base class for cohost extractors"""
+ category = "cohost"
+ root = "https://cohost.org"
+ directory_fmt = ("{category}", "{postingProject[handle]}")
+ filename_fmt = ("{postId}_{headline|plainTextBody:?/_/[:100]}"
+ "{num}.{extension}")
+ archive_fmt = "{postId}_{num}"
+
+ def _init(self):
+ self.replies = self.config("replies", True)
+ self.pinned = self.config("pinned", False)
+ self.shares = self.config("shares", False)
+ self.asks = self.config("asks", True)
+
+ def items(self):
+ for post in self.posts():
+ reason = post.get("limitedVisibilityReason")
+ if reason and reason != "none":
+ if reason == "log-in-first":
+ reason = ("This page's posts are visible only to users "
+ "who are logged in.")
+ self.log.warning('%s: "%s"', post["postId"], reason)
+
+ files = self._extract_files(post)
+ post["count"] = len(files)
+ post["date"] = text.parse_datetime(
+ post["publishedAt"], "%Y-%m-%dT%H:%M:%S.%fZ")
+
+ yield Message.Directory, post
+ for post["num"], file in enumerate(files, 1):
+ url = file["fileURL"]
+ post.update(file)
+ text.nameext_from_url(url, post)
+ yield Message.Url, url, post
+
+ def posts(self):
+ return ()
+
+ def _request_api(self, endpoint, input):
+ url = "{}/api/v1/trpc/{}".format(self.root, endpoint)
+ params = {"batch": "1", "input": util.json_dumps({"0": input})}
+ headers = {"content-type": "application/json"}
+
+ data = self.request(url, params=params, headers=headers).json()
+ return data[0]["result"]["data"]
+
+ def _extract_files(self, post):
+ files = []
+
+ self._extract_blocks(post, files)
+ if self.shares and post.get("shareTree"):
+ for share in post["shareTree"]:
+ self._extract_blocks(share, files, share)
+ del post["shareTree"]
+
+ return files
+
+ def _extract_blocks(self, post, files, shared=None):
+ post["content"] = content = []
+
+ for block in post.pop("blocks") or ():
+ try:
+ type = block["type"]
+ if type == "attachment":
+ file = block["attachment"].copy()
+ file["shared"] = shared
+ files.append(file)
+ elif type == "attachment-row":
+ for att in block["attachments"]:
+ file = att["attachment"].copy()
+ file["shared"] = shared
+ files.append(file)
+ elif type == "markdown":
+ content.append(block["markdown"]["content"])
+ elif type == "ask":
+ post["ask"] = block["ask"]
+ else:
+ self.log.debug("%s: Unsupported block type '%s'",
+ post["postId"], type)
+ except Exception as exc:
+ self.log.debug("%s: %s", exc.__class__.__name__, exc)
+
+
+class CohostUserExtractor(CohostExtractor):
+ """Extractor for media from a cohost user"""
+ subcategory = "user"
+ pattern = BASE_PATTERN + r"/([^/?#]+)/?(?:$|\?|#)"
+ example = "https://cohost.org/USER"
+
+ def posts(self):
+ empty = 0
+ params = {
+ "projectHandle": self.groups[0],
+ "page": 0,
+ "options": {
+ "pinnedPostsAtTop" : bool(self.pinned),
+ "hideReplies" : not self.replies,
+ "hideShares" : not self.shares,
+ "hideAsks" : not self.asks,
+ "viewingOnProjectPage": True,
+ },
+ }
+
+ while True:
+ data = self._request_api("posts.profilePosts", params)
+
+ posts = data["posts"]
+ if posts:
+ empty = 0
+ yield from posts
+ else:
+ empty += 1
+
+ pagination = data["pagination"]
+ if not pagination.get("morePagesForward"):
+ return
+ if empty >= 3:
+ return self.log.debug("Empty API results")
+ params["page"] = pagination["nextPage"]
+
+
+class CohostPostExtractor(CohostExtractor):
+ """Extractor for media from a single cohost post"""
+ subcategory = "post"
+ pattern = BASE_PATTERN + r"/([^/?#]+)/post/(\d+)"
+ example = "https://cohost.org/USER/post/12345"
+
+ def posts(self):
+ endpoint = "posts.singlePost"
+ params = {
+ "handle": self.groups[0],
+ "postId": int(self.groups[1]),
+ }
+
+ data = self._request_api(endpoint, params)
+ post = data["post"]
+
+ try:
+ post["comments"] = data["comments"][self.groups[1]]
+ except LookupError:
+ post["comments"] = ()
+
+ return (post,)
+
+
+class CohostTagExtractor(CohostExtractor):
+ """Extractor for tagged posts"""
+ subcategory = "tag"
+ pattern = BASE_PATTERN + r"/([^/?#]+)/tagged/([^/?#]+)(?:\?([^#]+))?"
+ example = "https://cohost.org/USER/tagged/TAG"
+
+ def posts(self):
+ user, tag, query = self.groups
+ url = "{}/{}/tagged/{}".format(self.root, user, tag)
+ params = text.parse_query(query)
+ post_feed_key = ("tagged-post-feed" if user == "rc" else
+ "project-tagged-post-feed")
+
+ while True:
+ page = self.request(url, params=params).text
+ data = util.json_loads(text.extr(
+ page, 'id="__COHOST_LOADER_STATE__">', '</script>'))
+
+ try:
+ feed = data[post_feed_key]
+ except KeyError:
+ feed = data.popitem()[1]
+
+ yield from feed["posts"]
+
+ pagination = feed["paginationMode"]
+ if not pagination.get("morePagesForward"):
+ return
+ params["refTimestamp"] = pagination["refTimestamp"]
+ params["skipPosts"] = \
+ pagination["currentSkip"] + pagination["idealPageStride"]
+
+
+class CohostLikesExtractor(CohostExtractor):
+ """Extractor for liked posts"""
+ subcategory = "likes"
+ pattern = BASE_PATTERN + r"/rc/liked-posts"
+ example = "https://cohost.org/rc/liked-posts"
+
+ def posts(self):
+ url = "{}/rc/liked-posts".format(self.root)
+ params = {}
+
+ while True:
+ page = self.request(url, params=params).text
+ data = util.json_loads(text.extr(
+ page, 'id="__COHOST_LOADER_STATE__">', '</script>'))
+
+ try:
+ feed = data["liked-posts-feed"]
+ except KeyError:
+ feed = data.popitem()[1]
+
+ yield from feed["posts"]
+
+ pagination = feed["paginationMode"]
+ if not pagination.get("morePagesForward"):
+ return
+ params["refTimestamp"] = pagination["refTimestamp"]
+ params["skipPosts"] = \
+ pagination["currentSkip"] + pagination["idealPageStride"]
diff --git a/gallery_dl/extractor/common.py b/gallery_dl/extractor/common.py
index df70571..32c8e67 100644
--- a/gallery_dl/extractor/common.py
+++ b/gallery_dl/extractor/common.py
@@ -15,6 +15,7 @@ import sys
import time
import netrc
import queue
+import random
import getpass
import logging
import datetime
@@ -37,6 +38,7 @@ class Extractor():
archive_fmt = ""
root = ""
cookies_domain = ""
+ cookies_index = 0
referer = True
ciphers = None
tls12 = True
@@ -196,6 +198,10 @@ class Extractor():
server = response.headers.get("Server")
if server and server.startswith("cloudflare") and \
code in (403, 503):
+ mitigated = response.headers.get("cf-mitigated")
+ if mitigated and mitigated.lower() == "challenge":
+ self.log.warning("Cloudflare challenge")
+ break
content = response.content
if b"_cf_chl_opt" in content or b"jschl-answer" in content:
self.log.warning("Cloudflare challenge")
@@ -439,45 +445,55 @@ class Extractor():
cookies = self.config("cookies")
if cookies:
- if isinstance(cookies, dict):
- self.cookies_update_dict(cookies, self.cookies_domain)
+ select = self.config("cookies-select")
+ if select:
+ if select == "rotate":
+ cookies = cookies[self.cookies_index % len(cookies)]
+ Extractor.cookies_index += 1
+ else:
+ cookies = random.choice(cookies)
+ self.cookies_load(cookies)
+
+ def cookies_load(self, cookies):
+ if isinstance(cookies, dict):
+ self.cookies_update_dict(cookies, self.cookies_domain)
+
+ elif isinstance(cookies, str):
+ path = util.expand_path(cookies)
+ try:
+ with open(path) as fp:
+ util.cookiestxt_load(fp, self.cookies)
+ except Exception as exc:
+ self.log.warning("cookies: %s", exc)
+ else:
+ self.log.debug("Loading cookies from '%s'", cookies)
+ self.cookies_file = path
- elif isinstance(cookies, str):
- path = util.expand_path(cookies)
+ elif isinstance(cookies, (list, tuple)):
+ key = tuple(cookies)
+ cookiejar = _browser_cookies.get(key)
+
+ if cookiejar is None:
+ from ..cookies import load_cookies
+ cookiejar = self.cookies.__class__()
try:
- with open(path) as fp:
- util.cookiestxt_load(fp, self.cookies)
+ load_cookies(cookiejar, cookies)
except Exception as exc:
self.log.warning("cookies: %s", exc)
else:
- self.log.debug("Loading cookies from '%s'", cookies)
- self.cookies_file = path
-
- elif isinstance(cookies, (list, tuple)):
- key = tuple(cookies)
- cookiejar = _browser_cookies.get(key)
-
- if cookiejar is None:
- from ..cookies import load_cookies
- cookiejar = self.cookies.__class__()
- try:
- load_cookies(cookiejar, cookies)
- except Exception as exc:
- self.log.warning("cookies: %s", exc)
- else:
- _browser_cookies[key] = cookiejar
- else:
- self.log.debug("Using cached cookies from %s", key)
+ _browser_cookies[key] = cookiejar
+ else:
+ self.log.debug("Using cached cookies from %s", key)
- set_cookie = self.cookies.set_cookie
- for cookie in cookiejar:
- set_cookie(cookie)
+ set_cookie = self.cookies.set_cookie
+ for cookie in cookiejar:
+ set_cookie(cookie)
- else:
- self.log.warning(
- "Expected 'dict', 'list', or 'str' value for 'cookies' "
- "option, got '%s' (%s)",
- cookies.__class__.__name__, cookies)
+ else:
+ self.log.warning(
+ "Expected 'dict', 'list', or 'str' value for 'cookies' "
+ "option, got '%s' (%s)",
+ cookies.__class__.__name__, cookies)
def cookies_store(self):
"""Store the session's cookies in a cookies.txt file"""
diff --git a/gallery_dl/extractor/deviantart.py b/gallery_dl/extractor/deviantart.py
index ea70b58..3686e1b 100644
--- a/gallery_dl/extractor/deviantart.py
+++ b/gallery_dl/extractor/deviantart.py
@@ -46,11 +46,13 @@ class DeviantartExtractor(Extractor):
self.extra = self.config("extra", False)
self.quality = self.config("quality", "100")
self.original = self.config("original", True)
+ self.previews = self.config("previews", False)
self.intermediary = self.config("intermediary", True)
self.comments_avatars = self.config("comments-avatars", False)
self.comments = self.comments_avatars or self.config("comments", False)
self.api = DeviantartOAuthAPI(self)
+ self.eclipse_api = None
self.group = False
self._premium_cache = {}
@@ -76,6 +78,11 @@ class DeviantartExtractor(Extractor):
else:
self._update_content = self._update_content_default
+ if self.previews == "all":
+ self.previews_images = self.previews = True
+ else:
+ self.previews_images = False
+
journals = self.config("journals", "html")
if journals == "html":
self.commit_journal = self._commit_journal_html
@@ -171,8 +178,19 @@ class DeviantartExtractor(Extractor):
if self.commit_journal:
if "excerpt" in deviation:
- journal = self.api.deviation_content(
- deviation["deviationid"])
+ # journal = self.api.deviation_content(
+ # deviation["deviationid"])
+ if not self.eclipse_api:
+ self.eclipse_api = DeviantartEclipseAPI(self)
+ content = self.eclipse_api.deviation_extended_fetch(
+ deviation["index"],
+ deviation["author"]["username"],
+ "journal",
+ )["deviation"]["textContent"]
+ html = content["html"]["markup"]
+ if html.startswith("{"):
+ html = content["excerpt"].replace("\n", "<br />")
+ journal = {"html": html}
elif "body" in deviation:
journal = {"html": deviation.pop("body")}
else:
@@ -197,6 +215,18 @@ class DeviantartExtractor(Extractor):
comment["_extractor"] = DeviantartAvatarExtractor
yield Message.Queue, url, comment
+ if self.previews and "preview" in deviation:
+ preview = deviation["preview"]
+ deviation["is_preview"] = True
+ if self.previews_images:
+ yield self.commit(deviation, preview)
+ else:
+ mtype = mimetypes.guess_type(
+ "a." + deviation["extension"], False)[0]
+ if mtype and not mtype.startswith("image/"):
+ yield self.commit(deviation, preview)
+ del deviation["is_preview"]
+
if not self.extra:
continue
@@ -284,6 +314,9 @@ class DeviantartExtractor(Extractor):
html = journal["html"]
shadow = SHADOW_TEMPLATE.format_map(thumbs[0]) if thumbs else ""
+ if not html:
+ self.log.warning("%s: Empty journal content", deviation["index"])
+
if "css" in journal:
css, cls = journal["css"], "withskin"
elif html.startswith("<style"):
@@ -321,10 +354,11 @@ class DeviantartExtractor(Extractor):
deviation["extension"] = "htm"
return Message.Url, html, deviation
- @staticmethod
- def _commit_journal_text(deviation, journal):
+ def _commit_journal_text(self, deviation, journal):
html = journal["html"]
- if html.startswith("<style"):
+ if not html:
+ self.log.warning("%s: Empty journal content", deviation["index"])
+ elif html.startswith("<style"):
html = html.partition("</style>")[2]
head, _, tail = html.rpartition("<script")
content = "\n".join(
diff --git a/gallery_dl/extractor/flickr.py b/gallery_dl/extractor/flickr.py
index 1b4971c..6aefa11 100644
--- a/gallery_dl/extractor/flickr.py
+++ b/gallery_dl/extractor/flickr.py
@@ -37,11 +37,13 @@ class FlickrExtractor(Extractor):
extract = self.api._extract_format
for photo in self.photos():
try:
+ 1/0
photo = extract(photo)
except Exception as exc:
self.log.warning(
- "Skipping %s (%s)", photo["id"], exc.__class__.__name__)
- self.log.debug("", exc_info=True)
+ "Skipping photo %s (%s: %s)",
+ photo["id"], exc.__class__.__name__, exc)
+ self.log.debug("", exc_info=exc)
else:
photo.update(data)
url = photo["url"]
diff --git a/gallery_dl/extractor/inkbunny.py b/gallery_dl/extractor/inkbunny.py
index f3098f1..bff3156 100644
--- a/gallery_dl/extractor/inkbunny.py
+++ b/gallery_dl/extractor/inkbunny.py
@@ -132,6 +132,7 @@ class InkbunnyPoolExtractor(InkbunnyExtractor):
class InkbunnyFavoriteExtractor(InkbunnyExtractor):
"""Extractor for inkbunny user favorites"""
subcategory = "favorite"
+ directory_fmt = ("{category}", "{favs_username!l}", "Favorites")
pattern = (BASE_PATTERN + r"/(?:"
r"userfavorites_process\.php\?favs_user_id=(\d+)|"
r"submissionsviewall\.php"
@@ -151,7 +152,17 @@ class InkbunnyFavoriteExtractor(InkbunnyExtractor):
self.orderby = params.get("orderby", "fav_datetime")
def metadata(self):
- return {"favs_user_id": self.user_id}
+ # Lookup fav user ID as username
+ url = "{}/userfavorites_process.php?favs_user_id={}".format(
+ self.root, self.user_id)
+ page = self.request(url).text
+ user_link = text.extr(page, '<a rel="author"', '</a>')
+ favs_username = text.extr(user_link, 'href="/', '"')
+
+ return {
+ "favs_user_id": self.user_id,
+ "favs_username": favs_username,
+ }
def posts(self):
params = {
diff --git a/gallery_dl/extractor/newgrounds.py b/gallery_dl/extractor/newgrounds.py
index 5fc0ce5..dfa1f6e 100644
--- a/gallery_dl/extractor/newgrounds.py
+++ b/gallery_dl/extractor/newgrounds.py
@@ -53,8 +53,8 @@ class NewgroundsExtractor(Extractor):
try:
post = self.extract_post(post_url)
url = post.get("url")
- except Exception:
- self.log.debug("", exc_info=True)
+ except Exception as exc:
+ self.log.debug("", exc_info=exc)
url = None
if url:
diff --git a/gallery_dl/extractor/pixiv.py b/gallery_dl/extractor/pixiv.py
index 3479b88..c908e44 100644
--- a/gallery_dl/extractor/pixiv.py
+++ b/gallery_dl/extractor/pixiv.py
@@ -448,7 +448,8 @@ class PixivRankingExtractor(PixivExtractor):
self.log.warning("invalid date '%s'", date)
date = None
if not date:
- date = (datetime.utcnow() - timedelta(days=1)).strftime("%Y-%m-%d")
+ now = util.datetime_utcnow()
+ date = (now - timedelta(days=1)).strftime("%Y-%m-%d")
self.date = date
return {"ranking": {
@@ -887,7 +888,7 @@ class PixivAppAPI():
"get_secure_url": "1",
}
- time = datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%S+00:00")
+ time = util.datetime_utcnow().strftime("%Y-%m-%dT%H:%M:%S+00:00")
headers = {
"X-Client-Time": time,
"X-Client-Hash": hashlib.md5(
diff --git a/gallery_dl/extractor/skeb.py b/gallery_dl/extractor/skeb.py
index 6ec44ba..07c9b21 100644
--- a/gallery_dl/extractor/skeb.py
+++ b/gallery_dl/extractor/skeb.py
@@ -26,7 +26,11 @@ class SkebExtractor(Extractor):
def _init(self):
self.thumbnails = self.config("thumbnails", False)
self.article = self.config("article", False)
- self.headers = {"Accept": "application/json, text/plain, */*"}
+ self.headers = {
+ "Accept": "application/json, text/plain, */*",
+ "sec-fetch-mode": "cors",
+ "sec-fetch-site": "same-origin",
+ }
if "Authorization" not in self.session.headers:
self.headers["Authorization"] = "Bearer null"
diff --git a/gallery_dl/extractor/weasyl.py b/gallery_dl/extractor/weasyl.py
index ddbfaa0..13b0520 100644
--- a/gallery_dl/extractor/weasyl.py
+++ b/gallery_dl/extractor/weasyl.py
@@ -159,24 +159,26 @@ class WeasylJournalsExtractor(WeasylExtractor):
class WeasylFavoriteExtractor(WeasylExtractor):
subcategory = "favorite"
- directory_fmt = ("{category}", "{owner_login}", "Favorites")
- pattern = BASE_PATTERN + r"favorites\?userid=(\d+)"
+ directory_fmt = ("{category}", "{user}", "Favorites")
+ pattern = BASE_PATTERN + r"favorites(?:\?userid=(\d+)|/([^/?#]+))"
example = "https://www.weasyl.com/favorites?userid=12345"
- def __init__(self, match):
- WeasylExtractor.__init__(self, match)
- self.userid = match.group(1)
-
def items(self):
+ userid, username = self.groups
owner_login = lastid = None
- url = self.root + "/favorites"
+
+ if username:
+ owner_login = username
+ path = "/favorites/" + username
+ else:
+ path = "/favorites"
params = {
- "userid" : self.userid,
+ "userid" : userid,
"feature": "submit",
}
while True:
- page = self.request(url, params=params).text
+ page = self.request(self.root + path, params=params).text
pos = page.index('id="favorites-content"')
if not owner_login:
@@ -186,12 +188,16 @@ class WeasylFavoriteExtractor(WeasylExtractor):
if submitid == lastid:
continue
lastid = submitid
+
submission = self.request_submission(submitid)
if self.populate_submission(submission):
submission["user"] = owner_login
yield Message.Directory, submission
yield Message.Url, submission["url"], submission
- if "&amp;nextid=" not in page:
+ try:
+ pos = page.index('">Next (', pos)
+ except ValueError:
return
- params["nextid"] = submitid
+ path = text.unescape(text.rextract(page, 'href="', '"', pos)[0])
+ params = None
diff --git a/gallery_dl/extractor/wikimedia.py b/gallery_dl/extractor/wikimedia.py
index 7a62e01..116f557 100644
--- a/gallery_dl/extractor/wikimedia.py
+++ b/gallery_dl/extractor/wikimedia.py
@@ -10,7 +10,8 @@
"""Extractors for Wikimedia sites"""
from .common import BaseExtractor, Message
-from .. import text
+from .. import text, exception
+from ..cache import cache
class WikimediaExtractor(BaseExtractor):
@@ -39,7 +40,17 @@ class WikimediaExtractor(BaseExtractor):
else:
self.api_url = api_path
else:
- self.api_url = self.root + "/api.php"
+ self.api_url = None
+
+ @cache(maxage=36500*86400, keyarg=1)
+ def _search_api_path(self, root):
+ self.log.debug("Probing possible API endpoints")
+ for path in ("/api.php", "/w/api.php", "/wiki/api.php"):
+ url = root + path
+ response = self.request(url, method="HEAD", fatal=None)
+ if response.status_code < 400:
+ return url
+ raise exception.StopExtraction("Unable to find API endpoint")
@staticmethod
def prepare(image):
@@ -76,6 +87,9 @@ class WikimediaExtractor(BaseExtractor):
"""
url = self.api_url
+ if not url:
+ url = self._search_api_path(self.root)
+
params["action"] = "query"
params["format"] = "json"
params["prop"] = "imageinfo"
@@ -139,14 +153,17 @@ BASE_PATTERN = WikimediaExtractor.update({
"fandom": {
"root": None,
"pattern": r"[\w-]+\.fandom\.com",
+ "api-path": "/api.php",
},
"wikigg": {
"root": None,
"pattern": r"\w+\.wiki\.gg",
+ "api-path": "/api.php",
},
"mariowiki": {
"root": "https://www.mariowiki.com",
"pattern": r"(?:www\.)?mariowiki\.com",
+ "api-path": "/api.php",
},
"bulbapedia": {
"root": "https://bulbapedia.bulbagarden.net",
diff --git a/gallery_dl/extractor/zzup.py b/gallery_dl/extractor/zzup.py
index 45b0cd8..05b12b4 100644
--- a/gallery_dl/extractor/zzup.py
+++ b/gallery_dl/extractor/zzup.py
@@ -4,6 +4,8 @@
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
+"""Extractors for https://zzup.com/"""
+
from .common import GalleryExtractor
from .. import text
@@ -11,17 +13,20 @@ from .. import text
class ZzupGalleryExtractor(GalleryExtractor):
category = "zzup"
directory_fmt = ("{category}", "{title}")
- filename_fmt = "{slug}_{num:>03}.{extension}"
+ filename_fmt = "{num:>03}.{extension}"
archive_fmt = "{slug}_{num}"
root = "https://zzup.com"
- pattern = (r"(?:https?://)?(?:www\.)?zzup\.com(/content"
+ pattern = (r"(?:https?://)?(up\.|www\.)?zzup\.com(/(?:viewalbum|content)"
r"/[\w=]+/([^/?#]+)/[\w=]+)/(?:index|page-\d+)\.html")
example = "https://zzup.com/content/xyz=/12345_TITLE/123=/index.html"
def __init__(self, match):
- url = "{}/{}/index.html".format(self.root, match.group(1))
+ subdomain, path, self.slug = match.groups()
+ if subdomain == "up.":
+ self.root = "https://up.zzup.com"
+ self.images = self.images_v2
+ url = "{}{}/index.html".format(self.root, path)
GalleryExtractor.__init__(self, match, url)
- self.slug = match.group(2)
def metadata(self, page):
return {
@@ -38,3 +43,20 @@ class ZzupGalleryExtractor(GalleryExtractor):
p1, _, p2 = url.partition("/image0")
ufmt = p1 + "/image{:>05}" + p2[4:]
return [(ufmt.format(num), None) for num in range(1, count + 1)]
+
+ def images_v2(self, page):
+ results = []
+
+ while True:
+ for path in text.extract_iter(
+ page, ' class="picbox"><a target="_blank" href="', '"'):
+ results.append(("{}/showimage/{}/zzup.com.jpg".format(
+ self.root, "/".join(path.split("/")[2:-2])), None))
+
+ pos = page.find("glyphicon-arrow-right")
+ if pos < 0:
+ break
+ path = text.rextract(page, ' href="', '"', pos)[0]
+ page = self.request(text.urljoin(self.gallery_url, path)).text
+
+ return results
diff --git a/gallery_dl/formatter.py b/gallery_dl/formatter.py
index f197e5d..e662c34 100644
--- a/gallery_dl/formatter.py
+++ b/gallery_dl/formatter.py
@@ -476,6 +476,7 @@ _GLOBALS = {
"_env": lambda: os.environ,
"_lit": lambda: _literal,
"_now": datetime.datetime.now,
+ "_nul": lambda: util.NONE,
}
_CONVERSIONS = {
"l": str.lower,
@@ -484,6 +485,7 @@ _CONVERSIONS = {
"C": string.capwords,
"j": util.json_dumps,
"t": str.strip,
+ "L": len,
"T": util.datetime_to_timestamp_string,
"d": text.parse_timestamp,
"U": text.unescape,
diff --git a/gallery_dl/job.py b/gallery_dl/job.py
index c995767..4affd55 100644
--- a/gallery_dl/job.py
+++ b/gallery_dl/job.py
@@ -158,11 +158,12 @@ class Job():
raise
except exception.GalleryDLException as exc:
log.error("%s: %s", exc.__class__.__name__, exc)
+ log.debug("", exc_info=exc)
self.status |= exc.code
except OSError as exc:
log.error("Unable to download data: %s: %s",
exc.__class__.__name__, exc)
- log.debug("", exc_info=True)
+ log.debug("", exc_info=exc)
self.status |= 128
except Exception as exc:
log.error(("An unexpected error occurred: %s - %s. "
@@ -170,7 +171,7 @@ class Job():
"copy its output and report this issue on "
"https://github.com/mikf/gallery-dl/issues ."),
exc.__class__.__name__, exc)
- log.debug("", exc_info=True)
+ log.debug("", exc_info=exc)
self.status |= 1
except BaseException:
self.status |= 1
@@ -641,7 +642,7 @@ class DownloadJob(Job):
except Exception as exc:
pp_log.error("'%s' initialization failed: %s: %s",
name, exc.__class__.__name__, exc)
- pp_log.debug("", exc_info=True)
+ pp_log.debug("", exc_info=exc)
else:
pp_list.append(pp_obj)
diff --git a/gallery_dl/option.py b/gallery_dl/option.py
index 0189c0e..c4f5b94 100644
--- a/gallery_dl/option.py
+++ b/gallery_dl/option.py
@@ -131,12 +131,17 @@ class UgoiraAction(argparse.Action):
"[a] palettegen [p];[b][p] paletteuse"),
"repeat-last-frame": False,
}
- elif value in ("mkv", "copy"):
+ elif value == "mkv" or value == "copy":
pp = {
"extension" : "mkv",
"ffmpeg-args" : ("-c:v", "copy"),
"repeat-last-frame": False,
}
+ elif value == "zip" or value == "archive":
+ pp = {
+ "mode" : "archive",
+ }
+ namespace.options.append(((), "ugoira", "original"))
else:
parser.error("Unsupported Ugoira format '{}'".format(value))
@@ -344,7 +349,7 @@ def build_parser():
)
output.add_argument(
"--list-extractors",
- dest="list_extractors", action="store_true",
+ dest="list_extractors", metavar="CATEGORIES", nargs="*",
help=("Print a list of extractor classes "
"with description, (sub)category and example URL"),
)
@@ -693,7 +698,7 @@ def build_parser():
dest="postprocessors", metavar="FMT", action=UgoiraAction,
help=("Convert Pixiv Ugoira to FMT using FFmpeg. "
"Supported formats are 'webm', 'mp4', 'gif', "
- "'vp8', 'vp9', 'vp9-lossless', 'copy'."),
+ "'vp8', 'vp9', 'vp9-lossless', 'copy', 'zip'."),
)
postprocessor.add_argument(
"--ugoira-conv",
diff --git a/gallery_dl/postprocessor/ugoira.py b/gallery_dl/postprocessor/ugoira.py
index f053afa..87a0ba6 100644
--- a/gallery_dl/postprocessor/ugoira.py
+++ b/gallery_dl/postprocessor/ugoira.py
@@ -29,12 +29,12 @@ class UgoiraPP(PostProcessor):
def __init__(self, job, options):
PostProcessor.__init__(self, job)
- self.extension = options.get("extension") or "webm"
self.args = options.get("ffmpeg-args") or ()
self.twopass = options.get("ffmpeg-twopass", False)
self.output = options.get("ffmpeg-output", "error")
self.delete = not options.get("keep-files", False)
self.repeat = options.get("repeat-last-frame", True)
+ self.metadata = options.get("metadata", True)
self.mtime = options.get("mtime", True)
self.skip = options.get("skip", True)
self.uniform = self._convert_zip = self._convert_files = False
@@ -45,24 +45,31 @@ class UgoiraPP(PostProcessor):
mkvmerge = options.get("mkvmerge-location")
self.mkvmerge = util.expand_path(mkvmerge) if mkvmerge else "mkvmerge"
- demuxer = options.get("ffmpeg-demuxer")
- if demuxer is None or demuxer == "auto":
- if self.extension in ("webm", "mkv") and (
+ ext = options.get("extension")
+ mode = options.get("mode") or options.get("ffmpeg-demuxer")
+ if mode is None or mode == "auto":
+ if ext in (None, "webm", "mkv") and (
mkvmerge or shutil.which("mkvmerge")):
- demuxer = "mkvmerge"
+ mode = "mkvmerge"
else:
- demuxer = "concat"
+ mode = "concat"
- if demuxer == "mkvmerge":
+ if mode == "mkvmerge":
self._process = self._process_mkvmerge
self._finalize = self._finalize_mkvmerge
- elif demuxer == "image2":
+ elif mode == "image2":
self._process = self._process_image2
self._finalize = None
+ elif mode == "archive":
+ if ext is None:
+ ext = "zip"
+ self._convert_impl = self.convert_to_archive
+ self._tempdir = util.NullContext
else:
self._process = self._process_concat
self._finalize = None
- self.log.debug("using %s demuxer", demuxer)
+ self.extension = "webm" if ext is None else ext
+ self.log.debug("using %s demuxer", mode)
rate = options.get("framerate", "auto")
if rate == "uniform":
@@ -93,8 +100,8 @@ class UgoiraPP(PostProcessor):
job.register_hooks({
"prepare": self.prepare,
- "file" : self.convert_zip,
- "after" : self.convert_files,
+ "file" : self.convert_from_zip,
+ "after" : self.convert_from_files,
}, options)
def prepare(self, pathfmt):
@@ -109,12 +116,15 @@ class UgoiraPP(PostProcessor):
pathfmt.set_extension(self.extension)
pathfmt.build_path()
else:
+ index = pathfmt.kwdict.get("_ugoira_frame_index")
+ if index is None:
+ return
+
pathfmt.build_path()
- index = pathfmt.kwdict["_ugoira_frame_index"]
frame = self._frames[index].copy()
frame["index"] = index
frame["path"] = pathfmt.realpath
- frame["ext"] = pathfmt.kwdict["extension"]
+ frame["ext"] = pathfmt.extension
if not index:
self._files = [frame]
@@ -123,31 +133,34 @@ class UgoiraPP(PostProcessor):
if len(self._files) >= len(self._frames):
self._convert_files = True
- def convert_zip(self, pathfmt):
+ def convert_from_zip(self, pathfmt):
if not self._convert_zip:
return
self._convert_zip = False
+ self._zip_source = True
- with tempfile.TemporaryDirectory() as tempdir:
- try:
- with zipfile.ZipFile(pathfmt.temppath) as zfile:
- zfile.extractall(tempdir)
- except FileNotFoundError:
- pathfmt.realpath = pathfmt.temppath
- return
+ with self._tempdir() as tempdir:
+ if tempdir:
+ try:
+ with zipfile.ZipFile(pathfmt.temppath) as zfile:
+ zfile.extractall(tempdir)
+ except FileNotFoundError:
+ pathfmt.realpath = pathfmt.temppath
+ return
if self.convert(pathfmt, tempdir):
if self.delete:
pathfmt.delete = True
- else:
+ elif pathfmt.extension != "zip":
self.log.info(pathfmt.filename)
pathfmt.set_extension("zip")
pathfmt.build_path()
- def convert_files(self, pathfmt):
+ def convert_from_files(self, pathfmt):
if not self._convert_files:
return
self._convert_files = False
+ self._zip_source = False
with tempfile.TemporaryDirectory() as tempdir:
for frame in self._files:
@@ -156,13 +169,14 @@ class UgoiraPP(PostProcessor):
frame["file"] = name = "{}.{}".format(
frame["file"].partition(".")[0], frame["ext"])
- # move frame into tempdir
- try:
- self._copy_file(frame["path"], tempdir + "/" + name)
- except OSError as exc:
- self.log.debug("Unable to copy frame %s (%s: %s)",
- name, exc.__class__.__name__, exc)
- return
+ if tempdir:
+ # move frame into tempdir
+ try:
+ self._copy_file(frame["path"], tempdir + "/" + name)
+ except OSError as exc:
+ self.log.debug("Unable to copy frame %s (%s: %s)",
+ name, exc.__class__.__name__, exc)
+ return
pathfmt.kwdict["num"] = 0
self._frames = self._files
@@ -179,6 +193,9 @@ class UgoiraPP(PostProcessor):
if self.skip and pathfmt.exists():
return True
+ return self._convert_impl(pathfmt, tempdir)
+
+ def convert_to_animation(self, pathfmt, tempdir):
# process frames and collect command-line arguments
args = self._process(pathfmt, tempdir)
if self.args_pp:
@@ -206,11 +223,12 @@ class UgoiraPP(PostProcessor):
print()
self.log.error("Unable to invoke FFmpeg (%s: %s)",
exc.__class__.__name__, exc)
+ self.log.debug("", exc_info=exc)
pathfmt.realpath = pathfmt.temppath
except Exception as exc:
print()
self.log.error("%s: %s", exc.__class__.__name__, exc)
- self.log.debug("", exc_info=True)
+ self.log.debug("", exc_info=exc)
pathfmt.realpath = pathfmt.temppath
else:
if self.mtime:
@@ -219,6 +237,54 @@ class UgoiraPP(PostProcessor):
util.set_mtime(pathfmt.realpath, mtime)
return True
+ def convert_to_archive(self, pathfmt, tempdir):
+ frames = self._frames
+
+ if self.metadata:
+ if isinstance(self.metadata, str):
+ metaname = self.metadata
+ else:
+ metaname = "animation.json"
+ framedata = util.json_dumps([
+ {"file": frame["file"], "delay": frame["delay"]}
+ for frame in frames
+ ]).encode()
+
+ if self._zip_source:
+ self.delete = False
+ if self.metadata:
+ with zipfile.ZipFile(pathfmt.temppath, "a") as zfile:
+ zinfo = zipfile.ZipInfo(metaname)
+ if self.mtime:
+ zinfo.date_time = zfile.infolist()[0].date_time
+ with zfile.open(zinfo, "w") as fp:
+ fp.write(framedata)
+ else:
+ if self.mtime:
+ dt = pathfmt.kwdict["date_url"] or pathfmt.kwdict["date"]
+ mtime = (dt.year, dt.month, dt.day,
+ dt.hour, dt.minute, dt.second)
+ with zipfile.ZipFile(pathfmt.realpath, "w") as zfile:
+ for frame in frames:
+ zinfo = zipfile.ZipInfo.from_file(
+ frame["path"], frame["file"])
+ if self.mtime:
+ zinfo.date_time = mtime
+ with open(frame["path"], "rb") as src, \
+ zfile.open(zinfo, "w") as dst:
+ shutil.copyfileobj(src, dst, 1024*8)
+ if self.metadata:
+ zinfo = zipfile.ZipInfo(metaname)
+ if self.mtime:
+ zinfo.date_time = mtime
+ with zfile.open(zinfo, "w") as fp:
+ fp.write(framedata)
+
+ return True
+
+ _convert_impl = convert_to_animation
+ _tempdir = tempfile.TemporaryDirectory
+
def _exec(self, args):
self.log.debug(args)
out = None if self.output else subprocess.DEVNULL
diff --git a/gallery_dl/text.py b/gallery_dl/text.py
index 9258187..8517cdf 100644
--- a/gallery_dl/text.py
+++ b/gallery_dl/text.py
@@ -9,7 +9,9 @@
"""Collection of functions that work on strings/text"""
import re
+import sys
import html
+import time
import datetime
import urllib.parse
@@ -247,12 +249,23 @@ def parse_query(qs):
return result
-def parse_timestamp(ts, default=None):
- """Create a datetime object from a unix timestamp"""
- try:
- return datetime.datetime.utcfromtimestamp(int(ts))
- except Exception:
- return default
+if sys.hexversion < 0x30c0000:
+ # Python <= 3.11
+ def parse_timestamp(ts, default=None):
+ """Create a datetime object from a Unix timestamp"""
+ try:
+ return datetime.datetime.utcfromtimestamp(int(ts))
+ except Exception:
+ return default
+else:
+ # Python >= 3.12
+ def parse_timestamp(ts, default=None):
+ """Create a datetime object from a Unix timestamp"""
+ try:
+ Y, m, d, H, M, S, _, _, _ = time.gmtime(int(ts))
+ return datetime.datetime(Y, m, d, H, M, S)
+ except Exception:
+ return default
def parse_datetime(date_string, format="%Y-%m-%dT%H:%M:%S%z", utcoffset=0):
diff --git a/gallery_dl/util.py b/gallery_dl/util.py
index ecb496d..128f48b 100644
--- a/gallery_dl/util.py
+++ b/gallery_dl/util.py
@@ -218,18 +218,34 @@ def to_string(value):
def datetime_to_timestamp(dt):
- """Convert naive UTC datetime to timestamp"""
+ """Convert naive UTC datetime to Unix timestamp"""
return (dt - EPOCH) / SECOND
def datetime_to_timestamp_string(dt):
- """Convert naive UTC datetime to timestamp string"""
+ """Convert naive UTC datetime to Unix timestamp string"""
try:
return str((dt - EPOCH) // SECOND)
except Exception:
return ""
+if sys.hexversion < 0x30c0000:
+ # Python <= 3.11
+ datetime_utcfromtimestamp = datetime.datetime.utcfromtimestamp
+ datetime_utcnow = datetime.datetime.utcnow
+ datetime_from_timestamp = datetime_utcfromtimestamp
+else:
+ # Python >= 3.12
+ def datetime_from_timestamp(ts=None):
+ """Convert Unix timestamp to naive UTC datetime"""
+ Y, m, d, H, M, S, _, _, _ = time.gmtime(ts)
+ return datetime.datetime(Y, m, d, H, M, S)
+
+ datetime_utcfromtimestamp = datetime_from_timestamp
+ datetime_utcnow = datetime_from_timestamp
+
+
def json_default(obj):
if isinstance(obj, CustomNone):
return None
@@ -516,6 +532,15 @@ class LazyPrompt():
return getpass.getpass()
+class NullContext():
+
+ def __enter__(self):
+ return None
+
+ def __exit__(self, exc_type, exc_value, traceback):
+ pass
+
+
class CustomNone():
"""None-style type that supports more operations than regular None"""
__slots__ = ()
@@ -760,8 +785,9 @@ def build_extractor_filter(categories, negate=True, special=None):
if catsub:
def test(extr):
for category, subcategory in catsub:
- if category in (extr.category, extr.basecategory) and \
- subcategory == extr.subcategory:
+ if subcategory == extr.subcategory and (
+ category == extr.category or
+ category == extr.basecategory):
return not negate
return negate
tests.append(test)
diff --git a/gallery_dl/version.py b/gallery_dl/version.py
index 0f9f91b..513da41 100644
--- a/gallery_dl/version.py
+++ b/gallery_dl/version.py
@@ -6,5 +6,5 @@
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
-__version__ = "1.27.4"
+__version__ = "1.27.5"
__variant__ = None