aboutsummaryrefslogtreecommitdiffstats
path: root/gallery_dl
diff options
context:
space:
mode:
Diffstat (limited to 'gallery_dl')
-rw-r--r--gallery_dl/downloader/ytdl.py1
-rw-r--r--gallery_dl/extractor/chevereto.py20
-rw-r--r--gallery_dl/extractor/danbooru.py5
-rw-r--r--gallery_dl/extractor/deviantart.py4
-rw-r--r--gallery_dl/extractor/discord.py16
-rw-r--r--gallery_dl/extractor/everia.py2
-rw-r--r--gallery_dl/extractor/gelbooru.py3
-rw-r--r--gallery_dl/extractor/hentai2read.py24
-rw-r--r--gallery_dl/extractor/instagram.py1
-rw-r--r--gallery_dl/extractor/issuu.py15
-rw-r--r--gallery_dl/extractor/kemonoparty.py3
-rw-r--r--gallery_dl/extractor/pixiv.py4
-rw-r--r--gallery_dl/extractor/readcomiconline.py4
-rw-r--r--gallery_dl/extractor/rule34xyz.py82
-rw-r--r--gallery_dl/extractor/tumblr.py4
-rw-r--r--gallery_dl/extractor/webtoons.py133
-rw-r--r--gallery_dl/extractor/zerochan.py6
-rw-r--r--gallery_dl/extractor/zzup.py2
-rw-r--r--gallery_dl/formatter.py2
-rw-r--r--gallery_dl/path.py2
-rw-r--r--gallery_dl/postprocessor/metadata.py9
-rw-r--r--gallery_dl/postprocessor/ugoira.py7
-rw-r--r--gallery_dl/util.py3
-rw-r--r--gallery_dl/version.py2
24 files changed, 213 insertions, 141 deletions
diff --git a/gallery_dl/downloader/ytdl.py b/gallery_dl/downloader/ytdl.py
index 9d653b3..7a20dc2 100644
--- a/gallery_dl/downloader/ytdl.py
+++ b/gallery_dl/downloader/ytdl.py
@@ -200,6 +200,7 @@ class YoutubeDLDownloader(DownloaderBase):
return None
info_dict = {
+ "extractor": "",
"id" : video_id,
"title" : video_id,
"formats" : fmts,
diff --git a/gallery_dl/extractor/chevereto.py b/gallery_dl/extractor/chevereto.py
index c9ccb7d..600d231 100644
--- a/gallery_dl/extractor/chevereto.py
+++ b/gallery_dl/extractor/chevereto.py
@@ -18,19 +18,23 @@ class CheveretoExtractor(BaseExtractor):
directory_fmt = ("{category}", "{user}", "{album}",)
archive_fmt = "{id}"
- def __init__(self, match):
- BaseExtractor.__init__(self, match)
- self.path = match.group(match.lastindex)
+ def _init(self):
+ self.path = self.groups[-1]
def _pagination(self, url):
- while url:
+ while True:
page = self.request(url).text
for item in text.extract_iter(
page, '<div class="list-item-image ', 'image-container'):
- yield text.extr(item, '<a href="', '"')
+ yield text.urljoin(self.root, text.extr(
+ item, '<a href="', '"'))
- url = text.extr(page, '<a data-pagination="next" href="', '" ><')
+ url = text.extr(page, 'data-pagination="next" href="', '"')
+ if not url:
+ return
+ if url[0] == "/":
+ url = self.root + url
BASE_PATTERN = CheveretoExtractor.update({
@@ -42,6 +46,10 @@ BASE_PATTERN = CheveretoExtractor.update({
"root": "https://img.kiwi",
"pattern": r"img\.kiwi",
},
+ "imagepond": {
+ "root": "https://imagepond.net",
+ "pattern": r"imagepond\.net",
+ },
})
diff --git a/gallery_dl/extractor/danbooru.py b/gallery_dl/extractor/danbooru.py
index 741800c..06c31b9 100644
--- a/gallery_dl/extractor/danbooru.py
+++ b/gallery_dl/extractor/danbooru.py
@@ -282,10 +282,11 @@ class DanbooruPoolExtractor(DanbooruExtractor):
example = "https://danbooru.donmai.us/pools/12345"
def metadata(self):
- return self._collection_metadata(self.groups[-1], "pool")
+ self.pool_id = self.groups[-1]
+ return self._collection_metadata(self.pool_id, "pool")
def posts(self):
- return self._collection_posts(self.groups[-1], "pool")
+ return self._collection_posts(self.pool_id, "pool")
class DanbooruFavgroupExtractor(DanbooruExtractor):
diff --git a/gallery_dl/extractor/deviantart.py b/gallery_dl/extractor/deviantart.py
index 3a862c1..378c7ec 100644
--- a/gallery_dl/extractor/deviantart.py
+++ b/gallery_dl/extractor/deviantart.py
@@ -687,7 +687,7 @@ x2="45.4107524%" y2="71.4898596%" id="app-root-3">\
for folder in folders:
if match(folder["name"]):
return folder
- elif folder["has_subfolders"]:
+ elif folder.get("has_subfolders"):
for subfolder in folder["subfolders"]:
if match(subfolder["name"]):
return subfolder
@@ -695,7 +695,7 @@ x2="45.4107524%" y2="71.4898596%" id="app-root-3">\
for folder in folders:
if folder["folderid"] == uuid:
return folder
- elif folder["has_subfolders"]:
+ elif folder.get("has_subfolders"):
for subfolder in folder["subfolders"]:
if subfolder["folderid"] == uuid:
return subfolder
diff --git a/gallery_dl/extractor/discord.py b/gallery_dl/extractor/discord.py
index 6a5fcc9..ac21fec 100644
--- a/gallery_dl/extractor/discord.py
+++ b/gallery_dl/extractor/discord.py
@@ -49,7 +49,10 @@ class DiscordExtractor(Extractor):
text_content.append(field.get("name", ""))
text_content.append(field.get("value", ""))
- text_content.append(embed.get("footer", {}).get("text", ""))
+ try:
+ text_content.append(embed["footer"]["text"])
+ except Exception:
+ pass
if message.get("poll"):
text_content.append(message["poll"]["question"]["text"])
@@ -224,10 +227,12 @@ class DiscordExtractor(Extractor):
return self.server_metadata
def build_server_and_channels(self, server_id):
- server = self.api.get_server(server_id)
- self.parse_server(server)
+ self.parse_server(self.api.get_server(server_id))
- for channel in self.api.get_server_channels(server_id):
+ for channel in sorted(
+ self.api.get_server_channels(server_id),
+ key=lambda ch: ch["type"] != 4
+ ):
self.parse_channel(channel)
@@ -353,7 +358,8 @@ class DiscordAPI():
"limit": MESSAGES_BATCH,
"before": before
})
- before = messages[-1]["id"]
+ if messages:
+ before = messages[-1]["id"]
return messages
return self._pagination(_method, MESSAGES_BATCH)
diff --git a/gallery_dl/extractor/everia.py b/gallery_dl/extractor/everia.py
index 94444ff..e41f6f6 100644
--- a/gallery_dl/extractor/everia.py
+++ b/gallery_dl/extractor/everia.py
@@ -52,7 +52,7 @@ class EveriaPostExtractor(EveriaExtractor):
def items(self):
url = self.root + self.groups[0]
page = self.request(url).text
- content = text.extr(page, 'itemprop="text">', "</div>")
+ content = text.extr(page, 'itemprop="text">', "<h3")
urls = re.findall(r'img.*?src="([^"]+)', content)
data = {
diff --git a/gallery_dl/extractor/gelbooru.py b/gallery_dl/extractor/gelbooru.py
index 37c776e..eb07739 100644
--- a/gallery_dl/extractor/gelbooru.py
+++ b/gallery_dl/extractor/gelbooru.py
@@ -114,11 +114,12 @@ class GelbooruBase():
md5 = post["md5"]
path = "/images/{}/{}/{}.webm".format(md5[0:2], md5[2:4], md5)
post["_fallback"] = GelbooruBase._video_fallback(path)
- url = "https://img3.gelbooru.com" + path
+ url = "https://img4.gelbooru.com" + path
return url
@staticmethod
def _video_fallback(path):
+ yield "https://img3.gelbooru.com" + path
yield "https://img2.gelbooru.com" + path
yield "https://img1.gelbooru.com" + path
diff --git a/gallery_dl/extractor/hentai2read.py b/gallery_dl/extractor/hentai2read.py
index 9ab1411..1317ce9 100644
--- a/gallery_dl/extractor/hentai2read.py
+++ b/gallery_dl/extractor/hentai2read.py
@@ -25,26 +25,30 @@ class Hentai2readChapterExtractor(Hentai2readBase, ChapterExtractor):
pattern = r"(?:https?://)?(?:www\.)?hentai2read\.com(/[^/?#]+/([^/?#]+))"
example = "https://hentai2read.com/TITLE/1/"
- def __init__(self, match):
- self.chapter = match.group(2)
- ChapterExtractor.__init__(self, match)
-
def metadata(self, page):
title, pos = text.extract(page, "<title>", "</title>")
manga_id, pos = text.extract(page, 'data-mid="', '"', pos)
chapter_id, pos = text.extract(page, 'data-cid="', '"', pos)
- chapter, sep, minor = self.chapter.partition(".")
- match = re.match(r"Reading (.+) \(([^)]+)\) Hentai(?: by (.+))? - "
+ chapter, sep, minor = self.groups[1].partition(".")
+
+ match = re.match(r"Reading (.+) \(([^)]+)\) Hentai(?: by (.*))? - "
r"([^:]+): (.+) . Page 1 ", title)
+ if match:
+ manga, type, author, _, title = match.groups()
+ else:
+ self.log.warning("Failed to extract 'manga', 'type', 'author', "
+ "and 'title' metadata")
+ manga = type = author = title = ""
+
return {
- "manga": match.group(1),
+ "manga": manga,
"manga_id": text.parse_int(manga_id),
"chapter": text.parse_int(chapter),
"chapter_minor": sep + minor,
"chapter_id": text.parse_int(chapter_id),
- "type": match.group(2),
- "author": match.group(3),
- "title": match.group(5),
+ "type": type,
+ "author": author,
+ "title": title,
"lang": "en",
"language": "English",
}
diff --git a/gallery_dl/extractor/instagram.py b/gallery_dl/extractor/instagram.py
index aa26408..432a7ad 100644
--- a/gallery_dl/extractor/instagram.py
+++ b/gallery_dl/extractor/instagram.py
@@ -29,6 +29,7 @@ class InstagramExtractor(Extractor):
root = "https://www.instagram.com"
cookies_domain = ".instagram.com"
cookies_names = ("sessionid",)
+ useragent = util.USERAGENT_CHROME
request_interval = (6.0, 12.0)
def __init__(self, match):
diff --git a/gallery_dl/extractor/issuu.py b/gallery_dl/extractor/issuu.py
index 65717b4..abbdfd5 100644
--- a/gallery_dl/extractor/issuu.py
+++ b/gallery_dl/extractor/issuu.py
@@ -29,9 +29,11 @@ class IssuuPublicationExtractor(IssuuBase, GalleryExtractor):
example = "https://issuu.com/issuu/docs/TITLE/"
def metadata(self, page):
- pos = page.rindex('id="initial-data"')
- data = util.json_loads(text.unescape(text.rextract(
- page, '<script data-json="', '"', pos)[0]))
+
+ data = text.extr(
+ page, '{\\"documentTextVersion\\":', ']\\n"])</script>')
+ data = util.json_loads(text.unescape(
+ '{"":' + data.replace('\\"', '"')))
doc = data["initialDocumentData"]["document"]
doc["date"] = text.parse_datetime(
@@ -39,7 +41,7 @@ class IssuuPublicationExtractor(IssuuBase, GalleryExtractor):
self._cnt = text.parse_int(doc["pageCount"])
self._tpl = "https://{}/{}-{}/jpg/page_{{}}.jpg".format(
- data["config"]["hosts"]["image"],
+ "image.isu.pub", # data["config"]["hosts"]["image"],
doc["revisionId"],
doc["publicationId"],
)
@@ -66,9 +68,8 @@ class IssuuUserExtractor(IssuuBase, Extractor):
url = base + "/" + str(pnum) if pnum > 1 else base
try:
html = self.request(url).text
- data = util.json_loads(text.unescape(text.extr(
- html, '</main></div><script data-json="', '" id="')))
- docs = data["docs"]
+ data = text.extr(html, '\\"docs\\":', '}]\\n"]')
+ docs = util.json_loads(data.replace('\\"', '"'))
except Exception as exc:
self.log.debug("", exc_info=exc)
return
diff --git a/gallery_dl/extractor/kemonoparty.py b/gallery_dl/extractor/kemonoparty.py
index 860e771..de7d040 100644
--- a/gallery_dl/extractor/kemonoparty.py
+++ b/gallery_dl/extractor/kemonoparty.py
@@ -123,6 +123,9 @@ class KemonopartyExtractor(Extractor):
g(post) for g in generators):
url = file["path"]
+ if "\\" in url:
+ file["path"] = url = url.replace("\\", "/")
+
match = find_hash(url)
if match:
file["hash"] = hash = match.group(1)
diff --git a/gallery_dl/extractor/pixiv.py b/gallery_dl/extractor/pixiv.py
index 8a4905d..e8050b3 100644
--- a/gallery_dl/extractor/pixiv.py
+++ b/gallery_dl/extractor/pixiv.py
@@ -15,7 +15,7 @@ from datetime import datetime, timedelta
import itertools
import hashlib
-BASE_PATTERN = r"(?:https?://)?(?:www\.|touch\.)?pixiv\.net"
+BASE_PATTERN = r"(?:https?://)?(?:www\.|touch\.)?ph?ixiv\.net"
USER_PATTERN = BASE_PATTERN + r"/(?:en/)?users/(\d+)"
@@ -531,7 +531,7 @@ class PixivMeExtractor(PixivExtractor):
class PixivWorkExtractor(PixivExtractor):
"""Extractor for a single pixiv work/illustration"""
subcategory = "work"
- pattern = (r"(?:https?://)?(?:(?:www\.|touch\.)?pixiv\.net"
+ pattern = (r"(?:https?://)?(?:(?:www\.|touch\.)?ph?ixiv\.net"
r"/(?:(?:en/)?artworks/"
r"|member_illust\.php\?(?:[^&]+&)*illust_id=)(\d+)"
r"|(?:i(?:\d+\.pixiv|\.pximg)\.net"
diff --git a/gallery_dl/extractor/readcomiconline.py b/gallery_dl/extractor/readcomiconline.py
index c0374eb..2f2daca 100644
--- a/gallery_dl/extractor/readcomiconline.py
+++ b/gallery_dl/extractor/readcomiconline.py
@@ -85,7 +85,7 @@ class ReadcomiconlineIssueExtractor(ReadcomiconlineBase, ChapterExtractor):
replacements = re.findall(
r"l = l\.replace\(/([^/]+)/g, [\"']([^\"']*)", page)
- for block in page.split(" pth = '")[1:]:
+ for block in page.split("\t\tpht = '")[1:]:
pth = text.extr(block, "", "'")
for needle, repl in re.findall(
@@ -129,7 +129,7 @@ class ReadcomiconlineComicExtractor(ReadcomiconlineBase, MangaExtractor):
def baeu(url, root="", root_blogspot="https://2.bp.blogspot.com"):
- """https://readcomiconline.li/Scripts/rguard.min.js"""
+ """https://readcomiconline.li/Scripts/rguard.min.js?v=1.5.4"""
if not root:
root = root_blogspot
diff --git a/gallery_dl/extractor/rule34xyz.py b/gallery_dl/extractor/rule34xyz.py
index 3b8d344..411a71a 100644
--- a/gallery_dl/extractor/rule34xyz.py
+++ b/gallery_dl/extractor/rule34xyz.py
@@ -23,10 +23,18 @@ class Rule34xyzExtractor(BooruExtractor):
per_page = 60
TAG_TYPES = {
- 0: "general",
- 1: "copyright",
- 2: "character",
- 3: "artist",
+ None: "general",
+ 0 : "general",
+ 1 : "general",
+ 2 : "copyright",
+ 4 : "character",
+ 8 : "artist",
+ }
+ FORMATS = {
+ "10" : "pic.jpg",
+ "100": "mov.mp4",
+ "101": "mov720.mp4",
+ "102": "mov480.mp4",
}
def _init(self):
@@ -36,49 +44,49 @@ class Rule34xyzExtractor(BooruExtractor):
formats = formats.split(",")
self.formats = formats
else:
- self.formats = ("10", "40", "41", "2")
+ self.formats = ("100", "101", "102", "10")
def _file_url(self, post):
- post["files"] = files = {
- str(link["type"]): link["url"]
- for link in post.pop("imageLinks")
- }
+ files = post["files"]
for fmt in self.formats:
if fmt in files:
+ extension = self.FORMATS.get(fmt)
break
else:
- fmt = "2"
self.log.warning("%s: Requested format not available", post["id"])
+ fmt = next(iter(files))
- post["file_url"] = url = files[fmt]
+ post_id = post["id"]
+ root = self.root_cdn if files[fmt][0] else self.root
+ post["file_url"] = url = "{}/posts/{}/{}/{}.{}".format(
+ root, post_id // 1000, post_id, post_id, extension)
post["format_id"] = fmt
- post["format"] = url.rsplit(".", 2)[1]
+ post["format"] = extension.partition(".")[0]
+
return url
def _prepare(self, post):
- post.pop("filesPreview", None)
- post.pop("tagsWithType", None)
+ post.pop("files", None)
post["date"] = text.parse_datetime(
- post["created"][:19], "%Y-%m-%dT%H:%M:%S")
+ post["created"], "%Y-%m-%dT%H:%M:%S.%fZ")
+ post["filename"], _, post["format"] = post["filename"].rpartition(".")
+ if "tags" in post:
+ post["tags"] = [t["value"] for t in post["tags"]]
def _tags(self, post, _):
- if post.get("tagsWithType") is None:
+ if "tags" not in post:
post.update(self._fetch_post(post["id"]))
tags = collections.defaultdict(list)
- tagslist = []
- for tag in post["tagsWithType"]:
- value = tag["value"]
- tagslist.append(value)
- tags[tag["type"]].append(value)
+ for tag in post["tags"]:
+ tags[tag["type"]].append(tag["value"])
types = self.TAG_TYPES
for type, values in tags.items():
post["tags_" + types[type]] = values
- post["tags"] = tagslist
def _fetch_post(self, post_id):
- url = "{}/api/post/{}".format(self.root, post_id)
+ url = "{}/api/v2/post/{}".format(self.root, post_id)
return self.request(url).json()
def _pagination(self, endpoint, params=None):
@@ -86,22 +94,22 @@ class Rule34xyzExtractor(BooruExtractor):
if params is None:
params = {}
- params["IncludeLinks"] = "true"
- params["IncludeTags"] = "true"
- params["OrderBy"] = "0"
params["Skip"] = self.page_start * self.per_page
- params["Take"] = self.per_page
- params["DisableTotal"] = "true"
+ params["take"] = self.per_page
+ params["CountTotal"] = False
+ params["IncludeLinks"] = True
+ params["OrderBy"] = 0
threshold = self.per_page
while True:
- data = self.request(url, params=params).json()
+ data = self.request(url, method="POST", json=params).json()
yield from data["items"]
if len(data["items"]) < threshold:
return
- params["Skip"] += params["Take"]
+ params["Skip"] += self.per_page
+ params["cursor"] = data["cursor"]
class Rule34xyzPostExtractor(Rule34xyzExtractor):
@@ -125,9 +133,8 @@ class Rule34xyzPlaylistExtractor(Rule34xyzExtractor):
return {"playlist_id": self.groups[0]}
def posts(self):
- endpoint = "/playlist-item"
- params = {"PlaylistId": self.groups[0]}
- return self._pagination(endpoint, params)
+ endpoint = "/v2/post/search/playlist/" + self.groups[0]
+ return self._pagination(endpoint)
class Rule34xyzTagExtractor(Rule34xyzExtractor):
@@ -138,10 +145,11 @@ class Rule34xyzTagExtractor(Rule34xyzExtractor):
example = "https://rule34.xyz/TAG"
def metadata(self):
- self.tags = text.unquote(self.groups[0]).replace("_", " ")
- return {"search_tags": self.tags}
+ self.tags = text.unquote(text.unquote(
+ self.groups[0]).replace("_", " ")).split("|")
+ return {"search_tags": ", ".join(self.tags)}
def posts(self):
- endpoint = "/post/search"
- params = {"Tag": self.tags}
+ endpoint = "/v2/post/search/root"
+ params = {"includeTags": self.tags}
return self._pagination(endpoint, params)
diff --git a/gallery_dl/extractor/tumblr.py b/gallery_dl/extractor/tumblr.py
index 8d1fcde..6f2114e 100644
--- a/gallery_dl/extractor/tumblr.py
+++ b/gallery_dl/extractor/tumblr.py
@@ -17,7 +17,7 @@ import re
BASE_PATTERN = (
r"(?:tumblr:(?:https?://)?([^/]+)|"
r"(?:https?://)?"
- r"(?:www\.tumblr\.com/(?:blog/(?:view/)?)?([\w-]+)|"
+ r"(?:(?:www\.)?tumblr\.com/(?:blog/(?:view/)?)?([\w-]+)|"
r"([\w-]+\.tumblr\.com)))"
)
@@ -357,7 +357,7 @@ class TumblrLikesExtractor(TumblrExtractor):
class TumblrSearchExtractor(TumblrExtractor):
"""Extractor for a Tumblr search"""
subcategory = "search"
- pattern = (BASE_PATTERN + r"/search/([^/?#]+)"
+ pattern = (r"(?:https?://)?(?:www\.)?tumblr\.com/search/([^/?#]+)"
r"(?:/([^/?#]+)(?:/([^/?#]+))?)?(?:/?\?([^#]+))?")
example = "https://www.tumblr.com/search/QUERY"
diff --git a/gallery_dl/extractor/webtoons.py b/gallery_dl/extractor/webtoons.py
index 008ae6e..8ff32af 100644
--- a/gallery_dl/extractor/webtoons.py
+++ b/gallery_dl/extractor/webtoons.py
@@ -12,13 +12,15 @@
from .common import GalleryExtractor, Extractor, Message
from .. import exception, text, util
-BASE_PATTERN = r"(?:https?://)?(?:www\.)?webtoons\.com/(([^/?#]+)"
+BASE_PATTERN = r"(?:https?://)?(?:www\.)?webtoons\.com"
+LANG_PATTERN = BASE_PATTERN + r"/(([^/?#]+)"
class WebtoonsBase():
category = "webtoons"
root = "https://www.webtoons.com"
cookies_domain = ".webtoons.com"
+ request_interval = (0.5, 1.5)
def setup_agegate_cookies(self):
self.cookies_update({
@@ -34,7 +36,7 @@ class WebtoonsBase():
response = Extractor.request(self, url, **kwargs)
if response.history and "/ageGate" in response.url:
raise exception.StopExtraction(
- "HTTP redirect to age gate check ('%s')", response.request.url)
+ "HTTP redirect to age gate check ('%s')", response.url)
return response
@@ -44,47 +46,19 @@ class WebtoonsEpisodeExtractor(WebtoonsBase, GalleryExtractor):
directory_fmt = ("{category}", "{comic}")
filename_fmt = "{episode_no}-{num:>02}.{extension}"
archive_fmt = "{title_no}_{episode_no}_{num}"
- pattern = (BASE_PATTERN + r"/([^/?#]+)/([^/?#]+)/(?:[^/?#]+))"
- r"/viewer(?:\?([^#'\"]+))")
+ pattern = (LANG_PATTERN + r"/([^/?#]+)/([^/?#]+)/[^/?#]+)"
+ r"/viewer\?([^#'\"]+)")
example = ("https://www.webtoons.com/en/GENRE/TITLE/NAME/viewer"
"?title_no=123&episode_no=12345")
- test = (
- (("https://www.webtoons.com/en/comedy/safely-endangered"
- "/ep-572-earth/viewer?title_no=352&episode_no=572"), {
- "url": "55bec5d7c42aba19e3d0d56db25fdf0b0b13be38",
- "content": ("1748c7e82b6db910fa179f6dc7c4281b0f680fa7",
- "42055e44659f6ffc410b3fb6557346dfbb993df3",
- "49e1f2def04c6f7a6a3dacf245a1cd9abe77a6a9"),
- "count": 5,
- }),
- (("https://www.webtoons.com/en/challenge/punderworld"
- "/happy-earth-day-/viewer?title_no=312584&episode_no=40"), {
- "exception": exception.NotFoundError,
- "keyword": {
- "comic": "punderworld",
- "description": str,
- "episode": "36",
- "episode_no": "40",
- "genre": "challenge",
- "title": r"re:^Punderworld - .+",
- "title_no": "312584",
- },
- }),
- )
-
- def __init__(self, match):
- self.path, self.lang, self.genre, self.comic, self.query = \
- match.groups()
-
- url = "{}/{}/viewer?{}".format(self.root, self.path, self.query)
- GalleryExtractor.__init__(self, match, url)
def _init(self):
self.setup_agegate_cookies()
- params = text.parse_query(self.query)
+ path, self.lang, self.genre, self.comic, query = self.groups
+ params = text.parse_query(query)
self.title_no = params.get("title_no")
self.episode_no = params.get("episode_no")
+ self.gallery_url = "{}/{}/viewer?{}".format(self.root, path, query)
def metadata(self, page):
extr = text.extract_from(page)
@@ -124,32 +98,49 @@ class WebtoonsEpisodeExtractor(WebtoonsBase, GalleryExtractor):
"language" : util.code_to_language(self.lang),
}
- @staticmethod
- def images(page):
- return [
- (url.replace("://webtoon-phinf.", "://swebtoon-phinf."), None)
- for url in text.extract_iter(
- page, 'class="_images" data-url="', '"')
- ]
+ def images(self, page):
+ quality = self.config("quality")
+ if quality is None or quality == "original":
+ quality = {"jpg": False, "jpeg": False, "webp": False}
+ elif not quality:
+ quality = None
+ elif isinstance(quality, str):
+ quality = {"jpg": quality, "jpeg": quality}
+ elif isinstance(quality, int):
+ quality = "q" + str(quality)
+ quality = {"jpg": quality, "jpeg": quality}
+ elif not isinstance(quality, dict):
+ quality = None
+
+ results = []
+ for url in text.extract_iter(
+ page, 'class="_images" data-url="', '"'):
+
+ if quality is not None:
+ path, _, query = url.rpartition("?")
+ type = quality.get(path.rpartition(".")[2].lower())
+ if type is False:
+ url = path
+ elif type:
+ url = "{}?type={}".format(path, type)
+
+ url = url.replace("://webtoon-phinf.", "://swebtoon-phinf.")
+ results.append((url, None))
+ return results
class WebtoonsComicExtractor(WebtoonsBase, Extractor):
"""Extractor for an entire comic on webtoons.com"""
subcategory = "comic"
categorytransfer = True
- pattern = (BASE_PATTERN + r"/([^/?#]+)/([^/?#]+))"
- r"/list(?:\?([^#]+))")
+ pattern = LANG_PATTERN + r"/([^/?#]+)/([^/?#]+))/list\?([^#]+)"
example = "https://www.webtoons.com/en/GENRE/TITLE/list?title_no=123"
- def __init__(self, match):
- Extractor.__init__(self, match)
- self.path, self.lang, self.genre, self.comic, self.query = \
- match.groups()
-
def _init(self):
self.setup_agegate_cookies()
- params = text.parse_query(self.query)
+ self.path, self.lang, self.genre, self.comic, query = self.groups
+ params = text.parse_query(query)
self.title_no = params.get("title_no")
self.page_no = text.parse_int(params.get("page"), 1)
@@ -164,7 +155,7 @@ class WebtoonsComicExtractor(WebtoonsBase, Extractor):
path = "/{}/list?title_no={}&page={}".format(
self.path, self.title_no, self.page_no)
- if page and path not in page:
+ if page is not None and path not in page:
return
response = self.request(self.root + path)
@@ -182,11 +173,47 @@ class WebtoonsComicExtractor(WebtoonsBase, Extractor):
self.page_no += 1
- @staticmethod
- def get_episode_urls(page):
+ def get_episode_urls(self, page):
"""Extract and return all episode urls in 'page'"""
page = text.extr(page, 'id="_listUl"', '</ul>')
return [
match.group(0)
for match in WebtoonsEpisodeExtractor.pattern.finditer(page)
]
+
+
+class WebtoonsArtistExtractor(WebtoonsBase, Extractor):
+ """Extractor for webtoons.com artists"""
+ subcategory = "artist"
+ pattern = BASE_PATTERN + r"/p/community/([^/?#]+)/u/([^/?#]+)"
+ example = "https://www.webtoons.com/p/community/LANG/u/ARTIST"
+
+ def items(self):
+ self.setup_agegate_cookies()
+
+ for comic in self.comics():
+ comic["_extractor"] = WebtoonsComicExtractor
+ comic_url = self.root + comic["extra"]["episodeListPath"]
+ yield Message.Queue, comic_url, comic
+
+ def comics(self):
+ lang, artist = self.groups
+ language = util.code_to_language(lang).upper()
+
+ url = "{}/p/community/{}/u/{}".format(
+ self.root, lang, artist)
+ page = self.request(url).text
+ creator_id = text.extr(page, '\\"creatorId\\":\\"', '\\')
+
+ url = "{}/p/community/api/v1/creator/{}/titles".format(
+ self.root, creator_id)
+ params = {
+ "language": language,
+ "nextSize": "50",
+ }
+ headers = {
+ "language": language,
+ }
+ data = self.request(url, params=params, headers=headers).json()
+
+ return data["result"]["titles"]
diff --git a/gallery_dl/extractor/zerochan.py b/gallery_dl/extractor/zerochan.py
index ac1400e..0ad73c0 100644
--- a/gallery_dl/extractor/zerochan.py
+++ b/gallery_dl/extractor/zerochan.py
@@ -74,7 +74,6 @@ class ZerochanExtractor(BooruExtractor):
extr = text.extract_from(page)
data = {
"id" : text.parse_int(entry_id),
- "author" : jsonld["author"]["name"],
"file_url": jsonld["contentUrl"],
"date" : text.parse_datetime(jsonld["datePublished"]),
"width" : text.parse_int(jsonld["width"][:-3]),
@@ -88,6 +87,11 @@ class ZerochanExtractor(BooruExtractor):
'id="source-url"', '</p>').rpartition("</s>")[2])),
}
+ try:
+ data["author"] = jsonld["author"]["name"]
+ except Exception:
+ data["author"] = ""
+
html = data["tags"]
tags = data["tags"] = []
for tag in html.split("<li class=")[1:]:
diff --git a/gallery_dl/extractor/zzup.py b/gallery_dl/extractor/zzup.py
index 05b12b4..20454b4 100644
--- a/gallery_dl/extractor/zzup.py
+++ b/gallery_dl/extractor/zzup.py
@@ -16,7 +16,7 @@ class ZzupGalleryExtractor(GalleryExtractor):
filename_fmt = "{num:>03}.{extension}"
archive_fmt = "{slug}_{num}"
root = "https://zzup.com"
- pattern = (r"(?:https?://)?(up\.|www\.)?zzup\.com(/(?:viewalbum|content)"
+ pattern = (r"(?:https?://)?(up\.|w+\.)?zzup\.com(/(?:viewalbum|content)"
r"/[\w=]+/([^/?#]+)/[\w=]+)/(?:index|page-\d+)\.html")
example = "https://zzup.com/content/xyz=/12345_TITLE/123=/index.html"
diff --git a/gallery_dl/formatter.py b/gallery_dl/formatter.py
index e662c34..6affc3e 100644
--- a/gallery_dl/formatter.py
+++ b/gallery_dl/formatter.py
@@ -495,6 +495,8 @@ _CONVERSIONS = {
"s": str,
"r": repr,
"a": ascii,
+ "i": int,
+ "f": float,
}
_FORMAT_SPECIFIERS = {
"?": _parse_optional,
diff --git a/gallery_dl/path.py b/gallery_dl/path.py
index 21e1aa0..54cf126 100644
--- a/gallery_dl/path.py
+++ b/gallery_dl/path.py
@@ -269,7 +269,7 @@ class PathFormat():
try:
for fmt in self.directory_formatters:
segment = fmt(kwdict).strip()
- if strip and segment != "..":
+ if strip and segment not in {".", ".."}:
# remove trailing dots and spaces (#647)
segment = segment.rstrip(strip)
if segment:
diff --git a/gallery_dl/postprocessor/metadata.py b/gallery_dl/postprocessor/metadata.py
index 3ef9fbc..fbb3fb8 100644
--- a/gallery_dl/postprocessor/metadata.py
+++ b/gallery_dl/postprocessor/metadata.py
@@ -108,6 +108,7 @@ class MetadataPP(PostProcessor):
self.omode = options.get("open", omode)
self.encoding = options.get("encoding", "utf-8")
self.skip = options.get("skip", False)
+ self.meta_path = options.get("metadata-path")
def run(self, pathfmt):
archive = self.archive
@@ -120,6 +121,9 @@ class MetadataPP(PostProcessor):
directory = self._directory(pathfmt)
path = directory + self._filename(pathfmt)
+ if self.meta_path is not None:
+ pathfmt.kwdict[self.meta_path] = path
+
if self.skip and os.path.exists(path):
return
@@ -180,7 +184,10 @@ class MetadataPP(PostProcessor):
pathfmt.directory_formatters = self._directory_formatters
pathfmt.directory_conditions = ()
segments = pathfmt.build_directory(pathfmt.kwdict)
- directory = pathfmt.clean_path(os.sep.join(segments) + os.sep)
+ if segments:
+ directory = pathfmt.clean_path(os.sep.join(segments) + os.sep)
+ else:
+ directory = "." + os.sep
return os.path.join(self._base(pathfmt), directory)
finally:
pathfmt.directory_conditions = conditions
diff --git a/gallery_dl/postprocessor/ugoira.py b/gallery_dl/postprocessor/ugoira.py
index 3a32b39..c1bfc20 100644
--- a/gallery_dl/postprocessor/ugoira.py
+++ b/gallery_dl/postprocessor/ugoira.py
@@ -156,12 +156,7 @@ class UgoiraPP(PostProcessor):
return self.log.debug("", exc_info=exc)
if self.convert(pathfmt, tempdir):
- if self.delete:
- pathfmt.delete = True
- elif pathfmt.extension != "zip":
- self.log.info(pathfmt.filename)
- pathfmt.set_extension("zip")
- pathfmt.build_path()
+ pathfmt.delete = self.delete
def convert_from_files(self, pathfmt):
if not self._convert_files:
diff --git a/gallery_dl/util.py b/gallery_dl/util.py
index 76e6517..eabd4ab 100644
--- a/gallery_dl/util.py
+++ b/gallery_dl/util.py
@@ -700,6 +700,9 @@ EXECUTABLE = getattr(sys, "frozen", False)
USERAGENT = "gallery-dl/" + version.__version__
USERAGENT_FIREFOX = ("Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:{}.0) "
"Gecko/20100101 Firefox/{}.0").format(_ff_ver, _ff_ver)
+USERAGENT_CHROME = ("Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
+ "AppleWebKit/537.36 (KHTML, like Gecko) Chrome/134.0.0.0 "
+ "Safari/537.36")
SPECIAL_EXTRACTORS = {"oauth", "recursive", "generic"}
GLOBALS = {
"contains" : contains,
diff --git a/gallery_dl/version.py b/gallery_dl/version.py
index 43b234d..87169e2 100644
--- a/gallery_dl/version.py
+++ b/gallery_dl/version.py
@@ -6,5 +6,5 @@
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
-__version__ = "1.29.3"
+__version__ = "1.29.4"
__variant__ = None