diff options
Diffstat (limited to 'gallery_dl')
| -rw-r--r-- | gallery_dl/extractor/__init__.py | 2 | ||||
| -rw-r--r-- | gallery_dl/extractor/bbc.py | 3 | ||||
| -rw-r--r-- | gallery_dl/extractor/bunkr.py | 36 | ||||
| -rw-r--r-- | gallery_dl/extractor/cien.py | 7 | ||||
| -rw-r--r-- | gallery_dl/extractor/common.py | 14 | ||||
| -rw-r--r-- | gallery_dl/extractor/e621.py | 2 | ||||
| -rw-r--r-- | gallery_dl/extractor/imagefap.py | 6 | ||||
| -rw-r--r-- | gallery_dl/extractor/mangapark.py | 3 | ||||
| -rw-r--r-- | gallery_dl/extractor/patreon.py | 15 | ||||
| -rw-r--r-- | gallery_dl/extractor/pexels.py | 189 | ||||
| -rw-r--r-- | gallery_dl/extractor/pixiv.py | 21 | ||||
| -rw-r--r-- | gallery_dl/extractor/plurk.py | 16 | ||||
| -rw-r--r-- | gallery_dl/extractor/slideshare.py | 5 | ||||
| -rw-r--r-- | gallery_dl/extractor/wallhaven.py | 23 | ||||
| -rw-r--r-- | gallery_dl/extractor/weebcentral.py | 136 | ||||
| -rw-r--r-- | gallery_dl/option.py | 8 | ||||
| -rw-r--r-- | gallery_dl/util.py | 2 | ||||
| -rw-r--r-- | gallery_dl/version.py | 2 |
18 files changed, 429 insertions, 61 deletions
diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py index d003a61..b582c99 100644 --- a/gallery_dl/extractor/__init__.py +++ b/gallery_dl/extractor/__init__.py @@ -124,6 +124,7 @@ modules = [ "nsfwalbum", "paheal", "patreon", + "pexels", "philomena", "photovogue", "picarto", @@ -190,6 +191,7 @@ modules = [ "weasyl", "webmshare", "webtoons", + "weebcentral", "weibo", "wikiart", "wikifeet", diff --git a/gallery_dl/extractor/bbc.py b/gallery_dl/extractor/bbc.py index 54aaac4..113a669 100644 --- a/gallery_dl/extractor/bbc.py +++ b/gallery_dl/extractor/bbc.py @@ -26,8 +26,7 @@ class BbcGalleryExtractor(GalleryExtractor): example = "https://www.bbc.co.uk/programmes/PATH" def metadata(self, page): - data = util.json_loads(text.extr( - page, '<script type="application/ld+json">', '</script>')) + data = self._extract_jsonld(page) return { "programme": self.gallery_url.split("/")[4], "path": list(util.unique_sequence( diff --git a/gallery_dl/extractor/bunkr.py b/gallery_dl/extractor/bunkr.py index 3e12452..e1ee50d 100644 --- a/gallery_dl/extractor/bunkr.py +++ b/gallery_dl/extractor/bunkr.py @@ -80,6 +80,9 @@ class BunkrAlbumExtractor(LolisafeAlbumExtractor): # redirect url = response.headers["Location"] + if url[0] == "/": + url = self.root + url + continue root, path = self._split(url) if root not in CF_DOMAINS: continue @@ -105,37 +108,40 @@ class BunkrAlbumExtractor(LolisafeAlbumExtractor): "All Bunkr domains require solving a CF challenge") # select alternative domain - root = "https://" + random.choice(DOMAINS) + self.root = root = "https://" + random.choice(DOMAINS) self.log.debug("Trying '%s' as fallback", root) url = root + path def fetch_album(self, album_id): # album metadata - page = self.request(self.root + "/a/" + album_id).text - title, size = text.split_html(text.extr( - page, "<h1", "</span>").partition(">")[2]) - if "&" in title: - title = title.replace( - "<", "<").replace(">", ">").replace("&", "&") + page = self.request( + self.root + "/a/" + album_id, encoding="utf-8").text + title = text.unescape(text.unescape(text.extr( + page, 'property="og:title" content="', '"'))) # files - items = list(text.extract_iter(page, "<!-- item -->", "<!-- -->")) + items = list(text.extract_iter( + page, '<div class="grid-images_box', "</a>")) + return self._extract_files(items), { "album_id" : album_id, "album_name" : title, - "album_size" : text.extr(size, "(", ")"), + "album_size" : text.extr( + page, '<span class="font-semibold">(', ')'), "count" : len(items), } def _extract_files(self, items): for item in items: try: - url = text.extr(item, ' href="', '"') - file = self._extract_file(text.unescape(url)) + url = text.unescape(text.extr(item, ' href="', '"')) + if url[0] == "/": + url = self.root + url + file = self._extract_file(url) info = text.split_html(item) - file["name"] = info[0] - file["size"] = info[2] + file["name"] = info[-3] + file["size"] = info[-2] file["date"] = text.parse_datetime( info[-1], "%H:%M:%S %d/%m/%Y") @@ -179,8 +185,8 @@ class BunkrMediaExtractor(BunkrAlbumExtractor): """Extractor for bunkr.si media links""" subcategory = "media" directory_fmt = ("{category}",) - pattern = BASE_PATTERN + r"(/[vid]/[^/?#]+)" - example = "https://bunkr.si/v/FILENAME" + pattern = BASE_PATTERN + r"(/[fvid]/[^/?#]+)" + example = "https://bunkr.si/f/FILENAME" def fetch_album(self, album_id): try: diff --git a/gallery_dl/extractor/cien.py b/gallery_dl/extractor/cien.py index 378365e..27d50e7 100644 --- a/gallery_dl/extractor/cien.py +++ b/gallery_dl/extractor/cien.py @@ -9,7 +9,7 @@ """Extractors for https://ci-en.net/""" from .common import Extractor, Message -from .. import text, util +from .. import text BASE_PATTERN = r"(?:https?://)?ci-en\.(?:net|dlsite\.com)" @@ -56,11 +56,8 @@ class CienArticleExtractor(CienExtractor): self.root, self.groups[0], self.groups[1]) page = self.request(url, notfound="article").text - post = util.json_loads(text.extr( - page, '<script type="application/ld+json">', '</script>'))[0] - files = self._extract_files(page) - + post = self._extract_jsonld(page)[0] post["post_url"] = url post["post_id"] = text.parse_int(self.groups[1]) post["count"] = len(files) diff --git a/gallery_dl/extractor/common.py b/gallery_dl/extractor/common.py index 5ada030..13fd88a 100644 --- a/gallery_dl/extractor/common.py +++ b/gallery_dl/extractor/common.py @@ -587,6 +587,14 @@ class Extractor(): return True return False + def _extract_jsonld(self, page): + return util.json_loads(text.extr( + page, '<script type="application/ld+json">', "</script>")) + + def _extract_nextdata(self, page): + return util.json_loads(text.extr( + page, ' id="__NEXT_DATA__" type="application/json">', "</script>")) + def _prepare_ddosguard_cookies(self): if not self.cookies.get("__ddg2", domain=self.cookies_domain): self.cookies.set( @@ -772,7 +780,11 @@ class MangaExtractor(Extractor): def items(self): self.login() - page = self.request(self.manga_url).text + + if self.manga_url: + page = self.request(self.manga_url, notfound=self.subcategory).text + else: + page = None chapters = self.chapters(page) if self.reverse: diff --git a/gallery_dl/extractor/e621.py b/gallery_dl/extractor/e621.py index 4a6624d..33e6ba8 100644 --- a/gallery_dl/extractor/e621.py +++ b/gallery_dl/extractor/e621.py @@ -90,7 +90,7 @@ BASE_PATTERN = E621Extractor.update({ class E621TagExtractor(E621Extractor, danbooru.DanbooruTagExtractor): """Extractor for e621 posts from tag searches""" - pattern = BASE_PATTERN + r"/posts?(?:\?.*?tags=|/index/\d+/)([^&#]+)" + pattern = BASE_PATTERN + r"/posts?(?:\?[^#]*?tags=|/index/\d+/)([^&#]*)" example = "https://e621.net/posts?tags=TAG" diff --git a/gallery_dl/extractor/imagefap.py b/gallery_dl/extractor/imagefap.py index 28590fc..dd5220d 100644 --- a/gallery_dl/extractor/imagefap.py +++ b/gallery_dl/extractor/imagefap.py @@ -9,7 +9,7 @@ """Extractors for https://www.imagefap.com/""" from .common import Extractor, Message -from .. import text, util, exception +from .. import text, exception BASE_PATTERN = r"(?:https?://)?(?:www\.|beta\.)?imagefap\.com" @@ -129,13 +129,11 @@ class ImagefapImageExtractor(ImagefapExtractor): url, pos = text.extract( page, 'original="', '"') - info, pos = text.extract( - page, '<script type="application/ld+json">', '</script>', pos) image_id, pos = text.extract( page, 'id="imageid_input" value="', '"', pos) gallery_id, pos = text.extract( page, 'id="galleryid_input" value="', '"', pos) - info = util.json_loads(info) + info = self._extract_jsonld(page) return url, text.nameext_from_url(url, { "title": text.unescape(info["name"]), diff --git a/gallery_dl/extractor/mangapark.py b/gallery_dl/extractor/mangapark.py index 63aaf91..6f7a238 100644 --- a/gallery_dl/extractor/mangapark.py +++ b/gallery_dl/extractor/mangapark.py @@ -43,8 +43,7 @@ class MangaparkChapterExtractor(MangaparkBase, ChapterExtractor): ChapterExtractor.__init__(self, match, url) def metadata(self, page): - data = util.json_loads(text.extr( - page, 'id="__NEXT_DATA__" type="application/json">', '<')) + data = self._extract_nextdata(page) chapter = (data["props"]["pageProps"]["dehydratedState"] ["queries"][0]["state"]["data"]["data"]) manga = chapter["comicNode"]["data"] diff --git a/gallery_dl/extractor/patreon.py b/gallery_dl/extractor/patreon.py index e4a5985..866e93a 100644 --- a/gallery_dl/extractor/patreon.py +++ b/gallery_dl/extractor/patreon.py @@ -286,15 +286,12 @@ class PatreonExtractor(Extractor): return [genmap[ft] for ft in filetypes] def _extract_bootstrap(self, page): - data = text.extr( - page, 'id="__NEXT_DATA__" type="application/json">', '</script') - if data: - try: - data = util.json_loads(data) - env = data["props"]["pageProps"]["bootstrapEnvelope"] - return env.get("pageBootstrap") or env["bootstrap"] - except Exception as exc: - self.log.debug("%s: %s", exc.__class__.__name__, exc) + try: + data = self._extract_nextdata(page) + env = data["props"]["pageProps"]["bootstrapEnvelope"] + return env.get("pageBootstrap") or env["bootstrap"] + except Exception as exc: + self.log.debug("%s: %s", exc.__class__.__name__, exc) bootstrap = text.extr( page, 'window.patreon = {"bootstrap":', '},"apiServer"') diff --git a/gallery_dl/extractor/pexels.py b/gallery_dl/extractor/pexels.py new file mode 100644 index 0000000..804623b --- /dev/null +++ b/gallery_dl/extractor/pexels.py @@ -0,0 +1,189 @@ +# -*- coding: utf-8 -*- + +# Copyright 2025 Mike Fährmann +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. + +"""Extractors for https://pexels.com/""" + +from .common import Extractor, Message +from .. import text, exception + +BASE_PATTERN = r"(?:https?://)?(?:www\.)?pexels\.com" + + +class PexelsExtractor(Extractor): + """Base class for pexels extractors""" + category = "pexels" + root = "https://www.pexels.com" + archive_fmt = "{id}" + request_interval = (1.0, 2.0) + request_interval_min = 0.5 + + def _init(self): + self.api = PexelsAPI(self) + + def items(self): + metadata = self.metadata() + + for post in self.posts(): + if "attributes" in post: + attr = post + post = post["attributes"] + post["type"] = attr["type"] + + post.update(metadata) + post["date"] = text.parse_datetime( + post["created_at"][:-5], "%Y-%m-%dT%H:%M:%S") + + if "image" in post: + url, _, query = post["image"]["download_link"].partition("?") + name = text.extr(query, "&dl=", "&") + elif "video" in post: + video = post["video"] + name = video["src"] + url = video["download_link"] + else: + self.log.warning("%s: Unsupported post type", post.get("id")) + continue + + yield Message.Directory, post + yield Message.Url, url, text.nameext_from_url(name, post) + + def posts(self): + return () + + def metadata(self): + return {} + + +class PexelsCollectionExtractor(PexelsExtractor): + """Extractor for a pexels.com collection""" + subcategory = "collection" + directory_fmt = ("{category}", "Collections", "{collection}") + pattern = BASE_PATTERN + r"/collections/((?:[^/?#]*-)?(\w+))" + example = "https://www.pexels.com/collections/SLUG-a1b2c3/" + + def metadata(self): + cname, cid = self.groups + return {"collection": cname, "collection_id": cid} + + def posts(self): + return self.api.collections_media(self.groups[1]) + + +class PexelsSearchExtractor(PexelsExtractor): + """Extractor for pexels.com search results""" + subcategory = "search" + directory_fmt = ("{category}", "Searches", "{search_tags}") + pattern = BASE_PATTERN + r"/search/([^/?#]+)" + example = "https://www.pexels.com/search/QUERY/" + + def metadata(self): + return {"search_tags": self.groups[0]} + + def posts(self): + return self.api.search_photos(self.groups[0]) + + +class PexelsUserExtractor(PexelsExtractor): + """Extractor for pexels.com user galleries""" + subcategory = "user" + directory_fmt = ("{category}", "@{user[slug]}") + pattern = BASE_PATTERN + r"/(@(?:(?:[^/?#]*-)?(\d+)|[^/?#]+))" + example = "https://www.pexels.com/@USER-12345/" + + def posts(self): + return self.api.users_media_recent(self.groups[1] or self.groups[0]) + + +class PexelsImageExtractor(PexelsExtractor): + subcategory = "image" + pattern = BASE_PATTERN + r"/photo/((?:[^/?#]*-)?\d+)" + example = "https://www.pexels.com/photo/SLUG-12345/" + + def posts(self): + url = "{}/photo/{}/".format(self.root, self.groups[0]) + page = self.request(url).text + return (self._extract_nextdata(page)["props"]["pageProps"]["medium"],) + + +class PexelsAPI(): + """Interface for the Pexels Web API""" + + def __init__(self, extractor): + self.extractor = extractor + self.root = "https://www.pexels.com/en-us/api" + self.headers = { + "Accept" : "*/*", + "Content-Type" : "application/json", + "secret-key" : "H2jk9uKnhRmL6WPwh89zBezWvr", + "Authorization" : "", + "X-Forwarded-CF-Connecting-IP" : "", + "X-Forwarded-HTTP_CF_IPCOUNTRY": "", + "X-Forwarded-CF-IPRegionCode" : "", + "X-Client-Type" : "react", + "Sec-Fetch-Dest": "empty", + "Sec-Fetch-Mode": "cors", + "Sec-Fetch-Site": "same-origin", + "Priority" : "u=4", + } + + def collections_media(self, collection_id): + endpoint = "/v3/collections/{}/media".format(collection_id) + params = { + "page" : "1", + "per_page": "24", + } + return self._pagination(endpoint, params) + + def search_photos(self, query): + endpoint = "/v3/search/photos" + params = { + "query" : query, + "page" : "1", + "per_page" : "24", + "orientation": "all", + "size" : "all", + "color" : "all", + "sort" : "popular", + } + return self._pagination(endpoint, params) + + def users_media_recent(self, user_id): + endpoint = "/v3/users/{}/media/recent".format(user_id) + params = { + "page" : "1", + "per_page": "24", + } + return self._pagination(endpoint, params) + + def _call(self, endpoint, params): + url = self.root + endpoint + + while True: + response = self.extractor.request( + url, params=params, headers=self.headers, fatal=None) + + if response.status_code < 300: + return response.json() + + elif response.status_code == 429: + self.extractor.wait(seconds=600) + + else: + self.extractor.log.debug(response.text) + raise exception.StopExtraction("API request failed") + + def _pagination(self, endpoint, params): + while True: + data = self._call(endpoint, params) + + yield from data["data"] + + pagination = data["pagination"] + if pagination["current_page"] >= pagination["total_pages"]: + return + params["page"] = pagination["current_page"] + 1 diff --git a/gallery_dl/extractor/pixiv.py b/gallery_dl/extractor/pixiv.py index 6207bf7..d3e40ee 100644 --- a/gallery_dl/extractor/pixiv.py +++ b/gallery_dl/extractor/pixiv.py @@ -111,6 +111,7 @@ class PixivExtractor(Extractor): { "url" : img["image_urls"]["original"], "suffix": "_p{:02}".format(num), + "_fallback": self._fallback_image(img), } for num, img in enumerate(meta_pages) ] @@ -128,7 +129,7 @@ class PixivExtractor(Extractor): self.log.warning("%s: 'My pixiv' locked", work["id"]) elif work["type"] != "ugoira": - return ({"url": url},) + return ({"url": url, "_fallback": self._fallback_image(url)},) elif self.load_ugoira: try: @@ -269,6 +270,24 @@ class PixivExtractor(Extractor): except exception.HttpError: pass + def _fallback_image(self, src): + if isinstance(src, str): + urls = None + orig = src + else: + urls = src["image_urls"] + orig = urls["original"] + + base = orig.rpartition(".")[0] + yield base.replace("-original/", "-master/", 1) + "_master1200.jpg" + + if urls is None: + return + + for fmt in ("large", "medium", "square_medium"): + if fmt in urls: + yield urls[fmt] + @staticmethod def _date_from_url(url, offset=timedelta(hours=9)): try: diff --git a/gallery_dl/extractor/plurk.py b/gallery_dl/extractor/plurk.py index be0dbde..0bacd54 100644 --- a/gallery_dl/extractor/plurk.py +++ b/gallery_dl/extractor/plurk.py @@ -104,16 +104,16 @@ class PlurkPostExtractor(PlurkExtractor): pattern = r"(?:https?://)?(?:www\.)?plurk\.com/p/(\w+)" example = "https://www.plurk.com/p/12345" - def __init__(self, match): - PlurkExtractor.__init__(self, match) - self.plurk_id = match.group(1) - def plurks(self): - url = "{}/p/{}".format(self.root, self.plurk_id) + url = "{}/p/{}".format(self.root, self.groups[0]) page = self.request(url).text - user, pos = text.extract(page, " GLOBAL = ", "\n") - data, pos = text.extract(page, "plurk = ", ";\n", pos) + user, pos = text.extract(page, " GLOBAL=", "\n") + data, pos = text.extract(page, "plurk =", ";\n", pos) data = self._load(data) - data["user"] = self._load(user)["page_user"] + try: + data["user"] = self._load(user)["page_user"] + except Exception: + self.log.warning("%s: Failed to extract 'user' data", + self.groups[0]) return (data,) diff --git a/gallery_dl/extractor/slideshare.py b/gallery_dl/extractor/slideshare.py index e5e7a6b..0722d23 100644 --- a/gallery_dl/extractor/slideshare.py +++ b/gallery_dl/extractor/slideshare.py @@ -10,7 +10,7 @@ """Extractors for https://www.slideshare.net/""" from .common import GalleryExtractor -from .. import text, util +from .. import text class SlidesharePresentationExtractor(GalleryExtractor): @@ -31,8 +31,7 @@ class SlidesharePresentationExtractor(GalleryExtractor): GalleryExtractor.__init__(self, match, url) def metadata(self, page): - data = util.json_loads(text.extr( - page, 'id="__NEXT_DATA__" type="application/json">', '</script>')) + data = self._extract_nextdata(page) self.slideshow = slideshow = data["props"]["pageProps"]["slideshow"] return { diff --git a/gallery_dl/extractor/wallhaven.py b/gallery_dl/extractor/wallhaven.py index 479e8a8..e5b764a 100644 --- a/gallery_dl/extractor/wallhaven.py +++ b/gallery_dl/extractor/wallhaven.py @@ -54,7 +54,7 @@ class WallhavenExtractor(Extractor): class WallhavenSearchExtractor(WallhavenExtractor): """Extractor for search results on wallhaven.cc""" subcategory = "search" - directory_fmt = ("{category}", "{search[q]}") + directory_fmt = ("{category}", "{search[tags]}") archive_fmt = "s_{search[q]}_{id}" pattern = r"(?:https?://)?wallhaven\.cc/search(?:/?\?([^#]+))?" example = "https://wallhaven.cc/search?q=QUERY" @@ -64,7 +64,7 @@ class WallhavenSearchExtractor(WallhavenExtractor): self.params = text.parse_query(match.group(1)) def wallpapers(self): - return self.api.search(self.params.copy()) + return self.api.search(self.params) def metadata(self): return {"search": self.params} @@ -141,7 +141,7 @@ class WallhavenUploadsExtractor(WallhavenExtractor): def wallpapers(self): params = {"q": "@" + self.username} - return self.api.search(params.copy()) + return self.api.search(params) def metadata(self): return {"username": self.username} @@ -215,20 +215,35 @@ class WallhavenAPI(): def _pagination(self, endpoint, params=None, metadata=None): if params is None: + params_ptr = None params = {} + else: + params_ptr = params + params = params.copy() if metadata is None: metadata = self.extractor.config("metadata") while True: data = self._call(endpoint, params) + meta = data.get("meta") + if params_ptr is not None: + if meta and "query" in meta: + query = meta["query"] + if isinstance(query, dict): + params_ptr["tags"] = query.get("tag") + params_ptr["tag_id"] = query.get("id") + else: + params_ptr["tags"] = query + params_ptr["tag_id"] = 0 + params_ptr = None + if metadata: for wp in data["data"]: yield self.info(str(wp["id"])) else: yield from data["data"] - meta = data.get("meta") if not meta or meta["current_page"] >= meta["last_page"]: return params["page"] = meta["current_page"] + 1 diff --git a/gallery_dl/extractor/weebcentral.py b/gallery_dl/extractor/weebcentral.py new file mode 100644 index 0000000..39f998a --- /dev/null +++ b/gallery_dl/extractor/weebcentral.py @@ -0,0 +1,136 @@ +# -*- coding: utf-8 -*- + +# Copyright 2025 Mike Fährmann +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. + +"""Extractors for https://weebcentral.com/""" + +from .common import ChapterExtractor, MangaExtractor +from .. import text +from ..cache import memcache + +BASE_PATTERN = r"(?:https?://)?(?:www\.)?weebcentral\.com" + + +class WeebcentralBase(): + category = "weebcentral" + root = "https://weebcentral.com" + request_interval = (0.5, 1.5) + + @memcache(keyarg=1) + def _extract_manga_data(self, manga_id): + url = "{}/series/{}".format(self.root, manga_id) + page = self.request(url).text + extr = text.extract_from(page) + + return { + "manga_id": manga_id, + "lang" : "en", + "language": "English", + "manga" : text.unescape(extr("<title>", " | Weeb Central")), + "author" : text.split_html(extr("<strong>Author", "</li>"))[1::2], + "tags" : text.split_html(extr("<strong>Tag", "</li>"))[1::2], + "type" : text.remove_html(extr("<strong>Type: ", "</li>")), + "status" : text.remove_html(extr("<strong>Status: ", "</li>")), + "release" : text.remove_html(extr("<strong>Released: ", "</li>")), + "official": ">Yes" in extr("<strong>Official Translatio", "</li>"), + "description": text.unescape(text.remove_html(extr( + "<strong>Description", "</li>"))), + } + + +class WeebcentralChapterExtractor(WeebcentralBase, ChapterExtractor): + """Extractor for manga chapters from weebcentral.com""" + pattern = BASE_PATTERN + r"(/chapters/(\w+))" + example = "https://weebcentral.com/chapters/01JHABCDEFGHIJKLMNOPQRSTUV" + + def metadata(self, page): + extr = text.extract_from(page) + manga_id = extr("'series_id': '", "'") + + data = self._extract_manga_data(manga_id) + data["chapter_id"] = self.groups[1] + data["chapter_type"] = extr("'chapter_type': '", "'") + + chapter, sep, minor = extr("'number': '", "'").partition(".") + data["chapter"] = text.parse_int(chapter) + data["chapter_minor"] = sep + minor + + return data + + def images(self, page): + referer = self.gallery_url + url = referer + "/images" + params = { + "is_prev" : "False", + "current_page" : "1", + "reading_style": "long_strip", + } + headers = { + "Accept" : "*/*", + "Referer" : referer, + "HX-Request" : "true", + "HX-Current-URL": referer, + } + page = self.request(url, params=params, headers=headers).text + extr = text.extract_from(page) + + results = [] + while True: + src = extr(' src="', '"') + if not src: + break + results.append((src, { + "width" : text.parse_int(extr(' width="' , '"')), + "height": text.parse_int(extr(' height="', '"')), + })) + return results + + +class WeebcentralMangaExtractor(WeebcentralBase, MangaExtractor): + """Extractor for manga from weebcentral.com""" + chapterclass = WeebcentralChapterExtractor + pattern = BASE_PATTERN + r"/series/(\w+)" + example = "https://weebcentral.com/series/01J7ABCDEFGHIJKLMNOPQRSTUV/TITLE" + + def __init__(self, match): + MangaExtractor.__init__(self, match, False) + + def chapters(self, _): + manga_id = self.groups[0] + referer = "{}/series/{}".format(self.root, manga_id) + url = referer + "/full-chapter-list" + headers = { + "Accept" : "*/*", + "Referer" : referer, + "HX-Request" : "true", + "HX-Target" : "chapter-list", + "HX-Current-URL": referer, + } + page = self.request(url, headers=headers).text + extr = text.extract_from(page) + data = self._extract_manga_data(manga_id) + base = self.root + "/chapters/" + + results = [] + while True: + chapter_id = extr("/chapters/", '"') + if not chapter_id: + break + type, _, chapter = extr('<span class="">', "<").partition(" ") + chapter, sep, minor = chapter.partition(".") + + chapter = { + "chapter_id" : chapter_id, + "chapter" : text.parse_int(chapter), + "chapter_minor": sep + minor, + "chapter_type" : type, + "date" : text.parse_datetime( + extr(' datetime="', '"')[:-5], "%Y-%m-%dT%H:%M:%S"), + } + chapter.update(data) + results.append((base + chapter_id, chapter)) + return results diff --git a/gallery_dl/option.py b/gallery_dl/option.py index a3f78e5..222679a 100644 --- a/gallery_dl/option.py +++ b/gallery_dl/option.py @@ -323,7 +323,7 @@ def build_parser(): input.add_argument( "--no-input", dest="input", nargs=0, action=ConfigConstAction, const=False, - help=("Do not prompt for passwords/tokens"), + help="Do not prompt for passwords/tokens", ) output = parser.add_argument_group("Output Options") @@ -406,7 +406,7 @@ def build_parser(): ) output.add_argument( "--list-extractors", - dest="list_extractors", metavar="CATEGORIES", nargs="*", + dest="list_extractors", metavar="[CATEGORIES]", nargs="*", help=("Print a list of extractor classes " "with description, (sub)category and example URL"), ) @@ -430,12 +430,12 @@ def build_parser(): output.add_argument( "--print-traffic", dest="print_traffic", action="store_true", - help=("Display sent and read HTTP traffic"), + help="Display sent and read HTTP traffic", ) output.add_argument( "--no-colors", dest="colors", action="store_false", - help=("Do not emit ANSI color codes in output"), + help="Do not emit ANSI color codes in output", ) networking = parser.add_argument_group("Networking Options") diff --git a/gallery_dl/util.py b/gallery_dl/util.py index 72ec98e..2302088 100644 --- a/gallery_dl/util.py +++ b/gallery_dl/util.py @@ -83,7 +83,7 @@ def unique_sequence(iterable): def contains(values, elements, separator=" "): """Returns True if at least one of 'elements' is contained in 'values'""" - if isinstance(values, str): + if isinstance(values, str) and (separator or separator is None): values = values.split(separator) if not isinstance(elements, (tuple, list)): diff --git a/gallery_dl/version.py b/gallery_dl/version.py index 4b28924..6bceebd 100644 --- a/gallery_dl/version.py +++ b/gallery_dl/version.py @@ -6,5 +6,5 @@ # it under the terms of the GNU General Public License version 2 as # published by the Free Software Foundation. -__version__ = "1.28.3" +__version__ = "1.28.4" __variant__ = None |
