From c679cd7a13bdbf6896e53d68fe2093910bc6625a Mon Sep 17 00:00:00 2001 From: Unit 193 Date: Mon, 5 May 2025 01:18:58 -0400 Subject: New upstream version 1.29.6. --- gallery_dl/extractor/__init__.py | 1 - gallery_dl/extractor/chevereto.py | 2 + gallery_dl/extractor/civitai.py | 29 +++++- gallery_dl/extractor/deviantart.py | 4 +- gallery_dl/extractor/kemonoparty.py | 25 ++++- gallery_dl/extractor/mangakakalot.py | 92 ------------------ gallery_dl/extractor/manganelo.py | 179 +++++++++++++++++++---------------- gallery_dl/extractor/pixiv.py | 25 ++--- gallery_dl/extractor/scrolller.py | 7 +- gallery_dl/extractor/tumblr.py | 10 +- gallery_dl/extractor/twitter.py | 32 ++++++- gallery_dl/extractor/weasyl.py | 2 +- 12 files changed, 200 insertions(+), 208 deletions(-) delete mode 100644 gallery_dl/extractor/mangakakalot.py (limited to 'gallery_dl/extractor') diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py index 9a7ca53..2da471e 100644 --- a/gallery_dl/extractor/__init__.py +++ b/gallery_dl/extractor/__init__.py @@ -105,7 +105,6 @@ modules = [ "mangadex", "mangafox", "mangahere", - "mangakakalot", "manganelo", "mangapark", "mangaread", diff --git a/gallery_dl/extractor/chevereto.py b/gallery_dl/extractor/chevereto.py index 600d231..dc963c5 100644 --- a/gallery_dl/extractor/chevereto.py +++ b/gallery_dl/extractor/chevereto.py @@ -78,6 +78,8 @@ class CheveretoImageExtractor(CheveretoExtractor): "id" : self.path.rpartition(".")[2], "url" : url, "album": text.extr(extr("Added to "), ">", "<"), + "date" : text.parse_datetime(extr( + '\ yield self.api.deviation(deviation_uuid) def _unescape_json(self, json): - return json.replace('\\"', '"').replace("\\\\", "\\") + return json.replace('\\"', '"') \ + .replace("\\'", "'") \ + .replace("\\\\", "\\") class DeviantartUserExtractor(DeviantartExtractor): diff --git a/gallery_dl/extractor/kemonoparty.py b/gallery_dl/extractor/kemonoparty.py index 79070ee..4893f19 100644 --- a/gallery_dl/extractor/kemonoparty.py +++ b/gallery_dl/extractor/kemonoparty.py @@ -317,11 +317,25 @@ class KemonopartyUserExtractor(KemonopartyExtractor): KemonopartyExtractor.__init__(self, match) def posts(self): + endpoint = self.config("endpoint") + if endpoint == "legacy": + endpoint = self.api.creator_posts_legacy + elif endpoint == "legacy+": + endpoint = self._posts_legacy_plus + else: + endpoint = self.api.creator_posts + _, _, service, creator_id, query = self.groups params = text.parse_query(query) - return self.api.creator_posts_legacy( - service, creator_id, - params.get("o"), params.get("q"), params.get("tag")) + return endpoint(service, creator_id, + params.get("o"), params.get("q"), params.get("tag")) + + def _posts_legacy_plus(self, service, creator_id, + offset=0, query=None, tags=None): + for post in self.api.creator_posts_legacy( + service, creator_id, offset, query, tags): + yield self.api.creator_post( + service, creator_id, post["id"])["post"] class KemonopartyPostsExtractor(KemonopartyExtractor): @@ -525,9 +539,10 @@ class KemonoAPI(): endpoint = "/file/" + file_hash return self._call(endpoint) - def creator_posts(self, service, creator_id, offset=0, query=None): + def creator_posts(self, service, creator_id, + offset=0, query=None, tags=None): endpoint = "/{}/user/{}".format(service, creator_id) - params = {"q": query, "o": offset} + params = {"q": query, "tag": tags, "o": offset} return self._pagination(endpoint, params, 50) def creator_posts_legacy(self, service, creator_id, diff --git a/gallery_dl/extractor/mangakakalot.py b/gallery_dl/extractor/mangakakalot.py deleted file mode 100644 index 9fc8681..0000000 --- a/gallery_dl/extractor/mangakakalot.py +++ /dev/null @@ -1,92 +0,0 @@ -# -*- coding: utf-8 -*- - -# Copyright 2020 Jake Mannens -# Copyright 2021-2023 Mike Fährmann -# -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License version 2 as -# published by the Free Software Foundation. - -"""Extractors for https://mangakakalot.tv/""" - -from .common import ChapterExtractor, MangaExtractor -from .. import text -import re - -BASE_PATTERN = r"(?:https?://)?(?:ww[\dw]?\.)?mangakakalot\.tv" - - -class MangakakalotBase(): - """Base class for mangakakalot extractors""" - category = "mangakakalot" - root = "https://ww8.mangakakalot.tv" - - -class MangakakalotChapterExtractor(MangakakalotBase, ChapterExtractor): - """Extractor for manga chapters from mangakakalot.tv""" - pattern = BASE_PATTERN + r"(/chapter/[^/?#]+/chapter[_-][^/?#]+)" - example = "https://ww6.mangakakalot.tv/chapter/manga-ID/chapter-01" - - def __init__(self, match): - self.path = match.group(1) - ChapterExtractor.__init__(self, match, self.root + self.path) - - def metadata(self, page): - _ , pos = text.extract(page, '', '<') - manga , pos = text.extract(page, '', '<', pos) - info , pos = text.extract(page, '', '<', pos) - author, pos = text.extract(page, '. Author:', ' already has ', pos) - - match = re.match( - r"(?:[Vv]ol\. *(\d+) )?" - r"[Cc]hapter *([^:]*)" - r"(?:: *(.+))?", info or "") - volume, chapter, title = match.groups() if match else ("", "", info) - chapter, sep, minor = chapter.partition(".") - - return { - "manga" : text.unescape(manga), - "title" : text.unescape(title) if title else "", - "author" : text.unescape(author).strip() if author else "", - "volume" : text.parse_int(volume), - "chapter" : text.parse_int(chapter), - "chapter_minor": sep + minor, - "lang" : "en", - "language" : "English", - } - - def images(self, page): - return [ - (url, None) - for url in text.extract_iter(page, '", "<") - author, pos = text.extract(page, "
  • Author(s) :", "", pos) - data["author"] = text.remove_html(author) - - results = [] - for chapter in text.extract_iter(page, '
    ', '
    '): - url, pos = text.extract(chapter, '', '', pos) - data["title"] = title.partition(": ")[2] - data["date"] , pos = text.extract( - chapter, '") - manga = extr('title="', '"') - info = extr('title="', '"') - author = extr("- Author(s) : ", "

    ") - return self._parse_chapter( - info, text.unescape(manga), text.unescape(author)) + data = { + "date" : text.parse_datetime(extr( + '"datePublished": "', '"')[:19], "%Y-%m-%dT%H:%M:%S"), + "date_updated": text.parse_datetime(extr( + '"dateModified": "', '"')[:19], "%Y-%m-%dT%H:%M:%S"), + "manga_id" : text.parse_int(extr("comic_id =", ";")), + "chapter_id" : text.parse_int(extr("chapter_id =", ";")), + "manga" : extr("comic_name =", ";").strip('" '), + "lang" : "en", + "language" : "English", + } + + chapter_name = extr("chapter_name =", ";").strip('" ') + chapter, sep, minor = chapter_name.rpartition(" ")[2].partition(".") + data["chapter"] = text.parse_int(chapter) + data["chapter_minor"] = sep + minor + data["author"] = extr(". Author:", " already has ").strip() + + return data def images(self, page): - page = text.extr( - page, 'class="container-chapter-reader', 'class="container') + extr = text.extract_from(page) + cdns = util.json_loads(extr("var cdns =", ";"))[0] + imgs = util.json_loads(extr("var chapterImages =", ";")) + + if cdns[-1] != "/": + cdns += "/" + return [ - (url, None) - for url in text.extract_iter(page, '", "<")) - author = text.remove_html(extr("Author(s) :", "")) - - extr('class="row-content-chapter', '') - while True: - url = extr('class="chapter-name text-nowrap" href="', '"') - if not url: - return results - info = extr(">", "<") - date = extr('class="chapter-time text-nowrap" title="', '"') - append((url, self._parse_chapter(info, manga, author, date))) + author = text.remove_html(extr("
  • Author(s) :", "")) + status = extr("
  • Status :", "<").strip() + update = text.parse_datetime(extr( + "
  • Last updated :", "<").strip(), "%b-%d-%Y %I:%M:%S %p") + tags = text.split_html(extr(">Genres :", "
  • "))[::2] + + results = [] + for chapter in text.extract_iter(page, '
    ', '
    '): + url, pos = text.extract(chapter, '', '', pos) + date, pos = text.extract(chapter, '