From c679cd7a13bdbf6896e53d68fe2093910bc6625a Mon Sep 17 00:00:00 2001 From: Unit 193 Date: Mon, 5 May 2025 01:18:58 -0400 Subject: New upstream version 1.29.6. --- gallery_dl/extractor/__init__.py | 1 - gallery_dl/extractor/chevereto.py | 2 + gallery_dl/extractor/civitai.py | 29 +++- gallery_dl/extractor/deviantart.py | 4 +- gallery_dl/extractor/kemonoparty.py | 25 +++- gallery_dl/extractor/mangakakalot.py | 92 ------------- gallery_dl/extractor/manganelo.py | 179 ++++++++++++++----------- gallery_dl/extractor/pixiv.py | 25 +--- gallery_dl/extractor/scrolller.py | 7 +- gallery_dl/extractor/tumblr.py | 10 +- gallery_dl/extractor/twitter.py | 32 ++++- gallery_dl/extractor/weasyl.py | 2 +- gallery_dl/job.py | 2 - gallery_dl/postprocessor/__init__.py | 1 + gallery_dl/postprocessor/directory.py | 30 +++++ gallery_dl/transaction_id.py | 246 ++++++++++++++++++++++++++++++++++ gallery_dl/version.py | 2 +- 17 files changed, 478 insertions(+), 211 deletions(-) delete mode 100644 gallery_dl/extractor/mangakakalot.py create mode 100644 gallery_dl/postprocessor/directory.py create mode 100644 gallery_dl/transaction_id.py (limited to 'gallery_dl') diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py index 9a7ca53..2da471e 100644 --- a/gallery_dl/extractor/__init__.py +++ b/gallery_dl/extractor/__init__.py @@ -105,7 +105,6 @@ modules = [ "mangadex", "mangafox", "mangahere", - "mangakakalot", "manganelo", "mangapark", "mangaread", diff --git a/gallery_dl/extractor/chevereto.py b/gallery_dl/extractor/chevereto.py index 600d231..dc963c5 100644 --- a/gallery_dl/extractor/chevereto.py +++ b/gallery_dl/extractor/chevereto.py @@ -78,6 +78,8 @@ class CheveretoImageExtractor(CheveretoExtractor): "id" : self.path.rpartition(".")[2], "url" : url, "album": text.extr(extr("Added to "), ">", "<"), + "date" : text.parse_datetime(extr( + '\ yield self.api.deviation(deviation_uuid) def _unescape_json(self, json): - return json.replace('\\"', '"').replace("\\\\", "\\") + return json.replace('\\"', '"') \ + .replace("\\'", "'") \ + .replace("\\\\", "\\") class DeviantartUserExtractor(DeviantartExtractor): diff --git a/gallery_dl/extractor/kemonoparty.py b/gallery_dl/extractor/kemonoparty.py index 79070ee..4893f19 100644 --- a/gallery_dl/extractor/kemonoparty.py +++ b/gallery_dl/extractor/kemonoparty.py @@ -317,11 +317,25 @@ class KemonopartyUserExtractor(KemonopartyExtractor): KemonopartyExtractor.__init__(self, match) def posts(self): + endpoint = self.config("endpoint") + if endpoint == "legacy": + endpoint = self.api.creator_posts_legacy + elif endpoint == "legacy+": + endpoint = self._posts_legacy_plus + else: + endpoint = self.api.creator_posts + _, _, service, creator_id, query = self.groups params = text.parse_query(query) - return self.api.creator_posts_legacy( - service, creator_id, - params.get("o"), params.get("q"), params.get("tag")) + return endpoint(service, creator_id, + params.get("o"), params.get("q"), params.get("tag")) + + def _posts_legacy_plus(self, service, creator_id, + offset=0, query=None, tags=None): + for post in self.api.creator_posts_legacy( + service, creator_id, offset, query, tags): + yield self.api.creator_post( + service, creator_id, post["id"])["post"] class KemonopartyPostsExtractor(KemonopartyExtractor): @@ -525,9 +539,10 @@ class KemonoAPI(): endpoint = "/file/" + file_hash return self._call(endpoint) - def creator_posts(self, service, creator_id, offset=0, query=None): + def creator_posts(self, service, creator_id, + offset=0, query=None, tags=None): endpoint = "/{}/user/{}".format(service, creator_id) - params = {"q": query, "o": offset} + params = {"q": query, "tag": tags, "o": offset} return self._pagination(endpoint, params, 50) def creator_posts_legacy(self, service, creator_id, diff --git a/gallery_dl/extractor/mangakakalot.py b/gallery_dl/extractor/mangakakalot.py deleted file mode 100644 index 9fc8681..0000000 --- a/gallery_dl/extractor/mangakakalot.py +++ /dev/null @@ -1,92 +0,0 @@ -# -*- coding: utf-8 -*- - -# Copyright 2020 Jake Mannens -# Copyright 2021-2023 Mike Fährmann -# -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License version 2 as -# published by the Free Software Foundation. - -"""Extractors for https://mangakakalot.tv/""" - -from .common import ChapterExtractor, MangaExtractor -from .. import text -import re - -BASE_PATTERN = r"(?:https?://)?(?:ww[\dw]?\.)?mangakakalot\.tv" - - -class MangakakalotBase(): - """Base class for mangakakalot extractors""" - category = "mangakakalot" - root = "https://ww8.mangakakalot.tv" - - -class MangakakalotChapterExtractor(MangakakalotBase, ChapterExtractor): - """Extractor for manga chapters from mangakakalot.tv""" - pattern = BASE_PATTERN + r"(/chapter/[^/?#]+/chapter[_-][^/?#]+)" - example = "https://ww6.mangakakalot.tv/chapter/manga-ID/chapter-01" - - def __init__(self, match): - self.path = match.group(1) - ChapterExtractor.__init__(self, match, self.root + self.path) - - def metadata(self, page): - _ , pos = text.extract(page, '', '<') - manga , pos = text.extract(page, '', '<', pos) - info , pos = text.extract(page, '', '<', pos) - author, pos = text.extract(page, '. Author:', ' already has ', pos) - - match = re.match( - r"(?:[Vv]ol\. *(\d+) )?" - r"[Cc]hapter *([^:]*)" - r"(?:: *(.+))?", info or "") - volume, chapter, title = match.groups() if match else ("", "", info) - chapter, sep, minor = chapter.partition(".") - - return { - "manga" : text.unescape(manga), - "title" : text.unescape(title) if title else "", - "author" : text.unescape(author).strip() if author else "", - "volume" : text.parse_int(volume), - "chapter" : text.parse_int(chapter), - "chapter_minor": sep + minor, - "lang" : "en", - "language" : "English", - } - - def images(self, page): - return [ - (url, None) - for url in text.extract_iter(page, '", "<") - author, pos = text.extract(page, "
  • Author(s) :", "", pos) - data["author"] = text.remove_html(author) - - results = [] - for chapter in text.extract_iter(page, '
    ', '
    '): - url, pos = text.extract(chapter, '', '', pos) - data["title"] = title.partition(": ")[2] - data["date"] , pos = text.extract( - chapter, '") - manga = extr('title="', '"') - info = extr('title="', '"') - author = extr("- Author(s) : ", "

    ") - return self._parse_chapter( - info, text.unescape(manga), text.unescape(author)) + data = { + "date" : text.parse_datetime(extr( + '"datePublished": "', '"')[:19], "%Y-%m-%dT%H:%M:%S"), + "date_updated": text.parse_datetime(extr( + '"dateModified": "', '"')[:19], "%Y-%m-%dT%H:%M:%S"), + "manga_id" : text.parse_int(extr("comic_id =", ";")), + "chapter_id" : text.parse_int(extr("chapter_id =", ";")), + "manga" : extr("comic_name =", ";").strip('" '), + "lang" : "en", + "language" : "English", + } + + chapter_name = extr("chapter_name =", ";").strip('" ') + chapter, sep, minor = chapter_name.rpartition(" ")[2].partition(".") + data["chapter"] = text.parse_int(chapter) + data["chapter_minor"] = sep + minor + data["author"] = extr(". Author:", " already has ").strip() + + return data def images(self, page): - page = text.extr( - page, 'class="container-chapter-reader', 'class="container') + extr = text.extract_from(page) + cdns = util.json_loads(extr("var cdns =", ";"))[0] + imgs = util.json_loads(extr("var chapterImages =", ";")) + + if cdns[-1] != "/": + cdns += "/" + return [ - (url, None) - for url in text.extract_iter(page, '", "<")) - author = text.remove_html(extr("Author(s) :", "")) - - extr('class="row-content-chapter', '') - while True: - url = extr('class="chapter-name text-nowrap" href="', '"') - if not url: - return results - info = extr(">", "<") - date = extr('class="chapter-time text-nowrap" title="', '"') - append((url, self._parse_chapter(info, manga, author, date))) + author = text.remove_html(extr("
  • Author(s) :", "")) + status = extr("
  • Status :", "<").strip() + update = text.parse_datetime(extr( + "
  • Last updated :", "<").strip(), "%b-%d-%Y %I:%M:%S %p") + tags = text.split_html(extr(">Genres :", "
  • "))[::2] + + results = [] + for chapter in text.extract_iter(page, '
    ', '
    '): + url, pos = text.extract(chapter, '', '', pos) + date, pos = text.extract(chapter, '= self._skipmax: raise self._skipexc() - else: - self._skipcnt = 0 def download(self, url): """Download 'url'""" diff --git a/gallery_dl/postprocessor/__init__.py b/gallery_dl/postprocessor/__init__.py index 7837b06..dd44a8a 100644 --- a/gallery_dl/postprocessor/__init__.py +++ b/gallery_dl/postprocessor/__init__.py @@ -11,6 +11,7 @@ modules = [ "classify", "compare", + "directory", "exec", "hash", "metadata", diff --git a/gallery_dl/postprocessor/directory.py b/gallery_dl/postprocessor/directory.py new file mode 100644 index 0000000..ed8c02e --- /dev/null +++ b/gallery_dl/postprocessor/directory.py @@ -0,0 +1,30 @@ +# -*- coding: utf-8 -*- + +# Copyright 2025 Mike Fährmann +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. + +"""Trigger directory format string evaluation""" + +from .common import PostProcessor + + +class DirectoryPP(PostProcessor): + + def __init__(self, job, options): + PostProcessor.__init__(self, job) + + events = options.get("event") + if events is None: + events = ("prepare",) + elif isinstance(events, str): + events = events.split(",") + job.register_hooks({event: self.run for event in events}, options) + + def run(self, pathfmt): + pathfmt.set_directory(pathfmt.kwdict) + + +__postprocessor__ = DirectoryPP diff --git a/gallery_dl/transaction_id.py b/gallery_dl/transaction_id.py new file mode 100644 index 0000000..25f1775 --- /dev/null +++ b/gallery_dl/transaction_id.py @@ -0,0 +1,246 @@ +# -*- coding: utf-8 -*- + +# Copyright 2025 Mike Fährmann +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. + +# Adapted from iSarabjitDhiman/XClientTransaction +# https://github.com/iSarabjitDhiman/XClientTransaction + +# References: +# https://antibot.blog/posts/1741552025433 +# https://antibot.blog/posts/1741552092462 +# https://antibot.blog/posts/1741552163416 + +"""Twitter 'x-client-transaction-id' header generation""" + +import math +import time +import random +import hashlib +import binascii +import itertools +from . import text, util +from .cache import cache + + +class ClientTransaction(): + __slots__ = ("key_bytes", "animation_key") + + def __getstate__(self): + return (self.key_bytes, self.animation_key) + + def __setstate__(self, state): + self.key_bytes, self.animation_key = state + + def initialize(self, extractor, homepage=None): + if homepage is None: + homepage = extractor.request("https://x.com/").text + + key = self._extract_verification_key(homepage) + if not key: + extractor.log.error( + "Failed to extract 'twitter-site-verification' key") + + ondemand_s = text.extr(homepage, '"ondemand.s":"', '"') + indices = self._extract_indices(ondemand_s, extractor) + if not indices: + extractor.log.error("Failed to extract KEY_BYTE indices") + + frames = self._extract_frames(homepage) + if not frames: + extractor.log.error("Failed to extract animation frame data") + + self.key_bytes = key_bytes = binascii.a2b_base64(key) + self.animation_key = self._calculate_animation_key( + frames, indices[0], key_bytes, indices[1:]) + + def _extract_verification_key(self, homepage): + pos = homepage.find('name="twitter-site-verification"') + beg = homepage.rfind("<", 0, pos) + end = homepage.find(">", pos) + return text.extr(homepage[beg:end], 'content="', '"') + + @cache(maxage=36500*86400, keyarg=1) + def _extract_indices(self, ondemand_s, extractor): + url = ("https://abs.twimg.com/responsive-web/client-web" + "/ondemand.s." + ondemand_s + "a.js") + page = extractor.request(url).text + pattern = util.re_compile(r"\(\w\[(\d\d?)\],\s*16\)") + return [int(i) for i in pattern.findall(page)] + + def _extract_frames(self, homepage): + return list(text.extract_iter( + homepage, 'id="loading-x-anim-', "")) + + def _calculate_animation_key(self, frames, row_index, key_bytes, + key_bytes_indices, total_time=4096): + frame = frames[key_bytes[5] % 4] + array = self._generate_2d_array(frame) + frame_row = array[key_bytes[row_index] % 16] + + frame_time = 1 + for index in key_bytes_indices: + frame_time *= key_bytes[index] % 16 + frame_time = round_js(frame_time / 10) * 10 + target_time = frame_time / total_time + + return self.animate(frame_row, target_time) + + def _generate_2d_array(self, frame): + split = util.re_compile(r"[^\d]+").split + return [ + [int(x) for x in split(path) if x] + for path in text.extr( + frame, '= 255.0 else c + for c in color] + + rotation_a = 0.0 + rotation_b = scale(float(frames[6]), 60.0, 360.0, True) + rotation = interpolate_value(cubic, rotation_a, rotation_b) + matrix = rotation_matrix_2d(rotation) + + result = ( + hex(round(color[0]))[2:], + hex(round(color[1]))[2:], + hex(round(color[2]))[2:], + float_to_hex(abs(round(matrix[0], 2))), + float_to_hex(abs(round(matrix[1], 2))), + float_to_hex(abs(round(matrix[2], 2))), + float_to_hex(abs(round(matrix[3], 2))), + "00", + ) + return "".join(result).replace(".", "").replace("-", "") + + def generate_transaction_id(self, method, path, + keyword="obfiowerehiring", rndnum=3): + bytes_key = self.key_bytes + + now = int(time.time()) - 1682924400 + bytes_time = ( + (now ) & 0xFF, # noqa: E202 + (now >> 8) & 0xFF, # noqa: E222 + (now >> 16) & 0xFF, + (now >> 24) & 0xFF, + ) + + payload = "{}!{}!{}{}{}".format( + method, path, now, keyword, self.animation_key) + bytes_hash = hashlib.sha256(payload.encode()).digest()[:16] + + num = random.randrange(256) + result = bytes( + byte ^ num + for byte in itertools.chain( + (0,), bytes_key, bytes_time, bytes_hash, (rndnum,)) + ) + return binascii.b2a_base64(result).rstrip(b"=\n") + + +# Cubic Curve + +def cubic_value(curve, t): + if t <= 0.0: + if curve[0] > 0.0: + value = curve[1] / curve[0] + elif curve[1] == 0.0 and curve[2] > 0.0: + value = curve[3] / curve[2] + else: + value = 0.0 + return value * t + + if t >= 1.0: + if curve[2] < 1.0: + value = (curve[3] - 1.0) / (curve[2] - 1.0) + elif curve[2] == 1.0 and curve[0] < 1.0: + value = (curve[1] - 1.0) / (curve[0] - 1.0) + else: + value = 0.0 + return 1.0 + value * (t - 1.0) + + start = 0.0 + end = 1.0 + while start < end: + mid = (start + end) / 2.0 + est = cubic_calculate(curve[0], curve[2], mid) + if abs(t - est) < 0.00001: + return cubic_calculate(curve[1], curve[3], mid) + if est < t: + start = mid + else: + end = mid + return cubic_calculate(curve[1], curve[3], mid) + + +def cubic_calculate(a, b, m): + m1 = 1.0 - m + return 3.0*a*m1*m1*m + 3.0*b*m1*m*m + m*m*m + + +# Interpolation + +def interpolate_list(x, a, b): + return [ + interpolate_value(x, a[i], b[i]) + for i in range(len(a)) + ] + + +def interpolate_value(x, a, b): + if isinstance(a, bool): + return a if x <= 0.5 else b + return a * (1.0 - x) + b * x + + +# Rotation + +def rotation_matrix_2d(deg): + rad = math.radians(deg) + cos = math.cos(rad) + sin = math.sin(rad) + return [cos, -sin, sin, cos] + + +# Utilities + +def float_to_hex(numf): + numi = int(numf) + + fraction = numf - numi + if not fraction: + return hex(numi)[2:] + + result = ["."] + while fraction > 0.0: + fraction *= 16.0 + integer = int(fraction) + fraction -= integer + result.append(chr(integer + 87) if integer > 9 else str(integer)) + return hex(numi)[2:] + "".join(result) + + +def is_odd(num): + return -1.0 if num % 2 else 0.0 + + +def round_js(num): + floor = math.floor(num) + return floor if (num - floor) < 0.5 else math.ceil(num) + + +def scale(value, value_min, value_max, rounding): + result = value * (value_max-value_min) / 255.0 + value_min + return math.floor(result) if rounding else round(result, 2) diff --git a/gallery_dl/version.py b/gallery_dl/version.py index af4acf5..d40dacd 100644 --- a/gallery_dl/version.py +++ b/gallery_dl/version.py @@ -6,5 +6,5 @@ # it under the terms of the GNU General Public License version 2 as # published by the Free Software Foundation. -__version__ = "1.29.5" +__version__ = "1.29.6" __variant__ = None -- cgit v1.2.3