From 3934e881dd6a9e71836f535057dfb3035e18be88 Mon Sep 17 00:00:00 2001 From: abetlen Date: Fri, 8 May 2026 08:44:28 -0700 Subject: [PATCH] feat: update llama.cpp to 5d6f18a63 --- CHANGELOG.md | 1 + llama_cpp/llama_cpp.py | 88 ++++++++++++++++++++++++++++++++++++++++++ vendor/llama.cpp | 2 +- 3 files changed, 90 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index ba3f500cc..5031e5808 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +- feat: Update llama.cpp to ggerganov/llama.cpp@5d6f18a63 and sync Python bindings - fix: Correct batched embedding outputs for multi-sequence `embed()` calls by @Anai-Guo in #2205 - fix: Configure embedding contexts with enough sequence slots for batched `embed()` calls diff --git a/llama_cpp/llama_cpp.py b/llama_cpp/llama_cpp.py index d03237140..a5ec5d190 100644 --- a/llama_cpp/llama_cpp.py +++ b/llama_cpp/llama_cpp.py @@ -199,6 +199,8 @@ def _warn_deprecated(symbol: str, hint: str) -> None: llama_token_p = ctypes.POINTER(llama_token) # typedef int32_t llama_seq_id; llama_seq_id = ctypes.c_int32 +# typedef uint32_t llama_state_seq_flags; +llama_state_seq_flags = ctypes.c_uint32 # enum llama_vocab_type { @@ -2835,6 +2837,92 @@ def llama_state_seq_load_file( ) -> int: ... +# for backwards-compat +# define LLAMA_STATE_SEQ_FLAGS_SWA_ONLY 1 +LLAMA_STATE_SEQ_FLAGS_SWA_ONLY = 1 + +# work only with partial states, such as SWA KV cache or recurrent cache +# (e.g. Mamba) +# define LLAMA_STATE_SEQ_FLAGS_PARTIAL_ONLY 1 +LLAMA_STATE_SEQ_FLAGS_PARTIAL_ONLY = 1 + +# keeps the tensor data on device buffers +# (i.e. not accessible in host memory, but faster save/load) +# define LLAMA_STATE_SEQ_FLAGS_ON_DEVICE 2 +LLAMA_STATE_SEQ_FLAGS_ON_DEVICE = 2 + + +# LLAMA_API size_t llama_state_seq_get_size_ext( +# struct llama_context * ctx, +# llama_seq_id seq_id, +# llama_state_seq_flags flags); +@ctypes_function( + "llama_state_seq_get_size_ext", + [llama_context_p_ctypes, llama_seq_id, llama_state_seq_flags], + ctypes.c_size_t, +) +def llama_state_seq_get_size_ext( + ctx: llama_context_p, + seq_id: llama_seq_id, + flags: llama_state_seq_flags, + /, +) -> int: ... + + +# LLAMA_API size_t llama_state_seq_get_data_ext( +# struct llama_context * ctx, +# uint8_t * dst, +# size_t size, +# llama_seq_id seq_id, +# llama_state_seq_flags flags); +@ctypes_function( + "llama_state_seq_get_data_ext", + [ + llama_context_p_ctypes, + ctypes.POINTER(ctypes.c_uint8), + ctypes.c_size_t, + llama_seq_id, + llama_state_seq_flags, + ], + ctypes.c_size_t, +) +def llama_state_seq_get_data_ext( + ctx: llama_context_p, + dst: CtypesArray[ctypes.c_uint8], + size: Union[ctypes.c_size_t, int], + seq_id: llama_seq_id, + flags: llama_state_seq_flags, + /, +) -> int: ... + + +# LLAMA_API size_t llama_state_seq_set_data_ext( +# struct llama_context * ctx, +# const uint8_t * src, +# size_t size, +# llama_seq_id dest_seq_id, +# llama_state_seq_flags flags); +@ctypes_function( + "llama_state_seq_set_data_ext", + [ + llama_context_p_ctypes, + ctypes.POINTER(ctypes.c_uint8), + ctypes.c_size_t, + llama_seq_id, + llama_state_seq_flags, + ], + ctypes.c_size_t, +) +def llama_state_seq_set_data_ext( + ctx: llama_context_p, + src: CtypesArray[ctypes.c_uint8], + size: Union[ctypes.c_size_t, int], + dest_seq_id: llama_seq_id, + flags: llama_state_seq_flags, + /, +) -> int: ... + + # // # // Decoding # // diff --git a/vendor/llama.cpp b/vendor/llama.cpp index 63d93d173..5d6f18a63 160000 --- a/vendor/llama.cpp +++ b/vendor/llama.cpp @@ -1 +1 @@ -Subproject commit 63d93d17336e41e4cc73a64451e5b1d2477abdb1 +Subproject commit 5d6f18a6387a7066fe387233f2ca6f113cb209fb