From 0f3f883e246eee6613a3a540bf7f5982e5cfe7e6 Mon Sep 17 00:00:00 2001
From: Tai An <antai12232931@outlook.com>
Date: Sat, 9 May 2026 06:18:02 -0700
Subject: [PATCH] fix(embed): mark all tokens for output to suppress llama.cpp
 'overriding' warning (#2208)

---
 llama_cpp/llama.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/llama_cpp/llama.py b/llama_cpp/llama.py
index 752c25dd3..7712446e7 100644
--- a/llama_cpp/llama.py
+++ b/llama_cpp/llama.py
@@ -1040,7 +1040,8 @@ def embed(
 
         # get pooling information
         pooling_type = self.pooling_type()
-        logits_all = pooling_type == llama_cpp.LLAMA_POOLING_TYPE_NONE
+        # All tokens need outputs for embeddings; llama.cpp otherwise logs an "overriding" warning per input.
+        logits_all = True
 
         if self.context_params.embeddings is False:
             raise RuntimeError(