From ab597c869a78530fe6495ccda2bdc03f6f5c712e Mon Sep 17 00:00:00 2001 From: Gregory Shtrasberg <156009573+gshtras@users.noreply.github.com> Date: Wed, 28 Jan 2026 15:25:07 -0600 Subject: [PATCH] [Bugfix] Add missing encoder only guard for do_kv_cache_update (#33269) Signed-off-by: Gregory Shtrasberg --- vllm/v1/attention/backends/triton_attn.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/vllm/v1/attention/backends/triton_attn.py b/vllm/v1/attention/backends/triton_attn.py index d091a4e96..96429e29b 100644 --- a/vllm/v1/attention/backends/triton_attn.py +++ b/vllm/v1/attention/backends/triton_attn.py @@ -572,6 +572,10 @@ class TritonAttentionImpl(AttentionImpl): kv_cache: torch.Tensor, slot_mapping: torch.Tensor, ): + if self.attn_type in (AttentionType.ENCODER_ONLY, AttentionType.ENCODER): + # For encoder attention, + # we use direct Q, K, V tensors without caching + return # For decoder and cross-attention, use KV cache as before key_cache, value_cache = kv_cache.unbind(1)