[Bugfix] Add missing encoder only guard for do_kv_cache_update (#33269)

Signed-off-by: Gregory Shtrasberg <Gregory.Shtrasberg@amd.com>
2026-01-28 15:25:07 -06:00
parent 4197168ea5
commit ab597c869a
1 changed files with 4 additions and 0 deletions
--- a/vllm/v1/attention/backends/triton_attn.py
+++ b/vllm/v1/attention/backends/triton_attn.py
@@ -572,6 +572,10 @@ class TritonAttentionImpl(AttentionImpl):
        kv_cache: torch.Tensor,
        slot_mapping: torch.Tensor,
    ):
+        if self.attn_type in (AttentionType.ENCODER_ONLY, AttentionType.ENCODER):
+            # For encoder attention,
+            # we use direct Q, K, V tensors without caching
+            return
        # For decoder and cross-attention, use KV cache as before
        key_cache, value_cache = kv_cache.unbind(1)