[Attention] Clarify comment explaining attn_logits +1 dimension (#33427)

Signed-off-by: Francesco Fusco <ffu@zurich.ibm.com>
2026-01-31 05:50:30 +01:00
parent 15e0bb9c42
commit 5b55c0bea7
1 changed files with 2 additions and 2 deletions
--- a/vllm/v1/attention/backends/mla/triton_mla.py
+++ b/vllm/v1/attention/backends/mla/triton_mla.py
@@ -143,8 +143,8 @@ class TritonMLAImpl(MLACommonImpl[MLACommonMetadata]):
                B,
                q_num_heads,
                num_kv_splits,
-                # NOTE(lucas) idk why the +1 is here but sglang has it so we
-                # just mirror that
+                # NOTE: the +1 stores the LogSumExp (LSE) that the stage2
+                # kernel uses to merge partial attention outputs across splits.
                self.kv_lora_rank + 1,
            ),
            dtype=torch.float32,