Fix: use self.scale instead of self.softmax_scale in Blackwell attention path

2026-05-19 10:04:46 +00:00
parent 39310c357d
commit 7e97551fd3
1 changed files with 1 additions and 1 deletions
--- a/vllm/patches/deepseek_v4_attention.py
+++ b/vllm/patches/deepseek_v4_attention.py
@@ -631,7 +631,7 @@ class DeepseekV4MultiHeadLatentAttentionWrapper(PluggableLayer):
            return

        # Attention using PyTorch SDPA (works on Blackwell)
-        o = full_sdpa_attention(q, kv, self.softmax_scale)
+        o = full_sdpa_attention(q, kv, self.scale)

        # Write into the output buffer (same shape as original path)
        if self.n_local_heads < self.padded_heads: