diff --git a/vllm/patches/deepseek_v4_attention.py b/vllm/patches/deepseek_v4_attention.py index 9da5cd04..35446abd 100644 --- a/vllm/patches/deepseek_v4_attention.py +++ b/vllm/patches/deepseek_v4_attention.py @@ -631,7 +631,7 @@ class DeepseekV4MultiHeadLatentAttentionWrapper(PluggableLayer): return # Attention using PyTorch SDPA (works on Blackwell) - o = full_sdpa_attention(q, kv, self.softmax_scale) + o = full_sdpa_attention(q, kv, self.scale) # Write into the output buffer (same shape as original path) if self.n_local_heads < self.padded_heads: