From 7e97551fd3dd9fcc524e708e347a7defdce7004d Mon Sep 17 00:00:00 2001 From: biondizzle Date: Tue, 19 May 2026 10:04:46 +0000 Subject: [PATCH] Fix: use self.scale instead of self.softmax_scale in Blackwell attention path --- vllm/patches/deepseek_v4_attention.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vllm/patches/deepseek_v4_attention.py b/vllm/patches/deepseek_v4_attention.py index 9da5cd04..35446abd 100644 --- a/vllm/patches/deepseek_v4_attention.py +++ b/vllm/patches/deepseek_v4_attention.py @@ -631,7 +631,7 @@ class DeepseekV4MultiHeadLatentAttentionWrapper(PluggableLayer): return # Attention using PyTorch SDPA (works on Blackwell) - o = full_sdpa_attention(q, kv, self.softmax_scale) + o = full_sdpa_attention(q, kv, self.scale) # Write into the output buffer (same shape as original path) if self.n_local_heads < self.padded_heads: