diff --git a/vllm/patches/deepseek_v4_attention.py b/vllm/patches/deepseek_v4_attention.py index 19bf6861..7b064240 100644 --- a/vllm/patches/deepseek_v4_attention.py +++ b/vllm/patches/deepseek_v4_attention.py @@ -586,6 +586,12 @@ class DeepseekV4MultiHeadLatentAttentionWrapper(PluggableLayer): forward_context = get_forward_context() attn_metadata = forward_context.attn_metadata + # Debug: check input for NaN + import sys as _sys + _hs_nan = torch.isnan(hidden_states).any().item() + if _hs_nan: + print(f"[BLACKWELL] INPUT NaN: cr={self.compress_ratio}", file=_sys.stderr, flush=True) + qr_kv, kv_score, indexer_kv_score, indexer_weights = ( self.attn_gemm_parallel_execute(hidden_states) )