diff --git a/vllm/patches/deepseek_v4_attention.py b/vllm/patches/deepseek_v4_attention.py
index 446dc1c6..19bf6861 100644
--- a/vllm/patches/deepseek_v4_attention.py
+++ b/vllm/patches/deepseek_v4_attention.py
@@ -742,6 +742,12 @@ class DeepseekV4MultiHeadLatentAttentionWrapper(PluggableLayer):
             kv_rope_prefill = self._apply_rope_kv(
                 kv[num_decode_tokens:], positions[num_decode_tokens:],
             )
+            # Debug: check attention inputs
+            import sys as _sys
+            _q_nan = torch.isnan(q_prefill).any().item()
+            _kv_nan = torch.isnan(kv_rope_prefill).any().item()
+            if _q_nan or _kv_nan:
+                print(f"[BLACKWELL] PREFILL INPUTS NaN: q_nan={_q_nan} kv_nan={_kv_nan} cr={self.compress_ratio}", file=_sys.stderr, flush=True)
             if swa_only:
                 o[num_decode_tokens:] = causal_prefill_attention(
                     q_prefill, kv_rope_prefill, self.scale,