diff --git a/vllm/patches/deepseek_v4_attention.py b/vllm/patches/deepseek_v4_attention.py
index 19bf6861..7b064240 100644
--- a/vllm/patches/deepseek_v4_attention.py
+++ b/vllm/patches/deepseek_v4_attention.py
@@ -586,6 +586,12 @@ class DeepseekV4MultiHeadLatentAttentionWrapper(PluggableLayer):
         forward_context = get_forward_context()
         attn_metadata = forward_context.attn_metadata
 
+        # Debug: check input for NaN
+        import sys as _sys
+        _hs_nan = torch.isnan(hidden_states).any().item()
+        if _hs_nan:
+            print(f"[BLACKWELL] INPUT NaN: cr={self.compress_ratio}", file=_sys.stderr, flush=True)
+        
         qr_kv, kv_score, indexer_kv_score, indexer_weights = (
             self.attn_gemm_parallel_execute(hidden_states)
         )