From c54ddbdae1dfeeb66357c36f474b69ac95a66cd5 Mon Sep 17 00:00:00 2001 From: biondizzle Date: Tue, 19 May 2026 08:55:31 +0000 Subject: [PATCH] Fix NVFP4 attention: slice output to actual N after 128-padding --- tests/test_nvfp4_attn_gemm_b200.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/test_nvfp4_attn_gemm_b200.py b/tests/test_nvfp4_attn_gemm_b200.py index 6673a607..ebfac828 100644 --- a/tests/test_nvfp4_attn_gemm_b200.py +++ b/tests/test_nvfp4_attn_gemm_b200.py @@ -219,7 +219,8 @@ class NVFP4Attention: self._cache_key = cache_key # Run Q×K^T GEMM - scores = self._runner.run(q_2d) # (T*NH, T) + scores = self._runner.run(q_2d) # (T*NH, N_padded) + scores = scores[:, :T] # Slice to actual N=T (runner pads to 128) scores = scores * scale # Causal mask