diff --git a/tests/test_decode_attention_b200.py b/tests/test_decode_attention_b200.py index 7c12f123..c62be70d 100644 --- a/tests/test_decode_attention_b200.py +++ b/tests/test_decode_attention_b200.py @@ -292,7 +292,7 @@ def test_prefill_decode(layer_id, compress_ratio): kv_cache_bf16[:N_PREFILL] = kv_rope_prefill print(f" Prefill: {N_PREFILL} tokens written to KV cache") - print(f" KV cache fp8 amax: {kv_fp8_prefill.amax():.4f}") + print(f" KV cache fp8 amax: {kv_fp8_prefill.float().abs().max():.4f}") print(f" KV BF16 amax: {kv_rope_prefill.amax():.4f}") # Verify roundtrip: read back and compare