Fix fp8 amax in decode test
This commit is contained in:
@@ -292,7 +292,7 @@ def test_prefill_decode(layer_id, compress_ratio):
|
||||
kv_cache_bf16[:N_PREFILL] = kv_rope_prefill
|
||||
|
||||
print(f" Prefill: {N_PREFILL} tokens written to KV cache")
|
||||
print(f" KV cache fp8 amax: {kv_fp8_prefill.amax():.4f}")
|
||||
print(f" KV cache fp8 amax: {kv_fp8_prefill.float().abs().max():.4f}")
|
||||
print(f" KV BF16 amax: {kv_rope_prefill.amax():.4f}")
|
||||
|
||||
# Verify roundtrip: read back and compare
|
||||
|
||||
Reference in New Issue
Block a user