fix: quantize_weight_to_nvfp4 returns 3 values, not 4

This commit is contained in:
2026-06-01 06:43:53 +00:00
parent 27fd847dd0
commit 7f1f224c78

View File

@@ -32,7 +32,7 @@ def test_fused_router():
# Create random BF16 gate weight and quantize to NVFP4
W_gate_bf16 = torch.randn(K, N, dtype=torch.bfloat16, device=device)
from dsv4.ops.quantize import quantize_weight_to_nvfp4
w_fp4, w_sf, ws2_val, _ = quantize_weight_to_nvfp4(W_gate_bf16)
w_fp4, w_sf, ws2_val = quantize_weight_to_nvfp4(W_gate_bf16)
# Build Nvfp4Linear for reference path
gate_lin = Nvfp4Linear(in_features=K, out_features=N, device=device)