P4: Fix rmsnorm_quantize_nvfp4 returns QuantizedActivation not tuple

This commit is contained in:
2026-06-02 17:43:21 +00:00
parent 454dbdad52
commit bdf0b15d45

View File

@@ -430,4 +430,4 @@ def rmsnorm_quantize_nvfp4(x_bf16, norm_weight, eps=1e-6, divisor=6.0 * 448.0):
from dsv4.kernels.cuda.loader import get_cuda_module
mod = get_cuda_module("fused_rmsnorm_quantize", ["fused_rmsnorm_quantize.cu"])
x_fp4, x_sf, gsa, inv_rms = mod.rmsnorm_quantize_nvfp4(x_bf16, norm_weight, eps, divisor)
return x_fp4, x_sf, gsa, inv_rms
return QuantizedActivation(x_fp4, x_sf, gsa, inv_rms)