P4: Fix rmsnorm_quantize_nvfp4 returns QuantizedActivation not tuple
This commit is contained in:
@@ -430,4 +430,4 @@ def rmsnorm_quantize_nvfp4(x_bf16, norm_weight, eps=1e-6, divisor=6.0 * 448.0):
|
||||
from dsv4.kernels.cuda.loader import get_cuda_module
|
||||
mod = get_cuda_module("fused_rmsnorm_quantize", ["fused_rmsnorm_quantize.cu"])
|
||||
x_fp4, x_sf, gsa, inv_rms = mod.rmsnorm_quantize_nvfp4(x_bf16, norm_weight, eps, divisor)
|
||||
return x_fp4, x_sf, gsa, inv_rms
|
||||
return QuantizedActivation(x_fp4, x_sf, gsa, inv_rms)
|
||||
|
||||
Reference in New Issue
Block a user