diff --git a/dsv4/kernels/gemm/fused_swiglu.py b/dsv4/kernels/gemm/fused_swiglu.py index 4040666c..61fcffeb 100644 --- a/dsv4/kernels/gemm/fused_swiglu.py +++ b/dsv4/kernels/gemm/fused_swiglu.py @@ -1285,6 +1285,10 @@ class FusedSwiGLUScaledGroupedGemmKernel: # ── Optional: NVFP4 per-expert global scales ── global_scale_a: Optional[cute.Tensor], global_scale_b: Optional[cute.Tensor], + # ── Fused SwiGLU epilogue outputs (replaces out when fused_swiglu=True) ── + fp4_out: Optional[cute.Tensor] = None, + sf_out: Optional[cute.Tensor] = None, + l2_global_scale: Optional[cute.Tensor] = None, ): """ GPU device kernel for MoE Scaled Grouped GEMM with block scaling.