diff --git a/dsv4/ops/gemm_runner.py b/dsv4/ops/gemm_runner.py index 2b4f615c..5d0501e7 100644 --- a/dsv4/ops/gemm_runner.py +++ b/dsv4/ops/gemm_runner.py @@ -416,6 +416,7 @@ def run_fused_swiglu_grouped_gemm( swiglu_limit=0.0, mma_tiler_mn=(128, 128), cluster_shape_mn=(1, 1), + out=None, # pre-allocated output buffer for CUDA graph capture ): """Run the fused SwiGLU NVFP4 scaled grouped GEMM.