[Kernels] Use empty for modular MoE workspaces (#19667)

Signed-off-by: Bill Nell <bnell@redhat.com>
2025-06-16 10:58:01 -04:00
parent 836d4ce140
commit 5e5baa91aa
2 changed files with 5 additions and 2 deletions
--- a/vllm/model_executor/layers/fused_moe/modular_kernel.py
+++ b/vllm/model_executor/layers/fused_moe/modular_kernel.py
@@ -426,10 +426,10 @@ class FusedMoEModularKernel(torch.nn.Module):

            # We can reuse the memory between cache1 and cache3 because by the
            # time we need cache3, we're done with cache1.
-            workspace13 = torch.zeros(prod(workspace13_shape),
+            workspace13 = torch.empty(prod(workspace13_shape),
                                      device=a1.device,
                                      dtype=workspace_dtype)
-            workspace2 = torch.zeros(prod(workspace2_shape),
+            workspace2 = torch.empty(prod(workspace2_shape),
                                     device=a1.device,
                                     dtype=workspace_dtype)