From f9170209834af0e8e53a6d16ccd17eacc0db2c67 Mon Sep 17 00:00:00 2001 From: Xin Yang <105740670+xyang16@users.noreply.github.com> Date: Thu, 5 Mar 2026 10:47:53 -0800 Subject: [PATCH] [Perf] Optimize FusedMoEModularKernel output tensor using torch.empty (#35794) Signed-off-by: Xin Yang --- vllm/model_executor/layers/fused_moe/modular_kernel.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vllm/model_executor/layers/fused_moe/modular_kernel.py b/vllm/model_executor/layers/fused_moe/modular_kernel.py index 1f495169b..d8c95727c 100644 --- a/vllm/model_executor/layers/fused_moe/modular_kernel.py +++ b/vllm/model_executor/layers/fused_moe/modular_kernel.py @@ -1519,7 +1519,7 @@ class FusedMoEKernelModularImpl: assert not disable_inplace() output = hidden_states else: - output = torch.zeros_like(hidden_states) + output = torch.empty_like(hidden_states) local_num_experts = w1.size(0) if global_num_experts == -1: