From ee484b3f4b0c061d2612ea7c0cb40b44baf680c0 Mon Sep 17 00:00:00 2001 From: Danielle Robinson Date: Sun, 25 Jan 2026 22:52:34 -0800 Subject: [PATCH] Set splitk=1 for fused-moe-lora expand kernel (#32882) Signed-off-by: Danielle Robinson Co-authored-by: Danielle Robinson Co-authored-by: Jee Jee Li --- vllm/lora/ops/triton_ops/fused_moe_lora_op.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vllm/lora/ops/triton_ops/fused_moe_lora_op.py b/vllm/lora/ops/triton_ops/fused_moe_lora_op.py index 35939b979..d3bcacf74 100644 --- a/vllm/lora/ops/triton_ops/fused_moe_lora_op.py +++ b/vllm/lora/ops/triton_ops/fused_moe_lora_op.py @@ -351,7 +351,7 @@ def _fused_moe_lora_expand( "GROUP_SIZE_M": group_size_m, "num_warps": num_warps, "num_stages": num_stages, - "SPLIT_K": split_k, # Set split_k = 1 for expand calls + "SPLIT_K": 1, # Set split_k = 1 for expand calls "USE_GDC": use_gdc, "launch_pdl": use_gdc, # triton kernel metadata }