[Fix Bug]num_active_loras always equals to zero (#34119)

Signed-off-by: Runkai Tao <rt572@physics.rutgers.edu>
Co-authored-by: Jee Jee Li <pandaleefree@gmail.com>
This commit is contained in:
Runkai Tao
2026-03-02 10:17:46 -05:00
committed by GitHub
parent 7e9149d9a9
commit ada4f4fadd
7 changed files with 58 additions and 29 deletions

View File

@@ -187,7 +187,8 @@ def use_fused_moe_lora_kernel(
# num_active_loras is the number of active LoRAs
# (max_loras + 1 to include no-lora case)
num_active_loras = max_loras + 1
# Stored as CPU tensor to match the kernel API (torch.compile compatibility)
num_active_loras = torch.tensor([max_loras + 1], dtype=torch.int32, device="cpu")
fused_moe_lora(
output,
@@ -399,7 +400,8 @@ def use_fused_moe_lora_kernel_naive(
# num_active_loras is the number of active LoRAs
# (max_loras + 1 to include no-lora case)
num_active_loras = max_loras + 1
# Stored as CPU tensor to match the kernel API (torch.compile compatibility)
num_active_loras = torch.tensor([max_loras + 1], dtype=torch.int32, device="cpu")
fused_moe_lora(
output,