[Fix Bug]num_active_loras always equals to zero (#34119)

Signed-off-by: Runkai Tao <rt572@physics.rutgers.edu> Co-authored-by: Jee Jee Li <pandaleefree@gmail.com>
2026-03-02 10:17:46 -05:00
parent 7e9149d9a9
commit ada4f4fadd
7 changed files with 58 additions and 29 deletions
--- a/tests/lora/test_fused_moe_lora_kernel.py
+++ b/tests/lora/test_fused_moe_lora_kernel.py
@@ -187,7 +187,8 @@ def use_fused_moe_lora_kernel(

    # num_active_loras is the number of active LoRAs
    # (max_loras + 1 to include no-lora case)
-    num_active_loras = max_loras + 1
+    # Stored as CPU tensor to match the kernel API (torch.compile compatibility)
+    num_active_loras = torch.tensor([max_loras + 1], dtype=torch.int32, device="cpu")

    fused_moe_lora(
        output,
@@ -399,7 +400,8 @@ def use_fused_moe_lora_kernel_naive(

    # num_active_loras is the number of active LoRAs
    # (max_loras + 1 to include no-lora case)
-    num_active_loras = max_loras + 1
+    # Stored as CPU tensor to match the kernel API (torch.compile compatibility)
+    num_active_loras = torch.tensor([max_loras + 1], dtype=torch.int32, device="cpu")

    fused_moe_lora(
        output,