Load tuned fused_moe_lora shrink and expand kernel configs separately (#27435)

Signed-off-by: Yu Gong <yu3.gong@gmail.com> Co-authored-by: Jee Jee Li <pandaleefree@gmail.com>
2025-11-04 02:27:35 -08:00
parent 4022a9d279
commit 2ec401bc39
9 changed files with 911 additions and 125 deletions
--- a/tests/lora/test_fused_moe_lora_kernel.py
+++ b/tests/lora/test_fused_moe_lora_kernel.py
@@ -158,6 +158,8 @@ def use_fused_moe_lora_kernel(
        "BLOCK_SIZE_N": 32,
        "BLOCK_SIZE_K": 64,
        "GROUP_SIZE_M": 1,
+        "NUM_WARPS": 4,
+        "NUM_STAGES": 3,
        "SPLIT_K": 1,
    }

@@ -182,6 +184,15 @@ def use_fused_moe_lora_kernel(
        config["BLOCK_SIZE_N"],
        config["BLOCK_SIZE_K"],
        config["GROUP_SIZE_M"],
+        config["NUM_WARPS"],
+        config["NUM_STAGES"],
+        config["SPLIT_K"],
+        config["BLOCK_SIZE_M"],
+        config["BLOCK_SIZE_N"],
+        config["BLOCK_SIZE_K"],
+        config["GROUP_SIZE_M"],
+        config["NUM_WARPS"],
+        config["NUM_STAGES"],
        config["SPLIT_K"],
        mul_routed_weight,
    )