Load tuned fused_moe_lora shrink and expand kernel configs separately (#27435)

Signed-off-by: Yu Gong <yu3.gong@gmail.com>
Co-authored-by: Jee Jee Li <pandaleefree@gmail.com>
This commit is contained in:
yugong333
2025-11-04 02:27:35 -08:00
committed by GitHub
parent 4022a9d279
commit 2ec401bc39
9 changed files with 911 additions and 125 deletions

View File

@@ -158,6 +158,8 @@ def use_fused_moe_lora_kernel(
"BLOCK_SIZE_N": 32,
"BLOCK_SIZE_K": 64,
"GROUP_SIZE_M": 1,
"NUM_WARPS": 4,
"NUM_STAGES": 3,
"SPLIT_K": 1,
}
@@ -182,6 +184,15 @@ def use_fused_moe_lora_kernel(
config["BLOCK_SIZE_N"],
config["BLOCK_SIZE_K"],
config["GROUP_SIZE_M"],
config["NUM_WARPS"],
config["NUM_STAGES"],
config["SPLIT_K"],
config["BLOCK_SIZE_M"],
config["BLOCK_SIZE_N"],
config["BLOCK_SIZE_K"],
config["GROUP_SIZE_M"],
config["NUM_WARPS"],
config["NUM_STAGES"],
config["SPLIT_K"],
mul_routed_weight,
)