Load tuned fused_moe_lora shrink and expand kernel configs separately (#27435)
Signed-off-by: Yu Gong <yu3.gong@gmail.com> Co-authored-by: Jee Jee Li <pandaleefree@gmail.com>
This commit is contained in:
@@ -158,6 +158,8 @@ def use_fused_moe_lora_kernel(
|
||||
"BLOCK_SIZE_N": 32,
|
||||
"BLOCK_SIZE_K": 64,
|
||||
"GROUP_SIZE_M": 1,
|
||||
"NUM_WARPS": 4,
|
||||
"NUM_STAGES": 3,
|
||||
"SPLIT_K": 1,
|
||||
}
|
||||
|
||||
@@ -182,6 +184,15 @@ def use_fused_moe_lora_kernel(
|
||||
config["BLOCK_SIZE_N"],
|
||||
config["BLOCK_SIZE_K"],
|
||||
config["GROUP_SIZE_M"],
|
||||
config["NUM_WARPS"],
|
||||
config["NUM_STAGES"],
|
||||
config["SPLIT_K"],
|
||||
config["BLOCK_SIZE_M"],
|
||||
config["BLOCK_SIZE_N"],
|
||||
config["BLOCK_SIZE_K"],
|
||||
config["GROUP_SIZE_M"],
|
||||
config["NUM_WARPS"],
|
||||
config["NUM_STAGES"],
|
||||
config["SPLIT_K"],
|
||||
mul_routed_weight,
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user