Add unpermute-aware fused MoE LoRA path (#32655)
Signed-off-by: Runkai Tao <rt572@physics.rutgers.edu>
This commit is contained in:
@@ -842,6 +842,7 @@ class BenchmarkTensors:
|
||||
"sorted_token_ids": sorted_token_ids,
|
||||
"expert_ids": expert_ids,
|
||||
"num_tokens_post_padded": num_tokens_post_padded,
|
||||
"token_lora_mapping": self.lora_kernel_meta.token_lora_mapping,
|
||||
"top_k_num": ctx.top_k_num,
|
||||
"device": self.input.device,
|
||||
"N": lora_rank,
|
||||
@@ -915,6 +916,7 @@ class BenchmarkTensors:
|
||||
"sorted_token_ids": sorted_token_ids,
|
||||
"expert_ids": expert_ids,
|
||||
"num_tokens_post_padded": num_tokens_post_padded,
|
||||
"token_lora_mapping": self.lora_kernel_meta.token_lora_mapping,
|
||||
"top_k_num": ctx.top_k_num,
|
||||
"device": self.input.device,
|
||||
"N": lora_rank,
|
||||
|
||||
Reference in New Issue
Block a user