Add unpermute-aware fused MoE LoRA path (#32655)

Signed-off-by: Runkai Tao <rt572@physics.rutgers.edu>
This commit is contained in:
Runkai Tao
2026-02-01 20:46:09 -05:00
committed by GitHub
parent cf0a99f84d
commit 7320ca3942
6 changed files with 472 additions and 117 deletions

View File

@@ -842,6 +842,7 @@ class BenchmarkTensors:
"sorted_token_ids": sorted_token_ids,
"expert_ids": expert_ids,
"num_tokens_post_padded": num_tokens_post_padded,
"token_lora_mapping": self.lora_kernel_meta.token_lora_mapping,
"top_k_num": ctx.top_k_num,
"device": self.input.device,
"N": lora_rank,
@@ -915,6 +916,7 @@ class BenchmarkTensors:
"sorted_token_ids": sorted_token_ids,
"expert_ids": expert_ids,
"num_tokens_post_padded": num_tokens_post_padded,
"token_lora_mapping": self.lora_kernel_meta.token_lora_mapping,
"top_k_num": ctx.top_k_num,
"device": self.input.device,
"N": lora_rank,