[Model][gpt-oss] Support DP+EP for GPT-OSS with FlashInfer trtllm-gen MoE (#23819)
Signed-off-by: Po-Han Huang <pohanh@nvidia.com>
This commit is contained in:
committed by
GitHub
parent
1f096f9b95
commit
95089607fa
@@ -623,8 +623,6 @@ class Mxfp4MoEMethod(FusedMoEMethodBase):
|
||||
|
||||
if should_use_flashinfer_mxfp4():
|
||||
from flashinfer import mxfp8_quantize, trtllm_fp4_block_scale_moe
|
||||
assert not self.moe.use_ep, (
|
||||
"EP is not supported for flashinfer mxfp4 moe backend yet.")
|
||||
if _should_use_flashinfer_mxfp4_bf16():
|
||||
assert x.dtype == torch.bfloat16
|
||||
x_quant = x
|
||||
@@ -650,12 +648,12 @@ class Mxfp4MoEMethod(FusedMoEMethodBase):
|
||||
None, # output1_scale_scalar
|
||||
None, # output1_scale_gate_scalar
|
||||
None, # output2_scale_scalar
|
||||
self.num_experts,
|
||||
global_num_experts,
|
||||
top_k,
|
||||
None, # n_group
|
||||
None, # topk_group
|
||||
self.intermediate_size, # padded to multiple of 256
|
||||
0, # local_expert_offset
|
||||
layer.ep_rank * layer.local_num_experts, # local_expert_offset
|
||||
self.num_experts, # local num experts
|
||||
None,
|
||||
self._get_tile_tokens_dim(x, top_k),
|
||||
|
||||
Reference in New Issue
Block a user