Bump Flashinfer to v0.6.1 (#30993)
Signed-off-by: elvischenv <219235043+elvischenv@users.noreply.github.com>
This commit is contained in:
@@ -982,8 +982,7 @@ class Mxfp4MoEMethod(FusedMoEMethodBase):
|
||||
self.intermediate_size, # padded to multiple of 256
|
||||
layer.ep_rank * layer.local_num_experts, # local_expert_offset
|
||||
self.num_experts, # local num experts
|
||||
None,
|
||||
None,
|
||||
None, # routed_scaling_factor
|
||||
1 if layer.renormalize else 0, # routing_method_type, renormalize
|
||||
True, # do finalize
|
||||
tune_max_num_tokens=max(self.max_capture_size, 1),
|
||||
|
||||
Reference in New Issue
Block a user