[Performance][Fix] update nvfp4 code to support renorm routing (#28569)
Signed-off-by: jiahanc <173873397+jiahanc@users.noreply.github.com> Co-authored-by: Michael Goin <mgoin64@gmail.com>
This commit is contained in:
@@ -291,5 +291,8 @@ def get_flashinfer_moe_backend() -> FlashinferMoeBackend:
|
||||
|
||||
def is_flashinfer_supporting_global_sf(backend: FlashinferMoeBackend | None) -> bool:
|
||||
# TODO(shuw@nvidia): Update when new backends are added.
|
||||
backends_supporting_global_sf = (FlashinferMoeBackend.CUTLASS,)
|
||||
backends_supporting_global_sf = (
|
||||
FlashinferMoeBackend.CUTLASS,
|
||||
FlashinferMoeBackend.TENSORRT_LLM,
|
||||
)
|
||||
return backend in backends_supporting_global_sf
|
||||
|
||||
Reference in New Issue
Block a user