[BugFix] Fix invalid flashinfer_fused_moe_blockscale_fp8 op registration (#32855)
Signed-off-by: Fadi Arafeh <fadi.arafeh@arm.com>
This commit is contained in:
@@ -131,7 +131,7 @@ def flashinfer_fused_moe_blockscale_fp8(
|
||||
expert_offset: int,
|
||||
local_num_experts: int,
|
||||
block_shape: list[int],
|
||||
routing_method_type: int = RoutingMethodType.DeepSeekV3,
|
||||
routing_method_type: int = int(RoutingMethodType.DeepSeekV3),
|
||||
routed_scaling: float | None = 1.0,
|
||||
) -> torch.Tensor:
|
||||
from vllm.utils.flashinfer import flashinfer_trtllm_fp8_block_scale_moe
|
||||
|
||||
Reference in New Issue
Block a user