[BugFix] Fix invalid flashinfer_fused_moe_blockscale_fp8 op registration (#32855)
Signed-off-by: Fadi Arafeh <fadi.arafeh@arm.com>
This commit is contained in:
@@ -131,7 +131,7 @@ def flashinfer_fused_moe_blockscale_fp8(
|
|||||||
expert_offset: int,
|
expert_offset: int,
|
||||||
local_num_experts: int,
|
local_num_experts: int,
|
||||||
block_shape: list[int],
|
block_shape: list[int],
|
||||||
routing_method_type: int = RoutingMethodType.DeepSeekV3,
|
routing_method_type: int = int(RoutingMethodType.DeepSeekV3),
|
||||||
routed_scaling: float | None = 1.0,
|
routed_scaling: float | None = 1.0,
|
||||||
) -> torch.Tensor:
|
) -> torch.Tensor:
|
||||||
from vllm.utils.flashinfer import flashinfer_trtllm_fp8_block_scale_moe
|
from vllm.utils.flashinfer import flashinfer_trtllm_fp8_block_scale_moe
|
||||||
|
|||||||
Reference in New Issue
Block a user