diff --git a/vllm/model_executor/layers/fused_moe/flashinfer_trtllm_moe.py b/vllm/model_executor/layers/fused_moe/flashinfer_trtllm_moe.py index ad9eb0d40..647108cc4 100644 --- a/vllm/model_executor/layers/fused_moe/flashinfer_trtllm_moe.py +++ b/vllm/model_executor/layers/fused_moe/flashinfer_trtllm_moe.py @@ -131,7 +131,7 @@ def flashinfer_fused_moe_blockscale_fp8( expert_offset: int, local_num_experts: int, block_shape: list[int], - routing_method_type: int = RoutingMethodType.DeepSeekV3, + routing_method_type: int = int(RoutingMethodType.DeepSeekV3), routed_scaling: float | None = 1.0, ) -> torch.Tensor: from vllm.utils.flashinfer import flashinfer_trtllm_fp8_block_scale_moe