From 510ed1e8d3b1da814d3f2b516008b8a0bc2ee464 Mon Sep 17 00:00:00 2001 From: XiongfeiWei Date: Mon, 26 Jan 2026 15:46:11 -0800 Subject: [PATCH] [Bugfix][TPU] Return a Default fp8 MoE Backend (#32908) Signed-off-by: Xiongfei Wei Signed-off-by: Robert Shaw Co-authored-by: Robert Shaw Co-authored-by: Robert Shaw <114415538+robertgshaw2-redhat@users.noreply.github.com> --- vllm/model_executor/layers/fused_moe/oracle/fp8.py | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/vllm/model_executor/layers/fused_moe/oracle/fp8.py b/vllm/model_executor/layers/fused_moe/oracle/fp8.py index cdf2d291b..dd0ad4523 100644 --- a/vllm/model_executor/layers/fused_moe/oracle/fp8.py +++ b/vllm/model_executor/layers/fused_moe/oracle/fp8.py @@ -35,6 +35,7 @@ from vllm.model_executor.layers.quantization.utils.marlin_utils_fp8 import ( from vllm.model_executor.layers.quantization.utils.quant_utils import ( QuantKey, ) +from vllm.platforms import current_platform logger = init_logger(__name__) @@ -330,9 +331,16 @@ def select_fp8_moe_backend( else: logger.debug_once(_make_log_unsupported(backend, reason), scope="local") - raise NotImplementedError( - "No FP8 MoE backend supports the deployment configuration." - ) + # TODO(rob): per discussion with TPU team, we need a way to register + # MoE backends by OOT plugins, rather than having an explicit list + # of AVAILBLE_BACKENDS. Enabling returning `Fp8MoeBackend.NONE` is + # a temporary measure until these register APIs are complete. + if current_platform.is_cuda() or current_platform.is_rocm(): + raise NotImplementedError( + "No FP8 MoE backend supports the deployment configuration." + ) + + return Fp8MoeBackend.NONE, None def convert_to_fp8_moe_kernel_format(