From 510ed1e8d3b1da814d3f2b516008b8a0bc2ee464 Mon Sep 17 00:00:00 2001
From: XiongfeiWei <isaacwxf23@gmail.com>
Date: Mon, 26 Jan 2026 15:46:11 -0800
Subject: [PATCH] [Bugfix][TPU] Return a Default fp8 MoE Backend (#32908)

Signed-off-by: Xiongfei Wei <isaacwxf23@gmail.com>
Signed-off-by: Robert Shaw <robshaw@redhat.com>
Co-authored-by: Robert Shaw <robshaw@redhat.com>
Co-authored-by: Robert Shaw <114415538+robertgshaw2-redhat@users.noreply.github.com>
---
 vllm/model_executor/layers/fused_moe/oracle/fp8.py | 14 +++++++++++---
 1 file changed, 11 insertions(+), 3 deletions(-)

diff --git a/vllm/model_executor/layers/fused_moe/oracle/fp8.py b/vllm/model_executor/layers/fused_moe/oracle/fp8.py
index cdf2d291b..dd0ad4523 100644
--- a/vllm/model_executor/layers/fused_moe/oracle/fp8.py
+++ b/vllm/model_executor/layers/fused_moe/oracle/fp8.py
@@ -35,6 +35,7 @@ from vllm.model_executor.layers.quantization.utils.marlin_utils_fp8 import (
 from vllm.model_executor.layers.quantization.utils.quant_utils import (
     QuantKey,
 )
+from vllm.platforms import current_platform
 
 logger = init_logger(__name__)
 
@@ -330,9 +331,16 @@ def select_fp8_moe_backend(
         else:
             logger.debug_once(_make_log_unsupported(backend, reason), scope="local")
 
-    raise NotImplementedError(
-        "No FP8 MoE backend supports the deployment configuration."
-    )
+    # TODO(rob): per discussion with TPU team, we need a way to register
+    # MoE backends by OOT plugins, rather than having an explicit list
+    # of AVAILBLE_BACKENDS. Enabling returning `Fp8MoeBackend.NONE` is
+    # a temporary measure until these register APIs are complete.
+    if current_platform.is_cuda() or current_platform.is_rocm():
+        raise NotImplementedError(
+            "No FP8 MoE backend supports the deployment configuration."
+        )
+
+    return Fp8MoeBackend.NONE, None
 
 
 def convert_to_fp8_moe_kernel_format(