[Bugfix][TPU] Return a Default fp8 MoE Backend (#32908)

Signed-off-by: Xiongfei Wei <isaacwxf23@gmail.com> Signed-off-by: Robert Shaw <robshaw@redhat.com> Co-authored-by: Robert Shaw <robshaw@redhat.com> Co-authored-by: Robert Shaw <114415538+robertgshaw2-redhat@users.noreply.github.com>
2026-01-26 15:46:11 -08:00
parent 8caffd92df
commit 510ed1e8d3
1 changed files with 11 additions and 3 deletions
--- a/vllm/model_executor/layers/fused_moe/oracle/fp8.py
+++ b/vllm/model_executor/layers/fused_moe/oracle/fp8.py
@@ -35,6 +35,7 @@ from vllm.model_executor.layers.quantization.utils.marlin_utils_fp8 import (
 from vllm.model_executor.layers.quantization.utils.quant_utils import (
    QuantKey,
 )
+from vllm.platforms import current_platform

 logger = init_logger(__name__)

@@ -330,9 +331,16 @@ def select_fp8_moe_backend(
        else:
            logger.debug_once(_make_log_unsupported(backend, reason), scope="local")

-    raise NotImplementedError(
-        "No FP8 MoE backend supports the deployment configuration."
-    )
+    # TODO(rob): per discussion with TPU team, we need a way to register
+    # MoE backends by OOT plugins, rather than having an explicit list
+    # of AVAILBLE_BACKENDS. Enabling returning `Fp8MoeBackend.NONE` is
+    # a temporary measure until these register APIs are complete.
+    if current_platform.is_cuda() or current_platform.is_rocm():
+        raise NotImplementedError(
+            "No FP8 MoE backend supports the deployment configuration."
+        )
+
+    return Fp8MoeBackend.NONE, None


 def convert_to_fp8_moe_kernel_format(