diff --git a/vllm/model_executor/layers/fused_moe/oracle/unquantized.py b/vllm/model_executor/layers/fused_moe/oracle/unquantized.py index ed182613e..2213461a7 100644 --- a/vllm/model_executor/layers/fused_moe/oracle/unquantized.py +++ b/vllm/model_executor/layers/fused_moe/oracle/unquantized.py @@ -32,6 +32,7 @@ class UnquantizedMoeBackend(Enum): TRITON = "TRITON" CPU = "CPU" XPU = "XPU" + TPU = "TPU" # NOTE(zyongye): Unsupported backend means backend @@ -40,6 +41,7 @@ class UnquantizedMoeBackend(Enum): UNSUPPORTED_BACKEND = [ UnquantizedMoeBackend.CPU, UnquantizedMoeBackend.XPU, + UnquantizedMoeBackend.TPU, ] @@ -91,6 +93,8 @@ def select_unquantized_moe_backend( backend = UnquantizedMoeBackend.XPU if current_platform.is_cpu(): backend = UnquantizedMoeBackend.CPU + if current_platform.is_tpu(): + backend = UnquantizedMoeBackend.TPU logger.info_once(_make_log_backend(backend), scope="local") return backend