[Hardware] Initial TPU integration (#5292)

2024-06-12 11:53:03 -07:00
parent 847cdcca1c
commit 1a8bfd92d5
22 changed files with 1322 additions and 28 deletions
--- a/vllm/engine/async_llm_engine.py
+++ b/vllm/engine/async_llm_engine.py
@@ -375,6 +375,9 @@ class AsyncLLMEngine:
        if engine_config.device_config.device_type == "neuron":
            from vllm.executor.neuron_executor import NeuronExecutorAsync
            executor_class = NeuronExecutorAsync
+        elif engine_config.device_config.device_type == "tpu":
+            from vllm.executor.tpu_executor import TPUExecutorAsync
+            executor_class = TPUExecutorAsync
        elif engine_config.device_config.device_type == "cpu":
            assert distributed_executor_backend is None, (
                "Distributed execution is not supported with the CPU backend.")