[Frontend] Enable support for CPU backend in AsyncLLMEngine. (#3993)

Signed-off-by: Tao He <sighingnow@gmail.com>
This commit is contained in:
Tao He
2024-04-22 17:19:51 +08:00
committed by GitHub
parent e73ed0f1c6
commit 077f0a2e8a
2 changed files with 30 additions and 2 deletions

View File

@@ -343,6 +343,11 @@ class AsyncLLMEngine:
if engine_config.device_config.device_type == "neuron":
from vllm.executor.neuron_executor import NeuronExecutorAsync
executor_class = NeuronExecutorAsync
elif engine_config.device_config.device_type == "cpu":
assert not engine_config.parallel_config.worker_use_ray, (
"Ray is not supported with the CPU backend.")
from vllm.executor.cpu_executor import CPUExecutorAsync
executor_class = CPUExecutorAsync
elif engine_config.parallel_config.worker_use_ray:
initialize_ray_cluster(engine_config.parallel_config)
from vllm.executor.ray_gpu_executor import RayGPUExecutorAsync